u31889
u31889

Reputation: 351

R make a named list from a data frame

I have a data frame of conditions:

df <- data.frame(Gender=c("Male", "Female", "Other"), AgeCat=c("young", "adult", "old"))
df
  Gender AgeCat
1   Male  young
2 Female  adult
3  Other    old

And I have a "master" data frame from which I need to extract data based on the conditions contained in the previous data frame:

master <- data.frame(Country=c("US", "US", "ENG", "SPN", "MEX", "ARG"), Gender=c("Male", "Male", "Male", "Female", "Female", "Female"), AgeCat=c("young", "young", "old", "adult", "adult", "adult"), Height=c("134", "125", "169", "189", "176", "191"))
master
  Country Gender AgeCat Height
1      US   Male  young    134
2      US   Male  young    125
3     ENG   Male    old    169
4     SPN Female  adult    189
5     MEX Female  adult    176
6     ARG Female  adult    191

Using one of the function from the apply family, I am trying to create a list of subset of master based on the row of df.

In order to obtain a list like myList below:

Male_young <- subset(master, Gender=="Male" & AgeCat=="young")
Male_young
  Country Gender AgeCat Height
1      US   Male  young    134
2      US   Male  young    125

Female_adult <- subset(master, Gender=="Female" & AgeCat=="adult")
Female_adult
  Country Gender AgeCat Height
4     SPN Female  adult    189
5     MEX Female  adult    176
6     ARG Female  adult    191

Other_adult <- subset(master, Gender=="Other" & AgeCat=="old")
Other_adult
[1] Country Gender  AgeCat  Height
<0 rows> (or 0-length row.names)

myList <- list(Male_young=Male_young, Female_adult=Female_adult, Other_old=Other_old)
myList
$Male_young
  Country Gender AgeCat Height
1      US   Male  young    134
2      US   Male  young    125

$Female_adult
  Country Gender AgeCat Height
4     SPN Female  adult    189
5     MEX Female  adult    176
6     ARG Female  adult    191

$Other_old
[1] Country Gender  AgeCat  Height
<0 rows> (or 0-length row.names)

EDIT I realised I wanted to show an example too simple maybe. What I am actually trying to obtain is a data frame (or a list that could be converted as a data frame or matrix) as follows:

    Conditions Individuals Mean_Height
1   Male_young           2       129.5
2 Female_adult           3    185.3333
3    Other_old           0           .

EDIT 2 I keep persisting with this method but I am not sure it is very efficient:

df[c("Conditions", "Individuals", "Mean_Height")] <- t(
        mapply(
            function(X, Y, Z){
                sub_df <- subset(X, Gender==Y & AgeCat==Z)
                c(paste(Y, Z, sep="_"), nrow(sub_df), colMeans(sub_df$Height))
            },
            master,
            df$Gender,
            df$AgeCat
        )
    )

Upvotes: 1

Views: 67

Answers (3)

rdornas
rdornas

Reputation: 652

I am responding for your first question and I'll think about your edit then. I would do that in a way, using and

df <- data.frame(Gender=c("Male", "Female", "Other"),
                 AgeCat=c("young", "adult", "old"),
                 stringsAsFactors = F)

master <- data.frame(Country=c("US", "US", "ENG", "SPN", "MEX", "ARG"),
                     Gender=c("Male", "Male", "Male", "Female", "Female", "Female"),
                     AgeCat=c("young", "young", "old", "adult", "adult", "adult"),
                     Height=c("134", "125", "169", "189", "176", "191"),
                     stringsAsFactors = F)

library(tidyverse)

pre <- master %>% 
  as_tibble(.) %>% 
  bind_rows(., df) %>% 
  complete(AgeCat, nesting(Country, Gender), fill = list(Height = NA)) %>%
  group_by(Gender, AgeCat) %>% 
  nest(.)

NEST <- set_names(x = pre$data, nm = sprintf("%s_%s", pre$Gender, pre$AgeCat))

map(names(NEST), function(x){
  z <- NEST[[x]]

  z %>% 
    mutate(Gender = str_split(x, "_", simplify = T)[1],
           AgeCat = str_split(x, "_", simplify = T)[2]) %>% 
    filter(!is.na(Country)) %>% 
    select(Country, Gender, AgeCat, Height)
}) %>% 
  set_names(., nm = sprintf("%s_%s", pre$Gender, pre$AgeCat))

#> $Female_adult
#> # A tibble: 3 x 4
#>   Country Gender AgeCat Height
#>   <chr>   <chr>  <chr>  <chr> 
#> 1 ARG     Female adult  191   
#> 2 MEX     Female adult  176   
#> 3 SPN     Female adult  189   
#> 
#> $Male_adult
#> # A tibble: 2 x 4
#>   Country Gender AgeCat Height
#>   <chr>   <chr>  <chr>  <chr> 
#> 1 ENG     Male   adult  <NA>  
#> 2 US      Male   adult  <NA>  
#> 
#> $Other_adult
#> # A tibble: 0 x 4
#> # … with 4 variables: Country <chr>, Gender <chr>, AgeCat <chr>, Height <chr>
#> 
#> $Female_old
#> # A tibble: 3 x 4
#>   Country Gender AgeCat Height
#>   <chr>   <chr>  <chr>  <chr> 
#> 1 ARG     Female old    <NA>  
#> 2 MEX     Female old    <NA>  
#> 3 SPN     Female old    <NA>  
#> 
#> $Male_old
#> # A tibble: 2 x 4
#>   Country Gender AgeCat Height
#>   <chr>   <chr>  <chr>  <chr> 
#> 1 ENG     Male   old    169   
#> 2 US      Male   old    <NA>  
#> 
#> $Other_old
#> # A tibble: 0 x 4
#> # … with 4 variables: Country <chr>, Gender <chr>, AgeCat <chr>, Height <chr>
#> 
#> $Female_young
#> # A tibble: 3 x 4
#>   Country Gender AgeCat Height
#>   <chr>   <chr>  <chr>  <chr> 
#> 1 ARG     Female young  <NA>  
#> 2 MEX     Female young  <NA>  
#> 3 SPN     Female young  <NA>  
#> 
#> $Male_young
#> # A tibble: 3 x 4
#>   Country Gender AgeCat Height
#>   <chr>   <chr>  <chr>  <chr> 
#> 1 ENG     Male   young  <NA>  
#> 2 US      Male   young  134   
#> 3 US      Male   young  125   
#> 
#> $Other_young
#> # A tibble: 0 x 4
#> # … with 4 variables: Country <chr>, Gender <chr>, AgeCat <chr>, Height <chr>

Created on 2020-02-20 by the reprex package (v0.3.0)

EDIT: new answer comprising all the question

pre <- master %>% 
  as_tibble(.) %>% 
  mutate(Height = as.numeric(Height)) %>%
  bind_rows(., df) %>% 
  complete(AgeCat, nesting(Country, Gender), fill = list(Height = NA)) %>%
  group_by(Gender, AgeCat) %>% 
  nest(.)

NEST <- set_names(x = pre$data, nm = sprintf("%s_%s", pre$Gender, pre$AgeCat))

DF <- map(names(NEST), function(x){
  z <- NEST[[x]]

  z %>% 
    mutate(Gender = str_split(x, "_", simplify = T)[1],
           AgeCat = str_split(x, "_", simplify = T)[2]) %>% 
    filter(!is.na(Country)) %>% 
    select(Country, Gender, AgeCat, Height)
}) %>% 
  set_names(., nm = sprintf("%s_%s", pre$Gender, pre$AgeCat)) %>% 
  bind_rows(., .id = "Conditions")

Count <- DF %>% 
  count(Conditions, name = "Individuals")

DF %>% 
  group_by(Conditions) %>% 
  filter(!is.na(Height)) %>% 
  summarise(Mean_Height = mean(Height, na.rm = T)) %>% 
  left_join(., Count) %>% 
  select(Conditions, Individuals, Mean_Height)

#> Joining, by = "Conditions"
#> # A tibble: 3 x 3
#>   Conditions   Individuals Mean_Height
#>   <chr>              <int>       <dbl>
#> 1 Female_adult           3        185.
#> 2 Male_old               2        169 
#> 3 Male_young             3        130.

Created on 2020-02-20 by the reprex package (v0.3.0)

EDIT2: I made slight changes to the resulting df in order to show the number of individuals in this case. As mean of NA doesn't exist it returns NaN

pre <- master %>% 
  as_tibble(.) %>% 
  mutate(Height = as.numeric(Height)) %>%
  bind_rows(., df) %>% 
  complete(AgeCat, nesting(Country, Gender), fill = list(Height = NA)) %>%
  group_by(Gender, AgeCat) %>% 
  nest(.)

NEST <- set_names(x = pre$data, nm = sprintf("%s_%s", pre$Gender, pre$AgeCat))

DF <- map(names(NEST), function(x){
  z <- NEST[[x]]

  z %>% 
    mutate(Gender = str_split(x, "_", simplify = T)[1],
           AgeCat = str_split(x, "_", simplify = T)[2]) %>% 
    filter(!is.na(Country)) %>% 
    select(Country, Gender, AgeCat, Height)
}) %>% 
  set_names(., nm = sprintf("%s_%s", pre$Gender, pre$AgeCat)) %>% 
  bind_rows(., .id = "Conditions")

DF %>% 
  group_by(Conditions) %>% 
  mutate(N = case_when(is.na(Height) ~ 0,
                       TRUE ~ 1)) %>% 
  summarise(Individuals = sum(N), Mean_Height = mean(Height, na.rm = T))

#> # A tibble: 6 x 3
#>   Conditions   Individuals Mean_Height
#>   <chr>              <dbl>       <dbl>
#> 1 Female_adult           3        185.
#> 2 Female_old             0        NaN 
#> 3 Female_young           0        NaN 
#> 4 Male_adult             0        NaN 
#> 5 Male_old               1        169 
#> 6 Male_young             2        130.

Created on 2020-02-20 by the reprex package (v0.3.0)

Upvotes: 0

Georgery
Georgery

Reputation: 8117

Could it be, that you actually want this?

master <- data.frame(Country=c("US", "US", "ENG", "SPN", "MEX", "ARG"), Gender=c("Male", "Male", "Male", "Female", "Female", "Female"), AgeCat=c("young", "young", "old", "adult", "adult", "adult"), Height=c("134", "125", "169", "189", "176", "191"), stringsAsFactors = FALSE)

df <- data.frame(Gender=c("Male", "Female", "Other"), AgeCat=c("young", "adult", "old"), stringsAsFactors = FALSE)

master %>%
    group_by(Gender, AgeCat) %>%
    summarise(mean(as.numeric(Height))) %>%
    full_join(df)

which results in

  Gender AgeCat `mean(as.numeric(Height))`
  <chr>  <chr>                       <dbl>
1 Female adult                        185.
2 Male   old                          169 
3 Male   young                        130.
4 Other  old                           NA 

Upvotes: 1

Georgery
Georgery

Reputation: 8117

library(tidyverse)

df <- df %>%
    mutate_if(is.factor, as.character)

master <- master %>%
    mutate_if(is.factor, as.character)

joinList <- split(df, seq(nrow(df)))

lapply(joinList, function(df, master) master %>% semi_join(df), master = master)

This results in a list of dataframes:

$`1`
  Country Gender AgeCat Height
1      US   Male  young    134
2      US   Male  young    125

$`2`
  Country Gender AgeCat Height
1     SPN Female  adult    189
2     MEX Female  adult    176
3     ARG Female  adult    191

$`3`
[1] Country Gender  AgeCat  Height 
<0 rows> (or 0-length row.names)

Upvotes: 0

Related Questions