Stataq
Stataq

Reputation: 2297

how to bind the col names as first row to each data.frame in a list

I have a list of list, which each sublist also have multi dfs. I would like to bind names(df) to df. How can I do that?

Data:

q1<-list(Demographics = list(`101-01-101` = structure(list(SubjectID = "101-01-101", 
    BRTHDTC = "1953-07-07", SEX = "Female"), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame")), `101-02-102` = structure(list(
    SubjectID = "101-02-102", BRTHDTC = "1963-07-02", SEX = "Female"), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame")), `101-03-103` = structure(list(
    SubjectID = "101-03-103", BRTHDTC = "1940-09-11", SEX = "Male"), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame")), `101-04-104` = structure(list(
    SubjectID = "101-04-104", BRTHDTC = "1955-12-31", SEX = "Male"), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame")), `104-05-201` = structure(list(
    SubjectID = "104-05-201", BRTHDTC = "1950-12-04", SEX = "Female"), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame"))), DiseaseStatus = list(
    `101-01-101` = structure(list(SubjectID = "101-01-101", DSDT = "2016-03-14", 
        DSDT_P = NA_character_), row.names = c(NA, -1L), class = c("tbl_df", 
    "tbl", "data.frame")), `101-02-102` = structure(list(SubjectID = "101-02-102", 
        DSDT = "2017-04-04", DSDT_P = NA_character_), row.names = c(NA, 
    -1L), class = c("tbl_df", "tbl", "data.frame")), `101-03-103` = structure(list(
        SubjectID = "101-03-103", DSDT = NA_character_, DSDT_P = "UN-UNK-2015"), row.names = c(NA, 
    -1L), class = c("tbl_df", "tbl", "data.frame")), `101-04-104` = structure(list(
        SubjectID = "101-04-104", DSDT = "2016-05-02", DSDT_P = NA_character_), row.names = c(NA, 
    -1L), class = c("tbl_df", "tbl", "data.frame")), `104-05-201` = structure(list(
        SubjectID = "104-05-201", DSDT = "2018-07-06", DSDT_P = NA_character_), row.names = c(NA, 
    -1L), class = c("tbl_df", "tbl", "data.frame"))))

My codes doesn't work:

q2<-imap(q1, ~ map(.x, ~
 .x %>%
 map(~ bind_rows(names(.x), .x) )))

My expected outcome is sth that looks like this:

enter image description here

Upvotes: 1

Views: 83

Answers (2)

Ronak Shah
Ronak Shah

Reputation: 389105

Base R using lapply -

lapply(q1, function(x) lapply(x, function(y) rbind(names(y), y)))

#$Demographics
#$Demographics$`101-01-101`
# A tibble: 2 x 3
#  SubjectID  BRTHDTC    SEX   
#  <chr>      <chr>      <chr> 
#1 SubjectID  BRTHDTC    SEX   
#2 101-01-101 1953-07-07 Female

#$Demographics$`101-02-102`
# A tibble: 2 x 3
#  SubjectID  BRTHDTC    SEX   
#  <chr>      <chr>      <chr> 
#1 SubjectID  BRTHDTC    SEX   
#2 101-02-102 1963-07-02 Female
#...
#...

Your attempt was close but it needs few changes -

  • You don't need 3 map's
  • bind_rows needs a dataframe or tibble to combine. names(.x) is a character vector so we can use rbind.
library(purrr)
map(q1, function(x) map(x, ~rbind(names(.x), .x)))

Upvotes: 4

Sinh Nguyen
Sinh Nguyen

Reputation: 4497

Here is a try.

library(purrr)
library(dplyr)

# First map the bind_rows function to each sub-list of your list of two
q2 <- imap(q1, ~ map(.x, 
  .f = function(x) {
    name_df <- tibble(a = names(x), b = names(x)) %>%
      pivot_wider(names_from = a, values_from = b)
    data <- bind_rows(name_df, x)
  }))
q2
#> $Demographics
#> $Demographics$`101-01-101`
#> # A tibble: 2 x 3
#>   SubjectID  BRTHDTC    SEX   
#>   <chr>      <chr>      <chr> 
#> 1 SubjectID  BRTHDTC    SEX   
#> 2 101-01-101 1953-07-07 Female
#> 
#> $Demographics$`101-02-102`
#> # A tibble: 2 x 3
#>   SubjectID  BRTHDTC    SEX   
#>   <chr>      <chr>      <chr> 
#> 1 SubjectID  BRTHDTC    SEX   
#> 2 101-02-102 1963-07-02 Female
#> 
#> $Demographics$`101-03-103`
#> # A tibble: 2 x 3
#>   SubjectID  BRTHDTC    SEX  
#>   <chr>      <chr>      <chr>
#> 1 SubjectID  BRTHDTC    SEX  
#> 2 101-03-103 1940-09-11 Male 
#> 
#> $Demographics$`101-04-104`
#> # A tibble: 2 x 3
#>   SubjectID  BRTHDTC    SEX  
#>   <chr>      <chr>      <chr>
#> 1 SubjectID  BRTHDTC    SEX  
#> 2 101-04-104 1955-12-31 Male 
#> 
#> $Demographics$`104-05-201`
#> # A tibble: 2 x 3
#>   SubjectID  BRTHDTC    SEX   
#>   <chr>      <chr>      <chr> 
#> 1 SubjectID  BRTHDTC    SEX   
#> 2 104-05-201 1950-12-04 Female
#> 
#> 
#> $DiseaseStatus
#> $DiseaseStatus$`101-01-101`
#> # A tibble: 2 x 3
#>   SubjectID  DSDT       DSDT_P
#>   <chr>      <chr>      <chr> 
#> 1 SubjectID  DSDT       DSDT_P
#> 2 101-01-101 2016-03-14 <NA>  
#> 
#> $DiseaseStatus$`101-02-102`
#> # A tibble: 2 x 3
#>   SubjectID  DSDT       DSDT_P
#>   <chr>      <chr>      <chr> 
#> 1 SubjectID  DSDT       DSDT_P
#> 2 101-02-102 2017-04-04 <NA>  
#> 
#> $DiseaseStatus$`101-03-103`
#> # A tibble: 2 x 3
#>   SubjectID  DSDT  DSDT_P     
#>   <chr>      <chr> <chr>      
#> 1 SubjectID  DSDT  DSDT_P     
#> 2 101-03-103 <NA>  UN-UNK-2015
#> 
#> $DiseaseStatus$`101-04-104`
#> # A tibble: 2 x 3
#>   SubjectID  DSDT       DSDT_P
#>   <chr>      <chr>      <chr> 
#> 1 SubjectID  DSDT       DSDT_P
#> 2 101-04-104 2016-05-02 <NA>  
#> 
#> $DiseaseStatus$`104-05-201`
#> # A tibble: 2 x 3
#>   SubjectID  DSDT       DSDT_P
#>   <chr>      <chr>      <chr> 
#> 1 SubjectID  DSDT       DSDT_P
#> 2 104-05-201 2018-07-06 <NA>

Created on 2021-05-27 by the reprex package (v2.0.0)

To explain further why your code is not work:

  • imap is apply your map to each list
  • map is apply to each column of each df in each list - and that causing the errors as the code map the bind_rows function to records of each column (a vector)

Here is what your code do if replace bind_rows with print. It printed out each record of the each column in each df within each list.

q2<-imap(q1, ~ map(.x, ~
    .x %>%
    map(~ print(.x))))
#> [1] "101-01-101"
#> [1] "1953-07-07"
#> [1] "Female"
#> [1] "101-02-102"
#> [1] "1963-07-02"
#> [1] "Female"
#> [1] "101-03-103"
#> [1] "1940-09-11"
#> [1] "Male"
#> [1] "101-04-104"
#> [1] "1955-12-31"
#> [1] "Male"
#> [1] "104-05-201"
#> [1] "1950-12-04"
#> [1] "Female"
#> [1] "101-01-101"
#> [1] "2016-03-14"
#> [1] NA
#> [1] "101-02-102"
#> [1] "2017-04-04"
#> [1] NA
#> [1] "101-03-103"
#> [1] NA
#> [1] "UN-UNK-2015"
#> [1] "101-04-104"
#> [1] "2016-05-02"
#> [1] NA
#> [1] "104-05-201"
#> [1] "2018-07-06"
#> [1] NA

Created on 2021-05-27 by the reprex package (v2.0.0)

Upvotes: 1

Related Questions