Shahin
Shahin

Reputation: 1316

turn a list of dataframes into a dataframe

I have made a list of dataframes by querying an SQL database. dput(list_df[1:5]) gives

DATA

list_df <- list("NOT FOUND",
structure(list(associated_gene = NA, refsnp_id = "rs778387354", 
               allele = "GTGCCCTGGACGTGGCCTCGAACCGCGTGCCCTGG/GTGCCCTGG", 
               chrom_start = 42692955L, chrom_end = 42692989L), row.names = 2L, 
               class = "data.frame"), 
structure(list(associated_gene = "", refsnp_id = "rs771507737", 
               allele = "AT/-", chrom_start = 42693404L, chrom_end = 42693405L), row.names = 2L, 
               class = "data.frame"), 
"NOT FOUND",
structure(list(associated_gene = "A4GALT", refsnp_id = "rs387906280", 
               allele = "GGGG/GGGGG", chrom_start = 42692923L, chrom_end = 42692926L),
               row.names = 1L, class = "data.frame"), 
structure(list(associated_gene = c(NA, NA), refsnp_id = c("rs1296945362", 
               "rs775626055"), allele = c("GGGTGGGGTGGGG/GGGTGGGGTGGGGTGGGG", 
               "GGGGGG/GGGG/GGGGGGG"), chrom_start = c(42693742L, 42693751L),
               chrom_end = c(42693754L, 42693756L)), row.names = c(1L, 4L),
               class = "data.frame"))

The objects of list_df are of 3 dimension types:

1 5
2 5
NULL

I would like to bind list_df dataframes with only 1 row and 5 cols to a new dataframe, new_df such that new_df[i,] == list[i]

For objects with 0 row or more than 1 row, NA would do.

Upvotes: 1

Views: 111

Answers (3)

Ronak Shah
Ronak Shah

Reputation: 388807

If you want to have only the list which has specified dimension, we can use Filter

do.call(rbind, Filter(function(x) is.data.frame(x) && nrow(x) == 1 && ncol(x) == 5, list_df))

#   associated_gene   refsnp_id                                        allele chrom_start chrom_end
#2             <NA> rs778387354 GTGCCCTGGACGTGGCCTCGAACCGCGTGCCCTGG/GTGCCCTGG    42692955  42692989
#21                 rs771507737                                          AT/-    42693404  42693405
#1           A4GALT rs387906280                                    GGGG/GGGGG    42692923  42692926

If you want to have NAs when the criteria is not matched, we can use lapply

dplyr::bind_rows(lapply(list_df, function(x) 
           if (is.data.frame(x) && nrow(x) == 1 && ncol(x) == 5) x else 
                  data.frame(associated_gene = NA)))

#  associated_gene   refsnp_id                                        allele chrom_start chrom_end
#1            <NA>        <NA>                                          <NA>          NA        NA
#2            <NA> rs778387354 GTGCCCTGGACGTGGCCTCGAACCGCGTGCCCTGG/GTGCCCTGG    42692955  42692989
#3                 rs771507737                                          AT/-    42693404  42693405
#4            <NA>        <NA>                                          <NA>          NA        NA
#5          A4GALT rs387906280                                    GGGG/GGGGG    42692923  42692926
#6            <NA>        <NA>                                          <NA>          NA        NA

Upvotes: 1

akrun
akrun

Reputation: 886938

We can create a data.frame where it is not and then bind it

library(purrr)
map_dfr(list_df, ~ if(!is.data.frame(.x) | nrow(data.frame(.x)) != 1) data.frame(associated_gene = NA) else .x)
#associated_gene    refsnp_id                                        allele chrom_start chrom_end
#1            <NA>         <NA>                                          <NA>          NA        NA
#2            <NA>  rs778387354 GTGCCCTGGACGTGGCCTCGAACCGCGTGCCCTGG/GTGCCCTGG    42692955  42692989
#3                  rs771507737                                          AT/-    42693404  42693405
#4            <NA>         <NA>                                          <NA>          NA        NA
#5          A4GALT  rs387906280                                    GGGG/GGGGG    42692923  42692926
#6            <NA> rs1296945362              GGGTGGGGTGGGG/GGGTGGGGTGGGGTGGGG    42693742  42693754
#7            <NA>  rs775626055                           GGGGGG/GGGG/GGGGGGG    42693751  42693756

If we want to keep only elements having one row and is a data.frame

library(dplyr)
keep(list_df, ~ is.data.frame(.x) && nrow(.x) == 1) %>% 
      bind_rows

Upvotes: 2

ngwalton
ngwalton

Reputation: 383

plyr::rbind.fill would also work:

library(plyr)

list_df <- lapply(list_df, as.data.frame)
df <- rbind.fill(list_df)
df <- df[, -1]  # remove column X[[i]] containing "NOT FOUND"

Upvotes: 1

Related Questions