Moses
Moses

Reputation: 1484

Importing specific file in different directories and appending them

I have related files in different zipped directories. File 1 in Directory 1 corresponds to File 1 in Directory 2, File 2 in Directory 1 corresponds to File 2 in Directory 2.... Am able to use the map_dfr but that would import the files in each directory and append them; this is undesirable. Below is the code with expectation:

library(tidyverse)
library(zip)

dir.create("data/test")

df1 <- tribble(
  ~"var1", ~"var2", ~"var3", ~"var4",~"var5",
  10, 20, 23, "Male", "No",
  6, 20, 30, "Female","Yes",
  7, 8, 30, "Female","No",
  10, 20, 30, "Male", "Yes"
)

df2 <- tribble(
  ~"var1", ~"var2", ~"var3", ~"var4",
  10, 20, 23, "Male",
  6, 20, 30, "Female",
  7, 8, 30, "Female",
  10, 20, 30, "Male"
)

write_csv(df1, file = "data/test/df1.csv")
write_csv(df2, file = "data/test/df2.csv")



zip("data/zip1.zip", c("data/test/df1.csv",
                       "data/test/df2.csv"))


zip("data/zip2.zip", c("data/test/df1.csv",
                       "data/test/df2.csv"))


all_zips <- list.files("data", pattern = ".zip", recursive = TRUE, full.names = T)

all_files <- vector("list")

for(f in all_zips){
  all_files[[f]] <- f
}

toimport <- lapply(all_files, utils::unzip)


map_dfr(toimport[[1]], read_csv) # Undesirable

# Desirable

file_1 <- read_csv(toimport[["data/zip1.zip"]][[1]]) %>%
  bind_rows(read_csv(toimport[["data/zip2.zip"]][[1]]))


file_2 <- read_csv(toimport[["data/zip1.zip"]][[2]]) %>%
  bind_rows(read_csv(toimport[["data/zip2.zip"]][[2]]))

Upvotes: 0

Views: 52

Answers (1)

lroha
lroha

Reputation: 34406

You can transpose and simplify the list of files to read in:

library(purrr)
library(readr)

map(simplify_all(transpose(toimport)), map_df, read_csv, show_col_types = FALSE)

[[1]]                                                                                                                                                                        
# A tibble: 8 x 5
   var1  var2  var3 var4   var5 
  <dbl> <dbl> <dbl> <chr>  <chr>
1    10    20    23 Male   No   
2     6    20    30 Female Yes  
3     7     8    30 Female No   
4    10    20    30 Male   Yes  
5    10    20    23 Male   No   
6     6    20    30 Female Yes  
7     7     8    30 Female No   
8    10    20    30 Male   Yes  

[[2]]
# A tibble: 8 x 4
   var1  var2  var3 var4  
  <dbl> <dbl> <dbl> <chr> 
1    10    20    23 Male  
2     6    20    30 Female
3     7     8    30 Female
4    10    20    30 Male  
5    10    20    23 Male  
6     6    20    30 Female
7     7     8    30 Female
8    10    20    30 Male  

or using data.table::transpose():

map(data.table::transpose(toimport), map_df, read_csv, show_col_types = FALSE)

Upvotes: 1

Related Questions