Loop to include different folders and patterns

Question

I have this code very inefficient and would like to make it better. Each df is a list of files that match a certain pattern as described by term6.

Can someone show me the best method to make this is into efficient and easily understandable code? Would a loop be best?

term6 <- c("Casos de Malaria|MALARIA8|Casos según Entidades")
term7 <- c("Casos según Entidades|MALARIA8")
term8 <- c("Distrito Capital10|hasta")
term9 <- c("NA12")
term10 <- c("hasta la semana|NA12|NA16|Entidades Federales16")
term11 <- c("NA19|Cuadro|Malaria16")

df6 <- list.files(path = "J:\GBD2017\Venezuela\xlsx\2006", pattern = term6, recursive = FALSE, full.names = FALSE, ignore.case = TRUE)
df7 <- list.files(path = "J:\GBD2017\Venezuela\xlsx\2007", pattern = term7, recursive = FALSE, full.names = FALSE, ignore.case = TRUE)
df8 <- list.files(path = "J:\GBD2017\Venezuela\xlsx\2008", pattern = term8, recursive = FALSE, full.names = FALSE, ignore.case = TRUE)
df9 <- list.files(path = "J:\GBD2017\Venezuela\xlsx\2009", pattern = term9, recursive = FALSE, full.names = FALSE, ignore.case = TRUE)
df10 <- list.files(path = "J:\GBD2017\Venezuela\xlsx\2010", pattern = term10, recursive = FALSE, full.names = FALSE, ignore.case = TRUE)
df11 <- list.files(path = "J:\GBD2017\Venezuela\xlsx\2011", pattern = term11, recursive = FALSE, full.names = FALSE, ignore.case = TRUE)

hrbrmstr · Accepted Answer

c(
  "Casos de Malaria|MALARIA8|Casos según Entidades",
  "Casos según Entidades|MALARIA8",
  "Distrito Capital10|hasta",
  "NA12",
  "hasta la semana|NA12|NA16|Entidades Federales16",
  "NA19|Cuadro|Malaria16"
) -> terms

c(
  "J:\GBD2017\Venezuela\xlsx\2006", 
  "J:\GBD2017\Venezuela\xlsx\2007", 
  "J:\GBD2017\Venezuela\xlsx\2008", 
  "J:\GBD2017\Venezuela\xlsx\2009", 
  "J:\GBD2017\Venezuela\xlsx\2010",
  "J:\GBD2017\Venezuela\xlsx\2011"
) -> paths

unlist(lapply(seq_along(paths), function(.i) {
  list.files(
    path = paths[.i], 
    pattern = terms[.i], 
    recursive = FALSE, 
    full.names = FALSE, # generally not a good idea; I always set this to TRUE
    ignore.case = TRUE
  )  
}), use.names = FALSE)

However, I'd recommend keeping the terms and paths together. That way you can have a TSV (tab-separated file vs comma) file with the path search metadata in it and also ensure the length equality constraint for each vector.

Its just as easy to use:

data.frame(
  term = c(
    "Casos de Malaria|MALARIA8|Casos según Entidades",
    "Casos según Entidades|MALARIA8",
    "Distrito Capital10|hasta",
    "NA12",
    "hasta la semana|NA12|NA16|Entidades Federales16",
    "NA19|Cuadro|Malaria16"
  ),
  path = c(
    "J:\GBD2017\Venezuela\xlsx\2006", 
    "J:\GBD2017\Venezuela\xlsx\2007", 
    "J:\GBD2017\Venezuela\xlsx\2008", 
    "J:\GBD2017\Venezuela\xlsx\2009", 
    "J:\GBD2017\Venezuela\xlsx\2010",
    "J:\GBD2017\Venezuela\xlsx\2011"
  ),
  stringsAsFactors = FALSE
)-> file_search_df

unlist(lapply(1:nrow(file_search_df), function(.i) {
  list.files(
    path = file_search_df$path[.i], 
    pattern = file_search_df$term[.i], 
    recursive = FALSE, 
    full.names = FALSE, # generally not a good idea; I always set this to TRUE
    ignore.case = TRUE
  )  
}), use.names = FALSE)

And, if you use the TSV approach, even more compact:

file_search_df <- read.csv("path-to-metadata-file.tsv", sep = "	", stringsAsFactors=FALSE)

unlist(lapply(1:nrow(file_search_df), function(.i) {
  list.files(
    path = file_search_df$path[.i], 
    pattern = file_search_df$term[.i], 
    recursive = FALSE, 
    full.names = FALSE, # generally not a good idea; I always set this to TRUE
    ignore.case = TRUE
  )  
}), use.names = FALSE)

Loop to include different folders and patterns

Answers (2)

Related Questions