Reputation: 265
I have a directory with identically names CSVs in multiple subdirectories. I am trying to combine the like-named CSVs into 1 data frame and add the subdirectory name as a column. With the example below, I would have one data frame named 'data' and one named 'name' that would contained the observations from both Run 1 and Run 2 with a column named Run added to each data frame. It would be ideal if the solution was agnostic to the names of the CSVs, but any solution would be very helpful.
In this question the person has the same problem, but I need an R solution: Combining files with same name in r and writing them into different files in R
dir <- getwd()
subDir <- 'temp'
dir.create(subDir)
setwd(file.path(dir, subDir))
dir.create('Run1')
dir.create('Run2')
employeeID <- c('123','456','789')
salary <- c(21000, 23400, 26800)
startdate <- as.Date(c('2010-11-1','2008-3-25','2007-3-14'))
employeeID <- c('123','456','789')
first <- c('John','Jane','Tom')
last <- c('Doe','Smith','Franks')
data <- data.frame(employeeID,salary,startdate)
name <- data.frame(employeeID,first,last)
write.csv(data, file = "Run1/data.csv",row.names=FALSE, na="")
write.csv(name, file = "Run1/name.csv",row.names=FALSE, na="")
employeeID <- c('465','798','132')
salary <- c(100000, 500000, 300000)
startdate <- as.Date(c('2000-11-1','2001-3-25','2003-3-14'))
employeeID <- c('465','798','132')
first <- c('Jay','Susan','Tina')
last <- c('Jones','Smith','Thompson')
data <- data.frame(employeeID,salary,startdate)
name <- data.frame(employeeID,first,last)
write.csv(data, file = "Run2/data.csv",row.names=FALSE, na="")
write.csv(name, file = "Run2/name.csv",row.names=FALSE, na="")
# list files in all directories to read
files <- list.files(recursive = TRUE)
# Read csvs into a list
list <- lapply(files, read.csv)
# Name each dataframe with the run and filename
names <- sub("\\..*$", "", files)
names(list) <- sub("\\..*$", "", files)
# And add .id = 'run' so that the run number is one of the columns
# This would work if all of the files were the same, but I don't know how to subset the dataframes based on name.
all_dat <- list %>%
bind_rows(.id = 'run')
Upvotes: 1
Views: 1171
Reputation: 265
files_to_df <- function(pattern){
# pattern <- "data"
filenames <- list.files(recursive = TRUE, pattern = pattern)
df_list <- lapply(filenames, read.csv, header = TRUE)
# Name each dataframe with the run and filename
names(df_list) <- str_sub(filenames, 1, 4)
# Create combined dataframe
df <- df_list %>%
bind_rows(.id = 'run')
# Assign dataframe to the name of the pattern
assign(pattern, df)
# Return the dataframe
return(data.frame(df))
}
name_df <- files_to_df('name')
Upvotes: 1