Luker354
Luker354

Reputation: 669

Read and process multiple files in R

I have more than 2000 txt files (each having 5 columns), each being more than 30 mb big. I would like to process through all files by read each file in and then process each of them separately and get an output and then process the next. SO I can't read them in at once and process them, since these files are too big. But something in my code is not working correctly.

        files = list.files(path = "Path/to/my/Directory/", 
                   pattern = "*.txt", 
                   full.names = TRUE)


FUN = function(files) {

CSA_input_data<-fread(files)

#1
CSA_input_data[,'x21_CT'] = ((CSA_input_data[,'CASE_ALLELE_CT']/2) - CSA_input_data[,'A1_CASE_CT'])
#2
CSA_input_data[,'x21'] = CSA_input_data[,'x21_CT']/CSA_input_data[,'CASE_ALLELE_CT']

#x22
#1
CSA_input_data[,'x22_CT'] = ((CSA_input_data[,'CTRL_ALLELE_CT']/2) - CSA_input_data[,'A1_CTRL_CT'])
#2
CSA_input_data[,'x22'] = CSA_input_data[,'x22_CT']/CSA_input_data[,'CTRL_ALLELE_CT']



write.table(CSA_input_data, "Path/to/my/Directory/", sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)

}

for (i in 1:length(files)) {
  FUN(files[i])
}

I get the error:

Error in file(file, ifelse(append, "a", "w")) : 
  cannot open the connection 

Upvotes: 0

Views: 478

Answers (1)

Ronak Shah
Ronak Shah

Reputation: 389235

You are passing only directory name to write data. Change the function to

files = list.files(path = "Path/to/my/Directory/", 
                   pattern = "*.txt", 
                   full.names = TRUE)


FUN = function(files) {
  CSA_input_data <- data.table::fread(files)
  
  #1
  CSA_input_data[,'x21_CT'] = ((CSA_input_data[,'CASE_ALLELE_CT']/2) - CSA_input_data[,'A1_CASE_CT'])
  #2
  CSA_input_data[,'x21'] = CSA_input_data[,'x21_CT']/CSA_input_data[,'CASE_ALLELE_CT']
  
  #x22
  #1
  CSA_input_data[,'x22_CT'] = ((CSA_input_data[,'CTRL_ALLELE_CT']/2) - CSA_input_data[,'A1_CTRL_CT'])
  #2
  CSA_input_data[,'x22'] = CSA_input_data[,'x22_CT']/CSA_input_data[,'CTRL_ALLELE_CT']
  
 write.table(CSA_input_data, paste0("Path/to/my/Directory/result_", basename(files)), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
  
}

and then use lapply or for loop.

lapply(files, FUN)

Upvotes: 1

Related Questions