Reputation: 669
I have more than 2000 txt files (each having 5 columns), each being more than 30 mb big. I would like to process through all files by read each file in and then process each of them separately and get an output and then process the next. SO I can't read them in at once and process them, since these files are too big. But something in my code is not working correctly.
files = list.files(path = "Path/to/my/Directory/",
pattern = "*.txt",
full.names = TRUE)
FUN = function(files) {
CSA_input_data<-fread(files)
#1
CSA_input_data[,'x21_CT'] = ((CSA_input_data[,'CASE_ALLELE_CT']/2) - CSA_input_data[,'A1_CASE_CT'])
#2
CSA_input_data[,'x21'] = CSA_input_data[,'x21_CT']/CSA_input_data[,'CASE_ALLELE_CT']
#x22
#1
CSA_input_data[,'x22_CT'] = ((CSA_input_data[,'CTRL_ALLELE_CT']/2) - CSA_input_data[,'A1_CTRL_CT'])
#2
CSA_input_data[,'x22'] = CSA_input_data[,'x22_CT']/CSA_input_data[,'CTRL_ALLELE_CT']
write.table(CSA_input_data, "Path/to/my/Directory/", sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
}
for (i in 1:length(files)) {
FUN(files[i])
}
I get the error:
Error in file(file, ifelse(append, "a", "w")) :
cannot open the connection
Upvotes: 0
Views: 478
Reputation: 389235
You are passing only directory name to write data. Change the function to
files = list.files(path = "Path/to/my/Directory/",
pattern = "*.txt",
full.names = TRUE)
FUN = function(files) {
CSA_input_data <- data.table::fread(files)
#1
CSA_input_data[,'x21_CT'] = ((CSA_input_data[,'CASE_ALLELE_CT']/2) - CSA_input_data[,'A1_CASE_CT'])
#2
CSA_input_data[,'x21'] = CSA_input_data[,'x21_CT']/CSA_input_data[,'CASE_ALLELE_CT']
#x22
#1
CSA_input_data[,'x22_CT'] = ((CSA_input_data[,'CTRL_ALLELE_CT']/2) - CSA_input_data[,'A1_CTRL_CT'])
#2
CSA_input_data[,'x22'] = CSA_input_data[,'x22_CT']/CSA_input_data[,'CTRL_ALLELE_CT']
write.table(CSA_input_data, paste0("Path/to/my/Directory/result_", basename(files)), sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
}
and then use lapply
or for
loop.
lapply(files, FUN)
Upvotes: 1