Reputation: 13
Good day, I have several text-files with the same layout that i would like to read in with the fread() function. (see two sample files here: https://www.dropbox.com/sh/grpai6ppc6oq3ka/AADyECZHz5KW7wtv5xjF5-ena?dl=0 ) The documents are divided into two parts, the first part contains 16 columns, the second 7 columns. I only want the data of the first part, and only column 1 and 2.
dat10 <- fread("CalcV10.txt", select = c(1,2), verbose=TRUE, col.names = c("Net", "Nrp"))
> head(dat10)
Net Nrp
1: 225 1
2: 247 1
3: 268 1
4: 287 1
5: 301 12
6: 302 4
This works perfectly for a part of my data (eg. CalcV10), where there is more than just one row of data.
For the other file, that only contains one row of data, however the first part is skipped and the second part of the document is read instead:
> head(dat3)
Net Nrp
1: 1000 9.9
2: 1000 14.8
3: 1000 12.7
4: 1000 14.8
5: 1000 11.7
6: 1000 14.8
I tried to change the number of rows (colClasses=list(character=1:16)), but that did not help. I'm thankful for every small hint!
Best, yakuelin
I'm using 1.10.4 version of data.table, R version 3.3.2, Version 1.0.136 of R Studio (all of it was updated two weeks ago)
edit
I have 40 files of the same name and layout(Calc.txt). They are in 20 Folders, named V1 - V20, and each have two subfolders named after two sim_types. To read in these text-files i created the following function:
read.res <- function(NrV, sim_type, FT) {
dat <- data.frame()
V <- paste("V", 1:NrV, sep="")
for (i in 1:NrV) {
Dir <- file.path(dataDir, V[i], sim_type)
setwd(Dir)
dat0 <- fread("Calc.txt", select = c(1,2), col.names = c("Net", "Nrp"))
dat0$type <- FT
dat<-rbind(dat, dat0)
}
dat<-as.data.frame(dat)
return(dat) }
Forest <- read.res(NrV=20, sim_type=sim_F, FT="F")
nonForest <- read.res(NrV=20, sim_type=sim_nF, FT="nF")
data <- rbind(Forest, nonForest)
@Sathish's works fine to read one file at a time, but it would be great to automatize this step for all files. I struggle to include Sathish's suggestion into my function. Any idea?
Upvotes: 1
Views: 548
Reputation: 12703
library('data.table')
fn1 <- "CalcV3.txt"
fn2 <- "CalcV10.txt"
n1 <- grep('Sim_data', readLines(fn1)) - 5 # get the line number matching Sim_data and subtract 5 to it
x1 <- fread(fn1, nrows = n1, header = F, skip = 1, sep = '\t', strip.white = F, stringsAsFactors = F) # get file contents for the n1 rows
n2 <- grep('Sim_data', readLines(fn2)) - 5
x2 <- fread(fn2, nrows = n2, header = F, skip = 1, sep = '\t', strip.white = F, stringsAsFactors = F)
# split the file contents and convert it to data table
my_func <- function(x, from, to)
{
y <- strsplit(x, '\ ') # split string by space
y <- lapply(y, function(z) as.numeric(z[ z != '' ] )[from:to]) # remove blank characters
t(rbindlist(l = list( y ))) # combine list elements into data table
}
my_func(x1$V1, 1, 16) # all columns
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16]
# V1 1000 2100 7 10 11 12 0.9 1.9 2 2.2 12.3 14.8 17.1 42.1 -52.1 -40.1
my_func(x1$V1, 2, 4) # columns from 2 to 4
# [,1] [,2] [,3]
# V1 2100 7 10
my_func(x2$V1, 1, 16) # all columns
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16]
# V1 225 1 773.1 773.1 773.1 773.1 0.5 0.5 0.5 0.5 21.5 21.5 34.7 34.7 -42.5 -42.5
# V2 247 1 833.5 833.5 833.5 833.5 0.6 0.6 0.6 0.6 20.2 20.2 40.9 40.9 -15.4 -15.4
# V3 268 1 704.4 704.4 704.4 704.4 1.8 1.8 1.8 1.8 20.6 20.6 32.8 32.8 -42.9 -42.9
# V4 287 1 325.1 325.1 325.1 325.1 0.9 0.9 0.9 0.9 14.0 14.0 25.0 25.0 -42.1 -42.1
# V5 301 12 170.8 325.8 437.8 437.8 0.5 0.8 5.9 5.9 9.8 16.3 17.2 27.2 -32.2 -20.2
# V6 302 4 85.0 218.0 218.0 218.0 0.5 0.5 0.5 0.5 6.8 14.9 8.1 15.1 -38.4 -34.4
# V7 303 3 70.5 85.5 85.5 85.5 0.5 0.5 0.5 0.5 6.2 6.4 11.4 12.4 -26.9 -17.9
# V8 316 56 499.1 689.1 728.1 772.1 0.6 1.3 1.8 1.9 15.9 20.9 28.9 36.9 -38.6 -31.6
# V9 317 772 367.5 569.5 618.5 705.5 0.5 0.7 0.9 1.0 13.7 17.9 27.3 35.3 -26.6 -14.6
# V10 318 52 304.2 445.2 511.2 615.2 0.6 1.3 1.8 2.0 12.5 17.8 23.5 34.5 -21.6 0.4
# V11 319 4 412.3 527.3 527.3 527.3 0.6 0.7 0.7 0.7 15.1 20.9 21.9 33.9 -25.8 -4.8
# V12 330 14 107.7 264.7 421.7 421.7 0.5 0.8 1.3 1.3 8.2 14.4 14.7 27.7 -45.7 -27.7
# V13 331 872 229.3 406.3 468.3 531.3 0.5 1.0 1.5 2.3 11.7 17.1 19.2 28.2 -47.5 -37.5
# V14 332 35 428.1 690.1 728.1 774.1 1.1 3.2 4.1 4.8 17.0 22.6 22.6 35.6 -51.3 -35.3
# V15 333 4 452.0 523.0 523.0 523.0 0.7 1.0 1.0 1.0 15.8 17.1 28.5 29.5 -45.9 -38.9
# V16 1000 2100 143.6 200.6 215.6 232.6 1.2 2.1 2.3 2.4 12.4 14.8 8.1 17.1 -52.1 -41.1
EDIT:
# split the file contents and convert it to data table
my_func <- function(x, from, to)
{
y <- strsplit(x, '\ ') # split string by space
y <- lapply(y, function(z) as.numeric(z[ z != '' ] )[from:to]) # remove blank characters
t(rbindlist(l = list( y ))) # combine list elements into data table
}
root_path <- "temp" # Set `root_path` variable to a desired location
fdirs <- unlist(lapply(file.path(root_path, c(paste('V', 1:20, sep = ''))),
function(x) file.path(x, c(paste('sim_types', 1:2, sep = '')))))
all_dfs <- list() # this list contains data frames of all files
for ( i in fdirs)
{
require('data.table')
fn <- file.path(i, 'Calc.txt')
if ( file.exists( fn ) ){
n1 <- grep('Sim_data', readLines(fn)) - 5 # get the line number matching Sim_data and subtract 5 to it
x1 <- fread(fn, nrows = n1, header = F, skip = 1, sep = '\t', strip.white = F, stringsAsFactors = F) # get file contents for the n1 rows
df <- my_func(x1$V1, 1, 2)
colnames(df) <- c('Net', 'Nrp')
all_dfs[[fn]] <- df
} else {
warning(paste('The file ', fn, ' does not exist!', sep = ''))
}
}
warnings()
# 38: The file temp/V20/sim_types2/Calc.txt does not exist!
all_dfs
# $`temp/V1/sim_types1/Calc.txt`
# Net Nrp
# V1 1000 2100
#
# $`temp/V1/sim_types2/Calc.txt`
# Net Nrp
# V1 1000 2100
#
# $`temp/V2/sim_types1/Calc.txt`
# Net Nrp
# V1 1000 2100
#
# $`temp/V2/sim_types2/Calc.txt`
# Net Nrp
# V1 1000 2100
If you want to play around with files and directories, try this reproducible example, which will create directories and files. Set root_path
variable to a desired location.
# reproducible example
root_path <- "temp"
dirs <- file.path(root_path, c(paste('V', 1:20, sep = '')))
for(fpath in dirs)
{
dir.create(path = fpath, recursive = TRUE )
sub_dirs <- file.path(fpath, c(paste('sim_types', 1:2, sep = '')))
for( sfpath in sub_dirs){
dir.create(path = sfpath, recursive = TRUE )
file.create(file.path(sfpath, 'Calc.txt'))
}
}
Upvotes: 1