Reputation: 1291
I really need to learn how to do this, because I've been doing it the terrible, terrible painful way for too long. I have a list of dataframes that I want to:
What's the best way to do these things automatically, so that I'm not writing the function for each data frame instance. The one tricky part for me is accessing the names in the habitat_names vector while operating on the list of data.tables. Here's my existing code:
iucn_1_4 <- data.table(read.csv("~/Projects/ClimateChange/Random/IUCN/1.4 Temperate Forest.csv"))
iucn_1_6 <- data.table(read.csv("~/Projects/ClimateChange/Random/IUCN/1.6 Subtropical_Tropical Moist Lowland Forest.csv"))
iucn_1_7 <- data.table(read.csv("~/Projects/ClimateChange/Random/IUCN/1.7 Subtropical_Tropical Mangrove Forest Vegetation Above High Tide Level.csv"))
iucn_1_8 <- data.table(read.csv("~/Projects/ClimateChange/Random/IUCN/1.9 Subtropical_Tropical Moist Montane Forest.csv"))
iucn_4_4 <- data.table(read.csv("~/Projects/ClimateChange/Random/IUCN/4.4 Temperate Grassland.csv"))
iucn_4_5 <- data.table(read.csv("~/Projects/ClimateChange/Random/IUCN/4.5 Subtropical_Tropical Dry Lowland Grassland_Colombia_Venezuela and Bolivia.csv"))
habitat_names <- c("1.4 Temperate Forest","1.6 Subtropical/Tropical Moist Lowland Forest","1.7 Subtropical/Tropical Mangrove Forest Vegetation Above High Tide Level","1.8 Subtropical/Tropical Swamp Forest","4.4 Temperate Grassland","4.5 Subtropical/Tropical Dry Lowland Grassland")
iucn_1_4[,SCI_NAME:=paste(Genus, Species, " ")]
iucn_1_4[,habitat_names[1]:=TRUE]
iucn_1_4.out <- iucn_1_4[,c("SCI_NAME", habitat_names[1]),with=FALSE]
iucn_1_6[,SCI_NAME:=paste(Genus, Species, " ")]
iucn_1_6[,habitat_names[2]:=TRUE]
iucn_1_6.out <- iucn_1_6[,c("SCI_NAME", habitat_names[2]),with=FALSE]
iucn_1_7[,SCI_NAME:=paste(Genus, Species, " ")]
iucn_1_7[,habitat_names[3]:=TRUE]
iucn_1_7.out <- iucn_1_7[,c("SCI_NAME", habitat_names[3]),with=FALSE]
iucn_1_8[,SCI_NAME:=paste(Genus, Species, " ")]
iucn_1_8[,habitat_names[4]:=TRUE]
iucn_1_8.out <- iucn_1_8[,c("SCI_NAME", habitat_names[4]),with=FALSE]
iucn_4_4[,SCI_NAME:=paste(Genus, Species, " ")]
iucn_4_4[,habitat_names[5]:=TRUE]
iucn_4_4.out <- iucn_4_4[,c("SCI_NAME", habitat_names[5]),with=FALSE]
iucn_4_5[,SCI_NAME:=paste(Genus, Species, " ")]
iucn_4_5[,habitat_names[6]:=TRUE]
iucn_4_5.out <- iucn_4_5[,c("SCI_NAME", habitat_names[6]),with=FALSE]
Upvotes: 4
Views: 385
Reputation: 14336
You can also use Map to avoid a for loop. Example code
library(data.table)
dt1 <- data.table(v1 = 1:10, v2 = c("a", "b")) # eg iucn_1_4
dt2 <- data.table(v1 = 1:10, v2 = c("a", "b", "c", "d", "e")) # eg iucn_1_6
names <- c("name1", "name2") # replace by habitat_names
f <- function(dt, name) {
dt[, SCI_NAME := paste0(v1, v2)] # replace v1, v2 by Genus, Species
dt[, eval(name) := TRUE]
dt[, c("SCI_NAME", ..name)]
}
res <- Map(f, list(dt1, dt2), names)
Then you can call join_all on res (if I understood correctly that was your aim)
Upvotes: 4
Reputation: 418
Good practice for R is if you're doing something more than once then write a function to handle the repeated elements:
for instance:
#get names
habitat_names <- c("1.4 Temperate Forest",
"1.6 Subtropical/Tropical Moist LowlandForest",
"1.7 Subtropical/Tropical Mangrove Forest Vegetation Above High",
#etc.....
)
#a simple function
getSpecies <- function(path, index, Names){
data <- data.table(read.csv(path)
data[,SCI_NAME:=paste(Genus, Species, " ")]
data[,Names[index]:=TRUE]
out <- data[,c("SCI_NAME", Names[index]),with=FALSE]
return(out)
}
#call function:
iucn_1_4 <- getSpecies(path = "~/Projects/ClimateChange/Random/IUCN/1.4 TemperateForest.csv",
index = 1,
names = habitat_names)
Upvotes: 0
Reputation: 55340
files <- c( "file1.csv", "file2.csv", etc)
DT.list <- lapply(files, fread)
DT.out <- list()
for (i in seq(DT.list)) {
DT.list[[i]][, SCI_NAME:=paste(Genus, Species, " ")]
DT.list[[i]][, c(habitat_names[i]) := TRUE]
DT.out[[i]] <- DT.list[[i]][,c("SCI_NAME", habitat_names[i]),with=FALSE]
}
then,
rbindlist(DT.out)
or
do.call(merge, DT.out)
For more details, see the Data.table meta-programming question
Upvotes: 1
Reputation: 12875
Untested code but something like this maybe?
namestoread <- c(
"1.4 Temperate Forest.csv"
,"1.6 Subtropical_Tropical Moist Lowland Forest.csv"
,"1.7 Subtropical_Tropical Mangrove Forest Vegetation Above High Tide Level.csv"
,"1.9 Subtropical_Tropical Moist Montane Forest.csv"
,"4.4 Temperate Grassland.csv"
,"4.5 Subtropical_Tropical Dry Lowland Grassland_Colombia_Venezuela and Bolivia.csv"
)
listofdatasets <- vector(mode = 'list')
for ( i in seq(length(namestoread)))
{
listofdatasets[[i]] <- read.csv(paste0("~/Projects/ClimateChange/Random/IUCN/",namestoread[i]))
listofdatasets[[i]][,SCI_NAME:=paste(Genus, Species, " ")]
listofdatasets[[i]][,namestoread[i]:=TRUE]
}
join_all(listofdatasets)
Upvotes: 0