user1491868
user1491868

Reputation: 326

Add variable to nested list

Using R base, I would like to add a variable to a nested list where the variable changes for each nested list element. Below is an example. Thank you.

#CREATE EXAMPLE DATAFRAME
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))

#CREATE NESTED LIST
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))), ]))

#PRINT NESTED LIST
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))

#I WOULD LIKE TO SIMPLIFY THIS PART
DF[[1]][[1]] <- within(DF[[1]][[1]], GROUP <- 2014)
DF[[1]][[2]] <- within(DF[[1]][[2]], GROUP <- 2015)

DF[[2]][[1]] <- within(DF[[2]][[1]], GROUP <- 2014)
DF[[2]][[2]] <- within(DF[[2]][[2]], GROUP <- 2015)

DF[[3]][[1]] <- within(DF[[3]][[1]], GROUP <- 2014)
DF[[3]][[2]] <- within(DF[[3]][[2]], GROUP <- 2015)

#PRINT MODIFIED NESTED LIST
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))

#I AM SURPRISED THE FOLLOWING DOES NOT WORK
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))),]))
DF <- lapply(DF, function(x) lapply(2014:2015, function(t) within(x, GROUP <- t)))
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))

Upvotes: 12

Views: 2488

Answers (6)

Rui Barradas
Rui Barradas

Reputation: 76402

OK, I think I've got it. At least all.equal returns TRUE, though identical does not. Here it goes. Note that I've changed your df names a bit.

#CREATE EXAMPLE DATAFRAME
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), DATE =    seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))

#CREATE NESTED LIST
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))), ]))

#PRINT NESTED LIST
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))

DF2 <- DF
#I WOULD LIKE TO SIMPLIFY THIS PART
DF2[[1]][[1]] <- within(DF2[[1]][[1]], GROUP <- 2014)
DF2[[1]][[2]] <- within(DF2[[1]][[2]], GROUP <- 2015)

DF2[[2]][[1]] <- within(DF2[[2]][[1]], GROUP <- 2014)
DF2[[2]][[2]] <- within(DF2[[2]][[2]], GROUP <- 2015)

DF2[[3]][[1]] <- within(DF2[[3]][[1]], GROUP <- 2014)
DF2[[3]][[2]] <- within(DF2[[3]][[2]], GROUP <- 2015)

#PRINT MODIFIED NESTED LIST
lapply(DF2, lapply, function(x) rbind(head(x), tail(x)))

### New code
DF3 <- DF
DF3 <- lapply(DF3, function(x) {
        lapply(2014:2015, function(t){
            within(x[[t - 2013]], GROUP <- t)
        })
    })
identical(DF2, DF3)
all.equal(DF2, DF3)

Upvotes: 2

Damian
Damian

Reputation: 1433

I think the problem may the second lapply:

DF <- lapply(DF, function(x) lapply(2014:2015, function(t) within(x, GROUP <- t)))

The lapply's don't appear to be extracting the desired component from the large list object. The first lapply iterates over the top level of the list, extracting a two-element list object, x, each time. The second lapply then iterates over a vector, providing a scalar vector, t, each time. So, the next part gets a two-element list (x) each time instead of the desired (unnamed) data frame.

Solution

If the object has already been created, you can iterate over the elements directly instead of indexing the list elements.

#CREATE NESTED LIST
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))), ]))

edit_level2 <- function(df) {
    # figure out what the value of t should be based on the data.
    t <- as.integer(format(min(df$DATE), "%Y"))
    df$GROUP <- t
    return(df)
}

# iterate over the list object contents at *both* levels
DF <- lapply(DF, function(level1) lapply(level1, function(level2) edit_level2(level2)))

Note: this is similar to the solution presented by @Consistency in the comments--extracting the data frame is the issue.

Alternative

If you can you alter the code that is producing the list object, I suggest assigning the variable while creating the list object instead of modifying it afterwards (my original suggestion before edits).

#CREATE EXAMPLE DATAFRAME
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), 
                  DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))

#CREATE NESTED LIST
DF <- lapply(1:3, function(i) {

    lapply(2014:2015, function(t) {

        first <- as.Date(paste(t,     "01", "01", sep = "-")) 
        last  <- as.Date(paste(t + i, "12", "31", sep = "-")) 

        # create a local data frame
        df <- DF[first <= DF$DATE & DF$DATE <= last, ]

        # modify the local data frame
        df$GROUP <- t

        # return the modified data frame
        df

    })

})

Upvotes: 2

Sotos
Sotos

Reputation: 51582

This can be also achieved using Map in order to cbind the dates. i.e.,

lapply(DF, function(i) Map(cbind, i, c(2014, 2015)))

#or to set the name of that column to 'id',

lapply(DF, function(i) Map(function(x, y) cbind(x, id = y), i, c(2014, 2015)))

However, If you don't mind flattening that list and keeping an extra id variable, you can use tidyverse to flatten and create the year id by graping the year from the first element of DATE from each data frame, i.e.

library(tidyverse)

new_df <- DF %>% 
  flatten_df(.id = 'list_id') %>% 
  group_by(list_id) %>% 
  mutate(id = sub('-.*', '', DATE[1]))

#which will give,

# A tibble: 19,725 x 4
# Groups:   list_id [6]
#   list_id   NAME       DATE    id
#     <chr> <fctr>     <date> <chr>
# 1       1  FRANK 2014-01-01  2014
# 2       1   TONY 2014-01-01  2014
# 3       1     ED 2014-01-01  2014
# 4       1  FRANK 2014-01-02  2014
# 5       1   TONY 2014-01-02  2014
# 6       1     ED 2014-01-02  2014
# 7       1  FRANK 2014-01-03  2014
# 8       1   TONY 2014-01-03  2014
# 9       1     ED 2014-01-03  2014
#10       1  FRANK 2014-01-04  2014
# ... with 19,715 more rows

Upvotes: 3

Probel
Probel

Reputation: 195

How about this? Had to use package dplyr since it makes adding a new variable to the dataframes easier with mutate_.

library(dplyr) # if not installed, install with install.packages("dplyr")
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), 
DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))

#CREATE NESTED LIST
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, 
as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + 
i, "12", "31", sep = "-"))), ]))

# loop over the first list with lapply and then loop over the nested lists 
# and the desired GROUP values with mapply
DF <- lapply(DF, function(x) mapply(FUN = function(df,number){mutate_(df, 
"GROUP" = number)},x, 2014:2015, SIMPLIFY = F))

#PRINT NESTED LIST
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))

As to why your way was not working: Think about what function(t) within(xs[[t]], GROUP <- GROUPS[t]) does. It does not return a dataframe.

Upvotes: 4

Consistency
Consistency

Reputation: 2922

#CREATE EXAMPLE DATAFRAME
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))

#CREATE NESTED LIST
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))), ]))

#PRINT NESTED LIST
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))

#I WOULD LIKE TO SIMPLIFY THIS PART
DF[[1]][[1]] <- within(DF[[1]][[1]], GROUP <- 2014)
DF[[1]][[2]] <- within(DF[[1]][[2]], GROUP <- 2015)

DF[[2]][[1]] <- within(DF[[2]][[1]], GROUP <- 2014)
DF[[2]][[2]] <- within(DF[[2]][[2]], GROUP <- 2015)

DF[[3]][[1]] <- within(DF[[3]][[1]], GROUP <- 2014)
DF[[3]][[2]] <- within(DF[[3]][[2]], GROUP <- 2015)

#PRINT MODIFIED NESTED LIST
DF1 <- lapply(DF, lapply, function(x) rbind(head(x), tail(x)))
DF1

#I AM SURPRISED THE FOLLOWING DOES NOT WORK
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))),]))

GROUPS <- c(2014:2015)    

DF <- lapply(DF, function(xs) lapply(1:2, function(t) within(xs[[t]], GROUP <- GROUPS[t])))

DF2 <- lapply(DF, lapply, function(x) rbind(head(x), tail(x)))
DF2

all.equal(DF1, DF2)

Upvotes: 7

user_123
user_123

Reputation: 60

This should do it

final_list<-list()
for(i in seq(1, length(DF))){

  new_list<-list()

  for(j in seq(1,length(DF[[i]]))){

    new_list[[j]]<-list(DF[[i]][[j]],GROUP=j)

  }
  final_list[[i]]<-new_list
}

Upvotes: 7

Related Questions