navac
navac

Reputation: 87

Add conditional columns to list of nested data frames using lapply in R

I'm attempting to simplify my code below. I would like to cast a list of data frames from long to wide and then add several variables to each nested data frame conditional on variables contained in the dataset. The following code produces my preferred output, I would like help understanding how to reduce the steps and possibly do this all in one lapply command. I've attempted several trials combining "with" statements to no avail.

dflist <- list(data.frame(ID=(c(rep(1,10),rep(2,10))),Y=(c(rep(1,5),rep(2,5),rep(1,5),rep(2,5))), b=rnorm(20),c=rnorm(20)),
           data.frame(ID=(c(rep(1,10),rep(2,10))), Y=(c(rep(1,5),rep(2,5),rep(1,5),rep(2,5))),b=rnorm(20),c=rnorm(20)),
           data.frame(ID=(c(rep(1,10),rep(2,10))), Y=(c(rep(1,5),rep(2,5),rep(1,5),rep(2,5))),b=rnorm(20),c=rnorm(20)))


wide_data<-lapply(dflist, function(x) dcast(setDT(x), ID ~ Y, value.var=c('b','c'),mean))

b_flag<-lapply(wide_data, function(x) with(x,ifelse((b_1 < .30 | b_2 >.95),"Flag",NA)))
c_flag<-lapply(wide_data, function(x) with(x,ifelse((c_1 < 0) & (c_1 < 0),"Flag",NA)))

wide_data<-Map(cbind, wide_data, b_flag = b_flag)                
wide_data<-Map(cbind, wide_data, c_flag = c_flag) 
wide_data

Upvotes: 0

Views: 82

Answers (2)

Sathish
Sathish

Reputation: 12713

another way of approaching it without using dcast

also, the condition for c column is ambiguous in your question. Please check if it is correct and edit your question.

library('data.table')
df2 <- lapply( dflist, function(x) {
  x <- setDT(x)[, .(b = mean(b), c = mean(c)), by = .(ID, Y)]
  x[ , `:=` ( b_flag = ifelse( any(b[Y == 1 ] < 0.30, b[Y == 2] > 0.95), "Flag", NA_character_ ),
              c_flag = ifelse( all(c < 0), "Flag", NA_character_ ) ),
     by = ID ]
  return( x )
  } )
df2 <- rbindlist(l = df2)
df2
#     ID Y            b           c b_flag c_flag
#  1:  1 1  0.198227292  0.57377712   Flag     NA
#  2:  1 2  0.578991810  0.40128112   Flag     NA
#  3:  2 1  0.578724225  0.30608932     NA     NA
#  4:  2 2  0.619338292  0.35209122     NA     NA
#  5:  1 1  0.321089583 -0.83979393     NA     NA
#  6:  1 2 -0.341194581  0.52508394     NA     NA
#  7:  2 1  0.179836568 -0.02041203   Flag     NA
#  8:  2 2  0.482725899  0.17163968   Flag     NA
#  9:  1 1  0.003591178 -0.30250232   Flag     NA
# 10:  1 2 -0.230479093  0.01971357   Flag     NA
# 11:  2 1 -0.038689389  0.35717286   Flag     NA
# 12:  2 2  0.691364217 -0.37037455   Flag     NA

Upvotes: 1

spadarian
spadarian

Reputation: 1624

I agree with you that 1 lapply would be better:

wide_data <- lapply(dflist, function(x) {
    tmp <- dcast(setDT(x), ID ~ Y, value.var=c('b','c'), mean)
    tmp$b_flag <- ifelse((tmp$b_1 < .30 | tmp$b_2 >.95) , "Flag", NA)
    tmp$c_flag <- ifelse((tmp$c_1 < 0) & (tmp$c_2 < 0), "Flag", NA)
    tmp
})

Upvotes: 1

Related Questions