Captain Tyler
Captain Tyler

Reputation: 594

unnest many columns using non standard evaluation

I have been re-written some scripts to data.table, because I have memory consumption problems with dplyr. I have an intermediate table with nested columns, and need unpack everything. Here's a small example.

library(data.table)

DT <- setDT(list(
  gp = c("A", "B"),
  etc = c("hi", "bye"),
  col1 = list(
    setDT(list(d1 = c(1, 2), d2 = c(2, 2), d3 = c(0.2, 0.2))),
    setDT(list(d1 = c(10, 20, 30), d2 = c(20, 20, 40), d3 = c(0.2, 0.2, 0.4)))
  ),
  col2 = list(
    setDT(list(d1 = c(-1, -2), d2 = c(-2, -2), d3 = c(-0.2, -0.2))),
    setDT(list(d1 = c(-10, -20, -30), d2 = c(-20, -20, -40), d3 = c(-0.2, -0.2, -0.4)))
  )
))

> DT
   gp etc              col1              col2
1:  A  hi <data.table[2x3]> <data.table[2x3]>
2:  B bye <data.table[3x3]> <data.table[3x3]>

I need this result

unnested_DT_manually <- setDT(list(
  gp = c("A", "A", "B", "B", "B"),
  etc = c("hi", "hi", "bye", "bye", "bye"),
  d1 = c(1, 2, 10, 20, 30), 
  d2 = c(2, 2, 20, 20, 40), 
  d3 = c(0.2, 0.2, 0.2, 0.2, 0.4),
  d1 = c(-1, -2, -10, -20, -30), 
  d2 = c(-2, -2, -20, -20, -40), 
  d3 = c(-0.2, -0.2, -0.2, -0.2, -0.4)
))

> unnested_DT
   gp etc d1 d2  d3  d1  d2   d3
1:  A  hi  1  2 0.2  -1  -2 -0.2
2:  A  hi  2  2 0.2  -2  -2 -0.2
3:  B bye 10 20 0.2 -10 -20 -0.2
4:  B bye 20 20 0.2 -20 -20 -0.2
5:  B bye 30 40 0.4 -30 -40 -0.4

A simpler (and guess the "way") solution is

unnested_DT <- DT[, 
  unlist(c(col1, col2), recursive = FALSE), 
  by = c("gp", "etc")
]

But my col's names are variables and need some of non-standard evaluation

I tried

unlist_cols <- function(dt, cols, by){
  stopifnot(is.data.table(dt))
  cols <- substitute(unlist(cols))
  by <- substitute(by)
  dt[, eval(cols), eval(by)]  
}

nestedcols <- c("col1", "col2")
by <- setdiff(colnames(DT), nestedcols)
unnested_DT <- unlist_cols(DT, nestedcols, by)

A good TIP: all nested columns has the same row-lenght per "by" group and they've the same nested- columns-names.

Upvotes: 0

Views: 50

Answers (2)

mt1022
mt1022

Reputation: 17299

Another way with cbind and unlist:

DT[, do.call(cbind, unname(unlist(.SD, recursive = F))),
    by = c('gp', 'etc'), .SDcols = c('col1', 'col2')]
#    gp etc d1 d2  d3  d1  d2   d3
# 1:  A  hi  1  2 0.2  -1  -2 -0.2
# 2:  A  hi  2  2 0.2  -2  -2 -0.2
# 3:  B bye 10 20 0.2 -10 -20 -0.2
# 4:  B bye 20 20 0.2 -20 -20 -0.2
# 5:  B bye 30 40 0.4 -30 -40 -0.4

Upvotes: 1

Ronak Shah
Ronak Shah

Reputation: 389135

This seems too convoluted but works :

library(data.table)

nestedcols <- c("col1", "col2")

DT[, 
   unlist(unname(unlist(.SD, recursive = FALSE)), recursive = FALSE), 
   by = c("gp", "etc"), .SDcols = nestedcols
]

#   gp etc d1 d2  d3  d1  d2   d3
#1:  A  hi  1  2 0.2  -1  -2 -0.2
#2:  A  hi  2  2 0.2  -2  -2 -0.2
#3:  B bye 10 20 0.2 -10 -20 -0.2
#4:  B bye 20 20 0.2 -20 -20 -0.2
#5:  B bye 30 40 0.4 -30 -40 -0.4

Upvotes: 1

Related Questions