Reputation: 594
I have been re-written some scripts to data.table, because I have memory consumption problems with dplyr. I have an intermediate table with nested columns, and need unpack everything. Here's a small example.
library(data.table)
DT <- setDT(list(
gp = c("A", "B"),
etc = c("hi", "bye"),
col1 = list(
setDT(list(d1 = c(1, 2), d2 = c(2, 2), d3 = c(0.2, 0.2))),
setDT(list(d1 = c(10, 20, 30), d2 = c(20, 20, 40), d3 = c(0.2, 0.2, 0.4)))
),
col2 = list(
setDT(list(d1 = c(-1, -2), d2 = c(-2, -2), d3 = c(-0.2, -0.2))),
setDT(list(d1 = c(-10, -20, -30), d2 = c(-20, -20, -40), d3 = c(-0.2, -0.2, -0.4)))
)
))
> DT
gp etc col1 col2
1: A hi <data.table[2x3]> <data.table[2x3]>
2: B bye <data.table[3x3]> <data.table[3x3]>
I need this result
unnested_DT_manually <- setDT(list(
gp = c("A", "A", "B", "B", "B"),
etc = c("hi", "hi", "bye", "bye", "bye"),
d1 = c(1, 2, 10, 20, 30),
d2 = c(2, 2, 20, 20, 40),
d3 = c(0.2, 0.2, 0.2, 0.2, 0.4),
d1 = c(-1, -2, -10, -20, -30),
d2 = c(-2, -2, -20, -20, -40),
d3 = c(-0.2, -0.2, -0.2, -0.2, -0.4)
))
> unnested_DT
gp etc d1 d2 d3 d1 d2 d3
1: A hi 1 2 0.2 -1 -2 -0.2
2: A hi 2 2 0.2 -2 -2 -0.2
3: B bye 10 20 0.2 -10 -20 -0.2
4: B bye 20 20 0.2 -20 -20 -0.2
5: B bye 30 40 0.4 -30 -40 -0.4
A simpler (and guess the "way") solution is
unnested_DT <- DT[,
unlist(c(col1, col2), recursive = FALSE),
by = c("gp", "etc")
]
But my col's names are variables and need some of non-standard evaluation
I tried
unlist_cols <- function(dt, cols, by){
stopifnot(is.data.table(dt))
cols <- substitute(unlist(cols))
by <- substitute(by)
dt[, eval(cols), eval(by)]
}
nestedcols <- c("col1", "col2")
by <- setdiff(colnames(DT), nestedcols)
unnested_DT <- unlist_cols(DT, nestedcols, by)
A good TIP: all nested columns has the same row-lenght per "by" group and they've the same nested- columns-names.
Upvotes: 0
Views: 50
Reputation: 17299
Another way with cbind
and unlist
:
DT[, do.call(cbind, unname(unlist(.SD, recursive = F))),
by = c('gp', 'etc'), .SDcols = c('col1', 'col2')]
# gp etc d1 d2 d3 d1 d2 d3
# 1: A hi 1 2 0.2 -1 -2 -0.2
# 2: A hi 2 2 0.2 -2 -2 -0.2
# 3: B bye 10 20 0.2 -10 -20 -0.2
# 4: B bye 20 20 0.2 -20 -20 -0.2
# 5: B bye 30 40 0.4 -30 -40 -0.4
Upvotes: 1
Reputation: 389135
This seems too convoluted but works :
library(data.table)
nestedcols <- c("col1", "col2")
DT[,
unlist(unname(unlist(.SD, recursive = FALSE)), recursive = FALSE),
by = c("gp", "etc"), .SDcols = nestedcols
]
# gp etc d1 d2 d3 d1 d2 d3
#1: A hi 1 2 0.2 -1 -2 -0.2
#2: A hi 2 2 0.2 -2 -2 -0.2
#3: B bye 10 20 0.2 -10 -20 -0.2
#4: B bye 20 20 0.2 -20 -20 -0.2
#5: B bye 30 40 0.4 -30 -40 -0.4
Upvotes: 1