Reputation: 14370
Hello I have a table with several columns, and I'd like to "group" them all into one column of lists, each row will contain the list all values of the "grouped" columns on the same row.
example (slow)
library(data.table)
library(foreach)
# create table
DT <- data.table(x_1 = 1:101, x_2 = 101:201, x_3 = 201:301, y_1 = 1:101, y_2 = 101:201)[, IDX := .I]
# we want to "condense" all x_[0-9] together and all y_[0-9]together
foreach(prefix = c("x", "y")) %do% {
# get columns matching x_[0-9]
cols_i <- grep(colnames(DT), pattern = sprintf("%s_[0-9]+", prefix), value = TRUE)
# subset table
DT_i <- DT[, c("IDX", cols_i), with = FALSE]
# melt
DT_i <- melt(DT_i, id.vars = "IDX")
setkey(DT_i, IDX, variable)
# reagglomerate
DT_i <- DT_i[, .(value = list(value)), keyby = IDX]
setnames(DT_i, old = "value", new = prefix)
} -> DT
DT <- Reduce(f = function(X, Y) X[Y, on = "IDX"], x = DT)
expected:
IDX x y
1: 1 1,101,201 1,101
2: 2 2,102,202 2,102
3: 3 3,103,203 3,103
4: 4 4,104,204 4,104
5: 5 5,105,205 5,105
---
97: 97 97,197,297 97,197
98: 98 98,198,298 98,198
99: 99 99,199,299 99,199
100: 100 100,200,300 100,200
101: 101 101,201,301 101,201
Upvotes: 1
Views: 255
Reputation: 11255
Here's a way that relies on asplit()
which splits a matrix by rows:
DT[, X := asplit(as.matrix(.SD), 1), .SDcols = grep('x', names(DT))]
DT[, Y := asplit(as.matrix(.SD), 1), .SDcols = grep('y', names(DT))]
It would likely have similar performance to @ThomasIsCoding
Upvotes: 1
Reputation: 1364
You can do the following. Although all variables are kept in this solution, you can of course just subset the final DT
for columns IDX, X and Y
, i.e. DT[, .(IDX, X, Y)]
.
Code
xcols = colnames(DT)[colnames(DT) %like% 'x']
ycols = colnames(DT)[colnames(DT) %like% 'y']
DT[, X := lapply(IDX, function(x){
lapply(xcols, function(y) DT[x, get(y)])
})]
DT[, Y := lapply(IDX, function(x){
lapply(ycols, function(y) DT[x, get(y)])
})]
Output
> dput(head(DT))
structure(list(x_1 = 1:6, x_2 = 101:106, x_3 = 201:206, y_1 = 1:6,
y_2 = 101:106, IDX = 1:6,
X = list(list(1L, 101L, 201L), list(2L, 102L, 202L), list(3L, 103L, 203L), list(4L, 104L, 204L), list(5L, 105L, 205L), list(6L, 106L, 206L)),
Y = list(list(1L, 101L), list(2L, 102L), list(3L, 103L), list(4L, 104L), list(5L, 105L), list(6L, 106L))), row.names = c(NA, -6L),
class = c("data.table", "data.frame"),
.Names = c("x_1", "x_2", "x_3", "y_1", "y_2", "IDX", "X", "Y"))
Upvotes: 0
Reputation: 101403
Here is a solution for data.frame
in base R
, but I believe it can also work for data.table
:
df <- data.frame(IDX = 1:101, x_1 = 1:101, x_2 = 101:201, x_3 = 201:301, y_1 = 1:101, y_2 = 101:201)
res <- data.table(df[,1], Reduce(rbind,apply(df[,-1], 1, function(v) {
data.table(x = list(as.numeric(v[grep("x",names(v))])), y = list(as.numeric(v[grep("y",names(v))])))
})))
such that
> head(res)
IDX x y
1 1 1,101,201 1,101
2 2 2,102,202 2,102
3 3 3,103,203 3,103
4 4 4,104,204 4,104
5 5 5,105,205 5,105
6 6 6,106,206 6,106
DATA:
df <- data.frame(IDX = 1:101, x_1 = 1:101, x_2 = 101:201, x_3 = 201:301, y_1 = 1:101, y_2 = 101:201)
data.table()
, but maybe slow for large datasetres <- data.table(dt[,1], Reduce(rbind,apply(dt[,-1], 1, function(v) {
data.table(x = list(v[grep("x",names(v))]), y = list(v[grep("y",names(v))]))
})))
such that
> res
IDX x y
1: 1 1,101,201 1,101
2: 2 2,102,202 2,102
3: 3 3,103,203 3,103
4: 4 4,104,204 4,104
5: 5 5,105,205 5,105
---
97: 97 97,197,297 97,197
98: 98 98,198,298 98,198
99: 99 99,199,299 99,199
100: 100 100,200,300 100,200
101: 101 101,201,301 101,201
DATA:
dt <- data.table(IDX = 1:101, x_1 = 1:101, x_2 = 101:201, x_3 = 201:301, y_1 = 1:101, y_2 = 101:201)
res <- cbind(dt[,1],
data.table(
x = split(as.matrix(dt[,grep("x",names(dt)), with = F]),seq(nrow(dt))),
y = split(as.matrix(dt[,grep("y",names(dt)), with = F]),seq(nrow(dt)))
)
)
Upvotes: 2