Reputation: 508
I'm trying to create a list of vectors from 2 columns and an arbitrary number of rows of a data table. An example data table that is similar to what I'm working with is:
dt <- data.table(a = c(1,2,3,4,4,4,3,5,7),
b=c(12,13,14,15,15,16,17,22,18),
c=c(1,1,1,1,2,1,1,1,1),
d=c("cat1", "cat2", "cat1", "cat1", "cat2", "cat3", "cat2", "cat1", "cat2"))
a b c d
1: 1 12 1 cat1
2: 2 13 1 cat2
3: 3 14 1 cat1
4: 4 15 1 cat1
5: 4 15 2 cat2
6: 4 16 1 cat3
7: 3 17 1 cat2
8: 5 22 1 cat1
9: 7 18 1 cat2
I'd like to use a by
statement to group the rows, and then create the list using a function. I haven't been able to get everything to work, though. Here's what I have:
create_kv <- function(x,y) {
l <- list()
i <- 1
while (i <= length(x) & i <= length(y)) {
each_c <- c(x[i], y[i])
l[[i]] <- each_c
i = i + 1
}
return(l)
}
for (each_a in unique(dt$a)) {
for (each_b in dt[a == each_a,]$b) {
ranks <- dt[a == each_a & b == each_b,]$c
inv <- dt[a == each_a & b == each_b,]$d
dt[a == each_a & b == each_b, `:=` (inv_flow = create_kv(ranks, inv))]
}
}
Unfortunately, the loop doesn't correctly combine rows, even if I use b="a,b"
. Instead, I'm getting:
a b c d inv_flow
1: 1 12 1 cat1 1,cat1
2: 2 13 1 cat2 1,cat2
3: 3 14 1 cat1 1,cat1
4: 4 15 1 cat1 1,cat1
5: 4 15 2 cat2 2,cat2
6: 4 16 1 cat3 1,cat3
7: 3 17 1 cat2 1,cat2
8: 5 22 1 cat1 1,cat1
9: 7 18 1 cat2 1,cat1
What I'm hoping for is:
a b c d inv_flow
1: 1 12 1 cat1 1,cat1
2: 2 13 1 cat2 1,cat2
3: 3 14 1 cat1 1,cat1
4: 4 15 1 cat1 c(1,cat1), c(2,cat2)
5: 4 16 1 cat3 1,cat3
7: 3 17 1 cat2 1,cat2
8: 5 22 1 cat1 1,cat1
9: 7 18 1 cat2 1,cat1
So that there is one row for every a,b
pair.
Upvotes: 0
Views: 105
Reputation: 92282
Is this is what you need?
dt <- dt[ , inv_flow := paste(c, d, sep=",")][, list(inv_flow = if (.N > 1) list(c(paste0("c(",inv_flow,")"))) else list(c(inv_flow))), by = "a,b"]
# a b inv_flow
#1: 1 12 1,cat1
#2: 2 13 1,cat2
#3: 3 14 1,cat1
#4: 4 15 c(1,cat1),c(2,cat2)
#5: 4 16 1,cat3
#6: 3 17 1,cat2
#7: 5 22 1,cat1
#8: 7 18 1,cat2
str(dt)
# Classes ‘data.table’ and 'data.frame': 8 obs. of 3 variables:
# $ a : num 1 2 3 4 4 3 5 7
# $ b : num 12 13 14 15 16 17 22 18
# $ inv_flow:List of 8
# ..$ : chr "1,cat1"
# ..$ : chr "1,cat2"
# ..$ : chr "1,cat1"
# ..$ : chr "c(1,cat1)" "c(2,cat2)"
# ..$ : chr "1,cat3"
# ..$ : chr "1,cat2"
# ..$ : chr "1,cat1"
# ..$ : chr "1,cat2"
# - attr(*, ".internal.selfref")=<externalptr>
Upvotes: 3
Reputation:
Since it's a small dataset, you can reshape your data as a data.frame
and summarize it using plyr
. Not a complete solution, but should get you started:
> df <- data.frame(a = c(1,2,3,4,4,4,3,5,7),
b=c(12,13,14,15,15,16,17,22,18),
c=c(1,1,1,1,2,1,1,1,1),
e=c("cat1", "cat2", "cat1", "cat1", "cat2", "cat3", "cat2", "cat1", "cat2"))
> df$e <- as.character(df$e)
# create index you'd like to group by
> df$ab <- paste(df$a, df$b)
> library(plyr)
> ddply(df, .(ab), summarize, f = list(e), g = list(c))
ab f g
1 1 12 cat1 1
2 2 13 cat2 1
3 3 14 cat1 1
4 3 17 cat2 1
5 4 15 cat1, cat2 1, 2
6 4 16 cat3 1
7 5 22 cat1 1
8 7 18 cat2 1
Upvotes: 0