Reputation: 263
I have a dataframe with 5 different groups :
id group
1 L1 1
2 L2 1
3 L1 2
4 L3 2
5 L4 2
6 L3 3
7 L5 3
8 L6 3
9 L1 4
10 L4 4
11 L2 5
I would like to know if it's possible to get the unique id
from the 1st group, the 1st and the 2nd, the 1st, 2nd and 3rd and so on without for looping. I'm searching a way with dplyr
or data.table
package.
Expected results :
group id
1 1 c("L1", "L2")
2 1,2 c("L1", "L2", "L3", "L4")
3 1,2,3 c("L1", "L2", "L3", "L4", "L5")
4 1,2,3,4 c("L1", "L2", "L3", "L4", "L5")
5 1,2,3,4,5 c("L1", "L2", "L3", "L4", "L5")
Data :
structure(list(id = c("L1", "L2", "L1", "L3", "L4", "L3", "L5",
"L6", "L1", "L4", "L2"), group = structure(c(1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 4L, 4L, 5L), .Label = c("1", "2", "3", "4", "5"
), class = "factor")), .Names = c("id", "group"), row.names = c(NA,
-11L), class = "data.frame")
Upvotes: 3
Views: 108
Reputation: 38500
Another method is to use split
and Reduce
to feed the groups to union
with accumulate=TRUE:
Reduce(union, split(df$id, df$group), accumulate=TRUE)
[[1]]
[1] "L1" "L2"
[[2]]
[1] "L1" "L2" "L3" "L4"
[[3]]
[1] "L1" "L2" "L3" "L4" "L5" "L6"
[[4]]
[1] "L1" "L2" "L3" "L4" "L5" "L6"
[[5]]
[1] "L1" "L2" "L3" "L4" "L5" "L6"
Upvotes: 4
Reputation: 24074
With base R, you can do:
# create the "growing" sets of groups
combi_groups <- lapply(seq_along(unique(df$group)), function(i) unique(df$group)[1:i])
# get the unique ID for each set of groups
uniq_ID <- setNames(lapply(combi_groups, function(x) unique(df$id[df$group %in% x])),
sapply(combi_groups, paste, collapse=","))
# $`1`
# [1] "L1" "L2"
# $`1,2`
# [1] "L1" "L2" "L3" "L4"
# $`1,2,3`
# [1] "L1" "L2" "L3" "L4" "L5" "L6"
# $`1,2,3,4`
# [1] "L1" "L2" "L3" "L4" "L5" "L6"
# $`1,2,3,4,5`
# [1] "L1" "L2" "L3" "L4" "L5" "L6"
If you want to format as in your expected output:
data.frame(group=sapply(combi_groups, paste, collapse=", "), id=sapply(uniq_ID, function(x) paste0("c(", paste0("\"", x, "\"", collapse=", "), ")")))
# group id
#1 1 c("L1", "L2")
#2 1, 2 c("L1", "L2", "L3", "L4")
#3 1, 2, 3 c("L1", "L2", "L3", "L4", "L5", "L6")
#4 1, 2, 3, 4 c("L1", "L2", "L3", "L4", "L5", "L6")
#5 1, 2, 3, 4, 5 c("L1", "L2", "L3", "L4", "L5", "L6")
Another possibility of formatting:
data.frame(group=rep(names(uniq_ID), sapply(uniq_ID, length)), id=unlist(uniq_ID))
Or, if you want to have uniq_ID
in a column:
library(data.table)
data.table(group=sapply(combi_groups, paste, collapse=", "), id=uniq_ID)
# group id
#1: 1 L1,L2
#2: 1, 2 L1,L2,L3,L4
#3: 1, 2, 3 L1,L2,L3,L4,L5,L6
#4: 1, 2, 3, 4 L1,L2,L3,L4,L5,L6
#5: 1, 2, 3, 4, 5 L1,L2,L3,L4,L5,L6
data.table(group=sapply(combi_groups, paste, collapse=", "), id=uniq_ID)[2, id]
[[1]]
[1] "L1" "L2" "L3" "L4"
Upvotes: 8
Reputation: 67778
In similar vein as the answer of @Cath, but using Reduce(..., accumulate = TRUE)
to create the expanding window of groups. Then loop over the set of groups with lapply
to get the unique id's for each window:
grp <- Reduce(c, unique(d$group), accumulate = TRUE)
lapply(grp, function(x) unique(d$id[d$group %in% x]))
# [[1]]
# [1] "L1" "L2"
#
# [[2]]
# [1] "L1" "L2" "L3" "L4"
#
# [[3]]
# [1] "L1" "L2" "L3" "L4" "L5" "L6"
#
# [[4]]
# [1] "L1" "L2" "L3" "L4" "L5" "L6"
#
# [[5]]
# [1] "L1" "L2" "L3" "L4" "L5" "L6"
For naming and prettification, please refer to the nice answer by @Cath.
Upvotes: 6