Reputation: 675
I have a large dataframe which is effectively combined output from a nested list using do.call(rbind, nested_list)
The output has the same number of rows for each list element (e.g. 5 rows per list) and I need to add a column which has a unique numeric code for each list (or group). How can I write a loop to reproduce the group column I have included in the example below, e.g. the five rows have a group value == 1, rows 6 to 10 have a group value == 2, rows 11 to 15 have a group value == 3
df <- data.frame("ID" = 1:15)
df$Var_A <- c(1,3,5,7,9,11,13,15,17,19,21,23,25,27,29)
df$Var_B <- c(10,0,0,0,12,12,12,12,0,14,NA_real_,14,16,16,16)
df$Var_C <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$Var_D <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$New_A <- c(2,5,5,8,11,14,15,17,20,21,22,23,25,25,27)
df$New_B <- c(10,0,0,0,12,12,12,12,0,14,NA_real_,14,16,16,16)
df$New_C <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$New_D <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$Group <- c(1,1,1,1,1,2,2,2,2,2,3,3,3,3,3)
ID Var_A Var_B Var_C Var_D New_A New_B New_C New_D Group
1 1 1 10 10 10 2 10 10 10 1
2 2 3 0 12 12 5 0 12 12 1
3 3 5 0 14 14 5 0 14 14 1
4 4 7 0 16 16 8 0 16 16 1
5 5 9 12 10 10 11 12 10 10 1
6 6 11 12 12 12 14 12 12 12 2
7 7 13 12 14 14 15 12 14 14 2
8 8 15 12 16 16 17 12 16 16 2
9 9 17 0 10 10 20 0 10 10 2
10 10 19 14 12 12 21 14 12 12 2
11 11 21 NA 14 14 22 NA 14 14 3
12 12 23 14 16 16 23 14 16 16 3
13 13 25 16 10 10 25 16 10 10 3
14 14 27 16 12 12 25 16 12 12 3
15 15 29 16 14 14 27 16 14 14 3
Upvotes: 3
Views: 261
Reputation: 886
You could use the cut
function with labels = FALSE
to return an integer to use for the group.
n_per_group <- 5
df$group <- cut(x = df$ID, breaks = nrow(df) / n_per_group, labels = FALSE)
df$group
#[1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3
Upvotes: 1
Reputation: 39737
An option would be to combine cumsum
with rep
.
cumsum(rep_len(c(TRUE, rep(FALSE, 4)), nrow(df)))
#cumsum(rep_len(c(TRUE, FALSE, FALSE, FALSE, FALSE), nrow(df))) #Alternative
# [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3
Or making use of auto repeat.
df$Group <- c(TRUE, rep(FALSE, 4))
df$Group <- cumsum(df$Group)
df$Group
# [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3
Or create a sequence with length of nrow
and make an integer division %/%
.
0:(nrow(df)-1) %/% 5
#seq(0, nrow(df)-1) %/% 5 #Alternative
#(seq_len(nrow(df))-1) %/% 5 #Alternative
# [1] 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2
Or using rep
:
rep(1:ceiling(nrow(df)/5), each=5, length.out=nrow(df))
# [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3
Upvotes: 1
Reputation: 71610
I'd use dplyr::mutate
with dplyr::row_number
:
library(dplyr)
df %>%
mutate(Group=ceiling(row_number() / 5))
Output:
ID Var_A Var_B Var_C Var_D New_A New_B New_C New_D Group
1 1 1 10 10 10 2 10 10 10 1
2 2 3 0 12 12 5 0 12 12 1
3 3 5 0 14 14 5 0 14 14 1
4 4 7 0 16 16 8 0 16 16 1
5 5 9 12 10 10 11 12 10 10 1
6 6 11 12 12 12 14 12 12 12 2
7 7 13 12 14 14 15 12 14 14 2
8 8 15 12 16 16 17 12 16 16 2
9 9 17 0 10 10 20 0 10 10 2
10 10 19 14 12 12 21 14 12 12 2
11 11 21 NA 14 14 22 NA 14 14 3
12 12 23 14 16 16 23 14 16 16 3
13 13 25 16 10 10 25 16 10 10 3
14 14 27 16 12 12 25 16 12 12 3
15 15 29 16 14 14 27 16 14 14 3
Upvotes: 1
Reputation: 2021
Without adding an ID or rownums we can do this using nrow and knowledge of the group length.
group_len <- 5
groups <- nrow(df)/group_len
df$group <- rep(1:groups, each = group_len)
# Example:
# rep(1:3, each = 5)
# 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3
Upvotes: 2
Reputation: 1986
You can use the ceiling
function:
df <- data.frame("ID" = 1:15)
df$Var_A <- c(1,3,5,7,9,11,13,15,17,19,21,23,25,27,29)
df$Var_B <- c(10,0,0,0,12,12,12,12,0,14,NA_real_,14,16,16,16)
df$Var_C <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$Var_D <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$New_A <- c(2,5,5,8,11,14,15,17,20,21,22,23,25,25,27)
df$New_B <- c(10,0,0,0,12,12,12,12,0,14,NA_real_,14,16,16,16)
df$New_C <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$New_D <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14)
df$Group <- ceiling(as.numeric(df$ID)/5)
df
# ID Var_A Var_B Var_C Var_D New_A New_B New_C New_D Group
# 1 1 1 10 10 10 2 10 10 10 1
# 2 2 3 0 12 12 5 0 12 12 1
# 3 3 5 0 14 14 5 0 14 14 1
# 4 4 7 0 16 16 8 0 16 16 1
# 5 5 9 12 10 10 11 12 10 10 1
# 6 6 11 12 12 12 14 12 12 12 2
# 7 7 13 12 14 14 15 12 14 14 2
# 8 8 15 12 16 16 17 12 16 16 2
# 9 9 17 0 10 10 20 0 10 10 2
# 10 10 19 14 12 12 21 14 12 12 2
# 11 11 21 NA 14 14 22 NA 14 14 3
# 12 12 23 14 16 16 23 14 16 16 3
# 13 13 25 16 10 10 25 16 10 10 3
# 14 14 27 16 12 12 25 16 12 12 3
# 15 15 29 16 14 14 27 16 14 14 3
Upvotes: 3