Reputation: 43
df <- data.frame(n = c(3, 2, 2),
survive = c(2, 1, 2),
a = c(1,1,0),
b = c(0,0,1))
How can I expand the last two columns of the data.frame above, so that each row appears the number of times specified in the column 'n'. And the second column "survive" changes into binary values 0/1 according to the value of "survive"
In other words:
n survive a b
3 2 1 0
2 1 1 0
2 2 0 1
To this
survive a b
1 1 0
1 1 0
0 1 0
1 1 0
0 1 0
1 0 1
1 0 1
Upvotes: 3
Views: 378
Reputation: 5335
Here's a solution using a split/apply/combine approach in base R:
df2 <- do.call(rbind, lapply(split(df, seq_along(df$n)), function(i) {
survive = c(rep(1, i$survive), rep(0, i$n - i$survive))
cbind(survive, i[rep(1, i$n), c("a", "b")])
}))
Result:
survive a b
1.1 1 1 0
1.1.1 1 1 0
1.1.2 0 1 0
2.2 1 1 0
2.2.1 0 1 0
3.3 1 0 1
3.3.1 1 0 1
Upvotes: 2
Reputation: 83255
Several alternative solutions:
1) Using base R:
rn <- rep(1:nrow(df), df$n)
df2 <- df[rn,]
df2$survive <- as.integer(df2$survive >= ave(rn, rn, FUN = seq_along))
which gives:
> df2[,-1]
survive a b
1: 1 1 0
2: 1 1 0
3: 0 1 0
4: 1 1 0
5: 0 1 0
6: 1 0 1
7: 1 0 1
2) Using the data.table-package:
library(data.table)
df2 <- setDT(df)[, rid := .I
][, .(survive = c(rep(1, survive), rep(0, n - survive)), a, b)
, by = rid
][, rid := NULL][]
which gives:
> df2
survive a b
1: 1 1 0
2: 1 1 0
3: 0 1 0
4: 1 1 0
5: 0 1 0
6: 1 0 1
7: 1 0 1
Or a bit shorter:
df2 <- setDT(df)[, .(survive = c(rep(1, survive), rep(0, n - survive)), a, b), by = 1:nrow(df)
][, nrow := NULL]
3) Using the dplyr-package:
library(dplyr)
df %>%
mutate(rid = row_number()) %>%
.[rep(1:nrow(df), df$n),] %>%
group_by(rid) %>%
mutate(survive = c(rep(1, unique(survive)), rep(0, unique(n) - unique(survive))) ) %>%
ungroup() %>%
select(-n, -rid)
which gives:
# A tibble: 7 × 3
survive a b
<dbl> <dbl> <dbl>
1 1 1 0
2 1 1 0
3 0 1 0
4 1 1 0
5 0 1 0
6 1 0 1
7 1 0 1
Used data:
df <- data.frame(n = c(3, 2, 2),
survive = c(2, 1, 2),
a = c(1,1,0),
b = c(0,0,1))
Upvotes: 3
Reputation: 887501
We can do with base R
df2 <- df1[rep(1:nrow(df1), df1$n),-(1:2)]
row.names(df2) <- NULL
df2 <- cbind(Survive = unlist(Map(function(x, y) rep(c(1,0),
c(y, x-y)), df1$n, df1$survive)), df2)
df2
# Survive a b
#1 1 1 0
#2 1 1 0
#3 0 1 0
#4 1 1 0
#5 0 1 0
#6 1 0 1
#7 1 0 1
Or a more vectorized approach is
df1 <- df[rep(seq_len(nrow(df)), df$n),-(1:2)]
df1$survive <- with(df, rep(rep(c(1,0), nrow(df)), rbind(survive, n - survive)))
Upvotes: 3
Reputation: 51592
One solution using splitstackshape
to expand rows and dplyr
,
library(splitstackshape)
library(dplyr)
df %>%
mutate(new = 1) %>%
expandRows('n') %>%
group_by(grp = cumsum(c(1, diff(survive) != 0))) %>%
mutate(survive = replace(new, tail(new, n() - survive[1]), 0)) %>%
arrange(grp, desc(survive)) %>%
ungroup() %>%
select(-c(new, grp))
# A tibble: 7 × 3
# survive a b
# <dbl> <dbl> <dbl>
#1 1 1 0
#2 1 1 0
#3 0 1 0
#4 1 1 0
#5 0 1 0
#6 1 0 1
#7 1 0 1
Upvotes: 3