Cheng
Cheng

Reputation: 43

Replicate each row and change one column into binary values

df <- data.frame(n = c(3, 2, 2), 
                 survive = c(2, 1, 2), 
                 a = c(1,1,0), 
                 b = c(0,0,1))

How can I expand the last two columns of the data.frame above, so that each row appears the number of times specified in the column 'n'. And the second column "survive" changes into binary values 0/1 according to the value of "survive"

In other words:

n  survive a  b
3  2       1  0
2  1       1  0
2  2       0  1

To this

survive a  b
1       1  0
1       1  0
0       1  0
1       1  0
0       1  0
1       0  1
1       0  1

Upvotes: 3

Views: 378

Answers (4)

ulfelder
ulfelder

Reputation: 5335

Here's a solution using a split/apply/combine approach in base R:

df2 <- do.call(rbind, lapply(split(df, seq_along(df$n)), function(i) {

  survive = c(rep(1, i$survive), rep(0, i$n - i$survive))

  cbind(survive, i[rep(1, i$n), c("a", "b")])

}))

Result:

      survive a b
1.1         1 1 0
1.1.1       1 1 0
1.1.2       0 1 0
2.2         1 1 0
2.2.1       0 1 0
3.3         1 0 1
3.3.1       1 0 1

Upvotes: 2

Jaap
Jaap

Reputation: 83255

Several alternative solutions:

1) Using base R:

rn <- rep(1:nrow(df), df$n)
df2 <- df[rn,]
df2$survive <- as.integer(df2$survive >= ave(rn, rn, FUN = seq_along))

which gives:

> df2[,-1]
   survive a b
1:       1 1 0
2:       1 1 0
3:       0 1 0
4:       1 1 0
5:       0 1 0
6:       1 0 1
7:       1 0 1

2) Using the data.table-package:

library(data.table)
df2 <- setDT(df)[, rid := .I
                 ][, .(survive = c(rep(1, survive), rep(0, n - survive)), a, b)
                   , by = rid
                   ][, rid := NULL][]

which gives:

> df2
   survive a b
1:       1 1 0
2:       1 1 0
3:       0 1 0
4:       1 1 0
5:       0 1 0
6:       1 0 1
7:       1 0 1

Or a bit shorter:

df2 <- setDT(df)[, .(survive = c(rep(1, survive), rep(0, n - survive)), a, b), by = 1:nrow(df)
                 ][, nrow := NULL]

3) Using the dplyr-package:

library(dplyr)
df %>% 
  mutate(rid = row_number()) %>% 
  .[rep(1:nrow(df), df$n),] %>% 
  group_by(rid) %>% 
  mutate(survive = c(rep(1, unique(survive)), rep(0, unique(n) - unique(survive))) ) %>% 
  ungroup() %>% 
  select(-n, -rid)

which gives:

# A tibble: 7 × 3
  survive     a     b
    <dbl> <dbl> <dbl>
1       1     1     0
2       1     1     0
3       0     1     0
4       1     1     0
5       0     1     0
6       1     0     1
7       1     0     1

Used data:

df <- data.frame(n = c(3, 2, 2), 
                 survive = c(2, 1, 2), 
                 a = c(1,1,0), 
                 b = c(0,0,1))

Upvotes: 3

akrun
akrun

Reputation: 887501

We can do with base R

df2 <- df1[rep(1:nrow(df1), df1$n),-(1:2)]
row.names(df2) <- NULL
df2 <- cbind(Survive = unlist(Map(function(x, y) rep(c(1,0),
             c(y, x-y)),  df1$n, df1$survive)), df2)
df2
#  Survive a b
#1       1 1 0
#2       1 1 0
#3       0 1 0
#4       1 1 0
#5       0 1 0
#6       1 0 1
#7       1 0 1

Or a more vectorized approach is

df1 <- df[rep(seq_len(nrow(df)), df$n),-(1:2)]
df1$survive <- with(df, rep(rep(c(1,0), nrow(df)), rbind(survive, n - survive)))

Upvotes: 3

Sotos
Sotos

Reputation: 51592

One solution using splitstackshape to expand rows and dplyr,

library(splitstackshape)
library(dplyr)

df %>% 
  mutate(new = 1) %>% 
  expandRows('n') %>% 
  group_by(grp = cumsum(c(1, diff(survive) != 0))) %>% 
  mutate(survive = replace(new, tail(new, n() - survive[1]), 0)) %>% 
  arrange(grp, desc(survive)) %>% 
  ungroup() %>% 
  select(-c(new, grp))

# A tibble: 7 × 3
#  survive     a     b
#    <dbl> <dbl> <dbl>
#1       1     1     0
#2       1     1     0
#3       0     1     0
#4       1     1     0
#5       0     1     0
#6       1     0     1
#7       1     0     1

Upvotes: 3

Related Questions