Reputation: 1441
Does anyone has an idea how to achieve the following: start from here
df <- data.frame(var = c(0,0,1,1,0,0,0,1,1,0,0,0,0,1,1))
and achieve this:
df <- data.frame(var = c(0,0,1,1,0,0,0,1,1,0,0,0,0,1,1),
newvar = c(0,0,1,1,0,0,0,2,2,0,0,0,0,3,3))
Upvotes: 2
Views: 89
Reputation: 49448
Another one for fun:
library(data.table)
setDT(df)
tmp = 0
df[, newvar := if(var[1] != 0) tmp <- tmp + 1 else 0, by = rleid(var)][]
And another one:
df[, newvar := var * cumsum(diff(c(0, var)) == 1)]
# or if still a data.frame
within(df, newvar <- var * cumsum(diff(c(0, var)) == 1))
Upvotes: 1
Reputation: 371
An efficient solution:
df %>%
mutate(temp= var - lag(var,default=df$var[1])) %>%
mutate(newvar= var * cumsum(temp>0))
or without the additional column:
df %>%
mutate(newvar= var - lag(var,default=df$var[1])) %>%
mutate(newvar= var * cumsum(newvar>0))
var temp newvar
1 0 0 0
2 0 0 0
3 1 1 1
4 1 0 1
5 0 -1 0
6 0 0 0
7 0 0 0
8 1 1 2
9 1 0 2
10 0 -1 0
11 0 0 0
12 0 0 0
13 0 0 0
14 1 1 3
15 1 0 3
Upvotes: 1
Reputation: 17678
you can try following:
df %>%
mutate(n=ifelse(var==lead(var,default = 0),1,0)) %>%
mutate(n2=ifelse(var==0,0,n)) %>%
mutate(res=ifelse(var==1, cumsum(n2),0))
var n n2 res
1 0 1 0 0
2 0 0 0 0
3 1 1 1 1
4 1 0 0 1
5 0 1 0 0
6 0 1 0 0
7 0 0 0 0
8 1 1 1 2
9 1 0 0 2
10 0 1 0 0
11 0 1 0 0
12 0 1 0 0
13 0 0 0 0
14 1 1 1 3
15 1 0 0 3
Then select(var, res)
only the columns you need.
Upvotes: 2
Reputation: 887951
Here is an option with rle
by replacing the 'values' that are not 0 with the sequence of those values and then call inverse_rle
to get the full vector
df$newvar <- inverse.rle(within.list(rle(df$var),
values[values!=0] <- seq_along(values[values!=0])))
df
# var newvar
#1 0 0
#2 0 0
#3 1 1
#4 1 1
#5 0 0
#6 0 0
#7 0 0
#8 1 2
#9 1 2
#10 0 0
#11 0 0
#12 0 0
#13 0 0
#14 1 3
#15 1 3
Upvotes: 3