Reputation: 25
here is a table example:
dt <- data.frame(cat = rep(c("A", "B", "C"), c(10, 8, 10)), value=c(0,0,0,3,5,0,8,0,0,0,0,0,2,2,3,0,1,0,0,0,0,0,1,2,3,4,0,0))
dt
cat value
1 A 0
2 A 0
3 A 0
4 A 3
5 A 5
6 A 0
7 A 8
8 A 0
9 A 0
10 A 0
11 B 0
12 B 0
13 B 2
14 B 2
15 B 3
16 B 0
17 B 1
18 B 0
19 C 0
20 C 0
21 C 0
22 C 0
23 C 1
24 C 2
25 C 3
26 C 4
27 C 0
28 C 0
What I would like to do is to flag rows between first and last value which is different than 0 for each group (and also those after last value marked in the other way). So, the final table should look like:
cat value flag
1 A 0 0
2 A 0 0
3 A 0 0
4 A 3 1
5 A 5 1
6 A 0 1
7 A 8 1
8 A 0 2
9 A 0 2
10 A 0 2
11 B 0 0
12 B 0 0
13 B 2 1
14 B 2 1
15 B 3 1
16 B 0 1
17 B 1 1
18 B 0 2
19 C 0 0
20 C 0 0
21 C 0 0
22 C 0 0
23 C 1 1
24 C 2 1
25 C 3 1
26 C 4 1
27 C 0 2
28 C 0 2
Thanks a lot in advance,
Upvotes: 2
Views: 45
Reputation: 109
A base R solution
dt_split = lapply( split(dt, f = dt$cat), function(x){
# Find nonzero elements
flag_tmp = which(x$value!=0)
# Define flags
x$flag =c(rep(0,flag_tmp[1]-1), # The leading zeros
rep(1,tail(flag_tmp, n=1)+1 - flag_tmp[1]), # The nonzero flag
rep(2, nrow(x) -tail(flag_tmp, n=1)) # The trailing zero flag
)
x
})
dt = do.call(rbind, dt_split)
Upvotes: 0
Reputation: 388817
Write a function which assign 0, 1 and 2 value based on condition.
library(dplyr)
assign_flag <- function(x) {
#First non-zero value
first <- match(TRUE, x > 0)
#last non-zero value
last <- which.max(cumsum(x))
case_when(row_number() < first ~ 0,
row_number() <= last ~ 1,
TRUE ~ 2)
}
and apply it for each group.
dt %>%
group_by(cat) %>%
mutate(flag = assign_flag(value)) %>%
ungroup
# cat value flag
#1 A 0 0
#2 A 0 0
#3 A 0 0
#4 A 3 1
#5 A 5 1
#6 A 0 1
#7 A 8 1
#8 A 0 2
#9 A 0 2
#10 A 0 2
#11 B 0 0
#12 B 0 0
#13 B 2 1
#14 B 2 1
#15 B 3 1
#16 B 0 1
#17 B 1 1
#18 B 0 2
#19 C 0 0
#20 C 0 0
#21 C 0 0
#22 C 0 0
#23 C 1 1
#24 C 2 1
#25 C 3 1
#26 C 4 1
#27 C 0 2
#28 C 0 2
Upvotes: 1
Reputation: 11584
Does this work:
library(dplyr)
dt %>% group_by(cat) %>% mutate(c1 = cumsum(value)) %>%
mutate(flat = case_when(c1 == 0 ~ 0,
c1 == max(c1) & value == 0 ~ 2,
TRUE ~ 1)) %>%
select(1,2,4) %>% print(n = 50)
# A tibble: 28 x 3
# Groups: cat [3]
cat value flat
<chr> <dbl> <dbl>
1 A 0 0
2 A 0 0
3 A 0 0
4 A 3 1
5 A 5 1
6 A 0 1
7 A 8 1
8 A 0 2
9 A 0 2
10 A 0 2
11 B 0 0
12 B 0 0
13 B 2 1
14 B 2 1
15 B 3 1
16 B 0 1
17 B 1 1
18 B 0 2
19 C 0 0
20 C 0 0
21 C 0 0
22 C 0 0
23 C 1 1
24 C 2 1
25 C 3 1
26 C 4 1
27 C 0 2
28 C 0 2
Upvotes: 1