Cezary
Cezary

Reputation: 25

How to mark rows in groups between row numbers based on value?

here is a table example:

dt <- data.frame(cat = rep(c("A", "B", "C"), c(10, 8, 10)), value=c(0,0,0,3,5,0,8,0,0,0,0,0,2,2,3,0,1,0,0,0,0,0,1,2,3,4,0,0))

dt
   cat value
1    A     0
2    A     0
3    A     0
4    A     3
5    A     5
6    A     0
7    A     8
8    A     0
9    A     0
10   A     0
11   B     0
12   B     0
13   B     2
14   B     2
15   B     3
16   B     0
17   B     1
18   B     0
19   C     0
20   C     0
21   C     0
22   C     0
23   C     1
24   C     2
25   C     3
26   C     4
27   C     0
28   C     0

What I would like to do is to flag rows between first and last value which is different than 0 for each group (and also those after last value marked in the other way). So, the final table should look like:

   cat value flag
1    A     0    0
2    A     0    0
3    A     0    0
4    A     3    1
5    A     5    1
6    A     0    1
7    A     8    1
8    A     0    2
9    A     0    2
10   A     0    2
11   B     0    0
12   B     0    0
13   B     2    1
14   B     2    1
15   B     3    1
16   B     0    1
17   B     1    1
18   B     0    2
19   C     0    0
20   C     0    0
21   C     0    0
22   C     0    0
23   C     1    1
24   C     2    1
25   C     3    1
26   C     4    1
27   C     0    2
28   C     0    2

Thanks a lot in advance,

Upvotes: 2

Views: 45

Answers (3)

A base R solution

dt_split = lapply( split(dt, f = dt$cat), function(x){
  # Find nonzero elements
  flag_tmp = which(x$value!=0)
  # Define flags
  x$flag =c(rep(0,flag_tmp[1]-1), # The leading zeros
            rep(1,tail(flag_tmp, n=1)+1 - flag_tmp[1]), # The nonzero flag
            rep(2, nrow(x) -tail(flag_tmp, n=1)) # The trailing zero flag
  )
  x
})
dt = do.call(rbind, dt_split)

Upvotes: 0

Ronak Shah
Ronak Shah

Reputation: 388817

Write a function which assign 0, 1 and 2 value based on condition.

library(dplyr)

assign_flag <- function(x) {
  #First non-zero value
  first <- match(TRUE, x > 0) 
  #last non-zero value
  last <- which.max(cumsum(x))
  case_when(row_number() < first ~ 0, 
            row_number() <= last ~ 1, 
            TRUE ~ 2)
}

and apply it for each group.

dt %>%
  group_by(cat) %>%
  mutate(flag = assign_flag(value)) %>%
  ungroup

#   cat value flag
#1    A     0    0
#2    A     0    0
#3    A     0    0
#4    A     3    1
#5    A     5    1
#6    A     0    1
#7    A     8    1
#8    A     0    2
#9    A     0    2
#10   A     0    2
#11   B     0    0
#12   B     0    0
#13   B     2    1
#14   B     2    1
#15   B     3    1
#16   B     0    1
#17   B     1    1
#18   B     0    2
#19   C     0    0
#20   C     0    0
#21   C     0    0
#22   C     0    0
#23   C     1    1
#24   C     2    1
#25   C     3    1
#26   C     4    1
#27   C     0    2
#28   C     0    2

Upvotes: 1

Karthik S
Karthik S

Reputation: 11584

Does this work:

library(dplyr)
dt %>% group_by(cat) %>% mutate(c1 = cumsum(value)) %>% 
   mutate(flat = case_when(c1 == 0 ~ 0,
                           c1 == max(c1) & value == 0 ~ 2,
                           TRUE ~ 1)) %>% 
   select(1,2,4) %>% print(n = 50)
# A tibble: 28 x 3
# Groups:   cat [3]
   cat   value  flat
   <chr> <dbl> <dbl>
 1 A         0     0
 2 A         0     0
 3 A         0     0
 4 A         3     1
 5 A         5     1
 6 A         0     1
 7 A         8     1
 8 A         0     2
 9 A         0     2
10 A         0     2
11 B         0     0
12 B         0     0
13 B         2     1
14 B         2     1
15 B         3     1
16 B         0     1
17 B         1     1
18 B         0     2
19 C         0     0
20 C         0     0
21 C         0     0
22 C         0     0
23 C         1     1
24 C         2     1
25 C         3     1
26 C         4     1
27 C         0     2
28 C         0     2

Upvotes: 1

Related Questions