Alexander
Alexander

Reputation: 4635

numbering records in grouped data

I need to label the records when the certain condition is matched for example inside the grouped data when sw1=='q'&type=='good' its a record_1 and record_1 need to be repeated until the next hit sw1=='q'&type=='good'.

Here is the what I mean

df <- data.frame(gr = rep(letters[1:2],each=8), 
             sw1=rep(c(letters[17:20],sw2=letters[c(17,21,17,23)]),2),

             type =c(c("good", "bad",'Visky','Wine',"good", "good","Visky","bad"),c("good","Wine","Visky","bad","good","Visky","good","bad")))

    gr sw1  type
1   a   q  good   #record 1
2   a   r   bad
3   a   s Visky
4   a   t  Wine
5   a   q  good   #record 2
6   a   u  good
7   a   q Visky
8   a   w   bad
9   b   q  good   #record 1
10  b   r  Wine
11  b   s Visky
12  b   t   bad
13  b   q  good   #record 2
14  b   u Visky
15  b   q  good   #record 3
16  b   w   bad

So I tried following

library(dplyr)


df%>%
  group_by(gr)%>%
  mutate(label=cummax(type=="good"&sw1=="q"))

which outputs

# A tibble: 16 x 4
# Groups:   gr [2]
   gr    sw1   type  label
   <fct> <fct> <fct> <int>
 1 a     q     good      1
 2 a     r     bad       1
 3 a     s     Visky     1
 4 a     t     Wine      1
 5 a     q     good      1
 6 a     u     good      1
 7 a     q     Visky     1
 8 a     w     bad       1
 9 b     q     good      1
10 b     r     Wine      1
11 b     s     Visky     1
12 b     t     bad       1
13 b     q     good      1
14 b     u     Visky     1
15 b     q     good      1
16 b     w     bad       1

but the expected output is

   gr    sw1   type  label   record
 1 a     q     good      1   record_1
 2 a     r     bad       1   record_1 
 3 a     s     Visky     1   record_1
 4 a     t     Wine      1   record_1
 5 a     q     good      2   record_2 
 6 a     u     good      2   record_2
 7 a     q     Visky     2   record_2
 8 a     w     bad       2   record_2
 9 b     q     good      1   record_1
10 b     r     Wine      1   record_1
11 b     s     Visky     1   record_1
12 b     t     bad       1   record_1
13 b     q     good      2   record_2
14 b     u     Visky     2   record_2
15 b     q     good      3   record_3
16 b     w     bad       3   record_3

forgot to mention I also tried dense_rank but the output is not even close to the expected output.

Upvotes: 1

Views: 39

Answers (2)

PKumar
PKumar

Reputation: 11128

You should use cumsum like below:

library(tidyverse)

df%>%
    group_by(gr)%>%
    mutate(label=cumsum(type=="good"&sw1=="q")+0L,
           record = paste0("record_",label))

Upvotes: 1

MKR
MKR

Reputation: 20095

You can try:

library(dplyr)


df%>%
  group_by(gr)%>%
  mutate(label=cumsum(type=="good"&sw1=="q"))

Upvotes: 1

Related Questions