Rajan Mishra
Rajan Mishra

Reputation: 41

Finding if a value is present in next group in r

I have a dataset containing purchases made by different households across different retailers. For eg

Example Dataset

enter image description here

using dput()

structure(list(Household = c(76, 76, 76, 76, 76, 76, 76, 
76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 126, 
126, 126, 126, 126, 126, 126, 126, 126), Trip ID = c(1032497498L, 
1025776063L, 1029419047L, 1030418100L, 1029502602L, 1034153056L, 
1027035051L, 1027533991L, 1033515804L, 1032998207L, 1032066227L, 
1028192785L, 1033419039L, 1028730296L, 1027388499L, 1030652869L, 
1025638394L, 1034032718L, 1034032718L, 1025678520L, 1029490031L, 
1029898838L, 1028024134L, 1030324171L, 1031983761L, 1031983761L, 
1033767148L, 1023953965L, 1030954113L, 1030954113L, 1027392968L
), purchase_date = structure(c(1L, 2L, 23L, 50L, 52L, 74L, 77L, 
94L, 148L, 158L, 176L, 179L, 196L, 211L, 224L, 246L, 271L, 286L, 
286L, 309L, 329L, 346L, 2L, 9L, 46L, 46L, 50L, 58L, 66L, 66L, 
68L), .Label = c("2012-01-01", "2012-01-02", "2012-01-03", "2012-01-04", 
"2012-01-05", "2012-01-06", "2012-01-07", "2012-01-08", "2012-01-09", 
"2012-01-10", "2012-01-11", "2012-01-12", "2012-01-13", "2012-01-14", 
"2012-01-15", "2012-01-16", "2012-01-17", "2012-01-18", "2012-01-19", 
"2012-01-20", "2012-01-21", "2012-01-22", "2012-01-23", "2012-01-24", 
"2012-01-25", "2012-01-26", "2012-01-27", "2012-01-28", "2012-01-29", 
"2012-01-30", "2012-01-31", "2012-02-01", "2012-02-02", "2012-02-03", 
"2012-02-04", "2012-02-05", "2012-02-06", "2012-02-07", "2012-02-08", 
"2012-02-09", "2012-02-10", "2012-02-11", "2012-02-12", "2012-02-13", 
"2012-02-14", "2012-02-15", "2012-02-16", "2012-02-17", "2012-02-18", 
"2012-02-19", "2012-02-20", "2012-02-21", "2012-02-22", "2012-02-23", 
"2012-02-24", "2012-02-25", "2012-02-26", "2012-02-27", "2012-02-28", 
"2012-02-29", "2012-03-01", "2012-03-02", "2012-03-03", "2012-03-04", 
"2012-03-05", "2012-03-06", "2012-03-07", "2012-03-08", "2012-03-09", 
"2012-03-10", "2012-03-11", "2012-03-12", "2012-03-13", "2012-03-14", 
"2012-03-15", "2012-03-16", "2012-03-17", "2012-03-18", "2012-03-19", 
"2012-03-20", "2012-03-21", "2012-03-22", "2012-03-23", "2012-03-24", 
"2012-03-25", "2012-03-26", "2012-03-27", "2012-03-28", "2012-03-29", 
"2012-03-30", "2012-03-31", "2012-04-01", "2012-04-02", "2012-04-03", 
"2012-04-04", "2012-04-05", "2012-04-06", "2012-04-07", "2012-04-08", 
"2012-04-09", "2012-04-10", "2012-04-11", "2012-04-12", "2012-04-13", 
"2012-04-14", "2012-04-15", "2012-04-16", "2012-04-17", "2012-04-18", 
"2012-04-19", "2012-04-20", "2012-04-21", "2012-04-22", "2012-04-23", 
"2012-04-24", "2012-04-25", "2012-04-26", "2012-04-27", "2012-04-28", 
"2012-04-29", "2012-04-30", "2012-05-01", "2012-05-02", "2012-05-03", 
"2012-05-04", "2012-05-05", "2012-05-06", "2012-05-07", "2012-05-08", 
"2012-05-09", "2012-05-10", "2012-05-11", "2012-05-12", "2012-05-13", 
"2012-05-14", "2012-05-15", "2012-05-16", "2012-05-17", "2012-05-18", 
"2012-05-19", "2012-05-20", "2012-05-21", "2012-05-22", "2012-05-23", 
"2012-05-24", "2012-05-25", "2012-05-26", "2012-05-27", "2012-05-28", 
"2012-05-29", "2012-05-30", "2012-05-31", "2012-06-01", "2012-06-02", 
"2012-06-03", "2012-06-04", "2012-06-05", "2012-06-06", "2012-06-07", 
"2012-06-08", "2012-06-09", "2012-06-10", "2012-06-11", "2012-06-12", 
"2012-06-13", "2012-06-14", "2012-06-15", "2012-06-16", "2012-06-17", 
"2012-06-18", "2012-06-19", "2012-06-20", "2012-06-21", "2012-06-22", 
"2012-06-23", "2012-06-24", "2012-06-25", "2012-06-26", "2012-06-27", 
"2012-06-28", "2012-06-29", "2012-06-30", "2012-07-01", "2012-07-02", 
"2012-07-03", "2012-07-04", "2012-07-05", "2012-07-06", "2012-07-07", 
"2012-07-08", "2012-07-09", "2012-07-10", "2012-07-11", "2012-07-12", 
"2012-07-13", "2012-07-14", "2012-07-15", "2012-07-16", "2012-07-17", 
"2012-07-18", "2012-07-19", "2012-07-20", "2012-07-21", "2012-07-22", 
"2012-07-23", "2012-07-24", "2012-07-25", "2012-07-26", "2012-07-27", 
"2012-07-28", "2012-07-29", "2012-07-30", "2012-07-31", "2012-08-01", 
"2012-08-02", "2012-08-03", "2012-08-04", "2012-08-05", "2012-08-06", 
"2012-08-07", "2012-08-08", "2012-08-09", "2012-08-10", "2012-08-11", 
"2012-08-12", "2012-08-13", "2012-08-14", "2012-08-15", "2012-08-16", 
"2012-08-17", "2012-08-18", "2012-08-19", "2012-08-20", "2012-08-21", 
"2012-08-22", "2012-08-23", "2012-08-24", "2012-08-25", "2012-08-26", 
"2012-08-27", "2012-08-28", "2012-08-29", "2012-08-30", "2012-08-31", 
"2012-09-01", "2012-09-02", "2012-09-03", "2012-09-04", "2012-09-05", 
"2012-09-06", "2012-09-07", "2012-09-08", "2012-09-09", "2012-09-10", 
"2012-09-11", "2012-09-12", "2012-09-13", "2012-09-14", "2012-09-15", 
"2012-09-16", "2012-09-17", "2012-09-18", "2012-09-19", "2012-09-20", 
"2012-09-21", "2012-09-22", "2012-09-23", "2012-09-24", "2012-09-25", 
"2012-09-26", "2012-09-27", "2012-09-28", "2012-09-29", "2012-09-30", 
"2012-10-01", "2012-10-02", "2012-10-03", "2012-10-04", "2012-10-05", 
"2012-10-06", "2012-10-07", "2012-10-08", "2012-10-09", "2012-10-10", 
"2012-10-11", "2012-10-12", "2012-10-13", "2012-10-14", "2012-10-15", 
"2012-10-16", "2012-10-17", "2012-10-18", "2012-10-19", "2012-10-20", 
"2012-10-21", "2012-10-22", "2012-10-23", "2012-10-24", "2012-10-25", 
"2012-10-26", "2012-10-27", "2012-10-28", "2012-10-29", "2012-10-30", 
"2012-10-31", "2012-11-01", "2012-11-02", "2012-11-03", "2012-11-04", 
"2012-11-05", "2012-11-06", "2012-11-07", "2012-11-08", "2012-11-09", 
"2012-11-10", "2012-11-11", "2012-11-12", "2012-11-13", "2012-11-14", 
"2012-11-15", "2012-11-16", "2012-11-17", "2012-11-18", "2012-11-19", 
"2012-11-20", "2012-11-21", "2012-11-22", "2012-11-23", "2012-11-24", 
"2012-11-25", "2012-11-26", "2012-11-27", "2012-11-28", "2012-11-29", 
"2012-11-30", "2012-12-01", "2012-12-02", "2012-12-03", "2012-12-04", 
"2012-12-05", "2012-12-06", "2012-12-07", "2012-12-08", "2012-12-09", 
"2012-12-10", "2012-12-11", "2012-12-12", "2012-12-13", "2012-12-14", 
"2012-12-15", "2012-12-16", "2012-12-17", "2012-12-18", "2012-12-19", 
"2012-12-20", "2012-12-21", "2012-12-22", "2012-12-23", "2012-12-24", 
"2012-12-25", "2012-12-26", "2012-12-27", "2012-12-28", "2012-12-29"
), class = "factor"), Retailer = c(11024, 11024, 11024, 
11024, 11024, 11024, 11024, 11024, 11024, 11024, 11024, 11024, 
11024, 11024, 11024, 11024, 2353, 11024, 11024, 2353, 11024, 
11024, 63882, 650, 89960, 89960, 650, 89960, 89960, 89960, 650
), Brand = structure(c(19L, 74L, 19L, 48L, 19L, 48L, 
19L, 19L, 19L, 48L, 48L, 31L, 46L, 31L, 31L, 48L, 74L, 31L, 74L, 
19L, 31L, 19L, 48L, 48L, 31L, 31L, 48L, 31L, 31L, 48L, 48L), .Label = c("ABUNDANCE", 
"ALPEN", "AMERICAN BREAKFAST ", "ANNIE'S HOMEGROWN", "ARWHD MLS", 
"BARBARA'S", "BEAR NAKED", "BEAR RIVER", "BOB'S RED MILL", "BOKOMO COUNTRY", 
"BREAKFAST CHOICE", "BREAKFAST ZONE", "BROOKFARM MACADAMIA ", 
"BRUGGEN", "BUCKEYE HEROES", "CADIA", "CASCADIAN FARM ", "CHOCOLATE SPOONERS", 
"CTL BR", "DORSET", "ENJOY LIFE PERKY'S CRUNCHY FLX", "EREWHON", 
"F-FACTOR", "FAMILIA", "FIELD DAY", "FINAX", "FLEURY FLAKES MARC ANDRE", 
"FOOD FOR LIFE EZEKIEL 50", "FORRELLI ", "GEFEN KING", "GENERAL MILLS", 
"GERONIMO PEYTON HILLIS REDZONE", "GLUCERNA", "GLUTINO", "GOLDEN FOODS", 
"GRANVITA PUFFY'S", "GREENBRIER INT INC-NBL CRN FLK", "HEALTH VALLEY", 
"HODGSON MILL", "HOME FAVORITE", "HOSTESS TOASTED OATS", "HSP", 
"ISABEL'S WAY ", "JASPER", "JUSTIN VERLANDER'S FASTBLL FLK", 
"KASHI", "KAY'S NATURALS BETTER BALANCE", "Kellogg", "KIND", 
"KOZY SHACK READY GRAINS", "KRETSCHMER", "LADY LIBERTY", "LIEBER'S", 
"LIVING INTENTIONS SPRFD CRL", "LOVE GROWN FOODS", "MAIZORO", 
"MANISCHEWITZ", "MILL SELECT ", "MOTHER'S", "MULTIGRAIN SPNRS", 
"NASH BROTHERS", "NATURE'S PATH", "NESTLE", "NEW ENGLAND NATURALS", 
"NEWMAN'S OWN SWEET ENGH WHT PF", "NUTRISYSTEM NOURISH", "NUTRITIOUS LIVING", 
"PAMPA ", "PASKESZ CHOCO RIOS", "PBLC LB BR-NBL HY NT TSD OT DM", 
"PEACE", "POST", "PROTEIN PLUS CORN FLAKE", "QUAKER", "RALSTON", 
"RLSTN", "SALLY'S ", "SCRUMPTIOUS SPELNDID CRLS CBB", "SEITENBACHER MUESLI", 
"SIMPLY FIBER", "SKINNER'S RAISIN BRAN", "SORIANA WHOLE BRAN", 
"STREIT'S FRUIT & NUT MUESLI", "SUNBELT", "SWAD", "T. ABRAHAM'S", 
"TAANUG CORN FLAKES", "TASTY", "TEMMY'S", "THE", "THREE SISTERS", 
"TIKISS SWEETENED PUFFD WHL WHT", "TRU ROOTS", "VOTTO'S", "WEETABIX", 
"WHOLESOME GOODNESS", "WILD ROOTS ANCIENT ORIGINS", "WONDER CORN FLAKES", 
"YOG ACTIVE"), class = "factor")), row.names = c(90609L, 222436L, 
90606L, 688592L, 90607L, 688593L, 90605L, 90604L, 90608L, 668330L, 
321377L, 567447L, 945385L, 567445L, 567443L, 892854L, 583186L, 
567446L, 583185L, 168138L, 567444L, 60086L, 698120L, 698127L, 
3933L, 809409L, 698129L, 15286L, 15284L, 698116L, 319455L), class = "data.frame")

I have to find whether a particular household has bought any brand from anywhere and then bought anything from a particular retailer on the next trip.
So, if, first I chose a retailer, 11024. Then for each trip for eg row 17, I would check whether the next trip is made to retailer 11024. If the next trip is made to the retailer 11024, I would mark row 17 as 1. Then for rows 18 & 19 - they are purchases on the same trip, so I would check row 20, which is the next trip. And, since that trip is not to 11024, I would mark the previous trip (both rows 18 & 19) as 0. So, the resulting output, if I want to check for retailer "11024", would be

Example Output :

enter image description here

In all those cases when the next trip is made to the retailer 11024, the purchase instances of the previous trip is put to 1. And, whenever the next trip is made to some other retailer, it is put to 0. Trips are sorted by purchase date. Now, it is easy when I have only 1 purchase per trip. I do it by the following code using lead()

e1$next_ret = e1$retailer
e1$next_ret = ifelse(e1$next_ret == 6901,1,0)
temp = e1 %>%
  group_by(Household) %>%
  mutate(dummy = lead(next_ret, default = 0))

However, when there is more than 1 purchase in a single trip, it doesn't work. Please Help

Upvotes: 0

Views: 117

Answers (1)

AnilGoyal
AnilGoyal

Reputation: 26218

Do you require this..

#df used

> df
       household_code trip_code_uc purchase_date retailer_code Overall_Brand
90609              76   1032497498    2012-01-01         11024        CTL BR
222436             76   1025776063    2012-01-02         11024        QUAKER
90606              76   1029419047    2012-01-23         11024        CTL BR
688592             76   1030418100    2012-02-19         11024       Kellogg
90607              76   1029502602    2012-02-21         11024        CTL BR
688593             76   1034153056    2012-03-14         11024       Kellogg
90605              76   1027035051    2012-03-17         11024        CTL BR
90604              76   1027533991    2012-04-03         11024        CTL BR
90608              76   1033515804    2012-05-27         11024        CTL BR
668330             76   1032998207    2012-06-06         11024       Kellogg
321377             76   1032066227    2012-06-24         11024       Kellogg
567447             76   1028192785    2012-06-27         11024 GENERAL MILLS
945385             76   1033419039    2012-07-14         11024         KASHI
567445             76   1028730296    2012-07-29         11024 GENERAL MILLS
567443             76   1027388499    2012-08-11         11024 GENERAL MILLS
892854             76   1030652869    2012-09-02         11024       Kellogg
583186             76   1025638394    2012-09-27          2353        QUAKER
567446             76   1034032718    2012-10-12         11024 GENERAL MILLS
583185             76   1034032718    2012-10-12         11024        QUAKER
168138             76   1025678520    2012-11-04          2353        CTL BR
567444             76   1029490031    2012-11-24         11024 GENERAL MILLS
60086              76   1029898838    2012-12-11         11024        CTL BR
698120            126   1028024134    2012-01-02         63882       Kellogg
698127            126   1030324171    2012-01-09           650       Kellogg
3933              126   1031983761    2012-02-15         89960 GENERAL MILLS
809409            126   1031983761    2012-02-15         89960 GENERAL MILLS
698129            126   1033767148    2012-02-19           650       Kellogg
15286             126   1023953965    2012-02-27         89960 GENERAL MILLS
15284             126   1030954113    2012-03-06         89960 GENERAL MILLS
698116            126   1030954113    2012-03-06         89960       Kellogg
319455            126   1027392968    2012-03-08           650       Kellogg

Solution proposed assuming that you want- if a customer has purchased a item next time from same retailer then 1 else 0

library(dplyr)
library(tidyr)

df %>% mutate(rowid = row_number()) %>%
  group_by(household_code, Overall_Brand) %>%
  mutate(cohort_id = dense_rank(paste(purchase_date, trip_code_uc))) %>%
  ungroup(retailer_code) %>% arrange(household_code, Overall_Brand, cohort_id) %>%
  mutate(repeat_pur = ifelse(lead(retailer_code)==retailer_code, 1, 0),
         repeat_pur = replace_na(repeat_pur, 0)) %>%
  ungroup() %>% arrange(rowid) %>% select(-cohort_id, -rowid)

# A tibble: 31 x 6
   household_code trip_code_uc purchase_date retailer_code Overall_Brand repeat_pur
            <dbl>        <int> <date>                <dbl> <chr>              <dbl>
 1             76   1032497498 2012-01-01            11024 CTL BR                 1
 2             76   1025776063 2012-01-02            11024 QUAKER                 0
 3             76   1029419047 2012-01-23            11024 CTL BR                 1
 4             76   1030418100 2012-02-19            11024 Kellogg                1
 5             76   1029502602 2012-02-21            11024 CTL BR                 1
 6             76   1034153056 2012-03-14            11024 Kellogg                1
 7             76   1027035051 2012-03-17            11024 CTL BR                 1
 8             76   1027533991 2012-04-03            11024 CTL BR                 1
 9             76   1033515804 2012-05-27            11024 CTL BR                 0
10             76   1032998207 2012-06-06            11024 Kellogg                1
11             76   1032066227 2012-06-24            11024 Kellogg                1
12             76   1028192785 2012-06-27            11024 GENERAL MILLS          1
13             76   1033419039 2012-07-14            11024 KASHI                  0
14             76   1028730296 2012-07-29            11024 GENERAL MILLS          1
15             76   1027388499 2012-08-11            11024 GENERAL MILLS          1
16             76   1030652869 2012-09-02            11024 Kellogg                0
17             76   1025638394 2012-09-27             2353 QUAKER                 0
18             76   1034032718 2012-10-12            11024 GENERAL MILLS          1
19             76   1034032718 2012-10-12            11024 QUAKER                 0
20             76   1025678520 2012-11-04             2353 CTL BR                 0
21             76   1029490031 2012-11-24            11024 GENERAL MILLS          0
22             76   1029898838 2012-12-11            11024 CTL BR                 0
23            126   1028024134 2012-01-02            63882 Kellogg                0
24            126   1030324171 2012-01-09              650 Kellogg                1
25            126   1031983761 2012-02-15            89960 GENERAL MILLS          1
26            126   1031983761 2012-02-15            89960 GENERAL MILLS          1
27            126   1033767148 2012-02-19              650 Kellogg                0
28            126   1023953965 2012-02-27            89960 GENERAL MILLS          1
29            126   1030954113 2012-03-06            89960 GENERAL MILLS          0
30            126   1030954113 2012-03-06            89960 Kellogg                0
31            126   1027392968 2012-03-08              650 Kellogg                0

If I have not understood correctly, please tell the exact requirement.

Upvotes: 1

Related Questions