Maximilian
Maximilian

Reputation: 121

Count rows until criterion is reached, then start again

My data looks like this:

id = c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4)
time=c(20,30,1100,40,31,32,33,1005,22,23,1001,24,12,13,14,1002)
test <- data.frame(id,time)

I am now trying to count the rows until time > 1000grouped by id . So far I got

library(dplyr) 
test %>%    
group_by(id, idx = cumsum(time >= 1000))
%>%    
mutate(trip_count = row_number()) %>%    
ungroup %>%   
select(-idx)

This works so far but instead of 1 when time > 1000 I want the count to go one further and starting with 1again at the next column. Is this somehow possible?

Upvotes: 1

Views: 366

Answers (2)

Karthik S
Karthik S

Reputation: 11584

Since each group has 4 rows in your data, we can use this:

> test %>% left_join(test %>% filter(time < 1000) %>% group_by(id) %>% mutate(trip_count = row_number())) %>% group_by(id) %>% 
+                                                       mutate(trip_count = replace_na(trip_count, 4))
Joining, by = c("id", "time")
# A tibble: 16 x 3
# Groups:   id [4]
      id  time trip_count
   <dbl> <dbl>      <dbl>
 1     1    20          1
 2     1    30          2
 3     1    40          3
 4     1  1100          4
 5     2    31          1
 6     2    32          2
 7     2    33          3
 8     2  1005          4
 9     3    22          1
10     3    23          2
11     3    24          3
12     3  1001          4
13     4    12          1
14     4    13          2
15     4    14          3
16     4  1002          4
> 

If your data doesn't have 4 rows per group, can use this:

> id = c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,4)
> time=c(20,30,40,1100,31,32,33,1005,22,23,24,1001,12,13,14,15,1002)
> test <- data.frame(id,time)
> test %>% left_join(test %>% filter(time < 1000) %>% group_by(id) %>% mutate(trip_count = row_number())) %>% group_by(id) %>% 
+               mutate(across(trip_count, ~ replace(., is.na(.), n())))
Joining, by = c("id", "time")
# A tibble: 17 x 3
# Groups:   id [4]
      id  time trip_count
   <dbl> <dbl>      <int>
 1     1    20          1
 2     1    30          2
 3     1    40          3
 4     1  1100          4
 5     2    31          1
 6     2    32          2
 7     2    33          3
 8     2  1005          4
 9     3    22          1
10     3    23          2
11     3    24          3
12     3  1001          4
13     4    12          1
14     4    13          2
15     4    14          3
16     4    15          4
17     4  1002          5
> 

I added additional row to group 4.

Based on new data as shared by OP:

> test %>% 
+   left_join(test %>% group_by(id) %>% filter(row_number() < which(time >= 1000)) %>% 
+   mutate(trip_count = row_number())) %>% 
+   left_join(test %>% group_by(id) %>% filter(row_number() > which(time >= 1000)) %>% mutate(trip_count1 = row_number())) %>% 
+                     mutate(trip_count = coalesce(trip_count, trip_count1)) %>% select(-trip_count1) %>% group_by(id) %>% 
+                       mutate(rowid = row_number()) %>% rowwise() %>%  mutate(trip_count = replace_na(trip_count, rowid)) %>% select(-rowid)
Joining, by = c("id", "time")
Joining, by = c("id", "time")
# A tibble: 16 x 3
# Rowwise:  id
      id  time trip_count
   <dbl> <dbl>      <int>
 1     1    20          1
 2     1    30          2
 3     1  1100          3
 4     1    40          1
 5     2    31          1
 6     2    32          2
 7     2    33          3
 8     2  1005          4
 9     3    22          1
10     3    23          2
11     3  1001          3
12     3    24          1
13     4    12          1
14     4    13          2
15     4    14          3
16     4  1002          4
> 

Upvotes: 1

arg0naut91
arg0naut91

Reputation: 14764

You could use the lag:

library(dplyr)

test %>%    
  group_by(id, idx = cumsum(lag(time, default = 0) >= 1000)) %>%    
  mutate(trip_count = row_number()) %>%    
  ungroup %>%   
  select(-idx)

Output:

# A tibble: 16 x 3
      id  time trip_count
   <dbl> <dbl>      <int>
 1     1    20          1
 2     1    30          2
 3     1    40          3
 4     1  1100          4
 5     2    31          1
 6     2    32          2
 7     2    33          3
 8     2  1005          4
 9     3    22          1
10     3    23          2
11     3    24          3
12     3  1001          4
13     4    12          1
14     4    13          2
15     4    14          3
16     4  1002          4

Upvotes: 0

Related Questions