importm
importm

Reputation: 305

count zeros before a different value by ID in r

I need to count zeros by ID in R, but I need them just the zeros before a different Value. Here's a example of my data.

x<-tibble::tribble(
  ~ID,        ~Date, ~Value,
   1L, "01-01-2016",     0L,
   1L, "01-02-2016",     0L,
   1L, "01-03-2016",     0L,
   1L, "01-04-2016",     0L,
   1L, "01-05-2016",  1000L,
   1L, "01-06-2016",     0L,
   2L, "01-01-2016",     0L,
   2L, "01-02-2016",     0L,
   2L, "01-03-2016",  1500L,
   2L, "01-04-2016",     0L,
   2L, "01-05-2016",     0L,
   2L, "01-06-2016",     0L,
   3L, "01-01-2016",     0L,
   3L, "01-02-2016",  -300L,
   3L, "01-03-2016",     0L,
   3L, "01-04-2016",     0L,
   3L, "01-05-2016",   200L,
   3L, "01-06-2016",     0L
  )
x<-as.data.frame(x)

 head(x)
   #   ID       Date Value
   # 1  1 01-01-2016     0
   # 2  1 01-02-2016     0
   # 3  1 01-03-2016     0
   # 4  1 01-04-2016     0
   # 5  1 01-05-2016  1000
   # 6  1 01-06-2016     0

What I'm looking it's something like this

ID    1 2 3 
Count 4 2 1

Because there's 4 zeros before a different value in ID 1, 2 zeros in ID 2 and 1 zero in ID 3. Also, I'd like to save it in the dataframe as it follows:

new_x<-tibble::tribble(
  ~ID,        ~Date, ~Value, ~Count,
   1L, "01-01-2016",     0L,     4L,
   1L, "01-02-2016",     0L,     4L,
   1L, "01-03-2016",     0L,     4L,
   1L, "01-04-2016",     0L,     4L,
   1L, "01-05-2016",  1000L,     4L,
   1L, "01-06-2016",     0L,     4L,
   2L, "01-01-2016",     0L,     2L,
   2L, "01-02-2016",     0L,     2L,
   2L, "01-03-2016",  1500L,     2L,
   2L, "01-04-2016",     0L,     2L,
   2L, "01-05-2016",     0L,     2L,
   2L, "01-06-2016",     0L,     2L,
   3L, "01-01-2016",     0L,     1L,
   3L, "01-02-2016",  -300L,     1L,
   3L, "01-03-2016",     0L,     1L,
   3L, "01-04-2016",     0L,     1L,
   3L, "01-05-2016",   200L,     1L,
   3L, "01-06-2016",     0L,     1L
  )

Does anyone how to solve it? Thanks!

Upvotes: 1

Views: 156

Answers (2)

arg0naut91
arg0naut91

Reputation: 14764

One way would be:

library(tidyverse)

x %>%
  group_by(ID) %>%
  summarise(n = sum(cumsum(Value != 0) == 0, na.rm = TRUE)) %>%
  pivot_wider(names_from = ID, values_from = n) %>%
  add_column(ID = 'Count', .before = 1) %>%
  as.data.frame

Output:

     ID 1 2 3
1 Count 4 2 1

For saving as column:

x %>%
  group_by(ID) %>%
  mutate(Count = sum(cumsum(Value != 0) == 0, na.rm = TRUE))

There's of course also no need for packages:

transform(x, Count = ave(Value, ID, FUN = function(x) sum(cumsum(x != 0) == 0, na.rm = TRUE)))

Upvotes: 1

RyanFrost
RyanFrost

Reputation: 1428

Here's another way to do it using purrr's detect_index:

library(purrr)
library(dplyr)

x %>%
  group_by(ID) %>%
  mutate(Count = detect_index(Value, ~ .x != 0) - 1)
#> # A tibble: 18 x 4
#> # Groups:   ID [3]
#>       ID Date       Value Count
#>    <int> <chr>      <int> <dbl>
#>  1     1 01-01-2016     0     4
#>  2     1 01-02-2016     0     4
#>  3     1 01-03-2016     0     4
#>  4     1 01-04-2016     0     4
#>  5     1 01-05-2016  1000     4
#>  6     1 01-06-2016     0     4
#>  7     2 01-01-2016     0     2
#>  8     2 01-02-2016     0     2
#>  9     2 01-03-2016  1500     2
#> 10     2 01-04-2016     0     2
#> 11     2 01-05-2016     0     2
#> 12     2 01-06-2016     0     2
#> 13     3 01-01-2016     0     1
#> 14     3 01-02-2016  -300     1
#> 15     3 01-03-2016     0     1
#> 16     3 01-04-2016     0     1
#> 17     3 01-05-2016   200     1
#> 18     3 01-06-2016     0     1

Upvotes: 1

Related Questions