Franky
Franky

Reputation: 751

cumulative sum in R with conditions

The column Signal3 is what I am struggling to code.

enter image description here

Here is the code:

library(tibble)
library(dplyr)
library(lubridate)
set.seed(1234)
df <- tibble(signal1 = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0), 
             signal2 = rbinom(31, 1, 0.7), 
             signal3 = c(0, 0, 1, 2, 0, 0, 0, 0, 0, 1, 2, 3, 4, 0, 1, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, 1, 2))

Basically, if signal1 == 1, then we start counting how many '1' in rows we have in signal2 (until signal2==0).

Upvotes: 1

Views: 540

Answers (3)

Stefano Barbi
Stefano Barbi

Reputation: 3184

Here is an approach using purrr::accumulate2 to count elements only when the 'gate' state is open (> 0).

library(dplyr)
library(purrr)
df |>
    mutate(hi = signal1 & signal2,
           lo = !signal2,
           s3 = accumulate2(hi, lo,
                            \(gate, hi, lo) {
                                if (hi)
                                    1
                                else
                                    if (lo)
                                        0
                                else
                                    if (gate)
                                        gate + 1
                                else
                                    0
                            }, .init = 0)[-1] |>
               unlist())

##>    signal1 signal2 signal3    hi    lo s3
##> 1        0       1       0 FALSE FALSE  0
##> 2        0       1       0 FALSE FALSE  0
##> 3        1       1       1  TRUE FALSE  1
##> 4        0       1       2 FALSE FALSE  2
##> 5        0       0       0 FALSE  TRUE  0
##> 6        0       1       0 FALSE FALSE  0
##> 7        0       1       0 FALSE FALSE  0
##> 8        0       1       0 FALSE FALSE  0
##> 9        0       1       0 FALSE FALSE  0
##> 10       1       1       1  TRUE FALSE  1
##> 11       0       1       2 FALSE FALSE  2
##> 12       0       1       3 FALSE FALSE  3
##> 13       0       1       4 FALSE FALSE  4
##> 14       0       0       0 FALSE  TRUE  0
##> 15       1       1       1  TRUE FALSE  1
##> 16       0       0       0 FALSE  TRUE  0
##> 17       0       1       0 FALSE FALSE  0
##> 18       0       1       0 FALSE FALSE  0
##> 19       0       1       0 FALSE FALSE  0
##> 20       0       1       0 FALSE FALSE  0
##> 21       0       1       0 FALSE FALSE  0
##> 22       1       1       1  TRUE FALSE  1
##> 23       0       1       2 FALSE FALSE  2
##> 24       0       1       3 FALSE FALSE  3
##> 25       0       1       4 FALSE FALSE  4
##> 26       0       0       0 FALSE  TRUE  0
##> 27       0       1       0 FALSE FALSE  0
##> 28       0       0       0 FALSE  TRUE  0
##> 29       0       0       0 FALSE  TRUE  0
##> 30       1       1       1  TRUE FALSE  1
##> 31       0       1       2 FALSE FALSE  2

Upvotes: 1

Chris
Chris

Reputation: 2286

and imagining $signal3 starts out life populated by 0s, using base R and a feckless series of for loops to progressively collect values needed to further populate $signal3 one could:

set.seed(1234)
df_sig <- structure(list(signal1 = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 
0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0), signal2 = c(1L, 
1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L), signal3 = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -31L), class = "data.frame")
# i.e. df_sig <- structure(list(...

starts of the logic are

starts <- which(df_sig$signa1 = 1L)
 starts
[1]  3 10 15 22 30

find ends

for ( i in 1:length(starts)) {
  ends[i] <- starts[i] + rle(df_sig$signal2[starts[i]:length(df_sig$signal2)])$lengths[1] -1
}
ends
[1]  4 13 15 25 31

create sequences

counts_seq <- list() # to receive sequences
# a further feckless for loop
for ( i in 1:length(ends)) {
   counts_seq[[i]] <- seq(1, ends[i] - starts[i]+1, 1)
}
counts_seq
[[1]]
[1] 1 2

[[2]]
[1] 1 2 3 4

[[3]]
[1] 1

[[4]]
[1] 1 2 3 4

[[5]]
[1] 1 2

populate $signal3

# the last for loop
for (i in 1:length(starts)){
  df_sig$signal3[starts[i]:ends[i]] <- counts_seq[[i]]
}
df_sig
   signal1 signal2 signal3
1        0       1       0
2        0       1       0
3        1       1       1
4        0       0       2
5        0       1       0
6        0       1       0
7        0       1       0
8        0       1       0
9        0       1       0
10       1       0       1
11       0       1       2
12       0       0       3
13       0       1       4
14       0       1       0
15       1       1       1
16       0       1       0
17       0       1       0
18       0       1       0
19       0       0       0
20       0       1       0
21       0       1       0
22       1       1       1
23       0       1       2
24       0       0       3
25       0       1       4
26       0       1       0
27       0       1       0
28       0       0       0
29       0       1       0
30       1       0       1
31       0       1       2

which looks like its going in the right direction.

Upvotes: 1

lroha
lroha

Reputation: 34291

You can create groups based on encountering the start or end condition, then on the grouped data return the cumulative sum of signal2 when the cumulative sum of signal1 is greater than 0, else return 0.

library(dplyr)

df %>%
  group_by(grp = cumsum(signal1 == 1 | signal2 == 0)) %>%
  mutate(signal3 = ifelse(cumsum(signal1) > 0, cumsum(signal2), 0)) %>%
  ungroup() %>%
  select(-grp)

# A tibble: 31 × 3
   signal1 signal2 signal3
     <dbl>   <int>   <dbl>
 1       0       1       0
 2       0       1       0
 3       1       1       1
 4       0       1       2
 5       0       0       0
 6       0       1       0
 7       0       1       0
 8       0       1       0
 9       0       1       0
10       1       1       1
# … with 21 more rows
# ℹ Use `print(n = ...)` to see more rows

Upvotes: 3

Related Questions