Gerry
Gerry

Reputation: 117

Resetting a cumsum to a value in a variable

I have a simple dataset, with date, sample, and application. Every day the value in the sample should decrease by 1 and increase by whatever is applied. When we take another sample we start over from the new value. The result is given in the variable estimate. I'm trying to calculate the variable estimate in R.

Here's my dataset:

structure(
    list(
        date = structure(c(17591, 17592, 17593, 17594, 
          17595, 17596, 17597, 17598, 17599, 17600, 17601, 17602, 17603, 
          17604, 17605, 17606, 17607, 17608, 17609, 17610, 17611, 17612, 
          17613, 17614), class = "Date"),
        sample = c(30, NA, NA, NA, NA, 20, NA, NA, 40, NA, NA, 
          NA, NA, NA, 35, NA, NA, 15, NA, NA, NA, 
          NA, 5, NA), 
        applied = c(NA, NA, 10, NA, NA, NA, NA, 10, NA, NA, 
          15, NA, NA, NA, 10, NA, NA, NA, 15, NA, NA, 10, NA, NA), 
        estimate = c(30, 29, 38, 37, 36, 20, 19, 28, 40, 39, 53, 52, 
          51, 50, 35, 34, 33, 15, 29, 28, 27, 36, 5, 4)
        ), 
    class = "data.frame", row.names = c(NA, -24L)
)

Upvotes: 0

Views: 73

Answers (3)

Mouad_Seridi
Mouad_Seridi

Reputation: 2716

df <-  structure(list(date = structure(c(17591, 17592, 17593, 17594, 
                                         17595, 17596, 17597, 17598, 17599, 17600, 17601, 17602, 17603, 
                                         17604, 17605, 17606, 17607, 17608, 17609, 17610, 17611, 17612, 
                                         17613, 17614), class = "Date"),
                      sample = c(30, NA, NA, NA, NA, 
                                 20, NA, NA, 40, NA, NA, NA, NA, NA, 35, NA, NA, 15, NA, NA, NA, 
                                 NA, 5, NA), 
                      applied = c(NA, NA, 10, NA, NA, NA, NA, 10, NA, NA, 
                                  15, NA, NA, NA, 10, NA, NA, NA, 15, NA, NA, 10, NA, NA), 
                      estimate = c(30,29, 38, 37, 36, 20, 19, 28, 40, 39, 53, 52, 51, 50, 35, 34, 33, 
                                   15, 29, 28, 27, 36, 5, 4)), class = "data.frame", 
                 row.names = c(NA,-24L))




require(dplyr)

## create asubset without "estimate"
df2 <- df[,1:3]


df2 %>% arrange(date) %>%  
  mutate(sample_id = cumsum(!is.na(sample))) %>%                      #create an id for each sample 
  mutate(applied = if_else(is.na(applied),0, applied)) %>%            # turn NA applied to 0
  mutate(applied = if_else(is.na(sample), applied -1, applied)) %>%   # turn applied to -1 when sample is NA
  mutate(estimate0 = if_else(is.na(sample), 0, sample)) %>%           # duplicate sample column 
  mutate(estimate0 = estimate0 + applied) %>%                         # horizontal calculation sample + applied
  group_by(sample_id) %>%                                             # group by to restrict the cumsum
  arrange(date)  %>%                                                  # necessary order for the cumsum 
  mutate(estimate = cumsum(estimate0)) %>%                            # cumsum 
  ungroup                  %>% 
  select(date, sample, applied, estimate)   %>% 
  print(n = 24)


# A tibble: 24 x 4
   date       sample applied estimate
   <date>      <dbl>   <dbl>    <dbl>
 1 2018-03-01     30       0       30
 2 2018-03-02     NA      -1       29
 3 2018-03-03     NA       9       38
 4 2018-03-04     NA      -1       37
 5 2018-03-05     NA      -1       36
 6 2018-03-06     20       0       20
 7 2018-03-07     NA      -1       19
 8 2018-03-08     NA       9       28
 9 2018-03-09     40       0       40
10 2018-03-10     NA      -1       39
11 2018-03-11     NA      14       53
12 2018-03-12     NA      -1       52
13 2018-03-13     NA      -1       51
14 2018-03-14     NA      -1       50
15 2018-03-15     35      10       45
16 2018-03-16     NA      -1       44
17 2018-03-17     NA      -1       43
18 2018-03-18     15       0       15
19 2018-03-19     NA      14       29
20 2018-03-20     NA      -1       28
21 2018-03-21     NA      -1       27
22 2018-03-22     NA       9       36
23 2018-03-23      5       0        5
24 2018-03-24     NA      -1        4

Upvotes: 0

d.b
d.b

Reputation: 32548

ave(df1$sample,
    cumsum(!is.na(df1$sample)),
    FUN = function(x) max(x, na.rm = TRUE) + -1 * (seq_along(x) - 1)) +
    ave(replace(df1$applied, is.na(df1$applied), 0),
        cumsum(!is.na(df1$sample)),
        FUN = cumsum)
# [1] 30 29 38 37 36 20 19 28 40 39 53 52 51 50 45 44 43 15 29 28 27 36  5  4

Upvotes: 1

Gregor Thomas
Gregor Thomas

Reputation: 145775

This could probably be cleaned up a little, but it works:

library(dplyr)
df %>% 
  mutate(group = cumsum(!is.na(sample))) %>%
  group_by(group) %>%
  mutate(
    to_add = ifelse(!is.na(sample), 0, ifelse(is.na(applied), -1, applied - 1)),
    result = first(sample) + cumsum(to_add)
  )
# # A tibble: 24 x 7
# # Groups:   group [6]
#    date       sample applied estimate group to_add result
#    <date>      <dbl>   <dbl>    <dbl> <int>  <dbl>  <dbl>
#  1 2018-03-01     30      NA       30     1      0     30
#  2 2018-03-02     NA      NA       29     1     -1     29
#  3 2018-03-03     NA      10       38     1      9     38
#  4 2018-03-04     NA      NA       37     1     -1     37
#  5 2018-03-05     NA      NA       36     1     -1     36
#  6 2018-03-06     20      NA       20     2      0     20
#  7 2018-03-07     NA      NA       19     2     -1     19
#  8 2018-03-08     NA      10       28     2      9     28
#  9 2018-03-09     40      NA       40     3      0     40
# 10 2018-03-10     NA      NA       39     3     -1     39
# # ... with 14 more rows

Upvotes: 2

Related Questions