Mel
Mel

Reputation: 71

Cumulative sum for variables with similar names in R

df_test <- data.frame(MONTH_NUM = c(7,7,8,8,8,10,11,12,1,2,3,4,4,5,5,5,5,NA)
                      , YEAR = c(2018,2018,2018,2018,2019,2019,2019,2019,2019,2018,2018,2019,2018,2018,2018,2018,2018,NA)
                      , Sys_Indicator = c(1,0,0,1,0,0,0,0,1,1,0,1,0,1,1,1,1,1)
                      , lbl_Indicator = c(1,1,1,1,0,1,0,0,1,1,0,1,1,1,1,1,1,0)
                      , Pk_Indicator=c(1,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1))

I want to find the cumulative sum of each indicator for each month+year combination. I'm currently using dplyr to achieve this but I was wondering if there was an easier way to do this and to do it for all variables that have and Indicator in their names? I want all my variable with Indicator in them to have cumulative sum.

df_test %>% 
 group_by(YEAR,MONTH_NUM) %>% 
 summarize(Sys_sum=sum(Sys_Indicator),lbl_Sum=sum(lbl_Indicator),Pk_Sum=sum(Pk_Indicator)) %>%
    arrange(MONTH_NUM,YEAR) %>%
    ungroup() %>% 
mutate(Sys_cum=cumsum(Sys_sum),Cum_lbl=cumsum(lbl_Sum),Pk_sum=cumsum(Pk_Sum))

Upvotes: 0

Views: 250

Answers (2)

Ronak Shah
Ronak Shah

Reputation: 388907

You could use the _at variants in dplyr to apply this for multiple columns :

library(dplyr)

df_test %>% 
  arrange(MONTH_NUM,YEAR) %>%
  group_by(YEAR,MONTH_NUM) %>% 
  summarize_at(vars(ends_with('Indicator')), sum) %>%
  ungroup() %>% 
  mutate_at(vars(ends_with('Indicator')), list(cs = ~cumsum(.)))


#      YEAR MONTH_NUM Sys_Indicator lbl_Indicator Pk_Indicator Sys_Indicator_cs lbl_Indicator_cs Pk_Indicator_cs
#   <dbl>     <dbl>         <dbl>         <dbl>        <dbl>            <dbl>            <dbl>           <dbl>
# 1  2018         2             1             1            1                1                1               1
# 2  2018         3             0             0            0                1                1               1
# 3  2018         4             0             1            0                1                2               1
# 4  2018         5             4             4            1                5                6               2
# 5  2018         7             1             2            1                6                8               3
# 6  2018         8             1             2            2                7               10               5
# 7  2019         1             1             1            1                8               11               6
# 8  2019         4             1             1            1                9               12               7
# 9  2019         8             0             0            0                9               12               7
#10  2019        10             0             1            1                9               13               8
#11  2019        11             0             0            0                9               13               8
#12  2019        12             0             0            0                9               13               8
#13    NA        NA             1             0            1               10               13               9

Upvotes: 2

Ian Campbell
Ian Campbell

Reputation: 24790

I think I understand what you want. Here is a data.table approach.

library(data.table)
setDT(df_test)[ ,sapply(names(df_test)[grep("Indicator",names(df_test))],paste0,"_cumsum") := lapply(.SD[,grep("Indicator",names(df_test))],cumsum)]
df_test
    MONTH_NUM YEAR Sys_Indicator lbl_Indicator Pk_Indicator Sys_Indicator_cumsum lbl_Indicator_cumsum Pk_Indicator_cumsum
 1:         7 2018             1             1            1                    1                    1                   1
 2:         7 2018             0             1            0                    1                    2                   1
 3:         8 2018             0             1            1                    1                    3                   2
 4:         8 2018             1             1            1                    2                    4                   3
 5:         8 2019             0             0            0                    2                    4                   3
 6:        10 2019             0             1            1                    2                    5                   4
 7:        11 2019             0             0            0                    2                    5                   4
 8:        12 2019             0             0            0                    2                    5                   4
 9:         1 2019             1             1            1                    3                    6                   5
10:         2 2018             1             1            1                    4                    7                   6
11:         3 2018             0             0            0                    4                    7                   6
12:         4 2019             1             1            1                    5                    8                   7
13:         4 2018             0             1            0                    5                    9                   7
14:         5 2018             1             1            0                    6                   10                   7
15:         5 2018             1             1            0                    7                   11                   7
16:         5 2018             1             1            0                    8                   12                   7
17:         5 2018             1             1            1                    9                   13                   8
18:        NA   NA             1             0            1                   10                   13                   9

Upvotes: 0

Related Questions