create a new column in R by summing up specific columns based on multiple conditions

Question

I am trying to create a new column called period from the dataframe below

structure(list(fw01 = c(21, 21, 22.8, 21.4, 18.7, 18.1, 14.3, 
24.4, 22.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4, 
30.4, 33.9, 21.5, 15.5, 15.2, 13.3, 19.2, 27.3, 26, 30.4, 15.8, 
19.7, 15, 21.4), fw02 = c(6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 
8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4), 
   fw03 = c(160, 160, 108, 258, 360, 225, 360, 146.7, 140.8, 
   167.6, 167.6, 275.8, 275.8, 275.8, 472, 460, 440, 78.7, 75.7, 
   71.1, 120.1, 318, 304, 350, 400, 79, 120.3, 95.1, 351, 145, 
   301, 121), fw04 = c(110, 110, 93, 110, 175, 105, 245, 62, 
   95, 123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97, 
   150, 150, 245, 175, 66, 91, 113, 264, 175, 335, 109), fw05 = c(3.9, 
   3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92, 
   3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93, 4.22, 3.7, 2.76, 
   3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22, 3.62, 3.54, 4.11
   ), fw06 = c(2.62, 2.875, 2.32, 3.215, 3.44, 3.46, 3.57, 3.19, 
   3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25, 5.424, 5.345, 2.2, 
   1.615, 1.835, 2.465, 3.52, 3.435, 3.84, 3.845, 1.935, 2.14, 
   1.513, 3.17, 2.77, 3.57, 2.78), fw07 = c(16.46, 17.02, 18.61, 
   19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3, 18.9, 17.4, 17.6, 
   18, 17.98, 17.82, 17.42, 19.47, 18.52, 19.9, 20.01, 16.87, 
   17.3, 15.41, 17.05, 18.9, 16.7, 16.9, 14.5, 15.5, 14.6, 18.6
   ), fw08 = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 
   0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1), fw09 = c(1, 
   1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 
   0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), fw10 = c(4, 4, 4, 3, 
   3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3, 
   3, 3, 4, 5, 5, 5, 5, 5, 4), fw11 = c(4, 4, 1, 1, 2, 1, 4, 
   2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1, 
   2, 2, 4, 6, 8, 2)), class = "data.frame", row.names = c(NA, 
-32L))

these are the periods that I have defined based on the requirement

p1 <- c("fw01","fw02","fw03","fw04")
p2 <- c("fw05","fw06","fw07","fw08")
p3 <- c("fw09","fw10","fw11","fw12","fw13")
p4 <- c("fw14","fw15","fw16","fw17")
p5 <- c("fw18","fw19","fw20","fw21")
p6 <- c("fw22","fw23","fw24","fw25","fw26")
p7 <- c("fw27","fw28","fw29","fw30")
p8 <- c("fw31","fw32","fw33","fw34")
p9 <- c("fw35","fw36","fw37","fw38","fw39")
p10 <- c("fw40","fw41","fw42","fw43")
p11 <- c("fw44","fw45","fw46","fw47")
p12 <- c("fw48","fw49","fw50","fw51","fw52")

my requirement is that the period column should be a summation of the weeks of 1st period i.e fw01+fw02+fw03+fw04 if the week number in the last column of the table belongs belongs to 2nd period and so on. In the above example, the last column is week 11(fw11) that belongs to 3rd period which is p3 as per the vectors defined. in this case i want the period column to be a summation of the weeks in period2 which is fw05+fw06+fw07+fw08. For this I coded 2 different logic

1st approach is using dplyr::case_when

dplyr::mutate(df2,
                     prev_per = case_when(rev(names(df2))[1] %in% p2 ~ fw01+fw02+fw03+fw04,
                                        rev(names(df2))[1] %in% p3 ~ fw05+fw06+fw07+fw08,
                                        rev(names(df2))[1] %in% p4 ~ fw09+fw10+fw11+fw12+fw13,
                                        rev(names(df2))[1] %in% p5 ~ fw14+fw15+fw16+fw17,
                                        rev(names(df2))[1] %in% p6 ~ fw18+fw19+fw20+fw21,
                                        rev(names(df2))[1] %in% p7 ~ fw22+fw23+fw24+fw25+fw26,
                                        rev(names(df2))[1] %in% p8 ~ fw27+fw28+fw29+fw30,
                                        rev(names(df2))[1] %in% p9 ~ fw31+fw32+fw33+fw34,
                                        rev(names(df2))[1] %in% p10 ~ fw35+fw36+fw37+fw38+fw39,
                                        rev(names(df2))[1] %in% p11 ~ fw40+fw41+fw42+fw43,
                                        rev(names(df2))[1] %in% p12 ~ fw44+fw45+fw46+fw47))

the problem with the above is, the other columns are not there in the data frame at all. ideally the condition gets satisfied in the 2nd case_when itself, but the operation is not breaking which leads to an error saying fw12 not found

sample of the 2nd approach is using ifelse function from base package.

df7<- dplyr::mutate(df2,
                    prev_per = ifelse(rev(names(df2))[1] %in% p2, fw01+fw02+fw03+fw04,
                               ifelse(rev(names(df2))[1] %in% p3, fw05+fw06+fw07+fw08,
                               ifelse(rev(names(df2))[1] %in% p4, fw09+fw10+fw11+fw12+fw13))))

here, the operation is breaking properly when the condition is satisfied, but its returning the same number in all the rows of the column period as below

structure(list(fw01 = c(21, 21, 22.8, 21.4, 18.7, 18.1, 14.3, 
24.4, 22.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4, 
30.4, 33.9, 21.5, 15.5, 15.2, 13.3, 19.2, 27.3, 26, 30.4, 15.8, 
19.7, 15, 21.4), fw02 = c(6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 
8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4), 
    fw03 = c(160, 160, 108, 258, 360, 225, 360, 146.7, 140.8, 
    167.6, 167.6, 275.8, 275.8, 275.8, 472, 460, 440, 78.7, 75.7, 
    71.1, 120.1, 318, 304, 350, 400, 79, 120.3, 95.1, 351, 145, 
    301, 121), fw04 = c(110, 110, 93, 110, 175, 105, 245, 62, 
    95, 123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97, 
    150, 150, 245, 175, 66, 91, 113, 264, 175, 335, 109), fw05 = c(3.9, 
    3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92, 
    3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93, 4.22, 3.7, 2.76, 
    3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22, 3.62, 3.54, 4.11
    ), fw06 = c(2.62, 2.875, 2.32, 3.215, 3.44, 3.46, 3.57, 3.19, 
    3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25, 5.424, 5.345, 2.2, 
    1.615, 1.835, 2.465, 3.52, 3.435, 3.84, 3.845, 1.935, 2.14, 
    1.513, 3.17, 2.77, 3.57, 2.78), fw07 = c(16.46, 17.02, 18.61, 
    19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3, 18.9, 17.4, 17.6, 
    18, 17.98, 17.82, 17.42, 19.47, 18.52, 19.9, 20.01, 16.87, 
    17.3, 15.41, 17.05, 18.9, 16.7, 16.9, 14.5, 15.5, 14.6, 18.6
    ), fw08 = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 
    0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1), fw09 = c(1, 
    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 
    0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), fw10 = c(4, 4, 4, 3, 
    3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3, 
    3, 3, 4, 5, 5, 5, 5, 5, 4), fw11 = c(4, 4, 1, 1, 2, 1, 4, 
    2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1, 
    2, 2, 4, 6, 8, 2), prev_per = c(22.98, 22.98, 22.98, 22.98, 
    22.98, 22.98, 22.98, 22.98, 22.98, 22.98, 22.98, 22.98, 22.98, 
    22.98, 22.98, 22.98, 22.98, 22.98, 22.98, 22.98, 22.98, 22.98, 
    22.98, 22.98, 22.98, 22.98, 22.98, 22.98, 22.98, 22.98, 22.98, 
    22.98)), class = "data.frame", row.names = c(NA, -32L))

Please suggest how to fix this issue.

AHart · Accepted Answer

For a truly awful solution, you can take advantage of the fact that case_when ignores NULL inputs and if without an else will return NULL.

So move the conditions in an if and follow with TRUE to force the evaluation.

df %>%
  mutate(
    prev_per = case_when(
      if (rev(names(df))[1] %in% p2) TRUE ~ fw01+fw02+fw03+fw04,
      if (rev(names(df))[1] %in% p3) TRUE ~ fw05+fw06+fw07+fw08,
      if (rev(names(df))[1] %in% p4) TRUE ~ fw09+fw10+fw11+fw12+fw13,
      if (rev(names(df))[1] %in% p5) TRUE ~ fw14+fw15+fw16+fw17,
      if (rev(names(df))[1] %in% p6) TRUE ~ fw18+fw19+fw20+fw21
    )
  )

I hate this answer, but it seems to work. Output:

   fw01 fw02  fw03 fw04 fw05  fw06  fw07 fw08 fw09 fw10 fw11 prev_per
1  21.0    6 160.0  110 3.90 2.620 16.46    0    1    4    4   22.980
2  21.0    6 160.0  110 3.90 2.875 17.02    0    1    4    4   23.795
3  22.8    4 108.0   93 3.85 2.320 18.61    1    1    4    1   25.780
4  21.4    6 258.0  110 3.08 3.215 19.44    1    0    3    1   26.735
5  18.7    8 360.0  175 3.15 3.440 17.02    0    0    3    2   23.610

create a new column in R by summing up specific columns based on multiple conditions

Answers (2)

Related Questions