MYaseen208
MYaseen208

Reputation: 23898

tidyverse: append rows of totals in summary output

I want append rows of totals in the output of summarise used with group_by.

Data <-
  structure(list(CT = c("1", "1", "1", "1", "1", "1", "1", "1", 
"1", "1", "1", "1", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2"), SCT = c("1", "1", "1", "1", "1", "1", "2", "2", "2", "2", 
"2", "2", "1", "1", "1", "1", "2", "2", "2", "2", "2", "2"), 
    SSCT = c("1", "2", "3", "1", "2", "3", "1", "2", "1", "2", 
    "1", "2", "1", "2", "1", "2", "1", "2", "3", "1", "2", "3"
    ), Category = c(111L, 112L, 113L, 111L, 112L, 113L, 121L, 
    122L, 121L, 122L, 121L, 122L, 211L, 212L, 211L, 212L, 221L, 
    222L, 223L, 221L, 222L, 223L), Y = c(10L, 12L, 15L, 11L, 
    10L, 13L, 21L, 22L, 20L, 25L, 23L, 24L, 31L, 33L, 36L, 39L, 
    32L, 31L, 36L, 41L, 44L, 45L)), row.names = c(NA, -22L), class = "data.frame")
head(Data)
  CT SCT SSCT Category  Y
1  1   1    1      111 10
2  1   1    2      112 12
3  1   1    3      113 15
4  1   1    1      111 11
5  1   1    2      112 10
6  1   1    3      113 13

Here

CT: Category, SCT: Sub Category, SSCT: Sub Sub Category

Data %>%
  group_by(CT) %>% 
  summarise(Total = sum(Y))

# A tibble: 2 x 2
  CT    Total
  <chr> <int>
1 1       206
2 2       368


Data %>%
  group_by(CT, SCT) %>% 
  summarise(Total = sum(Y))

# A tibble: 4 x 3
# Groups:   CT [2]
  CT    SCT   Total
  <chr> <chr> <int>
1 1     1        71
2 1     2       135
3 2     1       139
4 2     2       229

Data %>%
  group_by(CT, SCT, SSCT) %>% 
  summarise(Total = sum(Y))
# A tibble: 10 x 4
# Groups:   CT, SCT [4]
   CT    SCT   SSCT  Total
   <chr> <chr> <chr> <int>
 1 1     1     1        21
 2 1     1     2        22
 3 1     1     3        28
 4 1     2     1        64
 5 1     2     2        71
 6 2     1     1        67
 7 2     1     2        72
 8 2     2     1        73
 9 2     2     2        75
10 2     2     3        81

Required Output

Need output something following:

CT    SCT   SSCT  Total
Total               206
Total                71
1     1     1        21
1     1     2        22
1     1     3        28
Total               135
1     2     1        64
1     2     2        71
Total               368
Total               139
2     1     1        67
2     1     2        72
Total               229
2     2     1        73
2     2     2        75
2     2     3        81

Upvotes: 1

Views: 784

Answers (3)

IceCreamToucan
IceCreamToucan

Reputation: 28675

You can use rollup in the data.table package

library(data.table)
setDT(Data)

group_vars <-  c('CT', 'SCT', 'SSCT')
piv <- 
  rollup(Data, j = .(Total = sum(Y)), by = group_vars)
               
setorderv(piv, group_vars)[-1]
#>     CT  SCT SSCT Total
#>  1:  1 <NA> <NA>   206
#>  2:  1    1 <NA>    71
#>  3:  1    1    1    21
#>  4:  1    1    2    22
#>  5:  1    1    3    28
#>  6:  1    2 <NA>   135
#>  7:  1    2    1    64
#>  8:  1    2    2    71
#>  9:  2 <NA> <NA>   368
#> 10:  2    1 <NA>   139
#> 11:  2    1    1    67
#> 12:  2    1    2    72
#> 13:  2    2 <NA>   229
#> 14:  2    2    1    73
#> 15:  2    2    2    75
#> 16:  2    2    3    81

Created on 2021-06-05 by the reprex package (v2.0.0)

Upvotes: 1

G. Grothendieck
G. Grothendieck

Reputation: 269346

1) We can use adorn_totals from the janitor package. In janitor the totals normally come after the group that is totaled but we can use the name "0" in place of Total and sort so that the totals sort first and then at the end replace "0" with the word Total. The filter removes rows that have multiple fields with the word Total.

library(dplyr)
library(janitor)

Data %>% 
  group_by(CT, SCT, SSCT) %>%
  summarize(Y = sum(Y), .groups = "drop") %>%
  group_by(CT) %>%
  group_modify(~ adorn_totals(., name = "0")) %>%
  group_by(CT, SCT) %>%
  group_modify(~ adorn_totals(., name = "0")) %>%
  ungroup %>%
  filter(rowSums(across() == "0") <= 1) %>%
  arrange(CT, SCT, SSCT) %>%
  mutate(across(where(is.character), ~ replace(., . == "0", "Total")))

giving:

# A tibble: 16 x 4
   CT    SCT   SSCT      Y
   <chr> <chr> <chr> <int>
 1 1     Total -       206
 2 1     1     Total    71
 3 1     1     1        21
 4 1     1     2        22
 5 1     1     3        28
 6 1     2     Total   135
 7 1     2     1        64
 8 1     2     2        71
 9 2     Total -       368
10 2     1     Total   139
11 2     1     1        67
12 2     1     2        72
13 2     2     Total   229
14 2     2     1        73
15 2     2     2        75
16 2     2     3        81

2) If it is ok to have the totals after the group instead of before then it can be simplified.

Data %>% 
  group_by(CT, SCT, SSCT) %>%
  summarize(Y = sum(Y), .groups = "drop") %>%
  group_by(CT) %>%
  group_modify(~ adorn_totals(.)) %>%
  group_by(CT, SCT) %>%
  group_modify(~ adorn_totals(.)) %>%
  ungroup %>%
  filter(rowSums(across() == "Total") <= 1)

giving:

# A tibble: 16 x 4
   CT    SCT   SSCT      Y
   <chr> <chr> <chr> <int>
 1 1     1     1        21
 2 1     1     2        22
 3 1     1     3        28
 4 1     1     Total    71
 5 1     2     1        64
 6 1     2     2        71
 7 1     2     Total   135
 8 1     Total -       206
 9 2     1     1        67
10 2     1     2        72
11 2     1     Total   139
12 2     2     1        73
13 2     2     2        75
14 2     2     3        81
15 2     2     Total   229
16 2     Total -       368

Upvotes: 1

zimia
zimia

Reputation: 932

if you want the final columns to be numeric instead of character, you will have to accept NA instead of ""

bind_rows(
  Data %>% 
    group_by(CT) %>% 
    summarise(Total = sum(Y)) %>% 
    mutate(SCT = "Total", SSCT = "Total"),
  
  Data %>%
    group_by(CT, SCT) %>% 
    summarise(Total = sum(Y)) %>% 
    mutate(SSCT = "Total"),
  
  
  Data %>%
    group_by(CT, SCT, SSCT) %>% 
    summarise(Total = sum(Y))
) %>% 
  mutate(across(c("SCT","SSCT"), ~ fct_relevel(., "Total"))) %>% 
  arrange(CT, SCT, SSCT) %>% 
  mutate(CT = ifelse(SSCT == "Total", "Total", CT),
         SCT = ifelse(SSCT == "Total", "", as.numeric(SCT)-1),
         SSCT = gsub("Total", "", SSCT)) %>%  
  select(CT, SCT, SSCT, Total) 
#  A tibble: 16 x 4
    CT    SCT   SSCT  Total
   <chr> <chr> <chr> <int>
 1 Total ""    ""      206
 2 Total ""    ""       71
 3 1     "1"   "1"      21
 4 1     "1"   "2"      22
 5 1     "1"   "3"      28
 6 Total ""    ""      135
 7 1     "2"   "1"      64
 8 1     "2"   "2"      71
 9 Total ""    ""      368
10 Total ""    ""      139
11 2     "1"   "1"      67
12 2     "1"   "2"      72
13 Total ""    ""      229
14 2     "2"   "1"      73
15 2     "2"   "2"      75
16 2     "2"   "3"      81

Upvotes: 1

Related Questions