Mateusz
Mateusz

Reputation: 17

Aggregate by multiple groups and keep the result for each level in R

Is there any possibility to get expected result in simplest way (without intermediate steps, as I did in my code)? I was looking for it and couldn't find any solution...

So, for example:

df <- data.frame(   A = c("01","01","01","01","01","01","02","02","02","02","02","02"),
                    B = c("12","12","12","12","14","14","18","18","18","22","22","22"),
                    C = c("01","01","02","02","01","02","01","02","05","01","02","02"),
                    D = c("1","2","1","2","2","1","2","1","3","2","1","3"),
              value_1 = c(25,14,1,15,0,15,0,16,18,74,112,36), 
              value_2 = c(2,0,48,12,0,47,95,14,188,65,14,47),
              value_3 = c(78,5,14,26,9,0,4,0,0,8,0,4))

I would like to summarise values for each level (A,B,C,D) and combination of them to show the sum of each level in the final table.

A <- df %>%
  group_by(A) %>% 
  summarise(across(value_1:value_3, sum))

B <- df %>% 
  group_by(A,B) %>% 
  summarise(across(value_1:value_3, sum))

C <- df %>% 
  group_by(A,B,C) %>% 
  summarise(across(value_1:value_3, sum))

AD <- df %>% 
  group_by(A,D) %>% 
  summarise(across(value_1:value_3, sum))

BD <- df %>% 
  group_by(A,B,D) %>% 
  summarise(across(value_1:value_3, sum))

result <-bind_rows(A, B, C, AD, BD)
result[is.na(result)] <- "00"
result <- result[,c("A","B","C","D", "value_1", "value_2", "value_3")]
result <- result[order(result$A, result$B, result$C, result$D),]

My data has much more levels, so how could I do it without making intermediate steps like: B, C.. to get the result like this:

> head(result)
# A tibble: 6 x 7
  A     B     C     D     value_1 value_2 value_3
  <chr> <chr> <chr> <chr>   <dbl>   <dbl>   <dbl>
1 01    00    00    00         70     109     132
2 01    00    00    1          41      97      92
3 01    00    00    2          29      12      40
4 01    12    00    00         55      62     123
5 01    12    00    1          26      50      92
6 01    12    00    2          29      12      31

Is there any possibility to do it with .groups argument or using aggregate function instead of combination group_by and summarise?

Upvotes: 0

Views: 68

Answers (1)

Yuriy Saraykin
Yuriy Saraykin

Reputation: 8880

df <- data.frame(   A = c("01","01","01","01","01","01","02","02","02","02","02","02"),
                    B = c("12","12","12","12","14","14","18","18","18","22","22","22"),
                    C = c("01","01","02","02","01","02","01","02","05","01","02","02"),
                    D = c("1","2","1","2","2","1","2","1","3","2","1","3"),
                    value_1 = c(25,14,1,15,0,15,0,16,18,74,112,36), 
                    value_2 = c(2,0,48,12,0,47,95,14,188,65,14,47),
                    value_3 = c(78,5,14,26,9,0,4,0,0,8,0,4))

grp_vars <- list("A", c("A", "B"), c("A", "B", "C"))

library(tidyverse)

map_df(grp_vars,
    ~ df %>%
      group_by(across(.x)) %>%
      summarise(across(value_1:value_3, sum), .groups = "drop")) %>%
  mutate(across(where(is.character), ~ replace_na(.x, replace = "00"))) %>%
  relocate(where(is.numeric), .after = where(is.character))


#> # A tibble: 15 x 6
#>    A     B     C     value_1 value_2 value_3
#>    <chr> <chr> <chr>   <dbl>   <dbl>   <dbl>
#>  1 01    00    00         70     109     132
#>  2 02    00    00        256     423      16
#>  3 01    12    00         55      62     123
#>  4 01    14    00         15      47       9
#>  5 02    18    00         34     297       4
#>  6 02    22    00        222     126      12
#>  7 01    12    01         39       2      83
#>  8 01    12    02         16      60      40
#>  9 01    14    01          0       0       9
#> 10 01    14    02         15      47       0
#> 11 02    18    01          0      95       4
#> 12 02    18    02         16      14       0
#> 13 02    18    05         18     188       0
#> 14 02    22    01         74      65       8
#> 15 02    22    02        148      61       4

Created on 2022-03-31 by the reprex package (v2.0.1)

if I understood correctly

map_df(
  grp_vars,
  ~ df %>%
    group_by(across(.x)) %>%
    summarise(across(value_1:value_3, sum), .groups = "drop") %>%
    mutate(level = paste0(.x, collapse = ""))
) %>%
  mutate(across(where(is.character), ~ replace_na(.x, replace = "00"))) %>%
  relocate(level, where(is.numeric), .after = where(is.character))


# A tibble: 15 x 7
   A     B     C     level value_1 value_2 value_3
   <chr> <chr> <chr> <chr>   <dbl>   <dbl>   <dbl>
 1 01    00    00    A          70     109     132
 2 02    00    00    A         256     423      16
 3 01    12    00    AB         55      62     123
 4 01    14    00    AB         15      47       9
 5 02    18    00    AB         34     297       4
 6 02    22    00    AB        222     126      12
 7 01    12    01    ABC        39       2      83
 8 01    12    02    ABC        16      60      40
 9 01    14    01    ABC         0       0       9
10 01    14    02    ABC        15      47       0
11 02    18    01    ABC         0      95       4
12 02    18    02    ABC        16      14       0
13 02    18    05    ABC        18     188       0
14 02    22    01    ABC        74      65       8
15 02    22    02    ABC       148      61       4

Upvotes: 2

Related Questions