Reputation: 17
Is there any possibility to get expected result in simplest way (without intermediate steps, as I did in my code)? I was looking for it and couldn't find any solution...
So, for example:
df <- data.frame( A = c("01","01","01","01","01","01","02","02","02","02","02","02"),
B = c("12","12","12","12","14","14","18","18","18","22","22","22"),
C = c("01","01","02","02","01","02","01","02","05","01","02","02"),
D = c("1","2","1","2","2","1","2","1","3","2","1","3"),
value_1 = c(25,14,1,15,0,15,0,16,18,74,112,36),
value_2 = c(2,0,48,12,0,47,95,14,188,65,14,47),
value_3 = c(78,5,14,26,9,0,4,0,0,8,0,4))
I would like to summarise values for each level (A,B,C,D) and combination of them to show the sum of each level in the final table.
A <- df %>%
group_by(A) %>%
summarise(across(value_1:value_3, sum))
B <- df %>%
group_by(A,B) %>%
summarise(across(value_1:value_3, sum))
C <- df %>%
group_by(A,B,C) %>%
summarise(across(value_1:value_3, sum))
AD <- df %>%
group_by(A,D) %>%
summarise(across(value_1:value_3, sum))
BD <- df %>%
group_by(A,B,D) %>%
summarise(across(value_1:value_3, sum))
result <-bind_rows(A, B, C, AD, BD)
result[is.na(result)] <- "00"
result <- result[,c("A","B","C","D", "value_1", "value_2", "value_3")]
result <- result[order(result$A, result$B, result$C, result$D),]
My data has much more levels, so how could I do it without making intermediate steps like: B, C.. to get the result like this:
> head(result)
# A tibble: 6 x 7
A B C D value_1 value_2 value_3
<chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
1 01 00 00 00 70 109 132
2 01 00 00 1 41 97 92
3 01 00 00 2 29 12 40
4 01 12 00 00 55 62 123
5 01 12 00 1 26 50 92
6 01 12 00 2 29 12 31
Is there any possibility to do it with .groups argument or using aggregate function instead of combination group_by and summarise?
Upvotes: 0
Views: 68
Reputation: 8880
df <- data.frame( A = c("01","01","01","01","01","01","02","02","02","02","02","02"),
B = c("12","12","12","12","14","14","18","18","18","22","22","22"),
C = c("01","01","02","02","01","02","01","02","05","01","02","02"),
D = c("1","2","1","2","2","1","2","1","3","2","1","3"),
value_1 = c(25,14,1,15,0,15,0,16,18,74,112,36),
value_2 = c(2,0,48,12,0,47,95,14,188,65,14,47),
value_3 = c(78,5,14,26,9,0,4,0,0,8,0,4))
grp_vars <- list("A", c("A", "B"), c("A", "B", "C"))
library(tidyverse)
map_df(grp_vars,
~ df %>%
group_by(across(.x)) %>%
summarise(across(value_1:value_3, sum), .groups = "drop")) %>%
mutate(across(where(is.character), ~ replace_na(.x, replace = "00"))) %>%
relocate(where(is.numeric), .after = where(is.character))
#> # A tibble: 15 x 6
#> A B C value_1 value_2 value_3
#> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 01 00 00 70 109 132
#> 2 02 00 00 256 423 16
#> 3 01 12 00 55 62 123
#> 4 01 14 00 15 47 9
#> 5 02 18 00 34 297 4
#> 6 02 22 00 222 126 12
#> 7 01 12 01 39 2 83
#> 8 01 12 02 16 60 40
#> 9 01 14 01 0 0 9
#> 10 01 14 02 15 47 0
#> 11 02 18 01 0 95 4
#> 12 02 18 02 16 14 0
#> 13 02 18 05 18 188 0
#> 14 02 22 01 74 65 8
#> 15 02 22 02 148 61 4
Created on 2022-03-31 by the reprex package (v2.0.1)
if I understood correctly
map_df(
grp_vars,
~ df %>%
group_by(across(.x)) %>%
summarise(across(value_1:value_3, sum), .groups = "drop") %>%
mutate(level = paste0(.x, collapse = ""))
) %>%
mutate(across(where(is.character), ~ replace_na(.x, replace = "00"))) %>%
relocate(level, where(is.numeric), .after = where(is.character))
# A tibble: 15 x 7
A B C level value_1 value_2 value_3
<chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
1 01 00 00 A 70 109 132
2 02 00 00 A 256 423 16
3 01 12 00 AB 55 62 123
4 01 14 00 AB 15 47 9
5 02 18 00 AB 34 297 4
6 02 22 00 AB 222 126 12
7 01 12 01 ABC 39 2 83
8 01 12 02 ABC 16 60 40
9 01 14 01 ABC 0 0 9
10 01 14 02 ABC 15 47 0
11 02 18 01 ABC 0 95 4
12 02 18 02 ABC 16 14 0
13 02 18 05 ABC 18 188 0
14 02 22 01 ABC 74 65 8
15 02 22 02 ABC 148 61 4
Upvotes: 2