Reputation: 4229
Here example and self-explanatory case with the desired solution:
set.seed(123)
df <- data.frame(s=rnorm(100), g1=seq(1,100,,100),g2=seq(5,100,,100),g3=seq(10,1,,100))
agg <- with(df, aggregate(s, data.frame(g1,g2,g3),
FUN = function(x) c("mean" = mean(x), "median" = median(x))))
head(agg,5)
g1 g2 g3 x.mean x.median
1 100 100.00000 1.000000 -1.0264209 -1.0264209
2 99 99.04040 1.090909 -0.2357004 -0.2357004
3 98 98.08081 1.181818 1.5326106 1.5326106
4 97 97.12121 1.272727 2.1873330 2.1873330
5 96 96.16162 1.363636 -0.6002596 -0.6002596
Desired results (the -xxx are just the numerical values calculated or each by group):
g1 g2 g3 g1.x.mean g1.x.median g2.x.mean g2.x.median g3.x.mean g3.x.median
1 100 100.00000 1.000000 -1.0264209 -1.0264209 -xxx -xxxx -xxxx -xxxx
2 99 99.04040 1.090909 -0.2357004 -0.2357004 -xxx -xxxx -xxxx -xxxx
3 98 98.08081 1.181818 1.5326106 1.5326106 -xxx -xxxx -xxxx -xxxx
4 97 97.12121 1.272727 2.1873330 2.1873330 -xxx -xxxx -xxxx -xxxx
5 96 96.16162 1.363636 -0.6002596 -0.6002596 -xxx -xxxx -xxxx -xxxx
Upvotes: 1
Views: 54
Reputation: 887651
If the grouping should be separate, we can use lapply
to loop over the 'g' columns, apply the aggregate
separately and combine them with cbind
nm1 <- paste0('g', 1:3)
out1 <- do.call(cbind, lapply(nm1, function(g) {
out <- do.call(data.frame, aggregate(s ~ ., df[c('s', g)],
FUN = function(x) c(mean(x), median(x))))
names(out)[-1] <- paste0(g, c('.x.mean', '.x.median'))
out}))
out1 <- out1[c(nm1, setdiff(names(out1), nm1))]
-output
head(out1)
# g1 g2 g3 g1.x.mean g1.x.median g2.x.mean g2.x.median g3.x.mean g3.x.median
#1 1 5.000000 1.000000 -0.56047565 -0.56047565 -0.56047565 -0.56047565 -1.0264209 -1.0264209
#2 2 5.959596 1.090909 -0.23017749 -0.23017749 -0.23017749 -0.23017749 -0.2357004 -0.2357004
#3 3 6.919192 1.181818 1.55870831 1.55870831 1.55870831 1.55870831 1.5326106 1.5326106
#4 4 7.878788 1.272727 0.07050839 0.07050839 0.07050839 0.07050839 2.1873330 2.1873330
#5 5 8.838384 1.363636 0.12928774 0.12928774 0.12928774 0.12928774 -0.6002596 -0.6002596
#6 6 9.797980 1.454545 1.71506499 1.71506499 1.71506499 1.71506499 1.3606524 1.3606524
Or as mentioned in the tidyverse
, if the number of unique
groups are not same for each of them, an approach is with ave
to create columns and then apply unique
out1 <- unique(cbind(df[nm1], do.call(cbind, lapply(nm1, function(g) {
mean <- with(df, ave(s, df[[g]]))
median <- with(df, ave(s, df[[g]], FUN = median))
setNames(data.frame(mean, median),
paste0(g, c('.x.mean', '.x.median')))
}))))
-output
head(out1)
# g1 g2 g3 g1.x.mean g1.x.median g2.x.mean g2.x.median g3.x.mean g3.x.median
#1 1 5.000000 10.000000 -0.56047565 -0.56047565 -0.56047565 -0.56047565 -0.56047565 -0.56047565
#2 2 5.959596 9.909091 -0.23017749 -0.23017749 -0.23017749 -0.23017749 -0.23017749 -0.23017749
#3 3 6.919192 9.818182 1.55870831 1.55870831 1.55870831 1.55870831 1.55870831 1.55870831
#4 4 7.878788 9.727273 0.07050839 0.07050839 0.07050839 0.07050839 0.07050839 0.07050839
#5 5 8.838384 9.636364 0.12928774 0.12928774 0.12928774 0.12928774 0.12928774 0.12928774
#6 6 9.797980 9.545455 1.71506499 1.71506499 1.71506499 1.71506499 1.71506499 1.71506499
Or using tidyerse
library(dplyr) # version >= 1.0
library(purrr)
library(stringr)
map_dfc(nm1, ~ df %>%
group_by(across(all_of(.x))) %>%
summarise(!! str_c(.x, ".x.mean") := mean(s),
!! str_c(.x, ".x.median") := median(s), .groups = 'drop')) %>%
select(all_of(nm1), everything())
-output
# A tibble: 100 x 9
# g1 g2 g3 g1.x.mean g1.x.median g2.x.mean g2.x.median g3.x.mean g3.x.median
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 5 1 -0.560 -0.560 -0.560 -0.560 -1.03 -1.03
# 2 2 5.96 1.09 -0.230 -0.230 -0.230 -0.230 -0.236 -0.236
# 3 3 6.92 1.18 1.56 1.56 1.56 1.56 1.53 1.53
# 4 4 7.88 1.27 0.0705 0.0705 0.0705 0.0705 2.19 2.19
# 5 5 8.84 1.36 0.129 0.129 0.129 0.129 -0.600 -0.600
# 6 6 9.80 1.45 1.72 1.72 1.72 1.72 1.36 1.36
# 7 7 10.8 1.55 0.461 0.461 0.461 0.461 -0.628 -0.628
# 8 8 11.7 1.64 -1.27 -1.27 -1.27 -1.27 0.239 0.239
# 9 9 12.7 1.73 -0.687 -0.687 -0.687 -0.687 0.548 0.548
#10 10 13.6 1.82 -0.446 -0.446 -0.446 -0.446 0.994 0.994
# … with 90 more rows
If the grouping columns have different number of unique elements, an option would be to mutate/transmute
new columns and then take the distinct
at the end
map_dfc(nm1, ~ df %>%
group_by(across(all_of(.x))) %>%
transmute(!! str_c(.x, ".x.mean") := mean(s),
!! str_c(.x, ".x.median") := median(s))) %>%
select(all_of(nm1), everything()) %>%
ungroup %>%
distinct
-output
# A tibble: 100 x 9
# g1 g2 g3 g1.x.mean g1.x.median g2.x.mean g2.x.median g3.x.mean g3.x.median
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 5 10 -0.560 -0.560 -0.560 -0.560 -0.560 -0.560
# 2 2 5.96 9.91 -0.230 -0.230 -0.230 -0.230 -0.230 -0.230
# 3 3 6.92 9.82 1.56 1.56 1.56 1.56 1.56 1.56
# 4 4 7.88 9.73 0.0705 0.0705 0.0705 0.0705 0.0705 0.0705
# 5 5 8.84 9.64 0.129 0.129 0.129 0.129 0.129 0.129
# 6 6 9.80 9.55 1.72 1.72 1.72 1.72 1.72 1.72
# 7 7 10.8 9.45 0.461 0.461 0.461 0.461 0.461 0.461
# 8 8 11.7 9.36 -1.27 -1.27 -1.27 -1.27 -1.27 -1.27
# 9 9 12.7 9.27 -0.687 -0.687 -0.687 -0.687 -0.687 -0.687
#10 10 13.6 9.18 -0.446 -0.446 -0.446 -0.446 -0.446 -0.446
# … with 90 more rows
Upvotes: 3
Reputation: 39613
It can also be done with dplyr
:
library(dplyr)
#Code
new <- df %>% group_by(s) %>%
summarise_all(.funs = list("x.mean"=mean,"x.median"=median))
Output:
# A tibble: 100 x 7
s g1_x.mean g2_x.mean g3_x.mean g1_x.median g2_x.median g3_x.median
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 -2.31 72 73.1 3.55 72 73.1 3.55
2 -1.97 18 21.3 8.45 18 21.3 8.45
3 -1.69 26 29.0 7.73 26 29.0 7.73
4 -1.55 57 58.7 4.91 57 58.7 4.91
5 -1.27 43 45.3 6.18 43 45.3 6.18
6 -1.27 8 11.7 9.36 8 11.7 9.36
7 -1.22 78 78.9 3 78 78.9 3
8 -1.14 29 31.9 7.45 29 31.9 7.45
9 -1.12 46 48.2 5.91 46 48.2 5.91
10 -1.07 65 66.4 4.18 65 66.4 4.18
# ... with 90 more rows
Upvotes: 1