Reputation: 33
I have a variable and I would like to obtain the means within each group where the group is listed for each observation in a column and I have many such columns. I would then like to associate the group means to the appropriate observation so that if I start with a matrix of m obs x n different groupings I obtain an m x n matrix of means. For example:
> var <- round(runif(10),digits=2)
> var
[1] 0.47 0.21 0.80 0.65 0.32 0.72 0.29 0.93 0.77 0.64
> groupings <- cbind(sample(c(1,2,3), 10, replace=TRUE),
sample(c(1,2,3), 10, replace=TRUE),
sample(c(1,2,3,5), 10, replace=TRUE))
> groupings
[,1] [,2] [,3]
[1,] 3 1 5
[2,] 1 1 5
[3,] 2 1 5
[4,] 3 2 3
[5,] 2 3 1
[6,] 1 1 1
[7,] 2 3 1
[8,] 1 2 1
[9,] 3 1 5
[10,] 1 3 2
I can obtain the means within each group separately with the following (for example)
> means.1 <- sapply(split(var, groupings[,1]), function(x) mean(x))
> means.2 <- sapply(split(var, groupings[,2]), function(x) mean(x))
> means.3 <- sapply(split(var, groupings[,3]), function(x) mean(x))
> means.1
1 2 3
0.625 0.470 0.630
> means.2
1 2 3
0.5940000 0.7900000 0.4166667
> means.3
1 2 3 5
0.5650 0.6400 0.6500 0.5625
But not only are these separate calls inefficient, they still don't get me what I want, which is the following
[,1] [,2] [,3]
[1,] 0.630 0.5940000 0.5625
[2,] 0.625 0.5940000 0.5625
[3,] 0.470 0.5940000 0.5625
[4,] 0.630 0.7900000 0.6500
[5,] 0.470 0.4166667 0.5650
[6,] 0.625 0.5940000 0.5650
[7,] 0.470 0.4166667 0.5650
[8,] 0.625 0.7900000 0.5650
[9,] 0.630 0.5940000 0.5625
[10,] 0.625 0.4166667 0.6400
Upvotes: 2
Views: 44
Reputation: 214957
Another option, you can use apply
(because you already have a matrix) to loop through columns( with Margin set to 2) and pass the column to ave
function as group variable, you can either explicitly specify FUN parameter to be mean or not specify it as mean is the default function used:
apply(groupings, 2, ave, x = var) # pass the var as a named parameter since it is the
# parameter at the first position of ave function, if not
# ave will treat the column as the first position parameter
# which you don't want to
# [,1] [,2] [,3]
#[1,] 0.630 0.5940000 0.5625
#[2,] 0.625 0.5940000 0.5625
#[3,] 0.470 0.5940000 0.5625
#[4,] 0.630 0.7900000 0.6500
#[5,] 0.470 0.4166667 0.5650
#[6,] 0.625 0.5940000 0.5650
#[7,] 0.470 0.4166667 0.5650
#[8,] 0.625 0.7900000 0.5650
#[9,] 0.630 0.5940000 0.5625
#[10,] 0.625 0.4166667 0.6400
Or with dplyr
, you can use the mutate_all()
function:
library(dplyr)
mutate_all(as.data.frame(groupings), funs(ave(var, .)))
# V1 V2 V3
#1 0.630 0.5940000 0.5625
#2 0.625 0.5940000 0.5625
#3 0.470 0.5940000 0.5625
#4 0.630 0.7900000 0.6500
#5 0.470 0.4166667 0.5650
#6 0.625 0.5940000 0.5650
#7 0.470 0.4166667 0.5650
#8 0.625 0.7900000 0.5650
#9 0.630 0.5940000 0.5625
#10 0.625 0.4166667 0.6400
Upvotes: 2
Reputation: 8413
library(dplyr)
set.seed(1000)
var <- round(runif(10),digits=2)
groupings <- cbind(sample(c(1,2,3), 10, replace=TRUE),
sample(c(1,2,3), 10, replace=TRUE),
sample(c(1,2,3,5), 10, replace=TRUE), var)
df = data.frame(groupings)
df %>%
group_by(V1)%>% mutate(x1 =mean(var))%>% ungroup(V1) %>%
group_by(V2) %>% mutate(x2=mean(var)) %>% ungroup(V2) %>%
group_by(V3) %>% mutate(x3=mean(var)) %>% ungroup(V3)
# V1 V2 V3 var x1 x2 x3
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 2 1 3 0.33 0.4775000 0.322 0.250
#2 3 3 1 0.76 0.6566667 0.470 0.750
#3 1 1 3 0.11 0.1333333 0.322 0.250
#4 3 1 5 0.69 0.6566667 0.322 0.635
#5 3 2 3 0.52 0.6566667 0.630 0.250
#6 1 3 3 0.07 0.1333333 0.470 0.250
#7 2 2 1 0.74 0.4775000 0.630 0.750
#8 2 3 5 0.58 0.4775000 0.470 0.635
#9 1 1 3 0.22 0.1333333 0.322 0.250
#10 2 1 2 0.26 0.4775000 0.322 0.260
# you can simply subset the columns
Upvotes: 3
Reputation: 38510
Here is one method using ave
along with split
and sapply
sapply(split(groupings, rep(seq_len(ncol(groupings)), each=nrow(groupings))),
function(x) ave(var, x, FUN=mean))
1 2 3
[1,] 0.4566667 0.5550000 0.3925000
[2,] 0.6200000 0.5550000 0.3925000
[3,] 0.4816667 0.5550000 0.3925000
[4,] 0.4566667 0.5550000 0.6200000
[5,] 0.4816667 0.5550000 0.4350000
[6,] 0.4566667 0.5133333 0.6066667
[7,] 0.4816667 0.0100000 0.4350000
[8,] 0.4816667 0.5133333 0.3925000
[9,] 0.4816667 0.5133333 0.6066667
[10,] 0.4816667 0.5550000 0.6066667
The entry to sapply
, split(groupings, rep(seq_len(ncol(groupings)), each=nrow(groupings)))
produces a list where each list item is a column of the matrix groupings. Each element of this list is fed to sapply
which applies ave
using the vector to group the results.
data
set.seed(1234)
var <- round(runif(10),digits=2)
groupings <- cbind(sample(c(1,2,3), 10, replace=TRUE),
sample(c(1,2,3), 10, replace=TRUE),
sample(c(1,2,3,5), 10, replace=TRUE))
Upvotes: 1