aggregating data by weighted average for each group in R

Question

sample data

mydata=structure(list(PLSVYD.N.16.6 = c(60, 3.5, 7.5, 3.4, 2.4, 14, 
5.4, 3.1, 2.6, 4.1, 5.4, 8.1, 2.5, 4.5, 4.4, 5.4, 3.3, 8.7, 2.9, 
6.4, 9.9, 16, 3.2, 3.7, 11, 3.6), ZAPVYD.N.16.6 = c(1020L, 63L, 
143L, 68L, 58L, 280L, 92L, 62L, 52L, 94L, 124L, 178L, 40L, 59L, 
48L, 70L, 43L, 96L, 46L, 77L, 129L, 192L, 61L, 63L, 187L, 50L
), zap = c(170, 180, 190.6666667, 200, 241.6666667, 200, 170.3703704, 
200, 200, 229.2682927, 229.6296296, 219.7530864, 160, 131.1111111, 
109.0909091, 129.6296296, 130.3030303, 110.3448276, 158.6206897, 
120.3125, 130.3030303, 120, 190.625, 170.2702703, 170, 138.8888889
), POLNOT = c(0.6, 0.6, 0.6, 0.7, 0.7, 0.6, 0.6, 0.6, 0.6, 0.7, 
0.7, 0.7, 0.7, 0.5, 0.5, 0.6, 0.6, 0.5, 0.5, 0.5, 0.6, 0.5, 0.7, 
0.6, 0.6, 0.5), x1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 70L, 70L, 80L, 70L, 80L, 80L, 0L, 0L, 0L, 80L, 0L, 
0L, 0L), x2 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 18L, 18L, 19L, 18L, 19L, 19L, 0L, 0L, 0L, 18L, 0L, 0L, 0L
), x3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
20L, 22L, 24L, 22L, 24L, 24L, 0L, 0L, 0L, 22L, 0L, 0L, 0L), x4 = c(0L, 
140L, 0L, 0L, 0L, 0L, 0L, 140L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 110L, 80L, 0L, 0L, 0L, 0L), x5 = c(0L, 22L, 0L, 
0L, 0L, 0L, 0L, 22L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 19L, 20L, 0L, 0L, 0L, 0L), x6 = c(0L, 40L, 0L, 0L, 0L, 
0L, 0L, 40L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
28L, 28L, 0L, 0L, 0L, 0L), x7 = c(100L, 130L, 130L, 110L, 130L, 
130L, 140L, 140L, 140L, 140L, 130L, 130L, 210L, 0L, 0L, 0L, 0L, 
0L, 0L, 130L, 0L, 0L, 0L, 140L, 130L, 140L), cluster = c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L)), class = "data.frame", row.names = c(NA, 
-26L))

how can i aggregate these variables

  ZAPVYD.N.16.6
    zap
    POLNOT
    x1
    x2
    x3
    x4
    x5
    x6
    x7

by weighted.mean by variable PLSVYD.N.16.6? but this aggregation i must perform separately by groups(cluster variable)

simple command ag <- aggregate(PLSVYD.N.16.6 ~ ., data = mydata, weighted.mean) doesn't work for me and i need do it for each cluster separately.

how get such result?

cluster ZAPVYD.N.16.6   zap POLNOT  x1  x2  x3  x4  x5  x6  x7
1       616,3608618 184,5601436 0,613734291 0   0   0   8,29443447  1,303411131 2,36983842  114,5960503
2       117,6730769 143,0555556 0,565064103 26,38888889 6,412393162 7,882478632 25,30982906 5,428418803 7,747863248 51,94444444

Ronak Shah · Accepted Answer

You can apply weighted.mean function for multiple columns for each cluster.

Using dplyr :

library(dplyr)

mydata %>%
  group_by(cluster) %>%
  summarise(across(ZAPVYD.N.16.6:x7, weighted.mean, PLSVYD.N.16.6))

Or in data.table :

library(data.table)

setDT(mydata)[, lapply(.SD, weighted.mean, PLSVYD.N.16.6), 
                .SDcols =ZAPVYD.N.16.6:x7, by = cluster]  

#   cluster ZAPVYD.N.16.6 zap POLNOT   x1   x2   x3    x4   x5   x6    x7
#1:       1           616 185  0.614  0.0 0.00 0.00  8.29 1.30 2.37 114.6
#2:       2           118 143  0.565 26.4 6.41 7.88 25.31 5.43 7.75  51.9

aggregating data by weighted average for each group in R

Answers (2)

Related Questions