Reputation: 3195
sample data
mydata=structure(list(PLSVYD.N.16.6 = c(60, 3.5, 7.5, 3.4, 2.4, 14,
5.4, 3.1, 2.6, 4.1, 5.4, 8.1, 2.5, 4.5, 4.4, 5.4, 3.3, 8.7, 2.9,
6.4, 9.9, 16, 3.2, 3.7, 11, 3.6), ZAPVYD.N.16.6 = c(1020L, 63L,
143L, 68L, 58L, 280L, 92L, 62L, 52L, 94L, 124L, 178L, 40L, 59L,
48L, 70L, 43L, 96L, 46L, 77L, 129L, 192L, 61L, 63L, 187L, 50L
), zap = c(170, 180, 190.6666667, 200, 241.6666667, 200, 170.3703704,
200, 200, 229.2682927, 229.6296296, 219.7530864, 160, 131.1111111,
109.0909091, 129.6296296, 130.3030303, 110.3448276, 158.6206897,
120.3125, 130.3030303, 120, 190.625, 170.2702703, 170, 138.8888889
), POLNOT = c(0.6, 0.6, 0.6, 0.7, 0.7, 0.6, 0.6, 0.6, 0.6, 0.7,
0.7, 0.7, 0.7, 0.5, 0.5, 0.6, 0.6, 0.5, 0.5, 0.5, 0.6, 0.5, 0.7,
0.6, 0.6, 0.5), x1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 70L, 70L, 80L, 70L, 80L, 80L, 0L, 0L, 0L, 80L, 0L,
0L, 0L), x2 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 18L, 18L, 19L, 18L, 19L, 19L, 0L, 0L, 0L, 18L, 0L, 0L, 0L
), x3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
20L, 22L, 24L, 22L, 24L, 24L, 0L, 0L, 0L, 22L, 0L, 0L, 0L), x4 = c(0L,
140L, 0L, 0L, 0L, 0L, 0L, 140L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 110L, 80L, 0L, 0L, 0L, 0L), x5 = c(0L, 22L, 0L,
0L, 0L, 0L, 0L, 22L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 19L, 20L, 0L, 0L, 0L, 0L), x6 = c(0L, 40L, 0L, 0L, 0L,
0L, 0L, 40L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
28L, 28L, 0L, 0L, 0L, 0L), x7 = c(100L, 130L, 130L, 110L, 130L,
130L, 140L, 140L, 140L, 140L, 130L, 130L, 210L, 0L, 0L, 0L, 0L,
0L, 0L, 130L, 0L, 0L, 0L, 140L, 130L, 140L), cluster = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L)), class = "data.frame", row.names = c(NA,
-26L))
how can i aggregate these variables
ZAPVYD.N.16.6
zap
POLNOT
x1
x2
x3
x4
x5
x6
x7
by weighted.mean by variable PLSVYD.N.16.6?
but this aggregation i must perform separately by groups(cluster variable)
simple command ag <- aggregate(PLSVYD.N.16.6 ~ ., data = mydata, weighted.mean)
doesn't work for me
and i need do it for each cluster separately.
how get such result?
cluster ZAPVYD.N.16.6 zap POLNOT x1 x2 x3 x4 x5 x6 x7
1 616,3608618 184,5601436 0,613734291 0 0 0 8,29443447 1,303411131 2,36983842 114,5960503
2 117,6730769 143,0555556 0,565064103 26,38888889 6,412393162 7,882478632 25,30982906 5,428418803 7,747863248 51,94444444
Upvotes: 1
Views: 46
Reputation: 887881
We can also use summarise_at
library(dplyr)
mydata %>%
group_by(cluster) %>%
summarise_at(vars(ZAPVYD.N.16.6:x7), weighted.mean, PLSVYD.N.16.6)
Upvotes: 0
Reputation: 389265
You can apply weighted.mean
function for multiple columns for each cluster
.
Using dplyr
:
library(dplyr)
mydata %>%
group_by(cluster) %>%
summarise(across(ZAPVYD.N.16.6:x7, weighted.mean, PLSVYD.N.16.6))
Or in data.table
:
library(data.table)
setDT(mydata)[, lapply(.SD, weighted.mean, PLSVYD.N.16.6),
.SDcols =ZAPVYD.N.16.6:x7, by = cluster]
# cluster ZAPVYD.N.16.6 zap POLNOT x1 x2 x3 x4 x5 x6 x7
#1: 1 616 185 0.614 0.0 0.00 0.00 8.29 1.30 2.37 114.6
#2: 2 118 143 0.565 26.4 6.41 7.88 25.31 5.43 7.75 51.9
Upvotes: 3