Reputation: 3384
data=data.frame(SCORE=c(9,9,10,7,10,10,8,10,9,5,10,7,9,4,9,8,3,6,4,5,10,10,6,9,5,8,3,7,5,6,7,4,8,7,10,6,9,7,8,8,9,9,4,5,6,6,7,10,8,6),
TYPE=c(0,0,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0),
GRADE=c(2,2,2,1,1,1,1,2,1,3,2,2,1,1,2,1,2,1,2,1,1,2,2,2,3,2,3,2,2,1,1,2,2,3,3,1,2,3,3,3,2,2,3,2,2,3,3,3,2,1),
SCHOOL=c(1,1,1,2,2,2,2,1,1,2,1,2,2,2,2,1,1,2,2,1,2,1,2,1,1,1,2,1,2,1,1,1,1,1,2,1,1,1,1,2,2,2,2,1,1,1,1,2,1,2))
If you have this sample data and wish to get it in to this format:
For EVERY 'GRADE' and 'SCHOOL' I wish to get the mean value of 'SCORE' for 'TYPE' equals to 1 and subtract it away from the mean value of 'SCORE' for 'TYPE' equals to 0.
In the data table format you can put data[, SCORESUBTRACT := sum(SCORE)/nrow(data), by = list(GRADE, SCHOOL)] but is it possible to get the subtraction for 'TYPE' equals to 1 from 'TYPE' equals to 0?
Upvotes: 0
Views: 184
Reputation: 46
Here is a data.table solution for you. Hope it works well.
First lets read the same data as a data.table directly and set the key as grade and school
data=data.table(SCORE=c(9,9,10,7,10,10,8,10,9,5,10,7,9,4,9,8,3,6,4,5,10,10,6,9,5,8,3,7,5,6,7,4,8,7,10,6,9,7,8,8,9,9,4,5,6,6,7,10,8,6),
TYPE=c(0,0,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0),
GRADE=c(2,2,2,1,1,1,1,2,1,3,2,2,1,1,2,1,2,1,2,1,1,2,2,2,3,2,3,2,2,1,1,2,2,3,3,1,2,3,3,3,2,2,3,2,2,3,3,3,2,1),
SCHOOL=c(1,1,1,2,2,2,2,1,1,2,1,2,2,2,2,1,1,2,2,1,2,1,2,1,1,1,2,1,2,1,1,1,1,1,2,1,1,1,1,2,2,2,2,1,1,1,1,2,1,2))
setkeyv(data , c("GRADE","SCHOOL"))
Now we calculate the mean score like you had and then go forward with calculating the updated score as you wanted it
data[ , MeanScore:= mean(SCORE) , by = key(data)]
data[ TYPE == 0 , updated_score := SCORE - MeanScore ]
data[ TYPE == 1 , updated_score := MeanScore ]
Upvotes: 1
Reputation: 39595
Try this:
library(tidyverse)
data %>% group_by(GRADE,SCHOOL,TYPE) %>% summarise(Val=mean(SCORE,na.rm=T)) %>%
pivot_wider(names_from = TYPE,values_from = Val) %>%
mutate(Diff=`1`-`0`)
# A tibble: 6 x 5
# Groups: GRADE, SCHOOL [6]
GRADE SCHOOL `0` `1` Diff
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 7 6.75 -0.25
2 1 2 7.75 7.8 0.0500
3 2 1 7.57 8 0.429
4 2 2 6.5 7.67 1.17
5 3 1 6.5 6.75 0.25
6 3 2 5.67 7.67 2
Upvotes: 2