Reputation: 125
I would like to use dplyr to split a dataset on several variables, and then automatically do pairwise comparions between different levels of a specific variable.
To illustrate the concept I generate some mock data
df <- data.frame(id = rep(c(1,2,3),each=4),trt = rep(c("a","b"),each=3),method=rep(c("m1","m2","m3")),result=rnorm(12))
> df
id trt method result
1 1 a m1 -1.417685956
2 1 a m2 0.007361868
3 1 a m3 -0.763148987
4 1 b m1 -0.203534562
5 2 b m2 -0.359201092
6 2 b m3 -1.256859805
7 2 a m1 -0.160111516
8 2 a m2 -1.833714579
9 3 a m3 0.423011535
10 3 b m1 -0.382595107
11 3 b m2 -0.880756365
12 3 b m3 -0.447458763
For each id and treatment, I want to do the pairwise comparison between the result for each method. In my case the pairwise comparison is a simple division of the result. That is I want to generate the 9 possible divisions m1/m1, m1/m2, m1/m3, m2/m1, ..., m3/m3. That means that each method acts as a both reference and comparator.
I am thinking to start using spread using
spread(df,method,result)
which would give me
id trt m1 m2 m3
1 1 a 0.541351984 -0.1223797 -1.0296924
2 1 b -1.860059387 -0.1117496 -0.4026570
3 2 a -0.692783955 2.1372636 1.6603239
4 2 b -0.006360277 -0.1880784 1.4650995
5 3 a 1.475971742 1.1721263 -0.4858894
6 3 b 2.149726008 -0.6282801 -0.7653839
But how can I automatically generate all possible comparisons (divisions in my case) when they now are different columns? In the end I want to calculate summary statistics over all subjects and treatments on these pairwise comparisons.
Thankful for any input on this
Upvotes: 1
Views: 1456
Reputation: 41220
You dont need to spread.
dplyr:
inner_join(df,df,by = c("trt"="trt","id"="id")) %>% filter(method.x>=method.y) %>%
mutate(div = result.x/result.y)
id trt method.x result.x method.y result.y div
1 1 a m1 1.8453179 m1 1.8453179 1.0000000
2 1 a m2 0.5514105 m1 1.8453179 0.2988160
3 1 a m2 0.5514105 m2 0.5514105 1.0000000
4 1 a m3 -0.9281152 m1 1.8453179 -0.5029568
5 1 a m3 -0.9281152 m2 0.5514105 -1.6831657
6 1 a m3 -0.9281152 m3 -0.9281152 1.0000000
7 1 b m1 -0.8557873 m1 -0.8557873 1.0000000
data.table:
library(data.table)
setDT(df)
df[df,on=.(trt,id),allow.cartesian=T][method>=i.method,.(trt,id,method,i.method,div = i.result/result)]
Upvotes: 3
Reputation: 11
Use the combn
after grouping
library(dplyr)
library(tidyr)
df %>%
group_by(id, trt) %>%
summarise(result = if(n() >=2) list(combn(result, 2, FUN = function(x) x[1]/x[2])) else list(result)) %>%
unnest(c(result))
For full combinations, use outer
df %>%
group_by(id, trt) %>%
summarise(result = list(c(outer(result, result, FUN = `/`)))) %>%
unnest(c(result))
# A tibble: 28 x 3
# Groups: id [3]
# id trt result
# <dbl> <chr> <dbl>
# 1 1 a 1
# 2 1 a 0.597
# 3 1 a -0.121
# 4 1 a 1.68
# 5 1 a 1
# 6 1 a -0.202
# 7 1 a -8.29
# 8 1 a -4.95
# 9 1 a 1
#10 1 b 1
# … with 18 more rows
Or with merge
transform(merge(df, df, by = c('trt', 'id')), ratio = result.x/result.y)
# trt id method.x result.x method.y result.y ratio
#1 a 1 m1 2.19881035 m1 2.19881035 1.0000000
#2 a 1 m1 2.19881035 m2 1.31241298 1.6753952
#3 a 1 m1 2.19881035 m3 -0.26514506 -8.2928582
#4 a 1 m2 1.31241298 m1 2.19881035 0.5968741
#5 a 1 m2 1.31241298 m2 1.31241298 1.0000000
#6 a 1 m2 1.31241298 m3 -0.26514506 -4.9497924
#7 a 1 m3 -0.26514506 m1 2.19881035 -0.1205857
#8 a 1 m3 -0.26514506 m2 1.31241298 -0.2020287
#9 a 1 m3 -0.26514506 m3 -0.26514506 1.0000000
#10 a 2 m1 -0.78860284 m1 -0.78860284 1.0000000
#11 a 2 m1 -0.78860284 m2 -0.59461727 1.3262360
#12 a 2 m2 -0.59461727 m1 -0.78860284 0.7540136
#13 a 2 m2 -0.59461727 m2 -0.59461727 1.0000000
#14 a 3 m3 1.65090747 m3 1.65090747 1.0000000
#15 b 1 m1 0.54319406 m1 0.54319406 1.0000000
#16 b 2 m2 -0.41433995 m2 -0.41433995 1.0000000
#17 b 2 m2 -0.41433995 m3 -0.47624689 0.8700108
#18 b 2 m3 -0.47624689 m2 -0.41433995 1.1494110
#19 b 2 m3 -0.47624689 m3 -0.47624689 1.0000000
#20 b 3 m1 -0.05402813 m1 -0.05402813 1.0000000
#21 b 3 m1 -0.05402813 m2 0.11924524 -0.4530841
#22 b 3 m1 -0.05402813 m3 0.24368743 -0.2217108
#23 b 3 m2 0.11924524 m1 -0.05402813 -2.2070956
#24 b 3 m2 0.11924524 m2 0.11924524 1.0000000
#25 b 3 m2 0.11924524 m3 0.24368743 0.4893368
#26 b 3 m3 0.24368743 m1 -0.05402813 -4.5103810
#27 b 3 m3 0.24368743 m2 0.11924524 2.0435821
#28 b 3 m3 0.24368743 m3 0.24368743 1.0000000
Upvotes: 0
Reputation: 18551
I am not 100% sure how your desired output would look like, but here is a dplyr
approach:
library(dplyr)
df <- data.frame(id = rep(c(1,2,3),each=4),trt = rep(c("a","b"),each=3),method=rep(c("m1","m2","m3")),result=rnorm(12))
df %>%
inner_join(df, by = c("id", "trt")) %>%
mutate(comparison = result.x / result.y)
#> id trt method.x result.x method.y result.y comparison
#> 1 1 a m1 -0.21178736 m1 -0.21178736 1.0000000
#> 2 1 a m1 -0.21178736 m2 0.85283041 -0.2483347
#> 3 1 a m1 -0.21178736 m3 1.20428478 -0.1758615
#> 4 1 a m2 0.85283041 m1 -0.21178736 -4.0268240
#> 5 1 a m2 0.85283041 m2 0.85283041 1.0000000
#> 6 1 a m2 0.85283041 m3 1.20428478 0.7081634
#> 7 1 a m3 1.20428478 m1 -0.21178736 -5.6862921
#> 8 1 a m3 1.20428478 m2 0.85283041 1.4121035
#> 9 1 a m3 1.20428478 m3 1.20428478 1.0000000
#> 10 1 b m1 0.01242714 m1 0.01242714 1.0000000
#> 11 2 b m2 -0.11301668 m2 -0.11301668 1.0000000
#> 12 2 b m2 -0.11301668 m3 -0.38098395 0.2966442
#> 13 2 b m3 -0.38098395 m2 -0.11301668 3.3710417
#> 14 2 b m3 -0.38098395 m3 -0.38098395 1.0000000
#> 15 2 a m1 1.04499143 m1 1.04499143 1.0000000
#> 16 2 a m1 1.04499143 m2 0.90353396 1.1565602
#> 17 2 a m2 0.90353396 m1 1.04499143 0.8646329
#> 18 2 a m2 0.90353396 m2 0.90353396 1.0000000
#> 19 3 a m3 0.12398337 m3 0.12398337 1.0000000
#> 20 3 b m1 1.22590343 m1 1.22590343 1.0000000
#> 21 3 b m1 1.22590343 m2 -1.37266240 -0.8930844
#> 22 3 b m1 1.22590343 m3 0.41983609 2.9199572
#> 23 3 b m2 -1.37266240 m1 1.22590343 -1.1197150
#> 24 3 b m2 -1.37266240 m2 -1.37266240 1.0000000
#> 25 3 b m2 -1.37266240 m3 0.41983609 -3.2695198
#> 26 3 b m3 0.41983609 m1 1.22590343 0.3424708
#> 27 3 b m3 0.41983609 m2 -1.37266240 -0.3058553
#> 28 3 b m3 0.41983609 m3 0.41983609 1.0000000
Created on 2020-06-14 by the reprex package (v0.3.0)
Upvotes: 1