Arthur Pennt
Arthur Pennt

Reputation: 155

Multiple Wilcox Tests as a loop in R

My dataset consists of

cb <- data.frame(group = ("A", "B", "C", "D", "E"), 
      WC = runif(100, 0, 100), 
      Ana = runif(100, 0, 100), 
      Clo = runif(100, 0, 100))

str(cb)
data.frame: 66936 obs of 89 variables: 
$group: Factor w/ 5 levels "A", "B", "C" ...
$WC: int 19 28 35 92 10 23...
$Ana: num 17.2 48 35.4 84.2
$ Clo: num 37.2 12.1 45.4 38.9
....

Now I want to perform multiple Wilcox tests on the $group, so that it looks like this in the end:

commands: 
wilcox.test(cb$WC[cb$group == "A"], cb$WC[cb$group == "B"])
wilcox.test(cb$WC[cb$group == "A"], cb$WC[cb$group == "C"])
wilcox.test(cb$WC[cb$group == "A"], cb$WC[cb$group == "D"])
wilcox.test(cb$WC[cb$group == "A"], cb$WC[cb$group == "E"])
....

inserting the p-value:
WC   A     B    C     D    E
A    1     0.12 0.03  0.2  0.42
B    0.12  1    0.1   0.07 0.1
C    0.03  0.1  1     0.2  0.3
D    0.2   0.07 0.2   1    0.1
E    0.42  0.1  0.3   0.1  1

Ana   A     B    C     D    E
A    1      0.12  0.2  0.39  0.1
B    0.12   1     0.1  0.07  0.1
C    ...
D
E

...

I have a for loop of a prior question, multiple t-tests, but i struggle to adapt it to this task, because the Wilcox-Test is so different in design. Here is the for loop I used for the t-test:

res <- matrix(NA, ncol=5, 
dimnames=list(NULL, c("group", "col", "statistic", "estimate", "p.value")))

gr <- levels(cb$group)

for(cl in 2:ncol(cb)){
    for(grp in gr){
        temp <- cb[cb$group == grp, cl]
        res <- rbind(res, c(grp, colnames(cb)[cl], 
            unlist(t.test(temp, mu = mean(cb[,cl]), alternative="two.sided"))[c(1, 5, 3)]))
    }
}

Do you have an idea how to change this for loop to perform a wilcox test?

Upvotes: 2

Views: 5132

Answers (3)

Sathish
Sathish

Reputation: 12723

Raw Data:

set.seed(1L)
cb <- data.frame(group = factor(c("A", "B", "C", "D", "E")), 
                 WC = runif(100, 0, 100), 
                 Ana = runif(100, 0, 100), 
                 Clo = runif(100, 0, 100))

Code:

library(purrr)

combins <- combn(levels(cb$group), 2)

params_list <- split(as.vector(combins), rep(1:ncol(combins), each = nrow(combins)))

model_wc <- map(.x = params_list, 
                .f = ~ wilcox.test(formula = WC ~ group, 
                                   data    = subset(cb, group %in% .x)))

model_ana <- map(.x = params_list, 
                 .f = ~ wilcox.test(formula = Ana ~ group, 
                                    data    = subset(cb, group %in% .x)))

model_clo <- map(.x = params_list, 
                 .f = ~ wilcox.test(formula = Clo ~ group, 
                                    data    = subset(cb, group %in% .x)))

wilcox_pvals <- do.call(cbind, list(t(data.frame(map(.x = model_wc, .f  = "p.value"))),
                                    t(data.frame(map(.x = model_ana, .f = "p.value"))),
                                    t(data.frame(map(.x = model_clo, .f = "p.value")))))

row.names(wilcox_pvals) <- unlist(map(.x = params_list, .f = ~ paste0(.x, collapse = "")))

colnames(wilcox_pvals) <- names(cb)[2:4]

Output:

> wilcox_pvals
#           WC        Ana        Clo
# AB 0.7380622 0.52909692 0.75835096
# AC 0.9466955 0.41352631 0.32726184
# AD 0.6395139 0.79940719 0.30125264
# AE 0.8619871 0.34078485 0.04595423
# BC 0.9680024 0.63951388 0.18263084
# BD 0.8410127 0.38341328 0.12741907
# BE 0.7994072 0.10807707 0.01809358
# CD 0.7994072 0.21096433 0.94669547
# CE 0.7179503 0.03751918 0.38341328
# DE 0.7788036 0.63951388 0.30125264

Upvotes: 5

Matt W.
Matt W.

Reputation: 1

If you just want the p-values this should work. I just pull the p-value from all possible combinations in the matrix. Also be careful of multiple comparisons, you may need to adjust your alpha value.

gr <- levels(cb$group)
res <- matrix(NA, nrow= length(gr), ncol = length(gr), dimnames = list(gr,gr))

for (i in 1:ncol(res)){
  for (j in 1:nrow(res)){
    x<- wilcox.test(cb$WC[cb$group == gr[i]], cb$WC[cb$group == gr[j]])
    res[i,j] <- x$p.value
  }
}

Upvotes: 0

Gopala
Gopala

Reputation: 10483

One approach is to generate combinations of group values and run the test as follows:

apply(combn(unique(cb$group), 2), 2,
      function(x)
        wilcox.test(cb$WC[cb$group == x[1]], cb$WC[cb$group == x[2]])
  )

Output is as follows:

[[1]]

    Wilcoxon rank sum test

data:  cb$WC[cb$group == x[1]] and cb$WC[cb$group == x[2]]
W = 205, p-value = 0.9042
alternative hypothesis: true location shift is not equal to 0


[[2]]

    Wilcoxon rank sum test

data:  cb$WC[cb$group == x[1]] and cb$WC[cb$group == x[2]]
W = 153, p-value = 0.211
alternative hypothesis: true location shift is not equal to 0

If you just want to the p-values, you can get them like this:

apply(combn(unique(cb$group), 2), 2,
      function(x) {
        fit <- wilcox.test(cb$WC[cb$group == x[1]], cb$WC[cb$group == x[2]])
        fit$p.value
        }
  )
[1] 0.904208038 0.210964327 0.820148096 0.564831637 0.012165581 0.799407187 0.231498716 0.021076794 0.004681199
[10] 0.242269621

These correspond to the ten pair-wise comparisons:

combn(unique(cb$group), 2)
     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] "A"  "A"  "A"  "A"  "B"  "B"  "B"  "C"  "C"  "D"  
[2,] "B"  "C"  "D"  "E"  "C"  "D"  "E"  "D"  "E"  "E"  

Upvotes: 0

Related Questions