Reputation: 591
I have several boxplots that I am running some statistics on through a for loop.
I want to be able to output the data into a dataframe. i.e. something like this (I've made up random numbers):
names <- c('B1','B2','B3')
for (name in names){
mereLoc <- scaledDaily %>%
filter(scaledDaily$loc == name)
mereLoc$year <- as.numeric(mereLoc$year)
median <- tapply(mereLoc$mAODscale, mereLoc$year, median, na.rm=T)
print(name)
print(median)
krusk <- kruskal.test(mAODscale~year, data=mereLoc)
print(krusk)
pairs <- pairwise.wilcox.test(mereLoc$mAODscale, mereLoc$year, p.adjust.method='BH')
print(pairs)
}
here is some test data. its not my actual data, I've just fabricated some.
structure(list(loc = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L), levels = c("B1", "B2", "B3"), class = "factor"),
mAODscale = c(0.561948848, 0.851883432, 0.293151829, 0.683807808,
0.864472706, 0.380303934, 0.324501709, 0.765022304, 0.900772901,
0.204731715, 0.877104175, 0.56367162, 0.206528162, 0.353350116,
0.219628257, 0.840723901, 0.716389918, 0.569798858, 0.707583441,
0.120064246, 0.275325307, 0.438391155, 0.308969668, 0.350156436,
0.886955315, 0.693416677, 0.710065022), year = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), levels = c("1",
"2", "3"), class = "factor")), row.names = c(NA, -27L), class = "data.frame")
Upvotes: 2
Views: 58
Reputation: 591
Here is my solution:
names <- c('B1','B2','B3','B4','B5','B6','B7','B8',
'SSSI1','SSSI2','SSSI3',
"CM1", "CM2", "CM3", "CM4", "CM7", "CM8", "CM9", "CM11",
"Crose 1", "Crose 2", "Crose 3")
stats <- list()
for (name in names){
mereLoc <- scaledDaily %>%
filter(scaledDaily$loc == name)
mereLoc$year <- as.numeric(mereLoc$year)
median <- aggregate(mereLoc$mAODscale, list(mereLoc$year), median, na.rm=T)
krusk <- kruskal.test(mAODscale~year, data=mereLoc)
krusk2 <- tidy(krusk)
pairs <- pairwise.wilcox.test(mereLoc$mAODscale, mereLoc$year, p.adjust.method='BH')
pairs2 <- tidy(pairs)
stats[[name]] <- c("loc"=name,
"Y1_median"=median$x[1],
"Y2_median"=median$x[2],
"Y3_median"=median$x[3],
"KWCHiSq"=krusk$statistic,
"df"=krusk$parameter,
"pvalue"=krusk$p.value,
"Y1Y2"=pairs2[1,3],
"Y1Y3"=pairs2[2,3],
"Y2Y3"=pairs2[3,3])
}
stats <- Reduce(function(x,y) merge(x,y,all=TRUE), stats)
names(stats) <- c('loc','Y1_median','Y2_median','Y3_median',
'KW-ChiSq','df','pvalue','Y1-Y2','Y1-Y3','Y2-Y3')
Upvotes: 0
Reputation: 76402
Here is a way.
If the data is in the long format then it is generally easier to process by tidyverse
pipes. In the code below start by splitting by groups of location, run the tests, tidy as a data.frame and assemble the parts.
Reshape the results to wide format when needed.
suppressPackageStartupMessages({
library(dplyr)
library(broom)
library(tidyr)
})
bind_cols(
scaledDaily %>%
group_split(loc) %>%
lapply(\(X) {
loc <- first(X$loc)
kruskal.test(mAODscale ~ year, data = X) %>%
tidy() %>%
mutate(Location = loc) %>%
relocate(Location)
}) %>%
do.call(rbind, args = .) %>%
select(Location, statistic, p.value),
scaledDaily %>%
group_split(loc) %>%
lapply(\(X) {
X %>%
group_by(year) %>%
summarise(median = median(mAODscale)) %>%
pivot_wider(
names_from = year,
names_glue = "Y{year}_median",
values_from = median
)
}) %>%
do.call(rbind, .)
) %>% relocate(ends_with("median"), .after = Location)
#> # A tibble: 3 × 6
#> Location Y1_median Y2_median Y3_median statistic p.value
#> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 B1 0.562 0.684 0.765 0.622 0.733
#> 2 B2 0.564 0.220 0.716 3.20 0.202
#> 3 B3 0.275 0.350 0.710 4.36 0.113
scaledDaily %>%
group_split(loc) %>%
lapply(\(X) {
loc <- first(X$loc)
pairwise.wilcox.test(X$mAODscale, X$year, p.adjust.method='BH') %>%
tidy() %>%
mutate(Location = loc) %>%
relocate(Location)
}) %>%
do.call(rbind, args = .) %>%
pivot_wider(
id_cols = Location,
names_from = starts_with("group"),
names_glue = "Y{group1}_Y{group2}",
values_from = p.value
)
#> # A tibble: 3 × 4
#> Location Y2_Y1 Y3_Y1 Y3_Y2
#> <fct> <dbl> <dbl> <dbl>
#> 1 B1 1 1 1
#> 2 B2 0.7 0.7 0.3
#> 3 B3 0.7 0.3 0.3
Created on 2023-08-23 with reprex v2.0.2
Upvotes: 1