Reputation: 159
My script is designed to run an anova comparison between groups (rows) in each column. It then leads to one data frame that iterates the anova and post-hoc results as well as the name of the column. However, it will continually iterate the same anova and Post-hoc results. How do I fix this? Find a sample sheet here
#Save your Datasheet into variable X
x <- read.csv("T0_B_Class_Anova_test.csv")
x[is.na(x)] <- 0
DF.Anova <- data.frame()
DF.Tukey <- data.frame()
#Counts through the columns
for(i in 2:(ncol(x)-2)){
columns <- names(x[i])
##Runs an ANOVA - Group being a grouping factor
anovaresult <- anova(aov(x[,2]~Group,data=x))
DF.Anova <- rbind(DF.Anova, anovaresult)
##fix anova into data frame
Famall = colnames(x)
Famall = as.data.frame(Famall)
Famall = Famall[2:52,]
Famall = as.data.frame(Famall)
DFanovanames = rep(Famall, each = 2)
DFanovanames = as.data.frame(DFanovanames)
#install.packages("tidyr")
library(tidyr)
anovanames = data.frame(Names=unlist(DFanovanames, use.names = FALSE))
o.anovanames = dplyr::arrange(anovanames, Names)
finalanova_BFT0 = cbind(rn = rownames(DF.Anova), DF.Anova, o.anovanames)
##Runs Tukeys Post-hoc test on Anova
posthocresult <- TukeyHSD(aov(x[,2]~Group,data=x))
DF.Tukey <- rbind(DF.Tukey, posthocresult$Group)
##fix tukey into data frame
Famname = colnames(x)
Famname = as.data.frame(Famname)
Famname = Famname[2:52,]
Famname = as.data.frame(Famname)
DFposthocnames = rep(Famname, each = 3)
DFposthocnames = data.frame(DFposthocnames)
install.packages("tidyr")
library(tidyr)
posthocnames = data.frame(Names=unlist(DFposthocnames, use.names = FALSE))
o.posthocnames = dplyr::arrange(posthocnames, Names)
finalposthoc_BFT0 = cbind(rn = rownames(DF.Tukey), DF.Tukey, o.posthocnames)
##Prints posthoc results into txt file
print(columns)
print(anovaresult)
print(posthocresult)
}
write.csv(finalanova_BFT0, file="testfinalanova_BCT0.csv")
write.csv(finalposthoc_BFT0, file="finalposthoc_BCT0.csv")
Upvotes: 0
Views: 667
Reputation: 107577
First, as advised in Circle 2: Growing Objects of the R Inferno, avoid expanding complex (higher dimensional) objects like dataframes in a loop. This is memory inefficient and results in excessive copying in RAM.
As for your repeating results, currently your anova
and TukeyHSD
formulas never change in each iteration. Specifically, x[,2]
remains with each iteration:
anovaresult <- anova(aov(x[,2]~Group,data=x))
posthocresult <- TukeyHSD(aov(x[,2]~Group,data=x))
Therefore, consider the following adjustment all using base R that builds a list of dataframes with lapply
calls and binds test results directly in dataframe. Outside of the loop all dataframes are then appended for one final, singul dataframe.
Below demonstrates using a reproducible, randomized example as GDrive link is not accessible on my end (security) and likely will not be available for future readers. Only adjustment to be made is the sequence of column numbers to be passed into lapply
.
Data (seeded for reproducibility)
set.seed(061818)
x <- data.frame(
Group = replicate(500, sample(c("julia", "r", "pandas", "stata", "sas", "spss"),
1, replace=TRUE)),
NUM1 = rnorm(500) * 100,
NUM2 = rnorm(500),
NUM3 = rnorm(500) / 100
)
Dataframe Build
# CREATE LIST OF ANOVA RESULTS DATAFRAME
anova_df_list <- lapply(2:(ncol(x)), function(i){
##Runs an ANOVA - Group being a grouping factor
anovaresult <- anova(aov(x[,i] ~ Group, data=x))
data.frame(var = names(x[i]),
type = row.names(anovaresult),
anovaresult,
row.names = NULL)
})
# CREATE LIST OF TUKEY HSD RESULTS DATAFRAME
tukey_df_list <- lapply(2:(ncol(x)), function(i){
##Runs an ANOVA - Group being a grouping factor
posthocresult <- TukeyHSD(aov(x[,i] ~ Group, data=x))
data.frame(var = names(x[i]),
type = row.names(posthocresult$Group),
posthocresult$Group,
row.names = NULL)
})
# APPEND ALL DFs FOR SINGUL DATAFRAME OUTPUT
finalanova_BFT0 <- do.call(rbind, anova_df_list)
finalposthoc_BFT0 <- do.call(rbind, tukey_df_list)
Output
finalanova_BFT0
# var type Df Sum.Sq Mean.Sq F.value Pr..F.
# 1 NUM1 Group 5 3.294895e+04 6.589791e+03 0.5689185 0.7238625
# 2 NUM1 Residuals 494 5.722009e+06 1.158301e+04 NA NA
# 3 NUM2 Group 5 4.555384e+00 9.110768e-01 1.0519364 0.3864008
# 4 NUM2 Residuals 494 4.278509e+02 8.660949e-01 NA NA
# 5 NUM3 Group 5 7.930182e-04 1.586036e-04 1.4649269 0.1997531
# 6 NUM3 Residuals 494 5.348403e-02 1.082673e-04 NA NA
finalposthoc_BFT0
# var type diff lwr upr p.adj
# 1 NUM1 pandas-julia 1.588690e+01 -30.303690269 6.207749e+01 0.9229562
# 2 NUM1 r-julia 6.224191e+00 -39.105049169 5.155343e+01 0.9987890
# 3 NUM1 sas-julia -2.558098e+00 -48.597653093 4.348146e+01 0.9999859
# 4 NUM1 spss-julia -5.533965e-01 -46.743985299 4.563719e+01 1.0000000
# 5 NUM1 stata-julia 1.920475e+01 -29.494409788 6.790390e+01 0.8695383
# 6 NUM1 r-pandas -9.662708e+00 -56.922953435 3.759754e+01 0.9920073
# 7 NUM1 sas-pandas -1.844500e+01 -66.386955838 2.949696e+01 0.8810193
# 8 NUM1 spss-pandas -1.644030e+01 -64.527313252 3.164672e+01 0.9247795
# 9 NUM1 stata-pandas 3.317847e+00 -47.183623854 5.381932e+01 0.9999676
# 10 NUM1 sas-r -8.782289e+00 -55.894929992 3.833035e+01 0.9948026
# 11 NUM1 spss-r -6.777587e+00 -54.037832960 4.048266e+01 0.9985067
# 12 NUM1 stata-r 1.298055e+01 -36.734312824 6.269542e+01 0.9758552
# 13 NUM1 spss-sas 2.004702e+00 -45.937257220 4.994666e+01 0.9999966
# 14 NUM1 stata-sas 2.176284e+01 -28.600522533 7.212621e+01 0.8188459
# 15 NUM1 stata-spss 1.975814e+01 -30.743328824 7.025961e+01 0.8733230
# 16 NUM2 pandas-julia 4.328917e-02 -0.356126828 4.427052e-01 0.9996168
# 17 NUM2 r-julia 7.351585e-02 -0.318451955 4.654837e-01 0.9946513
# 18 NUM2 sas-julia 1.260665e-01 -0.272043455 5.241765e-01 0.9449289
# 19 NUM2 spss-julia 2.112904e-01 -0.188125601 6.107064e-01 0.6557799
# 20 NUM2 stata-julia 2.834413e-01 -0.137666545 7.045492e-01 0.3876287
# 21 NUM2 r-pandas 3.022668e-02 -0.378438781 4.388921e-01 0.9999416
# 22 NUM2 sas-pandas 8.277736e-02 -0.331782960 4.973377e-01 0.9928376
# 23 NUM2 spss-pandas 1.680012e-01 -0.247813441 5.838159e-01 0.8573913
# 24 NUM2 stata-pandas 2.401522e-01 -0.196540570 6.768449e-01 0.6165912
# 25 NUM2 sas-r 5.255068e-02 -0.354838417 4.599398e-01 0.9991049
# 26 NUM2 spss-r 1.377745e-01 -0.270890910 5.464400e-01 0.9288895
# 27 NUM2 stata-r 2.099255e-01 -0.219965388 6.398164e-01 0.7288549
# 28 NUM2 spss-sas 8.522386e-02 -0.329336457 4.997842e-01 0.9918030
# 29 NUM2 stata-sas 1.573748e-01 -0.278123725 5.928734e-01 0.9063874
# 30 NUM2 stata-spss 7.215095e-02 -0.364541797 5.088437e-01 0.9970600
# 31 NUM3 pandas-julia 2.540117e-03 -0.001925601 7.005835e-03 0.5807914
# 32 NUM3 r-julia -1.418724e-03 -0.005801167 2.963718e-03 0.9396929
# 33 NUM3 sas-julia -9.892923e-04 -0.005440408 3.461824e-03 0.9882722
# 34 NUM3 spss-julia 2.222945e-04 -0.004243423 4.688012e-03 0.9999918
# 35 NUM3 stata-julia 2.480217e-04 -0.004460225 4.956269e-03 0.9999892
# 36 NUM3 r-pandas -3.958842e-03 -0.008527974 6.102912e-04 0.1323856
# 37 NUM3 sas-pandas -3.529410e-03 -0.008164451 1.105631e-03 0.2496801
# 38 NUM3 spss-pandas -2.317823e-03 -0.006966888 2.331242e-03 0.7109641
# 39 NUM3 stata-pandas -2.292096e-03 -0.007174591 2.590399e-03 0.7607515
# 40 NUM3 sas-r 4.294318e-04 -0.004125430 4.984294e-03 0.9998066
# 41 NUM3 spss-r 1.641019e-03 -0.002928114 6.210151e-03 0.9086036
# 42 NUM3 stata-r 1.666746e-03 -0.003139700 6.473192e-03 0.9204005
# 43 NUM3 spss-sas 1.211587e-03 -0.003423454 5.846628e-03 0.9757341
# 44 NUM3 stata-sas 1.237314e-03 -0.003631829 6.106457e-03 0.9785797
# 45 NUM3 stata-spss 2.572720e-05 -0.004856768 4.908222e-03 1.0000000
Upvotes: 1