Reputation: 33
I have a list of protein intensities across 3 timepoints for 5 replicates.
I want to evaluate whether the change in protein intensities across timepoints is statistically different between replicates.
Data frame code
dataframe <- structure(list(genes = c("ZFTA", "IPO5", "COPE", "APOL1", "ZFTA",
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA",
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA",
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA",
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA",
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA",
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA",
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1"), replicate = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), levels = c("1",
"2", "3", "4", "5"), class = "factor"), intensity = c(0, 5.3,
0.1, 0, 0, 0, 0.1, 0, 0, 0, 0, 0, 0, 3.1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 6.4, 5.2, 234.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10.9,
6, 121, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6.9, 11.4, 125, 0, 0, 0, 0,
0, 0, 0, 0), timepoint = structure(c(1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L), levels = c("2", "24", "48"), class = "factor"),
mean = c(2534.11613603473, 2534.11613603473, 2534.11613603473,
2534.11613603473, 2545.86975397974, 2545.86975397974, 2545.86975397974,
2545.86975397974, 1041.9492040521, 1041.9492040521, 1041.9492040521,
1041.9492040521, 2352.51975397974, 2352.51975397974, 2352.51975397974,
2352.51975397974, 1549.3483357453, 1549.3483357453, 1549.3483357453,
1549.3483357453, 732.627062228654, 732.627062228654, 732.627062228654,
732.627062228654, 3142.20426917511, 3142.20426917511, 3142.20426917511,
3142.20426917511, 1043.8981910275, 1043.8981910275, 1043.8981910275,
1043.8981910275, 425.870405209841, 425.870405209841, 425.870405209841,
425.870405209841, 3623.80745296671, 3623.80745296671, 3623.80745296671,
3623.80745296671, 540.550434153401, 540.550434153401, 540.550434153401,
540.550434153401, 270.068668596237, 270.068668596237, 270.068668596237,
270.068668596237, 3062.0981910275, 3062.0981910275, 3062.0981910275,
3062.0981910275, 1628.59247467438, 1628.59247467438, 1628.59247467438,
1628.59247467438, 528.705282199711, 528.705282199711, 528.705282199711,
528.705282199711), sd = c(25094.6719445552, 25094.6719445552,
25094.6719445552, 25094.6719445552, 21058.9289049973, 21058.9289049973,
21058.9289049973, 21058.9289049973, 9975.3033998113, 9975.3033998113,
9975.3033998113, 9975.3033998113, 26179.750502137, 26179.750502137,
26179.750502137, 26179.750502137, 10207.7081411541, 10207.7081411541,
10207.7081411541, 10207.7081411541, 6420.40357806305, 6420.40357806305,
6420.40357806305, 6420.40357806305, 39498.0097492931, 39498.0097492931,
39498.0097492931, 39498.0097492931, 7936.15443848429, 7936.15443848429,
7936.15443848429, 7936.15443848429, 3413.66443657427, 3413.66443657427,
3413.66443657427, 3413.66443657427, 45997.397176612, 45997.397176612,
45997.397176612, 45997.397176612, 4168.32150996512, 4168.32150996512,
4168.32150996512, 4168.32150996512, 2961.16684541202, 2961.16684541202,
2961.16684541202, 2961.16684541202, 33839.982382166, 33839.982382166,
33839.982382166, 33839.982382166, 11360.6060492469, 11360.6060492469,
11360.6060492469, 11360.6060492469, 4131.58489947706, 4131.58489947706,
4131.58489947706, 4131.58489947706), id = c(1L, 2L, 3L, 4L,
1383L, 1384L, 1385L, 1386L, 2765L, 2766L, 2767L, 2768L, 4147L,
4148L, 4149L, 4150L, 5529L, 5530L, 5531L, 5532L, 6911L, 6912L,
6913L, 6914L, 8293L, 8294L, 8295L, 8296L, 9675L, 9676L, 9677L,
9678L, 11057L, 11058L, 11059L, 11060L, 12439L, 12440L, 12441L,
12442L, 13821L, 13822L, 13823L, 13824L, 15203L, 15204L, 15205L,
15206L, 16585L, 16586L, 16587L, 16588L, 17967L, 17968L, 17969L,
17970L, 19349L, 19350L, 19351L, 19352L)), row.names = c("1.2.1",
"1.2.2", "1.2.3", "1.2.4", "1.24.6911", "1.24.6912", "1.24.6913",
"1.24.6914", "1.48.13821", "1.48.13822", "1.48.13823", "1.48.13824",
"2.2.1383", "2.2.1384", "2.2.1385", "2.2.1386", "2.24.8293",
"2.24.8294", "2.24.8295", "2.24.8296", "2.48.15203", "2.48.15204",
"2.48.15205", "2.48.15206", "3.2.2765", "3.2.2766", "3.2.2767",
"3.2.2768", "3.24.9675", "3.24.9676", "3.24.9677", "3.24.9678",
"3.48.16585", "3.48.16586", "3.48.16587", "3.48.16588", "4.2.4147",
"4.2.4148", "4.2.4149", "4.2.4150", "4.24.11057", "4.24.11058",
"4.24.11059", "4.24.11060", "4.48.17967", "4.48.17968", "4.48.17969",
"4.48.17970", "5.2.5529", "5.2.5530", "5.2.5531", "5.2.5532",
"5.24.12439", "5.24.12440", "5.24.12441", "5.24.12442", "5.48.19349",
"5.48.19350", "5.48.19351", "5.48.19352"), class = "data.frame")
I tried this:
library(rstatix)
res.aov <- anova_test(data = dataframe, dv = intensity, wid = replicate, within = timepoint, between = replicate)
get_anova_table(res.aov)
Resulting in this:
Error in
mutate()
: ℹ In argument:data = map(.data$data, dplyr::distinct, replicate, .keep_all = TRUE)
. ℹ In group 1:replicate = 1
. Caused by error inmap()
: ℹ In index: 1. Caused by error in.f()
: ! Must use existing variables. ✖replicate
not found in.data
.
I think maybe I am approaching this entirely wrong. Please help
TIA
Upvotes: 1
Views: 46
Reputation: 3973
Based on your description of the data, it sounds like you have repeated measures within subjects. The subjects are the unique combinations of genes
and replicate
. We need to create an additional column identifying those unique combinations and then pass that column name to the wid
argument of anova_test()
.
Before providing the code, though, I will strongly caution you that this is very unlikely to yield useful predictions because your intensity variable is not even close to normally distributed. It is mostly zeros with a handful of positive values. I would recommend looking into alternatives such as generalized linear mixed models (zero-inflated), nonparametric tests, or to turn the intensity variable into a binary variable (zero vs. nonzero) and fit a binomial model. But the following code will do the ANOVA as you requested.
dataframe$gene_rep <- with(dataframe, interaction(genes, replicate))
res.aov <- anova_test(data = dataframe, dv = intensity, wid = gene_rep, within = timepoint, between = replicate)
get_anova_table(res.aov)
Upvotes: 0