Reputation: 1584
Let's say I have these categorical variables in my data set. All variables are related to the people's concerns of the COVID-19 and were assessed two times (with different participants..).
And my main goal is to check if time
(will be "constant") is associated with the prevalence of each item
(economy, social cohesion, and so on) (will vary). Therefore, I'll need to perform several Chi-square tests.
I've followed some instructions using nest_by
or xtabs
, but I'm not getting the right results.
I would like to keep the tidyverse environment in this analysis.
The main goal is to have several chi-squared tests, such as this one:
ds_plot_likert %>%
pivot_longer(cols = -c(time),
names_to = "item", values_to = "response") %>%
group_by(item, time, response) %>%
summarise(N = n()) %>%
mutate(pct = N / sum(N)) %>%
filter(item == "Children's academic achievement") %>% #need to change all the time...
xtabs(formula = pct ~ time + response, data = .) %>%
chisq.test()
But for all variables in my dataset (and preferably using tidyverse).
Thank you!
The following code gives you the possibility to reproduce.
ds <- structure(list(time = c("First", "First", "First", "First", "First",
"First", "First", "First", "First", "First", "First", "First",
".Second", "First", "First", "First", "First", ".Second", "First",
"First", "First", "First", "First", "First", "First", "First",
".Second", "First", "First", ".Second", "First", "First", "First",
"First", "First", "First", "First", "First", "First", "First",
".Second", "First", "First", "First", ".Second", ".Second", "First",
"First", "First", ".Second", ".Second", "First", "First", "First",
"First", ".Second", ".Second", "First", "First", "First", "First",
"First", ".Second", "First", "First", "First", "First", ".Second",
"First", "First", "First", "First", "First", "First", "First",
".Second", "First", ".Second", "First", "First", "First", "First",
"First", "First", "First", "First", "First", "First", "First",
".Second", "First", "First", "First", "First", "First", "First",
"First", ".Second", ".Second", "First"), Economy = structure(c(4L,
3L, 3L, 4L, 3L, 4L, 4L, 1L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 4L, 4L,
3L, 3L, NA, 2L, 3L, 4L, 3L, 3L, 4L, 4L, 2L, 3L, 4L, 4L, 3L, 2L,
4L, 2L, 3L, 2L, 3L, 3L, 3L, 2L, 4L, 4L, 3L, 3L, 3L, 4L, 3L, 3L,
4L, 2L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 4L, 3L, 2L,
3L, 3L, 3L, 4L, NA, 2L, 4L, 3L, 4L, 2L, 3L, 3L, 2L, NA, 3L, 2L,
3L, 2L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 4L, 3L, 3L, 2L, 3L,
3L, 3L, 4L), .Label = c("Not at all", "A little", "Moderately",
"Very much"), class = "factor"), `My personal finance` = structure(c(3L,
2L, 4L, 2L, 4L, 4L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 4L, 3L, 4L, 4L,
2L, 3L, NA, 3L, 3L, 4L, 3L, 3L, 4L, 3L, 4L, 3L, 4L, 4L, 3L, 4L,
3L, 4L, 2L, 2L, 3L, 2L, 2L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 2L, 2L,
3L, 2L, 4L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 4L, 4L, 2L,
3L, 3L, 4L, 3L, NA, 2L, 3L, 3L, 4L, 2L, 3L, 2L, 3L, NA, 3L, 2L,
2L, 2L, 3L, 4L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 4L, 3L, 2L, 2L, 3L,
3L, 3L, 3L), .Label = c("Not at all", "A little", "Moderately",
"Very much"), class = "factor"), `My own health` = structure(c(3L,
2L, 4L, 3L, 4L, 4L, 3L, 4L, 2L, 4L, 3L, 3L, 3L, 4L, 3L, 3L, 4L,
3L, 2L, NA, 3L, 4L, 3L, 4L, 3L, 2L, 1L, 2L, 3L, 3L, 4L, 2L, 4L,
2L, 4L, 4L, 3L, 2L, 2L, 4L, 4L, 2L, 4L, 3L, 3L, 2L, 3L, 3L, 2L,
2L, 3L, 3L, 1L, 3L, 4L, 4L, 3L, 3L, 2L, 2L, 4L, 3L, 3L, 4L, 2L,
3L, 3L, 4L, 4L, NA, 2L, 2L, 3L, 4L, 1L, 3L, 4L, 3L, NA, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 2L, 4L, 2L, 2L, 3L, 4L, 3L, 2L, 3L, 4L,
2L, 3L, 3L), .Label = c("Not at all", "A little", "Moderately",
"Very much"), class = "factor"), `My friends and family health` = structure(c(4L,
3L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 4L, 3L, NA, 3L, 4L, 3L, 3L, 4L,
4L, 3L, NA, 3L, 4L, 3L, 4L, 3L, 3L, 2L, 2L, 3L, 4L, 4L, 2L, 4L,
1L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 4L, 3L,
4L, 3L, 3L, 4L, 3L, 3L, 4L, 3L, 3L, 3L, 2L, 4L, 3L, 4L, 4L, 2L,
3L, 3L, 4L, 4L, NA, 2L, 4L, 3L, 4L, 2L, 3L, 4L, 3L, NA, 3L, 4L,
3L, 3L, 4L, 3L, 4L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 4L,
4L, 3L, 4L), .Label = c("Not at all", "A little", "Moderately",
"Very much"), class = "factor"), `Social cohesion` = structure(c(3L,
3L, 2L, 4L, 4L, 2L, 4L, 4L, 2L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 4L,
3L, 3L, NA, 3L, 3L, 4L, 3L, 3L, 2L, 1L, 2L, 2L, 4L, 4L, 3L, 3L,
1L, 3L, NA, 2L, 3L, 2L, 4L, 4L, 2L, 3L, 3L, 4L, 4L, 3L, 3L, 3L,
2L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 4L, 2L,
3L, 3L, 3L, 4L, NA, 2L, 3L, 3L, 2L, 1L, 1L, 3L, 2L, NA, 3L, NA,
3L, 3L, 4L, 2L, 4L, 3L, 1L, 4L, 2L, 4L, 3L, 2L, 4L, 2L, 3L, 4L,
4L, 2L, 2L), .Label = c("Not at all", "A little", "Moderately",
"Very much"), class = "factor"), `Food and pharmaceutical drugs` = structure(c(2L,
3L, 4L, 4L, 3L, 2L, 4L, 1L, 2L, 4L, 3L, NA, 2L, 3L, 3L, 2L, 4L,
3L, 3L, NA, 3L, 4L, 3L, 3L, 3L, 1L, 1L, 2L, 3L, 4L, 2L, 2L, 4L,
4L, 4L, 1L, 4L, 2L, 2L, 3L, 4L, 2L, 4L, 3L, 2L, 2L, 3L, 3L, 2L,
3L, 2L, 3L, 4L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 4L, 3L, 3L, 4L, 2L,
3L, 3L, 3L, 4L, NA, 2L, 3L, 2L, 2L, 1L, 1L, 2L, 2L, NA, 1L, 1L,
2L, 2L, 2L, 1L, 3L, 3L, 2L, 3L, 1L, 2L, 2L, 2L, 4L, 2L, 3L, 3L,
2L, 1L, 2L), .Label = c("Not at all", "A little", "Moderately",
"Very much"), class = "factor"), `Price of grocery products` = structure(c(2L,
2L, 3L, 4L, 3L, 4L, 3L, 2L, 2L, 4L, 3L, 3L, 2L, 4L, 3L, 3L, 4L,
4L, 3L, NA, 3L, 4L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 4L, 2L, 4L, 3L,
4L, 4L, 4L, 4L, 2L, 2L, 3L, 4L, 2L, 4L, 3L, 3L, 3L, 3L, 4L, 2L,
4L, 2L, 3L, 4L, 2L, 2L, 3L, 4L, 3L, 2L, 3L, 4L, 3L, 4L, 4L, 2L,
3L, 3L, 4L, 4L, NA, 2L, 3L, 2L, 2L, 2L, 1L, 3L, 3L, NA, 1L, 1L,
2L, 2L, 3L, 1L, 3L, 4L, 2L, 3L, 2L, 2L, 2L, 2L, 4L, 3L, 3L, 3L,
4L, 1L, 2L), .Label = c("Not at all", "A little", "Moderately",
"Very much"), class = "factor"), `Stock prices` = structure(c(2L,
2L, 2L, 4L, 3L, 2L, 1L, 1L, 3L, 4L, 2L, 2L, 2L, 4L, 3L, 3L, 4L,
3L, 2L, NA, 1L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 3L, 1L, 2L, 2L, 2L,
4L, 4L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 4L, 3L, 4L, 3L, 3L, 2L, 2L,
2L, 1L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, NA, 2L, 4L, 2L,
3L, 3L, 4L, 4L, NA, 2L, 2L, 2L, 2L, 2L, 1L, 3L, 3L, NA, 3L, 1L,
3L, 3L, 4L, 1L, 3L, 4L, 1L, 3L, 1L, 4L, 2L, 2L, 2L, NA, 3L, 2L,
4L, 1L, 2L), .Label = c("Not at all", "A little", "Moderately",
"Very much"), class = "factor"), `Children's academic achievement` = structure(c(4L,
3L, 4L, 1L, NA, NA, 1L, 1L, 1L, 1L, 3L, 3L, 2L, 4L, 3L, 4L, NA,
4L, 3L, NA, 1L, 1L, 3L, 3L, 3L, 1L, 4L, 1L, 3L, 4L, 3L, 2L, 4L,
1L, 4L, 1L, 1L, 3L, 2L, 3L, 1L, 2L, 1L, 3L, 2L, 3L, 3L, 3L, 1L,
1L, 1L, 4L, 1L, 1L, 2L, 3L, 3L, 3L, 3L, 2L, 4L, 3L, 3L, 4L, 2L,
2L, 1L, 1L, 4L, NA, 2L, 2L, 2L, 4L, 1L, 2L, 1L, 2L, NA, 2L, 1L,
NA, 3L, 2L, 2L, 1L, 4L, 2L, 3L, 1L, 4L, 1L, 1L, 1L, 3L, 1L, 1L,
1L, 1L, 2L), .Label = c("Not at all", "A little", "Moderately",
"Very much"), class = "factor")), class = "data.frame", row.names = c(NA,
-100L))
Upvotes: 1
Views: 364
Reputation: 389215
You can store the result in a list for each item
-
library(dplyr)
library(tidyr)
ds %>%
pivot_longer(cols = -c(time),
names_to = "item", values_to = "response") %>%
group_by(item, time, response) %>%
summarise(N = n()) %>%
mutate(pct = N / sum(N)) %>%
group_by(item) %>%
summarise(test = list(xtabs(formula = pct ~ time + response,
data = cur_data()) %>% chisq.test())) -> result
result$test
#[[1]]
# Pearson's Chi-squared test
#data: .
#X-squared = 0.0099329, df = 3, p-value = 0.9997
#[[2]]
#
# Pearson's Chi-squared test
#data: .
#X-squared = 0.026631, df = 3, p-value = 0.9989
#...
#...
Upvotes: 3