Luis
Luis

Reputation: 1584

Using nest_by or nest, group_by or looping to perform several chi square test to several variables in R (likert scales)

Let's say I have these categorical variables in my data set. All variables are related to the people's concerns of the COVID-19 and were assessed two times (with different participants..).

dataset

And my main goal is to check if time (will be "constant") is associated with the prevalence of each item (economy, social cohesion, and so on) (will vary). Therefore, I'll need to perform several Chi-square tests.

chi-square

I've followed some instructions using nest_by or xtabs , but I'm not getting the right results. I would like to keep the tidyverse environment in this analysis.

The main goal is to have several chi-squared tests, such as this one:

ds_plot_likert %>%
  pivot_longer(cols = -c(time),
               names_to = "item", values_to = "response") %>% 
  group_by(item, time, response) %>% 
  summarise(N = n()) %>%
  mutate(pct = N / sum(N)) %>% 
  filter(item == "Children's academic achievement") %>% #need to change all the time...
  xtabs(formula =  pct ~ time  + response, data = .) %>% 
  chisq.test()

chi-square

But for all variables in my dataset (and preferably using tidyverse).

Thank you!

The following code gives you the possibility to reproduce.

ds <- structure(list(time = c("First", "First", "First", "First", "First", 
"First", "First", "First", "First", "First", "First", "First", 
".Second", "First", "First", "First", "First", ".Second", "First", 
"First", "First", "First", "First", "First", "First", "First", 
".Second", "First", "First", ".Second", "First", "First", "First", 
"First", "First", "First", "First", "First", "First", "First", 
".Second", "First", "First", "First", ".Second", ".Second", "First", 
"First", "First", ".Second", ".Second", "First", "First", "First", 
"First", ".Second", ".Second", "First", "First", "First", "First", 
"First", ".Second", "First", "First", "First", "First", ".Second", 
"First", "First", "First", "First", "First", "First", "First", 
".Second", "First", ".Second", "First", "First", "First", "First", 
"First", "First", "First", "First", "First", "First", "First", 
".Second", "First", "First", "First", "First", "First", "First", 
"First", ".Second", ".Second", "First"), Economy = structure(c(4L, 
3L, 3L, 4L, 3L, 4L, 4L, 1L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 4L, 4L, 
3L, 3L, NA, 2L, 3L, 4L, 3L, 3L, 4L, 4L, 2L, 3L, 4L, 4L, 3L, 2L, 
4L, 2L, 3L, 2L, 3L, 3L, 3L, 2L, 4L, 4L, 3L, 3L, 3L, 4L, 3L, 3L, 
4L, 2L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 4L, 3L, 2L, 
3L, 3L, 3L, 4L, NA, 2L, 4L, 3L, 4L, 2L, 3L, 3L, 2L, NA, 3L, 2L, 
3L, 2L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 4L, 3L, 3L, 2L, 3L, 
3L, 3L, 4L), .Label = c("Not at all", "A little", "Moderately", 
"Very much"), class = "factor"), `My personal finance` = structure(c(3L, 
2L, 4L, 2L, 4L, 4L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 4L, 3L, 4L, 4L, 
2L, 3L, NA, 3L, 3L, 4L, 3L, 3L, 4L, 3L, 4L, 3L, 4L, 4L, 3L, 4L, 
3L, 4L, 2L, 2L, 3L, 2L, 2L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 2L, 2L, 
3L, 2L, 4L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 4L, 4L, 2L, 
3L, 3L, 4L, 3L, NA, 2L, 3L, 3L, 4L, 2L, 3L, 2L, 3L, NA, 3L, 2L, 
2L, 2L, 3L, 4L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 4L, 3L, 2L, 2L, 3L, 
3L, 3L, 3L), .Label = c("Not at all", "A little", "Moderately", 
"Very much"), class = "factor"), `My own health` = structure(c(3L, 
2L, 4L, 3L, 4L, 4L, 3L, 4L, 2L, 4L, 3L, 3L, 3L, 4L, 3L, 3L, 4L, 
3L, 2L, NA, 3L, 4L, 3L, 4L, 3L, 2L, 1L, 2L, 3L, 3L, 4L, 2L, 4L, 
2L, 4L, 4L, 3L, 2L, 2L, 4L, 4L, 2L, 4L, 3L, 3L, 2L, 3L, 3L, 2L, 
2L, 3L, 3L, 1L, 3L, 4L, 4L, 3L, 3L, 2L, 2L, 4L, 3L, 3L, 4L, 2L, 
3L, 3L, 4L, 4L, NA, 2L, 2L, 3L, 4L, 1L, 3L, 4L, 3L, NA, 3L, 3L, 
3L, 3L, 3L, 3L, 4L, 4L, 2L, 4L, 2L, 2L, 3L, 4L, 3L, 2L, 3L, 4L, 
2L, 3L, 3L), .Label = c("Not at all", "A little", "Moderately", 
"Very much"), class = "factor"), `My friends and family health` = structure(c(4L, 
3L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 4L, 3L, NA, 3L, 4L, 3L, 3L, 4L, 
4L, 3L, NA, 3L, 4L, 3L, 4L, 3L, 3L, 2L, 2L, 3L, 4L, 4L, 2L, 4L, 
1L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 4L, 3L, 
4L, 3L, 3L, 4L, 3L, 3L, 4L, 3L, 3L, 3L, 2L, 4L, 3L, 4L, 4L, 2L, 
3L, 3L, 4L, 4L, NA, 2L, 4L, 3L, 4L, 2L, 3L, 4L, 3L, NA, 3L, 4L, 
3L, 3L, 4L, 3L, 4L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 4L, 
4L, 3L, 4L), .Label = c("Not at all", "A little", "Moderately", 
"Very much"), class = "factor"), `Social cohesion` = structure(c(3L, 
3L, 2L, 4L, 4L, 2L, 4L, 4L, 2L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 4L, 
3L, 3L, NA, 3L, 3L, 4L, 3L, 3L, 2L, 1L, 2L, 2L, 4L, 4L, 3L, 3L, 
1L, 3L, NA, 2L, 3L, 2L, 4L, 4L, 2L, 3L, 3L, 4L, 4L, 3L, 3L, 3L, 
2L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 4L, 2L, 
3L, 3L, 3L, 4L, NA, 2L, 3L, 3L, 2L, 1L, 1L, 3L, 2L, NA, 3L, NA, 
3L, 3L, 4L, 2L, 4L, 3L, 1L, 4L, 2L, 4L, 3L, 2L, 4L, 2L, 3L, 4L, 
4L, 2L, 2L), .Label = c("Not at all", "A little", "Moderately", 
"Very much"), class = "factor"), `Food and pharmaceutical drugs` = structure(c(2L, 
3L, 4L, 4L, 3L, 2L, 4L, 1L, 2L, 4L, 3L, NA, 2L, 3L, 3L, 2L, 4L, 
3L, 3L, NA, 3L, 4L, 3L, 3L, 3L, 1L, 1L, 2L, 3L, 4L, 2L, 2L, 4L, 
4L, 4L, 1L, 4L, 2L, 2L, 3L, 4L, 2L, 4L, 3L, 2L, 2L, 3L, 3L, 2L, 
3L, 2L, 3L, 4L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 4L, 3L, 3L, 4L, 2L, 
3L, 3L, 3L, 4L, NA, 2L, 3L, 2L, 2L, 1L, 1L, 2L, 2L, NA, 1L, 1L, 
2L, 2L, 2L, 1L, 3L, 3L, 2L, 3L, 1L, 2L, 2L, 2L, 4L, 2L, 3L, 3L, 
2L, 1L, 2L), .Label = c("Not at all", "A little", "Moderately", 
"Very much"), class = "factor"), `Price of grocery products` = structure(c(2L, 
2L, 3L, 4L, 3L, 4L, 3L, 2L, 2L, 4L, 3L, 3L, 2L, 4L, 3L, 3L, 4L, 
4L, 3L, NA, 3L, 4L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 4L, 2L, 4L, 3L, 
4L, 4L, 4L, 4L, 2L, 2L, 3L, 4L, 2L, 4L, 3L, 3L, 3L, 3L, 4L, 2L, 
4L, 2L, 3L, 4L, 2L, 2L, 3L, 4L, 3L, 2L, 3L, 4L, 3L, 4L, 4L, 2L, 
3L, 3L, 4L, 4L, NA, 2L, 3L, 2L, 2L, 2L, 1L, 3L, 3L, NA, 1L, 1L, 
2L, 2L, 3L, 1L, 3L, 4L, 2L, 3L, 2L, 2L, 2L, 2L, 4L, 3L, 3L, 3L, 
4L, 1L, 2L), .Label = c("Not at all", "A little", "Moderately", 
"Very much"), class = "factor"), `Stock prices` = structure(c(2L, 
2L, 2L, 4L, 3L, 2L, 1L, 1L, 3L, 4L, 2L, 2L, 2L, 4L, 3L, 3L, 4L, 
3L, 2L, NA, 1L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 3L, 1L, 2L, 2L, 2L, 
4L, 4L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 4L, 3L, 4L, 3L, 3L, 2L, 2L, 
2L, 1L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, NA, 2L, 4L, 2L, 
3L, 3L, 4L, 4L, NA, 2L, 2L, 2L, 2L, 2L, 1L, 3L, 3L, NA, 3L, 1L, 
3L, 3L, 4L, 1L, 3L, 4L, 1L, 3L, 1L, 4L, 2L, 2L, 2L, NA, 3L, 2L, 
4L, 1L, 2L), .Label = c("Not at all", "A little", "Moderately", 
"Very much"), class = "factor"), `Children's academic achievement` = structure(c(4L, 
3L, 4L, 1L, NA, NA, 1L, 1L, 1L, 1L, 3L, 3L, 2L, 4L, 3L, 4L, NA, 
4L, 3L, NA, 1L, 1L, 3L, 3L, 3L, 1L, 4L, 1L, 3L, 4L, 3L, 2L, 4L, 
1L, 4L, 1L, 1L, 3L, 2L, 3L, 1L, 2L, 1L, 3L, 2L, 3L, 3L, 3L, 1L, 
1L, 1L, 4L, 1L, 1L, 2L, 3L, 3L, 3L, 3L, 2L, 4L, 3L, 3L, 4L, 2L, 
2L, 1L, 1L, 4L, NA, 2L, 2L, 2L, 4L, 1L, 2L, 1L, 2L, NA, 2L, 1L, 
NA, 3L, 2L, 2L, 1L, 4L, 2L, 3L, 1L, 4L, 1L, 1L, 1L, 3L, 1L, 1L, 
1L, 1L, 2L), .Label = c("Not at all", "A little", "Moderately", 
"Very much"), class = "factor")), class = "data.frame", row.names = c(NA, 
-100L))

Upvotes: 1

Views: 364

Answers (1)

Ronak Shah
Ronak Shah

Reputation: 389215

You can store the result in a list for each item -

library(dplyr)
library(tidyr)

ds %>%
  pivot_longer(cols = -c(time),
               names_to = "item", values_to = "response") %>% 
  group_by(item, time, response) %>% 
  summarise(N = n()) %>%
  mutate(pct = N / sum(N)) %>% 
  group_by(item) %>% 
  summarise(test = list(xtabs(formula =  pct ~ time  + response, 
                       data = cur_data()) %>% chisq.test())) -> result

result$test

#[[1]]

#   Pearson's Chi-squared test

#data:  .
#X-squared = 0.0099329, df = 3, p-value = 0.9997


#[[2]]
#
#   Pearson's Chi-squared test

#data:  .
#X-squared = 0.026631, df = 3, p-value = 0.9989
#...
#...

Upvotes: 3

Related Questions