Reputation: 347
df1 <-
data.frame(c("male", "female", "male"),
c("1", "2", "3", "4", "5", "6"),
seq(141, 170))
names(df1) = c("gender", "age", "height")
df1$age <- factor(
df1$age,
levels = c(1, 2, 3, 4, 5, 6),
labels = c("16-24", "25-34", "35-44", "45-54", "55-64", "65+")
)
q1a = c(1, 0, 1, 0, 0, 1)
q1b = c(0, 0, 2, 2, 2, 0)
q1c = c(0, 0, 3, 3, 0, 3)
# 1,2 and 3 used to be compatible with existing datasets.
# Could change all to 1 if necessary.
df2 <- data.frame(q1a = q1a, q1b = q1b, q1c = q1c)
df1 <- cbind(df1, df2)
rm(q1a, q1b, q1c, df2)
I am looking to replicate the analysis of multiple response questions from SPSS in R.
At the moment I am using this code:
#creating function for analysing questions with grouped data
multfreqtable <- function(a, b, c) {
# number of respondents (for percent of cases)
totrep = sum(a == 1 | b == 2 | c == 3)
#creating frequency table
table_a = data.frame("a", sum(a == 1))
names(table_a) = c("question", "freq")
table_b = data.frame("b", sum(b == 2))
names(table_b) = c("question", "freq")
table_c = data.frame("c", sum(c == 3))
names(table_c) = c("question", "freq")
table_question <- rbind(table_a, table_b, table_c)
#remove individual question tables
rm(table_a, table_b, table_c)
#adding total
total = as.data.frame("Total")
totalsum = (sum(table_question$freq, na.rm = TRUE))
totalrow = cbind(total, totalsum)
names(totalrow) = c("question", "freq")
table_question = rbind(table_question, totalrow)
#adding percentage column to frequency table
percentcalc = as.numeric(table_question$freq)
percent = (percentcalc / totalsum) * 100
table_question <- cbind(table_question, percent)
#adding percent of cases column to frequency table
poccalc = as.numeric(table_question$freq)
percentofcases = (poccalc / totrep) * 100
table_question <- cbind(table_question, percentofcases)
#print percent of cases value
total_respondents <<- data.frame(totrep)
#remove all unnecessary data and values
rm(
total,
totalsum,
totalrow,
b,
c,
percent,
percentcalc,
percentofcases,
totrep,
poccalc
)
return(table_question)
}
#calling function - must tie to data.frame using $ !!!
q1_frequency <- multfreqtable(df1$q1a, df1$q1b, df1$q1c)
#renaming percent of cases - This is very important while using current method
total_respondents_q1 <- total_respondents
rm(total_respondents)
Producing this table as a result:
I am looking for a more efficient method of doing this that ideally would not require the function to be edited if there were more or less multiple choice questions.
Upvotes: 9
Views: 13461
Reputation: 119
It's an old question. However, you can use userfriendlyscience
package to analyze multiple responses survey data very easily.
library(userfriendlyscience)
multiResponse (data, c('v1', 'v2', 'v3'))
Upvotes: 1
Reputation: 1
I've noticed that this is post is quite old, however I couldn’t find a more up to date solution. Here's my version based on dplyr/tidyverse approach.
mult_resp = function(df1, mv_q = c("q1a", "q1b", "q1c")){
df2 = df1 %>%
mutate(id = rownames(.)) %>% #row id for counting n_cases
select(id, everything()) %>%
mutate_at(mv_q, ~ ifelse(. != 0, 1, 0)) %>%
gather(question, resp,-id, -gender,-age,-height)
#count number of cases excluding "all zeros" cases
n_cases = df2 %>% group_by(id) %>%
summarise(n = sum(resp)) %>%
summarise(sum(n > 0))
#output table
res = df2 %>%
group_by(question) %>%
summarise(freq = sum(resp)) %>%
mutate(
percent = freq/sum(freq) *100,
percent_of_cases = freq/as.numeric(n_cases)*100
) %>%
rbind(.,
data.frame(question ="Total",
freq =sum(.$freq, na.rm=TRUE),
percent =sum(.$percent, na.rm=TRUE),
percent_of_cases = sum(.$percent_of_cases, na.rm=TRUE)
)
)
res
}
Example:
> mult_resp(df1, mv_q = c("q1a", "q1b", "q1c"))
# A tibble: 4 x 4
question freq percent percent_of_cases
<chr> <dbl> <dbl> <dbl>
1 q1a 15 33.3 60
2 q1b 15 33.3 60
3 q1c 15 33.3 60
4 Total 45 100. 180
Upvotes: 0
Reputation: 193517
Your function is actually far too complicated for what you need to do. I think a function like this should work and be more flexible.
multfreqtable = function(data, question.prefix) {
# Find the columns with the questions
a = grep(question.prefix, names(data))
# Find the total number of responses
b = sum(data[, a] != 0)
# Find the totals for each question
d = colSums(data[, a] != 0)
# Find the number of respondents
e = sum(rowSums(data[,a]) !=0)
# d + b as a vector. This is your overfall frequency
f = as.numeric(c(d, b))
data.frame(question = c(names(d), "Total"),
freq = f,
percent = (f/b)*100,
percentofcases = (f/e)*100 )
}
Add another question to your example dataset:
set.seed(1); df1$q2a = sample(c(0, 1), 30, replace=T)
set.seed(2); df1$q2b = sample(c(0, 2), 30, replace=T)
set.seed(3); df1$q2c = sample(c(0, 3), 30, replace=T)
Make a table for "q1" responses:
> multfreqtable(df1, "q1")
question freq percent percentofcases
1 q1a 15 33.33333 60
2 q1b 15 33.33333 60
3 q1c 15 33.33333 60
4 Total 45 100.00000 180
Make a table for "q2" responses:
> multfreqtable(df1, "q2")
question freq percent percentofcases
1 q2a 14 31.11111 53.84615
2 q2b 13 28.88889 50.00000
3 q2c 18 40.00000 69.23077
4 Total 45 100.00000 173.07692
Here's a modified version of the function that allows you to create a list of tables for multiple questions at once:
multfreqtable = function(data, question.prefix) {
z = length(question.prefix)
temp = vector("list", z)
for (i in 1:z) {
a = grep(question.prefix[i], names(data))
b = sum(data[, a] != 0)
d = colSums(data[, a] != 0)
e = sum(rowSums(data[,a]) !=0)
f = as.numeric(c(d, b))
temp[[i]] = data.frame(question = c(sub(question.prefix[i],
"", names(d)), "Total"),
freq = f,
percent = (f/b)*100,
percentofcases = (f/e)*100 )
names(temp)[i] = question.prefix[i]
}
temp
}
Examples:
> multfreqtable(df1, "q1")
$q1
question freq percent percentofcases
1 a 15 33.33333 60
2 b 15 33.33333 60
3 c 15 33.33333 60
4 Total 45 100.00000 180
> test1 = multfreqtable(df1, c("q1", "q2"))
> test1
$q1
question freq percent percentofcases
1 a 15 33.33333 60
2 b 15 33.33333 60
3 c 15 33.33333 60
4 Total 45 100.00000 180
$q2
question freq percent percentofcases
1 a 14 31.11111 53.84615
2 b 13 28.88889 50.00000
3 c 18 40.00000 69.23077
4 Total 45 100.00000 173.07692
> test1$q1
question freq percent percentofcases
1 a 15 33.33333 60
2 b 15 33.33333 60
3 c 15 33.33333 60
4 Total 45 100.00000 180
Upvotes: 12