Reputation: 31
I am using survey data and would like to create multiple cross tables with row percentages at once.
Below is some dummy data and the way I would create those tables individually. Where I get confused is how to run this for var1, var2, and var3 effectively in a loop.
library(tidyverse)
df_example <- tribble(
~id, ~country, ~var1, ~var2, ~var3, ~weight,
1, "US", 1, 1, 1, .5,
2, "US", 2, 2, 2, .5,
3, "US", 2, 2, 2, .5,
4, "US", 3, 3, 3, .5,
5, "US", 3, 3, 3, .5,
6, "US", 1, 1, 1, 2,
7, "US", 2, 2, 2, 2,
8, "US", 3, 3, 3, 2,
9, "US", 3, 3, 3, 2,
10, "MX", 2, 2, 2, 2,
11, "MX", 1, 3, 1, 1,
12, "MX", 2, 3, 2, 1,
13, "MX", 3, 3, 2, 1,
14, "MX", 1, 3, 3, 1,
15, "MX", 2, 3, 3, 1,
16, "MX", 3, 2, 1, 1,
17, "MX", 1, 2, 2, 1,
18, "MX", 2, 2, 3, 1,
19, "MX", 3, 3, 3, 1,
20, "MX", 2, 2, 2, 1
)
df_example %>%
group_by(country) %>%
count(var2, wt = weight) %>%
arrange(var2) %>%
mutate(pctg = round(n/sum(n), 4)) %>%
select(!n) %>%
pivot_wider(names_from = var2,
values_from = pctg)
Upvotes: 1
Views: 59
Reputation: 6770
Here is a simple loop in base R. But I think I might do it in one of the nice table making packages.
f_example <- tribble(
~id, ~country, ~var1, ~var2, ~var3, ~weight,
1, "US", 1, 1, 1, .5,
2, "US", 2, 2, 2, .5,
3, "US", 2, 2, 2, .5,
4, "US", 3, 3, 3, .5,
5, "US", 3, 3, 3, .5,
6, "US", 1, 1, 1, 2,
7, "US", 2, 2, 2, 2,
8, "US", 3, 3, 3, 2,
9, "US", 3, 3, 3, 2,
10, "MX", 2, 2, 2, 2,
11, "MX", 1, 3, 1, 1,
12, "MX", 2, 3, 2, 1,
13, "MX", 3, 3, 2, 1,
14, "MX", 1, 3, 3, 1,
15, "MX", 2, 3, 3, 1,
16, "MX", 3, 2, 1, 1,
17, "MX", 1, 2, 2, 1,
18, "MX", 2, 2, 3, 1,
19, "MX", 3, 3, 3, 1,
20, "MX", 2, 2, 2, 1
)
iv = "country"
dvs = c("var1", "var2", "var3")
tabletemp <- list()
for (i in 1:3){
tabletemp[[dvs[i]]] <- table(df_example[["country"]],df_example[[ dvs[i]]]) |>
prop.table(1) |> round(2)
}
tabletemp
$var1
1 2 3
MX 0.27 0.45 0.27
US 0.22 0.33 0.44
$var2
1 2 3
MX 0.00 0.45 0.55
US 0.22 0.33 0.44
$var3
1 2 3
MX 0.18 0.45 0.36
US 0.22 0.33 0.44
Upvotes: 0
Reputation: 1038
You can create a loop based on a vector of the variable names, and inside the loop use rename()
to rename each of your variables with the same name, and then run the remaining steps for your table on that renamed variable:
# loop using rename
variables <- c("var1", "var2", "var3")
for (var in variables) {
table <- df_example %>%
rename(variable = all_of(var)) %>%
group_by(country) %>%
count(variable, wt = weight) %>%
arrange(variable) %>%
mutate(pctg = round(n/sum(n), 4)) %>%
select(!n) %>%
pivot_wider(names_from = variable,
values_from = pctg)
print(table)
}
The same could be accomplished without the rename()
step by calling the variables (var2
in yout example) as !!sym(var)
inside the loop, which is the way to convert a string into a symbol; i.e., tell R that you are talking about an object/column instead of just a string of text:
# loop using symbol
variables <- c("var1", "var2", "var3")
for (var in variables) {
table <- df_example %>%
group_by(country) %>%
count(!!sym(var), wt = weight) %>%
arrange(!!sym(var)) %>%
mutate(pctg = round(n/sum(n), 4)) %>%
select(!n) %>%
pivot_wider(names_from = !!sym(var),
values_from = pctg)
print(table)
}
You can save the results of the loop into a list if you first initialize the list with tables <- list()
(outside of the loop, of course) and, below print(table)
at the end of the loop, you do tables[[var]] <- table
to assign each created table in a new element of the list, named after the values you looped through:
tables <- list()
variables <- c("var1", "var2", "var3")
for (var in variables) {
table <- df_example %>%
rename(variable = all_of(var)) %>%
group_by(country) %>%
count(variable, wt = weight) %>%
arrange(variable) %>%
mutate(pctg = round(n/sum(n), 4)) %>%
select(!n) %>%
pivot_wider(names_from = variable,
values_from = pctg)
print(table)
tables[[var]] <- table
}
tables
Upvotes: 0
Reputation: 17204
First pivot to long, then do your counts within each variable.
library(dplyr)
library(tidyr)
out <- df_example %>%
pivot_longer(var1:var3, names_to = "var") %>%
count(var, country, value, wt = weight) %>%
mutate(n = round(n/sum(n), 4), .by = c(var, country)) %>%
pivot_wider(names_from = value, values_from = n)
You can then keep as one big table:
#> out
# A tibble: 6 × 5
var country `1` `2` `3`
<chr> <chr> <dbl> <dbl> <dbl>
1 var1 MX 0.25 0.5 0.25
2 var1 US 0.238 0.286 0.476
3 var2 MX NA 0.5 0.5
4 var2 US 0.238 0.286 0.476
5 var3 MX 0.167 0.5 0.333
6 var3 US 0.238 0.286 0.476
Or split()
into separate tables:
#> out %>%
#+ select(!var) %>%
#+ split(out$var)
$var1
# A tibble: 2 × 4
country `1` `2` `3`
<chr> <dbl> <dbl> <dbl>
1 MX 0.25 0.5 0.25
2 US 0.238 0.286 0.476
$var2
# A tibble: 2 × 4
country `1` `2` `3`
<chr> <dbl> <dbl> <dbl>
1 MX NA 0.5 0.5
2 US 0.238 0.286 0.476
$var3
# A tibble: 2 × 4
country `1` `2` `3`
<chr> <dbl> <dbl> <dbl>
1 MX 0.167 0.5 0.333
2 US 0.238 0.286 0.476
Upvotes: 0