Reputation: 723
I have a particular problem with labeling categorical variables using {gtsummary}
.
When I label the levels of a variable, {gtsummary}
does not work properly.
Assume the following data:
library(survey)
library(gtsummary)
library(cobalt)
# Create data and add a categorical variable named hello with five levels:
data <- lalonde
hello <- sample(1:5, nrow(data), replace = TRUE)
data$hello <- as.factor(hello)
# Labels:
levels(data$hello) <- c("hello_1", "hello_2", "hello_3", "hello_4", "hello_5")
# Now we specify the variables we want to include in our table1:
vars <- c("hello")
# Now we create table 1:
datasvy <- survey::svydesign(~1, data = data, strata = ~treat)
table1 <- tbl_svysummary(
datasvy,
by = treat,
include = all_of(vars),
statistic = list(
all_categorical() ~ "{n}({p}%)"
),
percent = "col",
digits = list(
all_categorical() ~ c(0, 1)
)
)
table1 <- as.data.frame(table1)
Now this is the result:
Characteristic | Group 0 (N = 429) | Group 1 (N = 185) |
---|---|---|
hello | NA | NA |
hello_1 | 91(NA%) | 40(NA%) |
hello_2 | 71(NA%) | 35(NA%) |
hello_3 | 82(NA%) | 31(NA%) |
hello_4 | 86(NA%) | 37(NA%) |
hello_5 | 99(NA%) | 42(NA%) |
Is there a solution to that?
When I do not use the labels, everything works fine.
Reprex:
library(gtsummary)
data <- cobalt::lalonde
hello <- sample(1:5, nrow(data), replace = TRUE)
data$hello <- as.factor(hello)
levels(data$hello) <- c("hello_1", "hello_2", "hello_3", "hello_4", "hello_5")
# Now we specify the variables we want to include in our table1:
vars <- c("hello")
# Now we create table 1:
survey::svydesign(~1, data = data, strata = ~treat) |>
tbl_svysummary(
by = treat,
include = all_of(vars),
statistic = list(
all_categorical() ~ "{n} ({p}%)"
),
percent = "column",
digits = list(
all_categorical() ~ c(0, 1)
)
) |>
as_kable()
#> Warning in svydesign.default(~1, data = data, strata = ~treat): No weights or
#> probabilities supplied, assuming equal probability
Characteristic 0, N = 429 1, N = 185
hello
hello_1 86 (NA%) 43 (NA%)
hello_2 86 (NA%) 33 (NA%)
hello_3 96 (NA%) 35 (NA%)
hello_4 87 (NA%) 39 (NA%)
hello_5 74 (NA%) 35 (NA%)
Created on 2024-11-25 with reprex v2.1.1
Upvotes: 1
Views: 47
Reputation: 96
I'd like to suggest using surveytable
instead. It's easy to use and gives you all the estimates that you might be looking for.
# Create data
data <- cobalt::lalonde
hello <- sample(1:5, nrow(data), replace = TRUE)
data$hello <- as.factor(hello)
levels(data$hello) <- c("hello_1", "hello_2", "hello_3", "hello_4", "hello_5")
# For surveytable, treat needs to be a factor
data$treat = as.factor(data$treat)
datasvy = survey::svydesign(~1, data = data, strata = ~treat)
# surveytable
library(surveytable)
set_survey(datasvy)
tab_subset("hello", "treat")
Output:
hello (treat = 0) {datasvy}
┌─────────┬────┬────────┬────┬────┬─────┬─────────┬─────┬──────┬──────┐
│ Level │ n │ Number │ SE │ LL │ UL │ Percent │ SE │ LL │ UL │
├─────────┼────┼────────┼────┼────┼─────┼─────────┼─────┼──────┼──────┤
│ hello_1 │ 85 │ 85 │ 8 │ 70 │ 103 │ 19.8 │ 1.9 │ 16.1 │ 23.9 │
├─────────┼────┼────────┼────┼────┼─────┼─────────┼─────┼──────┼──────┤
│ hello_2 │ 99 │ 99 │ 9 │ 83 │ 118 │ 23.1 │ 2 │ 19.2 │ 27.4 │
├─────────┼────┼────────┼────┼────┼─────┼─────────┼─────┼──────┼──────┤
│ hello_3 │ 90 │ 90 │ 8 │ 75 │ 108 │ 21 │ 2 │ 17.2 │ 25.1 │
├─────────┼────┼────────┼────┼────┼─────┼─────────┼─────┼──────┼──────┤
│ hello_4 │ 73 │ 73 │ 8 │ 59 │ 90 │ 17 │ 1.8 │ 13.6 │ 20.9 │
├─────────┼────┼────────┼────┼────┼─────┼─────────┼─────┼──────┼──────┤
│ hello_5 │ 82 │ 82 │ 8 │ 67 │ 100 │ 19.1 │ 1.9 │ 15.5 │ 23.2 │
└─────────┴────┴────────┴────┴────┴─────┴─────────┴─────┴──────┴──────┘
N = 429.
hello (treat = 1) {datasvy}
┌─────────┬────┬────────┬────┬────┬────┬─────────┬─────┬──────┬──────┐
│ Level │ n │ Number │ SE │ LL │ UL │ Percent │ SE │ LL │ UL │
├─────────┼────┼────────┼────┼────┼────┼─────────┼─────┼──────┼──────┤
│ hello_1 │ 50 │ 50 │ 6 │ 39 │ 64 │ 27 │ 3.3 │ 20.8 │ 34.1 │
├─────────┼────┼────────┼────┼────┼────┼─────────┼─────┼──────┼──────┤
│ hello_2 │ 33 │ 33 │ 5 │ 24 │ 46 │ 17.8 │ 2.8 │ 12.6 │ 24.1 │
├─────────┼────┼────────┼────┼────┼────┼─────────┼─────┼──────┼──────┤
│ hello_3 │ 36 │ 36 │ 5 │ 27 │ 49 │ 19.5 │ 2.9 │ 14 │ 25.9 │
├─────────┼────┼────────┼────┼────┼────┼─────────┼─────┼──────┼──────┤
│ hello_4 │ 33 │ 33 │ 5 │ 24 │ 46 │ 17.8 │ 2.8 │ 12.6 │ 24.1 │
├─────────┼────┼────────┼────┼────┼────┼─────────┼─────┼──────┼──────┤
│ hello_5 │ 33 │ 33 │ 5 │ 24 │ 46 │ 17.8 │ 2.8 │ 12.6 │ 24.1 │
└─────────┴────┴────────┴────┴────┴────┴─────────┴─────┴──────┴──────┘
N = 185.
Upvotes: 0
Reputation: 11774
I can't replicate your issue. Can you use a proper reprex (reprex.tidyverse.com), which will evaluate in a fresh environment?
library(gtsummary)
packageVersion("gtsummary")
#> [1] '2.0.3'
# Create data and add a categorical variable named hello with five levels:
data <- cobalt::lalonde
hello <- sample(1:5, nrow(data), replace = TRUE)
data$hello <- as.factor(hello)
levels(data$hello) <- c("hello_1", "hello_2", "hello_3", "hello_4", "hello_5")
# Now we specify the variables we want to include in our table1:
vars <- c("hello")
# Now we create table 1:
survey::svydesign(~1, data = data, strata = ~treat) |>
tbl_svysummary(
by = treat,
include = all_of(vars),
statistic = list(
all_categorical() ~ "{n} ({p}%)"
),
percent = "column",
digits = list(
all_categorical() ~ c(0, 1)
)
) |>
as_kable() # convert to kable to display on SO
#> Warning in svydesign.default(~1, data = data, strata = ~treat): No weights or
#> probabilities supplied, assuming equal probability
Characteristic | 0 N = 429 | 1 N = 185 |
---|---|---|
hello | ||
hello_1 | 88 (20.5%) | 37 (20.0%) |
hello_2 | 93 (21.7%) | 35 (18.9%) |
hello_3 | 88 (20.5%) | 37 (20.0%) |
hello_4 | 85 (19.8%) | 36 (19.5%) |
hello_5 | 75 (17.5%) | 40 (21.6%) |
Created on 2024-11-25 with reprex v2.1.1
Upvotes: 0