Reputation: 33
I am trying to generate a summary table in R that includes:
Descriptive statistics for a set of continuous and categorical variables stratified by a grouping variable with three factors ("A", "B", "C").
P-values for comparisons across all three groups using Kruskal-Wallis test for continuous variables & Fisher's exact test for categorical variables.
Pairwise comparisons ("A" vs "B", "B" vs "C", "A" vs "C") with FDR correction applied to the resulting p-values.
Does anyone have suggestions for packages or existing solutions for this? I usually use gtsummary for this and there are no built-in functions, so I guess if there is nothing there, I will be customizing its tbl_summary. If anyone has any other ideas/solutions, I would appreciate it!
Example Data:
set.seed(123)
df <- data.frame(
group = sample(c("A", "B", "C"), 100, replace = TRUE),
continuous_var_1 = rnorm(100, mean = rep(c(5, 6, 7), length.out = 100)),
continuous_var2 = rnorm(100, mean = rep(c(5, 6, 7), length.out = 100)),
categorical_var_1 = sample(c("Yes", "No"), 100, replace = TRUE),
categorical_var2 = sample(c("Yes", "No"), 100, replace = TRUE)
)
Upvotes: 0
Views: 87
Reputation: 11774
You're after a rather bespoke table that requires some bespoke coding. Hopefully this helps!
The general outline here is that we can use the cards
and cardx
packages to perform the pairwise tests, then you need to use p.adjust()
to perform the correction to the p-values.
library(gtsummary)
set.seed(123)
df <- data.frame(
group = sample(c("A", "B", "C"), 100, replace = TRUE),
continuous_var_1 = rnorm(100, mean = rep(c(5, 6, 7), length.out = 100)),
continuous_var2 = rnorm(100, mean = rep(c(5, 6, 7), length.out = 100)),
categorical_var_1 = sample(c("Yes", "No"), 100, replace = TRUE),
categorical_var2 = sample(c("Yes", "No"), 100, replace = TRUE)
)
# function to perform pairwise fisher test
fisher_pairwise <- function(data, variable, by, ...) {
cards::ard_pairwise(
data = data,
variable = all_of(by),
.f = \(df) cardx::ard_stats_fisher_test(data = df, by = all_of(by), variables = all_of(variable)) |> dplyr::filter(stat_name == "p.value")
) |>
purrr::imap(~dplyr::tibble(column = cardx::bt_strip(.y) %>% paste0("**", ., "**"), p.value = .x$stat[[1]])) |>
dplyr::bind_rows() |>
tidyr::pivot_wider(names_from = column, values_from = p.value)
}
fisher_pairwise(data = df, variable = "categorical_var_1", by = "group")
#> # A tibble: 1 × 3
#> `**'A' vs. 'B'**` `**'A' vs. 'C'**` `**'B' vs. 'C'**`
#> <dbl> <dbl> <dbl>
#> 1 1 1 1
# function to perform pairwise kruskal-wallis test
kruskal_pairwise <- function(data, variable, by, ...) {
cards::ard_pairwise(
data = data,
variable = all_of(by),
.f = \(df) cardx::ard_stats_kruskal_test(data = df, by = all_of(by), variables = all_of(variable)) |> dplyr::filter(stat_name == "p.value")
) |>
purrr::imap(~dplyr::tibble(column = cardx::bt_strip(.y) %>% paste0("**", ., "**"), p.value = .x$stat[[1]])) |>
dplyr::bind_rows() |>
tidyr::pivot_wider(names_from = column, values_from = p.value)
}
kruskal_pairwise(data = df, variable = "continuous_var_1", by = "group")
#> # A tibble: 1 × 3
#> `**'A' vs. 'B'**` `**'A' vs. 'C'**` `**'B' vs. 'C'**`
#> <dbl> <dbl> <dbl>
#> 1 0.0722 0.288 0.624
tbl <- df |>
# create the primary summary table
tbl_summary(by = group) |>
# add the standard comparison across the three groups (A, B, C)
add_p(
test = list(all_continuous() ~ "kruskal.test",
all_categorical() ~ "fisher.test")
) |>
# add custom stat, that is, the pairwise comparisons
add_stat(
fns = list(all_continuous() ~ kruskal_pairwise,
all_categorical() ~ fisher_pairwise)
) |>
# style the pvalues with the pvalue syling function
modify_fmt_fun(contains("vs") ~ label_style_pvalue()) |>
# apply p-value correction (you may need to change `p.adjust(n)` depending on how you planned to make your correction)
modify_table_body(
\(table_body) {
# get all the pairwise pvalue colnames
pvalue_colnames <- select(table_body, contains("vs")) |> names()
# perform correction to p-value
for (v in pvalue_colnames) {
table_body[[v]] <- p.adjust(table_body[[v]], n = 12, method = "fdr")
}
table_body
}
) |>
modify_spanning_header(contains("vs") ~ "**Pairwise Comparisons**") |>
modify_footnote(
contains("vs") ~ "Pairwise p-values corrected for multiple comparisons using the false discovery method"
)
Created on 2024-12-21 with reprex v2.1.1
Upvotes: 3