Reputation: 7735
Q1. Is there a more direct (but still tidyverse
) way to create a summary table like this?
library(tidyverse)
library(knitr)
library(kableExtra)
df <- data.frame(group=c(1, 1, 1, 1, 0, 0, 0, 0),
v1=c(1, 2, 3, 4, 5, 6, 1, 2),
v2=c(4, 3, 2, 5, 3, 5, 3, 8),
v3=c(0, 1, 0, 1, 1, 0, 1, 1))
df %>%
group_by(group) %>%
summarise(v1=paste0(round(mean(v1), 2),
" (",
round(sd(v1), 2),
")"),
v2=paste0(round(mean(v2), 2),
" (",
round(sd(v2), 2),
")"),
v3=round(mean(v3)*100, 1)
) %>%
dplyr::select(-group) %>%
t() %>%
`rownames<-` (c("v1 mean (SD)",
"v2 mean (SD)",
"Percent v3")) %>%
kable("html",
col.names=c("Group 0", "Group 1")) %>%
kable_styling()
Q2. Related to this, is there a way to combine two levels of summarise
(e.g., no grouping + grouping) without repeating the summarise
code?
all <-
df %>%
summarise(v1=paste0(round(mean(v1), 2),
" (",
round(sd(v1), 2),
")"),
v2=paste0(round(mean(v2), 2),
" (",
round(sd(v2), 2),
")"),
v3=round(mean(v3)*100, 1)
) %>%
t() %>%
`rownames<-` (c("v1 mean (SD)",
"v2 mean (SD)",
"Percent v3"))
groups <-
df %>%
group_by(group) %>%
summarise(v1=paste0(round(mean(v1), 2),
" (",
round(sd(v1), 2),
")"),
v2=paste0(round(mean(v2), 2),
" (",
round(sd(v2), 2),
")"),
v3=round(mean(v3)*100, 1)
) %>%
dplyr::select(-group) %>%
t() %>%
`rownames<-` (c("v1 mean (SD)",
"v2 mean (SD)",
"Percent v3"))
all %>%
cbind(groups) %>%
kable("html",
col.names=c("All", "Group 0", "Group 1")) %>%
kable_styling()
Upvotes: 1
Views: 2828
Reputation: 7856
This is the minimum I can think of.
cat_var <- "v3"
df_cal <- function(x, var) {
if (var[1] %in% cat_var) return(as.character(round(mean(x), 1)))
paste0(mean(x), " (", round(sd(x), 2), ")")
}
df_tall <- df %>% gather(var, x, v1:v3) %>% group_by(var)
all <- df_tall %>% summarise(stat = df_cal(x, var)) %>% mutate(group = -1)
groups <- df_tall %>% group_by(group, var) %>% summarise(stat = df_cal(x, var))
bind_rows(all, groups) %>%
ungroup() %>%
mutate(var = factor(var, labels = c(
"v1 mean (SD)", "v2 mean (SD)", "Precent v3"
))) %>%
spread(group, stat) %>%
kable("html", col.names = c(" ", "All", "Group 0", "Group 1")) %>%
kable_styling()
Upvotes: 1
Reputation: 618
One solution (especially if you want to expand the number of columns v1, v2, ... in the future) to make your code a bit more concise might be, to put paste0(round(mean(v1), 2)," (", round(sd(v1), 2), ")")
into a function: paste_mean_and_sd = function(df_col){paste0(round(mean(df_col), 2)," (", round(sd(df_col), 2), ")")}
.
That would shorten your "pipeline" and make it more easily readable:
... %>% summarise(v1 = paste_mean_and_sd(v1), v2 = paste_mean_and_sd(v2), v3=round(mean(v3)*100, 1)) %>% ...
Upvotes: 1