Sandro
Sandro

Reputation: 123

r/gtsummary: Creating a function so that tbl_summary reports 95%CI of selected data

Further to my previous post, I am seeking assistance to have the all_continuous() call report the 95%CI of the previously selected data.

library(gtsummary)
packageVersion("gtsummary")
#> [1] '1.5.2'

set.seed(42)
n <- 1000
dat <- data.frame(id=1:n,
                  treat = factor(sample(c('Treat','Control'), n, rep=TRUE, prob=c(.5, .5))),
                  outcome1=runif(n, min=-3.6, max=2.3),
                  outcome2=runif(n, min=-1.9, max=3.3),
                  outcome3=runif(n, min=-2.5, max=2.8),
                  outcome4=runif(n, min=-3.1, max=2.2))

mean_no_extreme <- function(x) {
  x <- na.omit(x)
  sd <- sd(x)
  mean <- mean(x)
  
  # calculate mean excluding extremes
  mean(x[x >= mean - sd * 3 & x <= mean + sd * 3])
}

lci <- function(x){
  x <- na.omit(x)
  sd <- sd(x)
  mean <- mean_no_extreme(x)
  e <- (mean*sd)/(sqrt(n))
  (x[x=mean-e*1.96])
}

uci <- function(x){
  x <- na.omit(x)
  sd <- sd(x)
  mean <- mean_no_extreme(x)
  e <- (mean*sd)/(sqrt(n))
  (x[x=mean+e*1.96])
}


dat %>% 
  select(-c(id)) %>% 
  tbl_summary(
    by=treat, 
    statistic = all_continuous() ~ "{mean_no_extreme} ({lci} to {uci})"
  ) %>%
  as_kable()

Any guidance is tremendously appreciated.

Many thanks,
Sandro

Upvotes: 0

Views: 146

Answers (1)

Daniel D. Sjoberg
Daniel D. Sjoberg

Reputation: 11680

I think there is an error in your CI calculation functions. Here's an example that runs without error.

library(gtsummary)
packageVersion("gtsummary")
#> [1] '1.5.2'

set.seed(42)
n <- 1000
dat <- data.frame(id=1:n,
                  treat = factor(sample(c('Treat','Control'), n, rep=TRUE, prob=c(.5, .5))),
                  outcome1=runif(n, min=-3.6, max=2.3),
                  outcome2=runif(n, min=-1.9, max=3.3),
                  outcome3=runif(n, min=-2.5, max=2.8),
                  outcome4=runif(n, min=-3.1, max=2.2))

.remove_extremes <- function(x) {
  x <- na.omit(x)
  sd <- sd(x)
  mean <- mean(x)
  
  x[x >= mean - sd * 3 & x <= mean + sd * 3]
}

mean_no_extreme <- function(x) {
  mean(.remove_extremes(x))
}

conf.low.no_extreme <- function(x) {
  .remove_extremes(x) |> t.test() |> broom::tidy() |> purrr::pluck("conf.low")
}

conf.high.no_extreme <- function(x) {
  .remove_extremes(x) |> t.test() |> broom::tidy() |> purrr::pluck("conf.high")
}


dat %>% 
  select(-c(id)) %>% 
  tbl_summary(
    by=treat,
    statistic = all_continuous() ~ "{mean_no_extreme} ({conf.low.no_extreme} to {conf.high.no_extreme})"
  ) %>%
  as_kable()
Characteristic Control, N = 527 Treat, N = 473
outcome1 -0.64 (-0.79 to -0.49) -0.70 (-0.86 to -0.54)
outcome2 0.68 (0.55 to 0.81) 0.78 (0.64 to 0.91)
outcome3 0.20 (0.07 to 0.34) 0.23 (0.09 to 0.37)
outcome4 -0.36 (-0.49 to -0.23) -0.41 (-0.54 to -0.27)

Created on 2022-03-24 by the reprex package (v2.0.1)

Upvotes: 1

Related Questions