Reputation: 25
I have three measurements, each with two time points.
library(tidyverse)
library(tableone)
test_1_pre <- c(1,5,8,2)
test_1_post <- c(2,7,3,6)
test_2_pre <- c(6,3,6,5)
test_2_post <- c(9,8,9,1)
test_3_pre <- c(12,2,4,6)
test_3_post <- c(4,7,6,6)
df <- data.frame(test_1_pre, test_1_post, test_2_pre,
test_2_post, test_3_pre, test_3_post)
df_2 <- df %>%
gather(test_1_pre, test_1_post,
key="test_1_old", value="test_1") %>%
gather(test_2_pre, test_2_post,
key="test_2_old", value="test_2") %>%
gather(test_3_pre, test_3_post,
key="test_3_old", value="test_3") %>%
mutate(pre_post = case_when(test_1_old == "test_1_pre" ~ 'pre',
test_1_old == "test_1_post" ~'post',
test_1_old == "test_2_pre" ~ 'pre',
test_1_old == "test_2_post" ~ 'post',
test_1_old == "test_3_pre" ~ 'pre',
test_1_old == "test_3_post" ~'post'))
vars_df <- c("test_1", "test_2", "test_3")
table_df <- CreateTableOne(vars = vars_df,
data = df_2,
strata = "pre_post")
tabelle.table_df<-print(table_df)
My aim is to get a table which compares the two time points "pre" and "post" of the different tests.
It works for test 1, but not the following ones.
Could anyone help, would be much appreciated!
Result:
Stratified by pre_post
post pre p test
n 16 16
test_1 (mean (sd)) 4.50 (2.13) 4.00 (2.83) 0.576
test_2 (mean (sd)) 5.88 (2.75) 5.88 (2.75) 1.000
test_3 (mean (sd)) 5.88 (2.85) 5.88 (2.85) 1.000
Upvotes: 0
Views: 149
Reputation: 1334
The problem arises because the pre_post
column you create is incorrect for tests 2 and 3, which you can verify by inspecting the df_2
dataframe. For example row 5 from your output is:
test_1_post 2 test_2_pre 6 test_3_pre 12 post
Whilst a single data pipeline is appropriate is many circumstances, this problem is more easily solved by splitting up the tables and using a union_all
, as per the following:
library(tidyverse)
library(tableone)
test_1_pre <- c(1,5,8,2)
test_1_post <- c(2,7,3,6)
test_2_pre <- c(6,3,6,5)
test_2_post <- c(9,8,9,1)
test_3_pre <- c(12,2,4,6)
test_3_post <- c(4,7,6,6)
df <- data.frame(test_1_pre, test_1_post, test_2_pre,
test_2_post, test_3_pre, test_3_post)
get_dfs <- function(df, suffix) {
df %>%
select(ends_with(suffix)) %>%
mutate(pre_post = suffix) %>%
# Drop _pre / _post suffixes in test column names ahead of union
rename_with(.fn = function(x) gsub(paste0("_", suffix), "", x),
.cols = starts_with("test"))
}
df_2 <- union_all(get_dfs(df, "pre"), get_dfs(df, "post"))
vars_df <- c("test_1", "test_2", "test_3")
table_df <- CreateTableOne(vars = vars_df,
data = df_2,
strata = "pre_post")
table_df
The output given is:
# Stratified by pre_post
# post pre p test
# n 4 4
# test_1 (mean (SD)) 4.50 (2.38) 4.00 (3.16) 0.809
# test_2 (mean (SD)) 6.75 (3.86) 5.00 (1.41) 0.427
# test_3 (mean (SD)) 5.75 (1.26) 6.00 (4.32) 0.915
This contains more conservative p-values arising from the different estimated standard errors, as compared with @Frank's answer.
Moreover, the structure of df_2
is cleaner:
# test_1 test_2 test_3 pre_post
# 1 6 12 pre
# 5 3 2 pre
# 8 6 4 pre
# 2 5 6 pre
# 2 9 4 post
# 7 8 7 post
# 3 9 6 post
# 6 1 6 post
Upvotes: 1
Reputation: 25
nr <- c("1", "2", "3", "4")
test_1_pre <- c(1,5,8,2)
test_1_post <- c(2,7,3,6)
test_2_pre <- c(6,3,6,5)
test_2_post <- c(9,8,9,1)
test_3_pre <- c(12,2,4,6)
test_3_post <- c(4,7,6,6)
df <- data.frame(nr, test_1_pre, test_1_post, test_2_pre,
test_2_post, test_3_pre, test_3_post)
df_2 <- df %>%
gather(test_1_pre, test_1_post, test_2_pre, test_2_post,
test_3_pre, test_3_post,
key="score", value="value") %>%
mutate(pre_post = case_when(score == "test_1_pre" ~ 'pre',
score == "test_1_post" ~'post',
score == "test_2_pre" ~ 'pre',
score == "test_2_post" ~ 'post',
score == "test_3_pre" ~ 'pre',
score == "test_3_post" ~'post'))%>%
pivot_wider(names_from="score", values_from="value")%>%
gather(test_1_pre, test_1_post,
key="test_1_old", value="test_1") %>%
gather(test_2_pre, test_2_post,
key="test_2_old", value="test_2") %>%
gather(test_3_pre, test_3_post,
key="test_3_old", value="test_3")
vars_df <- c("test_1", "test_2", "test_3")
table_df <- CreateTableOne(vars = vars_df,
data = df_2,
strata = "pre_post")
tabelle.table_df<-print(table_df)
Found the solution: first gather everything into one column, mutate the new column, spread the columns again and gather them separately.
Stratified by pre_post
post pre p test
n 32 32
test_1 (mean (SD)) 4.50 (2.13) 4.00 (2.83) 0.576
test_2 (mean (SD)) 6.75 (3.45) 5.00 (1.26) 0.067
test_3 (mean (SD)) 5.75 (1.13) 6.00 (3.86) 0.805
Upvotes: 1