Reputation: 35
To demostrate values using customized format "xx/xx (xx.x%)"
, a example is like this:
library(formatters)
library(rtables)
library(tern)
advs <- subset(ex_advs, COUNTRY %in% c("CHN", "USA"))
custom_format <- function(x, ...) {
attr(x, "label") <- NULL
checkmate::assert_vector(x)
checkmate::assert_count(x["num"])
checkmate::assert_count(x["denom"])
result <- if (x["num"] == 0) {
paste0(x["num"], "/", x["denom"])
} else if (x["num"] / x["denom"] == 1){
paste0(x["num"], "/", x["denom"]," (100)")
}else {
paste0(
x["num"], "/", x["denom"],
" (", sprintf("%.1f", round2(x["num"] / x["denom"] * 100, 1)), ")"
)
}
return(result)
}
lyt <- basic_table() %>%
rtables::split_cols_by(var = "ARM") %>%
rtables::add_colcounts() %>%
rtables::split_rows_by("PARAM", split_fun = drop_split_levels) %>%
tern::count_occurrences("SITEID", .stats = "fraction",
.formats = list(fraction = custom_format),
denom = "n")
tbl <- rtables::build_table(lyt, advs)
tbl
Then you have:
> tbl
A: Drug X B: Placebo C: Combination
(N=588) (N=658) (N=567)
———————————————————————————————————————————————————————————————————————
Diastolic Blood Pressure
CHN-1 21/84 (25.0) 20/94 (21.3) 16/81 (19.8)
CHN-10 0/84 1/94 (1.1) 0/81
CHN-11 12/84 (14.3) 20/94 (21.3) 16/81 (19.8)
CHN-12 4/84 (4.8) 3/94 (3.2) 1/81 (1.2)
CHN-13 2/84 (2.4) 6/94 (6.4) 0/81
CHN-14 4/84 (4.8) 2/94 (2.1) 3/81 (3.7)
CHN-15 2/84 (2.4) 0/94 4/81 (4.9)
CHN-16 0/84 3/94 (3.2) 3/81 (3.7)
CHN-17 4/84 (4.8) 4/94 (4.3) 3/81 (3.7)
CHN-18 1/84 (1.2) 0/94 2/81 (2.5)
CHN-2 9/84 (10.7) 4/94 (4.3) 3/81 (3.7)
CHN-3 5/84 (6.0) 1/94 (1.1) 5/81 (6.2)
CHN-4 3/84 (3.6) 3/94 (3.2) 3/81 (3.7)
CHN-5 4/84 (4.8) 3/94 (3.2) 4/81 (4.9)
CHN-6 1/84 (1.2) 3/94 (3.2) 0/81
CHN-7 0/84 5/94 (5.3) 1/81 (1.2)
CHN-8 1/84 (1.2) 1/94 (1.1) 0/81
CHN-9 1/84 (1.2) 2/94 (2.1) 0/81
USA-1 1/84 (1.2) 4/94 (4.3) 5/81 (6.2)
USA-11 4/84 (4.8) 2/94 (2.1) 3/81 (3.7)
USA-12 1/84 (1.2) 2/94 (2.1) 3/81 (3.7)
USA-14 1/84 (1.2) 0/94 0/81
USA-15 0/84 1/94 (1.1) 1/81 (1.2)
USA-17 1/84 (1.2) 1/94 (1.1) 0/81
USA-19 0/84 0/94 1/81 (1.2)
USA-2 0/84 0/94 1/81 (1.2)
USA-3 1/84 (1.2) 0/94 1/81 (1.2)
USA-4 0/84 1/94 (1.1) 1/81 (1.2)
USA-5 0/84 1/94 (1.1) 0/81
USA-6 0/84 1/94 (1.1) 0/81
USA-8 0/84 0/94 1/81 (1.2)
USA-9 1/84 (1.2) 0/94 0/81
It is using the value of number of uniques within this PARAM as denominator, which is 84.
If I use count_occurrences
on COUNTRY
:
lyt1 <- basic_table() %>%
rtables::split_cols_by(var = "ARM") %>%
rtables::add_colcounts() %>%
rtables::split_rows_by("PARAM", split_fun = drop_split_levels) %>%
tern::count_occurrences("COUNTRY")
tbl1 <- rtables::build_table(lyt1, advs)
tbl1
Then you have this:
> tbl1
A: Drug X B: Placebo C: Combination
(N=588) (N=658) (N=567)
———————————————————————————————————————————————————————————————————
Diastolic Blood Pressure
CHN 74 (12.6%) 81 (12.3%) 64 (11.3%)
USA 10 (1.7%) 13 (2.0%) 17 (3.0%)
What I want is to use the value of a COUNTRY
as the demoninator for all associated SITEID
in tbl
, for example, I want 74
(see from tbl1
) to be the denominator for all SITEID
started with CHN-
, because all those sites are in COUNTRY == "CHN"
, rather than 84
, likewise, 10
for those SITEID
in COUNTRY == "USA"
, rather than 84
.
Is that what current ratbles
and tern
can do? Thanks for your thoughts in advance!
Upvotes: 1
Views: 49
Reputation: 1204
In general, I would suggest to write your own analysis function (afun
in analyze()
) and avoid using count_occurences
. Usually you can start by copying the statistical function such as s_count_occurences
and add an output using in_rows
. Another way, that is a bit more hacky, but that allows you to understand how {tern} statistical function works is to overload s_count_occurences
like in the following:
library(formatters)
#>
#> Attaching package: 'formatters'
#> The following object is masked from 'package:base':
#>
#> %||%
library(rtables)
#> Loading required package: magrittr
#>
#> Attaching package: 'rtables'
#> The following object is masked from 'package:utils':
#>
#> str
library(tern)
#> Registered S3 method overwritten by 'tern':
#> method from
#> tidy.glm broom
advs <- subset(ex_advs, COUNTRY %in% c("CHN", "USA"))
custom_format <- function(x, ...) {
attr(x, "label") <- NULL
checkmate::assert_vector(x)
checkmate::assert_count(x["num"])
checkmate::assert_count(x["denom"])
result <- if (x["num"] == 0) {
paste0(x["num"], "/", x["denom"])
} else if (x["num"] / x["denom"] == 1) {
paste0(x["num"], "/", x["denom"], " (100)")
} else {
paste0(
x["num"], "/", x["denom"],
" (", sprintf("%.1f", round(x["num"] / x["denom"] * 100, 1)), ")"
)
}
return(result)
}
lyt <- basic_table() %>%
rtables::split_cols_by(var = "ARM") %>%
rtables::add_colcounts() %>%
tern::count_occurrences("SITEID",
.stats = "fraction",
.formats = list(fraction = custom_format),
denom = "N_row"
)
custom_s_count_occurrences <- function(df, denom = c("N_col", "n", "N_row"), .N_col, .N_row,
.df_row, drop = TRUE, .var = "MHDECOD", id = "USUBJID", ...) {
occurrences <- if (drop) {
occurrence_levels <- sort(unique(.df_row[[.var]]))
if (length(occurrence_levels) == 0) {
stop(
"no empty `.df_row` input allowed when `drop = TRUE`,",
" please use `split_fun = drop_split_levels` in the `rtables` `split_rows_by` calls"
)
}
factor(df[[.var]], levels = occurrence_levels)
} else {
df[[.var]]
}
ids <- factor(df[[id]])
has_occurrence_per_id <- table(occurrences, ids) > 0
n_ids_per_occurrence <- as.list(rowSums(has_occurrence_per_id))
denom <- lapply(names(n_ids_per_occurrence), function(uu) ifelse(grepl("CHN", uu), 74, 10))
cur_count_fraction <- lapply(
seq_along(n_ids_per_occurrence),
function(i2) {
i <- n_ids_per_occurrence[[i2]]
den <- denom[[i2]]
if (i == 0 && den == 0) {
c(0, 0)
} else {
c(i, i / den)
}
}
)
cur_fraction <- setNames(
lapply(seq_along(n_ids_per_occurrence), function(i2) {
c(num = n_ids_per_occurrence[[i2]], denom = denom[[i2]])
}),
names(n_ids_per_occurrence)
)
list(
count = n_ids_per_occurrence, count_fraction = cur_count_fraction,
count_fraction_fixed_dp = cur_count_fraction, fraction = cur_fraction
)
}
tern_s_count_occurrences <- get("s_count_occurrences", envir = asNamespace("tern"))
# Override the function in the package's namespace
unlockBinding("s_count_occurrences", asNamespace("tern"))
assign("s_count_occurrences", custom_s_count_occurrences, envir = asNamespace("tern"))
lockBinding("s_count_occurrences", asNamespace("tern"))
tbl <- rtables::build_table(lyt, advs)
tbl
#> A: Drug X B: Placebo C: Combination
#> (N=3528) (N=3948) (N=3402)
#> —————————————————————————————————————————————————————
#> CHN-1 21/74 (28.4) 20/74 (27.0) 16/74 (21.6)
#> CHN-10 0/74 1/74 (1.4) 0/74
#> CHN-11 12/74 (16.2) 20/74 (27.0) 16/74 (21.6)
#> CHN-12 4/74 (5.4) 3/74 (4.1) 1/74 (1.4)
#> CHN-13 2/74 (2.7) 6/74 (8.1) 0/74
#> CHN-14 4/74 (5.4) 2/74 (2.7) 3/74 (4.1)
#> CHN-15 2/74 (2.7) 0/74 4/74 (5.4)
#> CHN-16 0/74 3/74 (4.1) 3/74 (4.1)
#> CHN-17 4/74 (5.4) 4/74 (5.4) 3/74 (4.1)
#> CHN-18 1/74 (1.4) 0/74 2/74 (2.7)
#> CHN-2 9/74 (12.2) 4/74 (5.4) 3/74 (4.1)
#> CHN-3 5/74 (6.8) 1/74 (1.4) 5/74 (6.8)
#> CHN-4 3/74 (4.1) 3/74 (4.1) 3/74 (4.1)
#> CHN-5 4/74 (5.4) 3/74 (4.1) 4/74 (5.4)
#> CHN-6 1/74 (1.4) 3/74 (4.1) 0/74
#> CHN-7 0/74 5/74 (6.8) 1/74 (1.4)
#> CHN-8 1/74 (1.4) 1/74 (1.4) 0/74
#> CHN-9 1/74 (1.4) 2/74 (2.7) 0/74
#> USA-1 1/10 (10.0) 4/10 (40.0) 5/10 (50.0)
#> USA-11 4/10 (40.0) 2/10 (20.0) 3/10 (30.0)
#> USA-12 1/10 (10.0) 2/10 (20.0) 3/10 (30.0)
#> USA-14 1/10 (10.0) 0/10 0/10
#> USA-15 0/10 1/10 (10.0) 1/10 (10.0)
#> USA-17 1/10 (10.0) 1/10 (10.0) 0/10
#> USA-19 0/10 0/10 1/10 (10.0)
#> USA-2 0/10 0/10 1/10 (10.0)
#> USA-3 1/10 (10.0) 0/10 1/10 (10.0)
#> USA-4 0/10 1/10 (10.0) 1/10 (10.0)
#> USA-5 0/10 1/10 (10.0) 0/10
#> USA-6 0/10 1/10 (10.0) 0/10
#> USA-8 0/10 0/10 1/10 (10.0)
#> USA-9 1/10 (10.0) 0/10 0/10
# Reassign original value
unlockBinding("s_count_occurrences", asNamespace("tern"))
assign("s_count_occurrences", tern_s_count_occurrences, envir = asNamespace("tern"))
lockBinding("s_count_occurrences", asNamespace("tern"))
Created on 2025-02-07 with reprex v2.1.1
From the above getting it into a less hacky form, i.e. without overloading:
library(formatters)
#>
#> Attaching package: 'formatters'
#> The following object is masked from 'package:base':
#>
#> %||%
library(rtables)
#> Loading required package: magrittr
#>
#> Attaching package: 'rtables'
#> The following object is masked from 'package:utils':
#>
#> str
library(tern)
#> Registered S3 method overwritten by 'tern':
#> method from
#> tidy.glm broom
advs <- subset(ex_advs, COUNTRY %in% c("CHN", "USA"))
custom_format <- function(x, ...) {
attr(x, "label") <- NULL
# browser()
checkmate::assert_vector(x)
checkmate::assert_count(x["num"])
checkmate::assert_count(x["denom"])
result <- if (x["num"] == 0) {
paste0(x["num"], "/", x["denom"])
} else if (x["num"] / x["denom"] == 1) {
paste0(x["num"], "/", x["denom"], " (100)")
} else {
paste0(
x["num"], "/", x["denom"],
" (", sprintf("%.1f", round(x["num"] / x["denom"] * 100, 1)), ")"
)
}
return(result)
}
custom_afun_count_occurrences <- function(df, denom = c("N_col", "n", "N_row"), .N_col, .N_row,
.df_row, drop = TRUE, .var = "MHDECOD", id = "USUBJID", ...) {
occurrences <- if (drop) {
occurrence_levels <- sort(unique(.df_row[[.var]]))
if (length(occurrence_levels) == 0) {
stop(
"no empty `.df_row` input allowed when `drop = TRUE`,",
" please use `split_fun = drop_split_levels` in the `rtables` `split_rows_by` calls"
)
}
factor(df[[.var]], levels = occurrence_levels)
}
ids <- factor(df[[id]])
has_occurrence_per_id <- table(occurrences, ids) > 0
n_ids_per_occurrence <- as.list(rowSums(has_occurrence_per_id))
denom <- lapply(names(n_ids_per_occurrence), function(uu) ifelse(grepl("CHN", uu), 74, 10))
cur_count_fraction <- lapply(
seq_along(n_ids_per_occurrence),
function(i2) {
i <- n_ids_per_occurrence[[i2]]
den <- denom[[i2]]
if (i == 0 && den == 0) {
c(0, 0)
} else {
c(i, i / den)
}
}
)
cur_fraction <- setNames(
lapply(seq_along(n_ids_per_occurrence), function(i2) {
c(num = n_ids_per_occurrence[[i2]], denom = denom[[i2]])
}),
names(n_ids_per_occurrence)
)
in_rows(.list = cur_fraction)
}
lyt <- basic_table() %>%
rtables::split_cols_by(var = "ARM") %>%
rtables::add_colcounts() %>%
rtables::analyze("SITEID",
afun = custom_afun_count_occurrences,
format = list(fraction = custom_format),
extra_args = list(id = "USUBJID")
)
tbl <- rtables::build_table(lyt, advs)
tbl
#> A: Drug X B: Placebo C: Combination
#> (N=3528) (N=3948) (N=3402)
#> —————————————————————————————————————————————————————
#> CHN-1 21/74 (28.4) 20/74 (27.0) 16/74 (21.6)
#> CHN-10 0/74 1/74 (1.4) 0/74
#> CHN-11 12/74 (16.2) 20/74 (27.0) 16/74 (21.6)
#> CHN-12 4/74 (5.4) 3/74 (4.1) 1/74 (1.4)
#> CHN-13 2/74 (2.7) 6/74 (8.1) 0/74
#> CHN-14 4/74 (5.4) 2/74 (2.7) 3/74 (4.1)
#> CHN-15 2/74 (2.7) 0/74 4/74 (5.4)
#> CHN-16 0/74 3/74 (4.1) 3/74 (4.1)
#> CHN-17 4/74 (5.4) 4/74 (5.4) 3/74 (4.1)
#> CHN-18 1/74 (1.4) 0/74 2/74 (2.7)
#> CHN-2 9/74 (12.2) 4/74 (5.4) 3/74 (4.1)
#> CHN-3 5/74 (6.8) 1/74 (1.4) 5/74 (6.8)
#> CHN-4 3/74 (4.1) 3/74 (4.1) 3/74 (4.1)
#> CHN-5 4/74 (5.4) 3/74 (4.1) 4/74 (5.4)
#> CHN-6 1/74 (1.4) 3/74 (4.1) 0/74
#> CHN-7 0/74 5/74 (6.8) 1/74 (1.4)
#> CHN-8 1/74 (1.4) 1/74 (1.4) 0/74
#> CHN-9 1/74 (1.4) 2/74 (2.7) 0/74
#> USA-1 1/10 (10.0) 4/10 (40.0) 5/10 (50.0)
#> USA-11 4/10 (40.0) 2/10 (20.0) 3/10 (30.0)
#> USA-12 1/10 (10.0) 2/10 (20.0) 3/10 (30.0)
#> USA-14 1/10 (10.0) 0/10 0/10
#> USA-15 0/10 1/10 (10.0) 1/10 (10.0)
#> USA-17 1/10 (10.0) 1/10 (10.0) 0/10
#> USA-19 0/10 0/10 1/10 (10.0)
#> USA-2 0/10 0/10 1/10 (10.0)
#> USA-3 1/10 (10.0) 0/10 1/10 (10.0)
#> USA-4 0/10 1/10 (10.0) 1/10 (10.0)
#> USA-5 0/10 1/10 (10.0) 0/10
#> USA-6 0/10 1/10 (10.0) 0/10
#> USA-8 0/10 0/10 1/10 (10.0)
#> USA-9 1/10 (10.0) 0/10 0/10
Created on 2025-02-07 with reprex v2.1.1
Upvotes: 0