Huan Lu
Huan Lu

Reputation: 35

Using different denominators in tern::count_occurrences within same split_by_row

To demostrate values using customized format "xx/xx (xx.x%)", a example is like this:

library(formatters)
library(rtables)
library(tern)

advs <- subset(ex_advs, COUNTRY %in% c("CHN", "USA"))

custom_format <- function(x, ...) {
  attr(x, "label") <- NULL
  checkmate::assert_vector(x)
  checkmate::assert_count(x["num"])
  checkmate::assert_count(x["denom"])
  result <- if (x["num"] == 0) {
    paste0(x["num"], "/", x["denom"])
  } else if (x["num"] / x["denom"] == 1){
    paste0(x["num"], "/", x["denom"]," (100)")
  }else {
    paste0(
      x["num"], "/", x["denom"],
      " (", sprintf("%.1f", round2(x["num"] / x["denom"] * 100, 1)), ")"
    )
  }
  return(result)
}

lyt <- basic_table() %>%
  rtables::split_cols_by(var = "ARM") %>%
  rtables::add_colcounts() %>%
  rtables::split_rows_by("PARAM", split_fun = drop_split_levels) %>%
  tern::count_occurrences("SITEID", .stats = "fraction",
                          .formats = list(fraction = custom_format),
                          denom = "n")

tbl <- rtables::build_table(lyt, advs)
tbl

Then you have:

> tbl
                            A: Drug X      B: Placebo    C: Combination
                             (N=588)        (N=658)         (N=567)    
———————————————————————————————————————————————————————————————————————
Diastolic Blood Pressure                                               
  CHN-1                    21/84 (25.0)   20/94 (21.3)    16/81 (19.8) 
  CHN-10                       0/84        1/94 (1.1)         0/81     
  CHN-11                   12/84 (14.3)   20/94 (21.3)    16/81 (19.8) 
  CHN-12                    4/84 (4.8)     3/94 (3.2)      1/81 (1.2)  
  CHN-13                    2/84 (2.4)     6/94 (6.4)         0/81     
  CHN-14                    4/84 (4.8)     2/94 (2.1)      3/81 (3.7)  
  CHN-15                    2/84 (2.4)        0/94         4/81 (4.9)  
  CHN-16                       0/84        3/94 (3.2)      3/81 (3.7)  
  CHN-17                    4/84 (4.8)     4/94 (4.3)      3/81 (3.7)  
  CHN-18                    1/84 (1.2)        0/94         2/81 (2.5)  
  CHN-2                    9/84 (10.7)     4/94 (4.3)      3/81 (3.7)  
  CHN-3                     5/84 (6.0)     1/94 (1.1)      5/81 (6.2)  
  CHN-4                     3/84 (3.6)     3/94 (3.2)      3/81 (3.7)  
  CHN-5                     4/84 (4.8)     3/94 (3.2)      4/81 (4.9)  
  CHN-6                     1/84 (1.2)     3/94 (3.2)         0/81     
  CHN-7                        0/84        5/94 (5.3)      1/81 (1.2)  
  CHN-8                     1/84 (1.2)     1/94 (1.1)         0/81     
  CHN-9                     1/84 (1.2)     2/94 (2.1)         0/81     
  USA-1                     1/84 (1.2)     4/94 (4.3)      5/81 (6.2)  
  USA-11                    4/84 (4.8)     2/94 (2.1)      3/81 (3.7)  
  USA-12                    1/84 (1.2)     2/94 (2.1)      3/81 (3.7)  
  USA-14                    1/84 (1.2)        0/94            0/81     
  USA-15                       0/84        1/94 (1.1)      1/81 (1.2)  
  USA-17                    1/84 (1.2)     1/94 (1.1)         0/81     
  USA-19                       0/84           0/94         1/81 (1.2)  
  USA-2                        0/84           0/94         1/81 (1.2)  
  USA-3                     1/84 (1.2)        0/94         1/81 (1.2)  
  USA-4                        0/84        1/94 (1.1)      1/81 (1.2)  
  USA-5                        0/84        1/94 (1.1)         0/81     
  USA-6                        0/84        1/94 (1.1)         0/81     
  USA-8                        0/84           0/94         1/81 (1.2)  
  USA-9                     1/84 (1.2)        0/94            0/81     

It is using the value of number of uniques within this PARAM as denominator, which is 84.

If I use count_occurrences on COUNTRY:

lyt1 <- basic_table() %>%
  rtables::split_cols_by(var = "ARM") %>%
  rtables::add_colcounts() %>%
  rtables::split_rows_by("PARAM", split_fun = drop_split_levels) %>%
  tern::count_occurrences("COUNTRY")

tbl1 <- rtables::build_table(lyt1, advs)
tbl1

Then you have this:

> tbl1
                           A: Drug X    B: Placebo   C: Combination
                            (N=588)      (N=658)        (N=567)    
———————————————————————————————————————————————————————————————————
Diastolic Blood Pressure                                           
  CHN                      74 (12.6%)   81 (12.3%)     64 (11.3%)  
  USA                      10 (1.7%)    13 (2.0%)      17 (3.0%)   

What I want is to use the value of a COUNTRY as the demoninator for all associated SITEID in tbl, for example, I want 74 (see from tbl1) to be the denominator for all SITEID started with CHN-, because all those sites are in COUNTRY == "CHN", rather than 84, likewise, 10 for those SITEID in COUNTRY == "USA", rather than 84.

Is that what current ratbles and tern can do? Thanks for your thoughts in advance!

Upvotes: 1

Views: 49

Answers (1)

Garini
Garini

Reputation: 1204

In general, I would suggest to write your own analysis function (afun in analyze()) and avoid using count_occurences. Usually you can start by copying the statistical function such as s_count_occurences and add an output using in_rows. Another way, that is a bit more hacky, but that allows you to understand how {tern} statistical function works is to overload s_count_occurences like in the following:

library(formatters)
#> 
#> Attaching package: 'formatters'
#> The following object is masked from 'package:base':
#> 
#>     %||%
library(rtables)
#> Loading required package: magrittr
#> 
#> Attaching package: 'rtables'
#> The following object is masked from 'package:utils':
#> 
#>     str
library(tern)
#> Registered S3 method overwritten by 'tern':
#>   method   from 
#>   tidy.glm broom

advs <- subset(ex_advs, COUNTRY %in% c("CHN", "USA"))

custom_format <- function(x, ...) {
  attr(x, "label") <- NULL
  checkmate::assert_vector(x)
  checkmate::assert_count(x["num"])
  checkmate::assert_count(x["denom"])
  result <- if (x["num"] == 0) {
    paste0(x["num"], "/", x["denom"])
  } else if (x["num"] / x["denom"] == 1) {
    paste0(x["num"], "/", x["denom"], " (100)")
  } else {
    paste0(
      x["num"], "/", x["denom"],
      " (", sprintf("%.1f", round(x["num"] / x["denom"] * 100, 1)), ")"
    )
  }
  return(result)
}

lyt <- basic_table() %>%
  rtables::split_cols_by(var = "ARM") %>%
  rtables::add_colcounts() %>%
  tern::count_occurrences("SITEID",
    .stats = "fraction",
    .formats = list(fraction = custom_format),
    denom = "N_row"
  )

custom_s_count_occurrences <- function(df, denom = c("N_col", "n", "N_row"), .N_col, .N_row,
                                       .df_row, drop = TRUE, .var = "MHDECOD", id = "USUBJID", ...) {
  occurrences <- if (drop) {
    occurrence_levels <- sort(unique(.df_row[[.var]]))
    if (length(occurrence_levels) == 0) {
      stop(
        "no empty `.df_row` input allowed when `drop = TRUE`,",
        " please use `split_fun = drop_split_levels` in the `rtables` `split_rows_by` calls"
      )
    }
    factor(df[[.var]], levels = occurrence_levels)
  } else {
    df[[.var]]
  }
  ids <- factor(df[[id]])
  has_occurrence_per_id <- table(occurrences, ids) > 0
  n_ids_per_occurrence <- as.list(rowSums(has_occurrence_per_id))
  denom <- lapply(names(n_ids_per_occurrence), function(uu) ifelse(grepl("CHN", uu), 74, 10))
  cur_count_fraction <- lapply(
    seq_along(n_ids_per_occurrence),
    function(i2) {
      i <- n_ids_per_occurrence[[i2]]
      den <- denom[[i2]]

      if (i == 0 && den == 0) {
        c(0, 0)
      } else {
        c(i, i / den)
      }
    }
  )
  cur_fraction <- setNames(
    lapply(seq_along(n_ids_per_occurrence), function(i2) {
      c(num = n_ids_per_occurrence[[i2]], denom = denom[[i2]])
    }),
    names(n_ids_per_occurrence)
  )
  list(
    count = n_ids_per_occurrence, count_fraction = cur_count_fraction,
    count_fraction_fixed_dp = cur_count_fraction, fraction = cur_fraction
      
  )
}

tern_s_count_occurrences <- get("s_count_occurrences", envir = asNamespace("tern"))
# Override the function in the package's namespace
unlockBinding("s_count_occurrences", asNamespace("tern"))
assign("s_count_occurrences", custom_s_count_occurrences, envir = asNamespace("tern"))
lockBinding("s_count_occurrences", asNamespace("tern"))

tbl <- rtables::build_table(lyt, advs)
tbl
#>           A: Drug X      B: Placebo    C: Combination
#>            (N=3528)       (N=3948)        (N=3402)   
#> —————————————————————————————————————————————————————
#> CHN-1    21/74 (28.4)   20/74 (27.0)    16/74 (21.6) 
#> CHN-10       0/74        1/74 (1.4)         0/74     
#> CHN-11   12/74 (16.2)   20/74 (27.0)    16/74 (21.6) 
#> CHN-12    4/74 (5.4)     3/74 (4.1)      1/74 (1.4)  
#> CHN-13    2/74 (2.7)     6/74 (8.1)         0/74     
#> CHN-14    4/74 (5.4)     2/74 (2.7)      3/74 (4.1)  
#> CHN-15    2/74 (2.7)        0/74         4/74 (5.4)  
#> CHN-16       0/74        3/74 (4.1)      3/74 (4.1)  
#> CHN-17    4/74 (5.4)     4/74 (5.4)      3/74 (4.1)  
#> CHN-18    1/74 (1.4)        0/74         2/74 (2.7)  
#> CHN-2    9/74 (12.2)     4/74 (5.4)      3/74 (4.1)  
#> CHN-3     5/74 (6.8)     1/74 (1.4)      5/74 (6.8)  
#> CHN-4     3/74 (4.1)     3/74 (4.1)      3/74 (4.1)  
#> CHN-5     4/74 (5.4)     3/74 (4.1)      4/74 (5.4)  
#> CHN-6     1/74 (1.4)     3/74 (4.1)         0/74     
#> CHN-7        0/74        5/74 (6.8)      1/74 (1.4)  
#> CHN-8     1/74 (1.4)     1/74 (1.4)         0/74     
#> CHN-9     1/74 (1.4)     2/74 (2.7)         0/74     
#> USA-1    1/10 (10.0)    4/10 (40.0)     5/10 (50.0)  
#> USA-11   4/10 (40.0)    2/10 (20.0)     3/10 (30.0)  
#> USA-12   1/10 (10.0)    2/10 (20.0)     3/10 (30.0)  
#> USA-14   1/10 (10.0)        0/10            0/10     
#> USA-15       0/10       1/10 (10.0)     1/10 (10.0)  
#> USA-17   1/10 (10.0)    1/10 (10.0)         0/10     
#> USA-19       0/10           0/10        1/10 (10.0)  
#> USA-2        0/10           0/10        1/10 (10.0)  
#> USA-3    1/10 (10.0)        0/10        1/10 (10.0)  
#> USA-4        0/10       1/10 (10.0)     1/10 (10.0)  
#> USA-5        0/10       1/10 (10.0)         0/10     
#> USA-6        0/10       1/10 (10.0)         0/10     
#> USA-8        0/10           0/10        1/10 (10.0)  
#> USA-9    1/10 (10.0)        0/10            0/10

# Reassign original value
unlockBinding("s_count_occurrences", asNamespace("tern"))
assign("s_count_occurrences", tern_s_count_occurrences, envir = asNamespace("tern"))
lockBinding("s_count_occurrences", asNamespace("tern"))

Created on 2025-02-07 with reprex v2.1.1

From the above getting it into a less hacky form, i.e. without overloading:

library(formatters)
#> 
#> Attaching package: 'formatters'
#> The following object is masked from 'package:base':
#> 
#>     %||%
library(rtables)
#> Loading required package: magrittr
#> 
#> Attaching package: 'rtables'
#> The following object is masked from 'package:utils':
#> 
#>     str
library(tern)
#> Registered S3 method overwritten by 'tern':
#>   method   from 
#>   tidy.glm broom

advs <- subset(ex_advs, COUNTRY %in% c("CHN", "USA"))

custom_format <- function(x, ...) {
  attr(x, "label") <- NULL
  # browser()
  checkmate::assert_vector(x)
  checkmate::assert_count(x["num"])
  checkmate::assert_count(x["denom"])
  result <- if (x["num"] == 0) {
    paste0(x["num"], "/", x["denom"])
  } else if (x["num"] / x["denom"] == 1) {
    paste0(x["num"], "/", x["denom"], " (100)")
  } else {
    paste0(
      x["num"], "/", x["denom"],
      " (", sprintf("%.1f", round(x["num"] / x["denom"] * 100, 1)), ")"
    )
  }
  return(result)
}

custom_afun_count_occurrences <- function(df, denom = c("N_col", "n", "N_row"), .N_col, .N_row,
                                          .df_row, drop = TRUE, .var = "MHDECOD", id = "USUBJID", ...) {
  occurrences <- if (drop) {
    occurrence_levels <- sort(unique(.df_row[[.var]]))
    if (length(occurrence_levels) == 0) {
      stop(
        "no empty `.df_row` input allowed when `drop = TRUE`,",
        " please use `split_fun = drop_split_levels` in the `rtables` `split_rows_by` calls"
      )
    }
    factor(df[[.var]], levels = occurrence_levels)
  }
  ids <- factor(df[[id]])
  has_occurrence_per_id <- table(occurrences, ids) > 0
  n_ids_per_occurrence <- as.list(rowSums(has_occurrence_per_id))
  denom <- lapply(names(n_ids_per_occurrence), function(uu) ifelse(grepl("CHN", uu), 74, 10))
  cur_count_fraction <- lapply(
    seq_along(n_ids_per_occurrence),
    function(i2) {
      i <- n_ids_per_occurrence[[i2]]
      den <- denom[[i2]]

      if (i == 0 && den == 0) {
        c(0, 0)
      } else {
        c(i, i / den)
      }
    }
  )
  cur_fraction <- setNames(
    lapply(seq_along(n_ids_per_occurrence), function(i2) {
      c(num = n_ids_per_occurrence[[i2]], denom = denom[[i2]])
    }),
    names(n_ids_per_occurrence)
  )
  in_rows(.list = cur_fraction)
}

lyt <- basic_table() %>%
  rtables::split_cols_by(var = "ARM") %>%
  rtables::add_colcounts() %>%
  rtables::analyze("SITEID", 
                   afun = custom_afun_count_occurrences,
                   format = list(fraction = custom_format), 
                   extra_args = list(id = "USUBJID")
                   )
tbl <- rtables::build_table(lyt, advs)
tbl
#>           A: Drug X      B: Placebo    C: Combination
#>            (N=3528)       (N=3948)        (N=3402)   
#> —————————————————————————————————————————————————————
#> CHN-1    21/74 (28.4)   20/74 (27.0)    16/74 (21.6) 
#> CHN-10       0/74        1/74 (1.4)         0/74     
#> CHN-11   12/74 (16.2)   20/74 (27.0)    16/74 (21.6) 
#> CHN-12    4/74 (5.4)     3/74 (4.1)      1/74 (1.4)  
#> CHN-13    2/74 (2.7)     6/74 (8.1)         0/74     
#> CHN-14    4/74 (5.4)     2/74 (2.7)      3/74 (4.1)  
#> CHN-15    2/74 (2.7)        0/74         4/74 (5.4)  
#> CHN-16       0/74        3/74 (4.1)      3/74 (4.1)  
#> CHN-17    4/74 (5.4)     4/74 (5.4)      3/74 (4.1)  
#> CHN-18    1/74 (1.4)        0/74         2/74 (2.7)  
#> CHN-2    9/74 (12.2)     4/74 (5.4)      3/74 (4.1)  
#> CHN-3     5/74 (6.8)     1/74 (1.4)      5/74 (6.8)  
#> CHN-4     3/74 (4.1)     3/74 (4.1)      3/74 (4.1)  
#> CHN-5     4/74 (5.4)     3/74 (4.1)      4/74 (5.4)  
#> CHN-6     1/74 (1.4)     3/74 (4.1)         0/74     
#> CHN-7        0/74        5/74 (6.8)      1/74 (1.4)  
#> CHN-8     1/74 (1.4)     1/74 (1.4)         0/74     
#> CHN-9     1/74 (1.4)     2/74 (2.7)         0/74     
#> USA-1    1/10 (10.0)    4/10 (40.0)     5/10 (50.0)  
#> USA-11   4/10 (40.0)    2/10 (20.0)     3/10 (30.0)  
#> USA-12   1/10 (10.0)    2/10 (20.0)     3/10 (30.0)  
#> USA-14   1/10 (10.0)        0/10            0/10     
#> USA-15       0/10       1/10 (10.0)     1/10 (10.0)  
#> USA-17   1/10 (10.0)    1/10 (10.0)         0/10     
#> USA-19       0/10           0/10        1/10 (10.0)  
#> USA-2        0/10           0/10        1/10 (10.0)  
#> USA-3    1/10 (10.0)        0/10        1/10 (10.0)  
#> USA-4        0/10       1/10 (10.0)     1/10 (10.0)  
#> USA-5        0/10       1/10 (10.0)         0/10     
#> USA-6        0/10       1/10 (10.0)         0/10     
#> USA-8        0/10           0/10        1/10 (10.0)  
#> USA-9    1/10 (10.0)        0/10            0/10

Created on 2025-02-07 with reprex v2.1.1

Upvotes: 0

Related Questions