Lym Lin
Lym Lin

Reputation: 3

GLMM for count data with random effect of Subject in small sample size with risk of inflated type I error, how to infer factor effects and post-hoc

I am a newbie in GLMM. My data designed with repeated measures involved 9 subjects, each subject performed in duplicate, the count as a response. Since dispersion, the model of negative binomial was constructed instead of Poisson regression model. GLMM: Count~Task+(1|subject) My question is that does the Task factor (A,B,C) associates the count and if so, what is the post-hoc analysis?

According to some papers below, the ML based approaches imbedded prones to inflate type I error especially in small sample size, which contributes to a wrong statistical inference.

  1. Warton, D. I., Lyons, M., Stoklosa, J. & Ives, A. R. Three points to consider when choosing a LM or GLM test for count data. Methods in Ecology and Evolution 7, 882–890 (2016).
  2. O’Hara, R. & Kotze, J. Do not log-transform count data. Nature Precedings 1–1 (2010).

After asking the openai, a permutation based on LR with shuffling within each subject is recommended.

library(glmmTMB)   
library(doParallel) 
library(foreach)

df$Task <- factor(df$Task, levels = c("A", "B", "C"))

# LR
computeLR <- function(data) {
  mod_full <- glmmTMB(Count~ Task + (1 | subject), 
                      family = nbinom2, data = data)
  mod_null <- glmmTMB(Count~ 1 + (1 | subject), 
                      family = nbinom2, data = data)
  lr_stat <- as.numeric(2 * (logLik(mod_full) - logLik(mod_null)))
  return(lr_stat)
}


obs_stat <- computeLR(df)
cat("LR  =", obs_stat, "\n")

# shuffle  Task  within subject
permuteTask <- function(data) {
  # shuffle Task for each subject
  data$Task <- unlist(lapply(split(data$Task, data$subject), sample))
  data$Task <- factor(data$Task, levels = levels(df$Task))
  return(data)
}

nperm <- 5000

ncores <- parallel::detectCores() - 1
cl <- makeCluster(ncores)
registerDoParallel(cl)

perm_stats <- foreach(i = 1:nperm, .combine = c,
                      .packages = "glmmTMB") %dopar% {
                        data_perm <- permuteTask(df)
                        computeLR(data_perm)
                      }

stopCluster(cl)

# perm p (single side)
p_value <- mean(perm_stats >= obs_stat)
cat("P =", p_value, "\n")

hist(perm_stats, breaks = 30, main = "Permutation_LR",
     xlab = "LR stat", col = "lightblue", border = "white")
abline(v = obs_stat, col = "red", lwd = 2)

And the recommended post-hoc analysis is also based on permutation for pairs of data

permuteTask <- function(data) {
  data$Task <- unlist(lapply(split(data$Task, data$subject), sample))
  data$Task <- factor(data$Task, levels = levels(data$Task))
  return(data)
}

pairwise_perm_test <- function(data, level1, level2, nperm = 1000) {
 
  data_sub <- subset(data, Task %in% c(level1, level2))
  data_sub$Task <- factor(data_sub$Task, levels = c(level1, level2))
  
  # b extraction
  mod_orig <- glmmTMB(Count~ Task + (1|subject), family = nbinom2, data = data_sub)
  coef_name <- paste0("Task", level2)
  obs_coef <- summary(mod_orig)$coefficients$cond[coef_name, "Estimate"]
  
  perm_coefs <- numeric(nperm)
  for (i in 1:nperm) {
    data_perm <- permuteTask(data_sub)
    mod_perm <- try(glmmTMB(Count~ Task + (1|subject), family = nbinom2, data = data_perm),
                    silent = TRUE)
    if (inherits(mod_perm, "try-error")) {
      perm_coefs[i] <- NA
    } else {
      perm_coefs[i] <- summary(mod_perm)$coefficients$cond[coef_name, "Estimate"]
    }
  }
  perm_coefs <- perm_coefs[!is.na(perm_coefs)]
  
  # bilateral p 
  p_value <- mean(abs(perm_coefs) >= abs(obs_coef))
  
  return(list(pair = paste(level1, "vs", level2),
              obs_coef = obs_coef,
              raw_p = p_value))
}

pairs_list <- list(c("A", "B"),
                   c("A", "C"),
                   c("B", "C"))
nperm <- 5000

ncores <- parallel::detectCores() - 1
cl <- makeCluster(ncores)
registerDoParallel(cl)

results <- foreach(pair = pairs_list, .combine = rbind,
                   .packages = c("glmmTMB")) %dopar% {
                     res <- pairwise_perm_test(df, pair[1], pair[2], nperm)
                     c(pair = res$pair, obs_coef = res$obs_coef, raw_p = res$raw_p)
                   }
stopCluster(cl)

results <- as.data.frame(results, stringsAsFactors = FALSE)
results$obs_coef <- as.numeric(results$obs_coef)
results$raw_p <- as.numeric(results$raw_p)

# 4. post-hoc correction: Holm-bonferroni
# ----------------------------
results$holm_p <- p.adjust(results$raw_p, method = "holm")
cat("Pair-wise comparison:\n")
print(results)

Does it proper? If so, how can I report this? Or should I simplify this by using log(count+1) with LMM?

Upvotes: 0

Views: 17

Answers (0)

Related Questions