quepur
quepur

Reputation: 90

How to use MuMIn::dredge on models in nested data frame (edit: with parallel processing)

I have many glmmTMB models in a nested data frame that I would like to dredge iteratively, but I get this result even when trying to dredge a single model:

Error in eval(call("is.data.frame", gmCall$data), gmEnv) : object '"is.data.frame"' not found

Fitting the same models outside of the nested data frame works without issue as long as the data are saved as an object in the environment. Is there a fix to use nested data frames? Or maybe a list if necessary? I'd rather not have dozens of standalone data frames and accompanying models floating around.

Example using mtcars dataset:

# Example model to fit
mod_fx <- function(x) {
  glmmTMB(mpg ~ disp + drat + wt, 
          data = x,
          family = gaussian(),
          na.action = "na.fail")
}

# Grouping and nesting data
dat <- mtcars %>% 
  group_by(cyl) %>%
  nest() %>% 
  mutate(mod = map(data, mod_fx))

# Model summary works fine, but note: Data = x
# Related to problem?
summary(dat$mod[[1]])

# Dredge fails
dred_dat <- dredge(dat$mod[[1]],
                   rank = "AICc",
                   evaluate = T)

# Dredge does work when same model is built with separate data frame object stored in environment, not a nested data frame.
dat2 <- filter(mtcars, cyl == 6)

m1 <- glmmTMB(mpg ~ disp + drat + wt, 
        data = dat2,
        family = gaussian(),
        na.action = "na.fail")

summary(m1) # Same model results, (data = dat2), but dredge works:

dred_dat <- dredge(m1,
                   rank = "AICc",
                   evaluate = T)

# This also fails without data stored as separate object
m3 <- glmmTMB(mpg ~ disp + drat + wt, 
              data = filter(mtcars, cyl == 6),
              family = gaussian(),
              na.action = "na.fail")

summary(m3) # Data: filter(mtcars, cyl == 6)

dred_dat <- dredge(m3,
                   rank = "AICc",
                   evaluate = T)
# Error in dredge(m3, rank = "AICc", evaluate = T) : 'global.model' uses "data" that is a function value: use a variable instead

Edit:

Failed attempt to add parallel processing to Ben's solution:

clust <- try(makeCluster(detectCores()-1))
invisible(clusterCall(clust, "library", "glmmTMB",
                      character.only = TRUE))

dfun <- function(x) {
  g1 <- glmmTMB(mpg ~ disp + drat + wt, 
                data = x,
                family = gaussian(),
                na.action = "na.fail")
  clusterExport(clust, "data")
  dredge(g1, cluster = clust, rank = "AICc",  evaluate = TRUE)
}

dat <- mtcars %>% 
  group_by(cyl) %>%
  nest() %>% 
  mutate(dredged = map(data, dfun))

Edit: Successful parallelization of dredge on nested data with Ben's solution using doParallel

library(tidyverse)
library(MuMIn)
library(doParallel)

dfun <- function(x) {
  g1 <- glmmTMB(mpg ~ disp + drat + wt, 
                data = x,
                family = gaussian(),
                na.action = "na.fail")
  dredge(g1, rank = "AICc",  evaluate = TRUE)
}

dat <- mtcars %>% 
  group_by(cyl) %>%
  nest()

ncores <- detectCores() - 1L
registerDoParallel(ncores)

dat_dredge <- foreach(i = 1:nrow(dat),
                      .packages = c("MuMIn", "glmmTMB"),
                      .final = function(x) setNames(x, dat$cyl)) %dopar% {
                        dfun(dat$data[[i]])
                      }

Upvotes: 1

Views: 196

Answers (1)

Ben Bolker
Ben Bolker

Reputation: 226027

Here's one possibility: put the model fitting and dredging within a single function, then nest/map.

library(tidyverse)
library(MuMIn)

dfun <- function(x) {
   g1 <- glmmTMB(mpg ~ disp + drat + wt, 
          data = x,
          family = gaussian(),
          na.action = "na.fail")
   dredge(g1, rank = "AICc",  evaluate = TRUE)
}
dat <- mtcars %>% 
  group_by(cyl) %>%
  nest() %>% 
  mutate(dredged = map(data, dfun))
 dat$dredged[[1]]
Global model call: glmmTMB(formula = mpg ~ disp + drat + wt, data = x, family = gaussian(), 
    na.action = "na.fail", ziformula = ~0, dispformula = ~1)
---
Model selection table 
  cnd((Int)) dsp((Int)) cnd(dsp) cnd(drt) cnd(wt) df  logLik AICc delta weight
1     19.740          +                            2 -12.011 31.0  0.00  0.750
5     28.410          +                    -2.780  3  -9.825 33.7  2.63  0.201
3     18.490          +            0.3503          3 -11.965 37.9  6.91  0.024
2     19.080          + 0.003605                   3 -11.974 37.9  6.93  0.024
6     28.190          + 0.019140           -3.835  4  -7.830 43.7 12.64  0.001
7     30.650          +           -0.4436  -2.990  4  -9.702 47.4 16.38  0.000
4      8.536          + 0.022440   1.9780          4 -11.481 51.0 19.94  0.000
8     16.050          + 0.042090   2.3460  -3.989  5  -4.630 79.3 48.24  0.000
Models ranked by AICc(x) 

Upvotes: 3

Related Questions