hnguyen
hnguyen

Reputation: 814

`emmeans` for a gls model doesn't run inside `map`

This question is inspired by can't use emmeans inside map, and related to Map `joint_tests` to a list after fitting a `gls` model and `group_by` and keep grouping levels as nested data frame's name

I want to wrap several tests into one workflow.

This works, for a glm model.

library(dplyr)
library(purrr)
library(emmeans)
library(nlme)

diamond_result <- diamonds %>%
  group_by(cut) %>%
  nest() %>% 
  ungroup %>%
  dplyr::mutate(models=map(data,~glm(price ~ x + y + z + clarity + color,data=.x)),
         jt = map(models, ~emmeans::joint_tests(.x, data = .x$data)),
         means=map(models,~emmeans::emmeans(.x,"color",data=.x$data)),
         p_cont = map(means, ~emmeans::contrast(.x, "pairwise",infer = c(T,T))),
         across(models:p_cont, stats::setNames,  .$cut)) 
> diamond_result$jt
$Ideal
 model term df1 df2 F.ratio p.value
 x            1 Inf 611.626 <.0001 
 y            1 Inf   2.914 0.0878 
 z            1 Inf 100.457 <.0001 
 clarity      7 Inf 800.852 <.0001 
 color        6 Inf 256.796 <.0001 


$Premium
 model term df1 df2  F.ratio p.value
 x            1 Inf 2074.371 <.0001 

But the same syntax doesn't work for a gls model so I stopped at emmeans() step. Eventually, I want joint_tests, emmeans, and contrast in the mutate step.

diamonds_emm2 <-  diamonds %>%
  group_by(cut) %>% 
  nest() %>%
  ungroup() %>%
  dplyr::mutate(models=map(data,~gls(price ~ x + y + z + clarity,  
                              weights = varIdent(form = ~ 1|color),
                              data =.x)),
                means=map(models,~emmeans::emmeans(.x,"clarity",data=.x$data)),
                across(models:p_cont, setNames,  .$cut))
    Error: Problem with `mutate()` input `means`.
    x undefined columns selected
    ℹ Input `means` is `map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))`.
    Run `rlang::last_error()` to see where the error occurred.
<error/dplyr:::mutate_error>
Problem with `mutate()` input `means`.
x undefined columns selected
ℹ Input `means` is `map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))`.
Backtrace:
  1. `%>%`(...)
 18. base::.handleSimpleError(...)
 19. dplyr:::h(simpleError(msg, call))

<error/dplyr:::mutate_error>
Problem with `mutate()` input `means`.
x undefined columns selected
ℹ Input `means` is `map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))`.
Backtrace:
     █
  1. ├─`%>%`(...)
  2. ├─dplyr::mutate(...)
  3. ├─dplyr:::mutate.data.frame(...)
  4. │ └─dplyr:::mutate_cols(.data, ...)
  5. │   ├─base::withCallingHandlers(...)
  6. │   └─mask$eval_all_mutate(dots[[i]])
  7. ├─purrr::map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))
  8. │ └─.f(.x[[i]], ...)
  9. │   └─emmeans::emmeans(.x, "clarity", data = .x$data)
 10. │     ├─base::do.call(ref_grid, args)
 11. │     └─(function (object, at, cov.reduce = mean, cov.keep = get_emm_option("cov.keep"), ...
 12. │       ├─emmeans::recover_data(object, data = as.data.frame(data), ...)
 13. │       └─emmeans:::recover_data.gls(...)
 14. │         └─emmeans:::recover_data.call(...)
 15. │           ├─tbl[, vars, drop = FALSE]
 16. │           └─base::`[.data.frame`(tbl, , vars, drop = FALSE)
 17. │             └─base::stop("undefined columns selected")
 18. └─base::.handleSimpleError(...)
 19.   └─dplyr:::h(simpleError(msg, call))

The code runs OK at this step.

diamonds_emm <-  diamonds %>%
  group_by(cut) %>% nest() %>%
  mutate(models=map(data,~gls(price ~ x + y + z + clarity,  
                              weights = varIdent(form = ~ 1|color),
                              data =.x))) 

How do I work around this problem? Thank you.

Update: the map2 function from Ronak's answer solved the problem at the means steps but it won't do pairwise contrasts. What did I miss?

diamonds %>%
  group_by(cut) %>% 
  nest() %>%
  mutate(models=map(data,~gls(price ~ x + y + z + clarity,  
                              weights = varIdent(form = ~ 1|color),
                              data =.x)), 
         means = map2(data, models,~emmeans::emmeans(.y,"clarity",data=.x)),
         p_cont = map2(means, ~emmeans::contrast(.y, "pairwise",infer = c(T,T)))) %>%
  ungroup %>%
  mutate(across(models:p_cont, setNames,  .$cut)) -> result
Error: Problem with `mutate()` input `p_cont`.
x object '.z' not found
ℹ Input `p_cont` is `map(means, ~emmeans::contrast(.y, "pairwise", infer = c(T, T)))`.
ℹ The error occurred in group 1: cut = "Fair".

Giving a new name to the input at the p_cont step, such as ~emmeans::contrast(.z, "pairwise", infer = c(T, T))) didn't solve the problem.

Upvotes: 2

Views: 735

Answers (2)

TimTeaFan
TimTeaFan

Reputation: 18581

If you do not insist on using the purrr::map family, I would suggest to use the new (dplyr 1.0.0) rowwise style. It is less confusing, since you can just use the variable/column names as is and there is no need to choose the correct map function and figure out the lambda notation. You just need to wrap the function call in list(...). Only the last call to across needs to be called on the ungrouped data.frame.

library(tidyverse)
library(emmeans)
library(nlme)

diamonds_emm_row <- diamonds %>%
  nest_by(cut) %>% 
  dplyr::mutate(models = list(gls(price ~ x + y + z + clarity,  
                                  weights = varIdent(form = ~ 1|color),
                                  data = data)),
                jt = list(joint_tests(models, data = data)),
                means = list(emmeans::emmeans(models, "clarity", data = data)),
                p_cont = list(emmeans::contrast(means, "pairwise", infer = c(T,T)))) %>% 
  ungroup %>% 
  mutate(across(models:p_cont, setNames, .$cut))

diamonds_emm_row
#> # A tibble: 5 x 6
#>   cut                   data models      jt                 means     p_cont    
#>   <ord>    <list<tbl_df[,9]> <named lis> <named list>       <named l> <named li>
#> 1 Fair           [1,610 × 9] <gls>       <summary_emm[,5] … <emmGrid> <emmGrid> 
#> 2 Good           [4,906 × 9] <gls>       <summary_emm[,5] … <emmGrid> <emmGrid> 
#> 3 Very Go…      [12,082 × 9] <gls>       <summary_emm[,5] … <emmGrid> <emmGrid> 
#> 4 Premium       [13,791 × 9] <gls>       <summary_emm[,5] … <emmGrid> <emmGrid> 
#> 5 Ideal         [21,551 × 9] <gls>       <summary_emm[,5] … <emmGrid> <emmGrid>

Created on 2021-01-01 by the reprex package (v0.3.0)

This yields more or less the same result as using purrr::map. "More or less", because identical() won't show it, since the function call is saved in the attributes (and other places), and differs depending on whether you use {dplyr}'s rowwise style or the map lambda notation.

diamonds_emm_map <- diamonds %>%
  nest_by(cut) %>% 
  ungroup() %>%
  dplyr::mutate(models=map(data,~gls(price ~ x + y + z + clarity,  
                                     weights = varIdent(form = ~ 1|color),
                                     data =.x)),
                jt = map2(models, data, ~ joint_tests(.x, data = .y)),
                means = map2(data, models, ~ emmeans::emmeans(.y, "clarity", data = .x)),
                p_cont = map(means, ~emmeans::contrast(.x, "pairwise", infer = c(T,T))),
                across(models:p_cont, setNames, .$cut))

map2(diamonds_emm_row, diamonds_emm_map, all.equal, check.attributes = FALSE)
#> $cut
#> [1] TRUE
#> 
#> $data
#> [1] TRUE
#> 
#> $models
#> [1] "Component \"Fair\": Component \"call\": target, current do not match when deparsed"     
#> [2] "Component \"Good\": Component \"call\": target, current do not match when deparsed"     
#> [3] "Component \"Very Good\": Component \"call\": target, current do not match when deparsed"
#> [4] "Component \"Premium\": Component \"call\": target, current do not match when deparsed"  
#> [5] "Component \"Ideal\": Component \"call\": target, current do not match when deparsed"    
#> 
#> $jt
#> [1] TRUE
#> 
#> $means
#> [1] TRUE
#> 
#> $p_cont
#> [1] TRUE

Created on 2021-01-01 by the reprex package (v0.3.0)

Upvotes: 2

Ronak Shah
Ronak Shah

Reputation: 389255

Pass data as well as model in the emmeans step using map2. For contrasts and joint_tests you can use map.

library(tidyverse)
library(emmeans)
library(nlme)

diamonds %>%
  group_by(cut) %>% 
  nest() %>%
  mutate(models=map(data,~gls(price ~ x + y + z + clarity,  
                              weights = varIdent(form = ~ 1|color),
                              data =.x))) %>%
  ungroup %>%
  mutate(means = map2(data, models,~emmeans(.y,"clarity",data=.x)),
         p_cont = map(means, contrast, "pairwise"), 
         joint_tests = map(means, joint_tests),
         across(models:joint_tests, setNames,  .$cut)) -> result

result

#   cut       data                models      means       p_cont     joint_tests           
#  <ord>     <list>              <named lis> <named lis> <named li> <named list>          
#1 Ideal     <tibble [21,551 × … <gls>       <emmGrid>   <emmGrid>  <summary_emm[,5] [1 ×…
#2 Premium   <tibble [13,791 × … <gls>       <emmGrid>   <emmGrid>  <summary_emm[,5] [1 ×…
#3 Good      <tibble [4,906 × 9… <gls>       <emmGrid>   <emmGrid>  <summary_emm[,5] [1 ×…
#4 Very Good <tibble [12,082 × … <gls>       <emmGrid>   <emmGrid>  <summary_emm[,5] [1 ×…
#5 Fair      <tibble [1,610 × 9… <gls>       <emmGrid>   <emmGrid>  <summary_emm[,5] [1 ×…

Upvotes: 4

Related Questions