Reputation: 814
This question is inspired by can't use emmeans inside map, and related to Map `joint_tests` to a list after fitting a `gls` model and `group_by` and keep grouping levels as nested data frame's name
I want to wrap several tests into one workflow.
This works, for a glm
model.
library(dplyr)
library(purrr)
library(emmeans)
library(nlme)
diamond_result <- diamonds %>%
group_by(cut) %>%
nest() %>%
ungroup %>%
dplyr::mutate(models=map(data,~glm(price ~ x + y + z + clarity + color,data=.x)),
jt = map(models, ~emmeans::joint_tests(.x, data = .x$data)),
means=map(models,~emmeans::emmeans(.x,"color",data=.x$data)),
p_cont = map(means, ~emmeans::contrast(.x, "pairwise",infer = c(T,T))),
across(models:p_cont, stats::setNames, .$cut))
> diamond_result$jt
$Ideal
model term df1 df2 F.ratio p.value
x 1 Inf 611.626 <.0001
y 1 Inf 2.914 0.0878
z 1 Inf 100.457 <.0001
clarity 7 Inf 800.852 <.0001
color 6 Inf 256.796 <.0001
$Premium
model term df1 df2 F.ratio p.value
x 1 Inf 2074.371 <.0001
But the same syntax doesn't work for a gls
model so I stopped at emmeans()
step. Eventually, I want joint_tests
, emmeans
, and contrast
in the mutate
step.
diamonds_emm2 <- diamonds %>%
group_by(cut) %>%
nest() %>%
ungroup() %>%
dplyr::mutate(models=map(data,~gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data =.x)),
means=map(models,~emmeans::emmeans(.x,"clarity",data=.x$data)),
across(models:p_cont, setNames, .$cut))
Error: Problem with `mutate()` input `means`.
x undefined columns selected
ℹ Input `means` is `map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))`.
Run `rlang::last_error()` to see where the error occurred.
<error/dplyr:::mutate_error>
Problem with `mutate()` input `means`.
x undefined columns selected
ℹ Input `means` is `map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))`.
Backtrace:
1. `%>%`(...)
18. base::.handleSimpleError(...)
19. dplyr:::h(simpleError(msg, call))
<error/dplyr:::mutate_error>
Problem with `mutate()` input `means`.
x undefined columns selected
ℹ Input `means` is `map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))`.
Backtrace:
█
1. ├─`%>%`(...)
2. ├─dplyr::mutate(...)
3. ├─dplyr:::mutate.data.frame(...)
4. │ └─dplyr:::mutate_cols(.data, ...)
5. │ ├─base::withCallingHandlers(...)
6. │ └─mask$eval_all_mutate(dots[[i]])
7. ├─purrr::map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))
8. │ └─.f(.x[[i]], ...)
9. │ └─emmeans::emmeans(.x, "clarity", data = .x$data)
10. │ ├─base::do.call(ref_grid, args)
11. │ └─(function (object, at, cov.reduce = mean, cov.keep = get_emm_option("cov.keep"), ...
12. │ ├─emmeans::recover_data(object, data = as.data.frame(data), ...)
13. │ └─emmeans:::recover_data.gls(...)
14. │ └─emmeans:::recover_data.call(...)
15. │ ├─tbl[, vars, drop = FALSE]
16. │ └─base::`[.data.frame`(tbl, , vars, drop = FALSE)
17. │ └─base::stop("undefined columns selected")
18. └─base::.handleSimpleError(...)
19. └─dplyr:::h(simpleError(msg, call))
The code runs OK at this step.
diamonds_emm <- diamonds %>%
group_by(cut) %>% nest() %>%
mutate(models=map(data,~gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data =.x)))
How do I work around this problem? Thank you.
Update: the map2
function from Ronak's answer solved the problem at the means
steps but it won't do pairwise contrasts. What did I miss?
diamonds %>%
group_by(cut) %>%
nest() %>%
mutate(models=map(data,~gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data =.x)),
means = map2(data, models,~emmeans::emmeans(.y,"clarity",data=.x)),
p_cont = map2(means, ~emmeans::contrast(.y, "pairwise",infer = c(T,T)))) %>%
ungroup %>%
mutate(across(models:p_cont, setNames, .$cut)) -> result
Error: Problem with `mutate()` input `p_cont`.
x object '.z' not found
ℹ Input `p_cont` is `map(means, ~emmeans::contrast(.y, "pairwise", infer = c(T, T)))`.
ℹ The error occurred in group 1: cut = "Fair".
Giving a new name to the input at the p_cont
step, such as ~emmeans::contrast(.z, "pairwise", infer = c(T, T)))
didn't solve the problem.
Upvotes: 2
Views: 735
Reputation: 18581
If you do not insist on using the purrr::map
family, I would suggest to use the new (dplyr 1.0.0) rowwise
style. It is less confusing, since you can just use the variable/column names as is and there is no need to choose the correct map
function and figure out the lambda notation. You just need to wrap the function call in list(...)
. Only the last call to across
needs to be called on the ungrouped data.frame
.
library(tidyverse)
library(emmeans)
library(nlme)
diamonds_emm_row <- diamonds %>%
nest_by(cut) %>%
dplyr::mutate(models = list(gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data = data)),
jt = list(joint_tests(models, data = data)),
means = list(emmeans::emmeans(models, "clarity", data = data)),
p_cont = list(emmeans::contrast(means, "pairwise", infer = c(T,T)))) %>%
ungroup %>%
mutate(across(models:p_cont, setNames, .$cut))
diamonds_emm_row
#> # A tibble: 5 x 6
#> cut data models jt means p_cont
#> <ord> <list<tbl_df[,9]> <named lis> <named list> <named l> <named li>
#> 1 Fair [1,610 × 9] <gls> <summary_emm[,5] … <emmGrid> <emmGrid>
#> 2 Good [4,906 × 9] <gls> <summary_emm[,5] … <emmGrid> <emmGrid>
#> 3 Very Go… [12,082 × 9] <gls> <summary_emm[,5] … <emmGrid> <emmGrid>
#> 4 Premium [13,791 × 9] <gls> <summary_emm[,5] … <emmGrid> <emmGrid>
#> 5 Ideal [21,551 × 9] <gls> <summary_emm[,5] … <emmGrid> <emmGrid>
Created on 2021-01-01 by the reprex package (v0.3.0)
This yields more or less the same result as using purrr::map
. "More or less", because identical()
won't show it, since the function call is saved in the attributes (and other places), and differs depending on whether you use {dplyr}'s rowwise
style or the map
lambda notation.
diamonds_emm_map <- diamonds %>%
nest_by(cut) %>%
ungroup() %>%
dplyr::mutate(models=map(data,~gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data =.x)),
jt = map2(models, data, ~ joint_tests(.x, data = .y)),
means = map2(data, models, ~ emmeans::emmeans(.y, "clarity", data = .x)),
p_cont = map(means, ~emmeans::contrast(.x, "pairwise", infer = c(T,T))),
across(models:p_cont, setNames, .$cut))
map2(diamonds_emm_row, diamonds_emm_map, all.equal, check.attributes = FALSE)
#> $cut
#> [1] TRUE
#>
#> $data
#> [1] TRUE
#>
#> $models
#> [1] "Component \"Fair\": Component \"call\": target, current do not match when deparsed"
#> [2] "Component \"Good\": Component \"call\": target, current do not match when deparsed"
#> [3] "Component \"Very Good\": Component \"call\": target, current do not match when deparsed"
#> [4] "Component \"Premium\": Component \"call\": target, current do not match when deparsed"
#> [5] "Component \"Ideal\": Component \"call\": target, current do not match when deparsed"
#>
#> $jt
#> [1] TRUE
#>
#> $means
#> [1] TRUE
#>
#> $p_cont
#> [1] TRUE
Created on 2021-01-01 by the reprex package (v0.3.0)
Upvotes: 2
Reputation: 389255
Pass data as well as model in the emmeans
step using map2
. For contrasts
and joint_tests
you can use map
.
library(tidyverse)
library(emmeans)
library(nlme)
diamonds %>%
group_by(cut) %>%
nest() %>%
mutate(models=map(data,~gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data =.x))) %>%
ungroup %>%
mutate(means = map2(data, models,~emmeans(.y,"clarity",data=.x)),
p_cont = map(means, contrast, "pairwise"),
joint_tests = map(means, joint_tests),
across(models:joint_tests, setNames, .$cut)) -> result
result
# cut data models means p_cont joint_tests
# <ord> <list> <named lis> <named lis> <named li> <named list>
#1 Ideal <tibble [21,551 × … <gls> <emmGrid> <emmGrid> <summary_emm[,5] [1 ×…
#2 Premium <tibble [13,791 × … <gls> <emmGrid> <emmGrid> <summary_emm[,5] [1 ×…
#3 Good <tibble [4,906 × 9… <gls> <emmGrid> <emmGrid> <summary_emm[,5] [1 ×…
#4 Very Good <tibble [12,082 × … <gls> <emmGrid> <emmGrid> <summary_emm[,5] [1 ×…
#5 Fair <tibble [1,610 × 9… <gls> <emmGrid> <emmGrid> <summary_emm[,5] [1 ×…
Upvotes: 4