Reputation: 680
I estimated a glmnet logistic regression using tidymodels. But I couldn't figure out 2 things, which are closely related, in tidymodels:
Below are the codes for a pseudo model. I tried tidy()
, coef()
, and predict()
but they all failed. Any help will be much appreciated. Thanks.
library(tidymodels)
#> -- Attaching packages --------------------------------------------------------------------------------------------------------------------------- tidymodels 0.1.0 --
#> v broom 0.7.0 v recipes 0.1.13
#> v dials 0.0.8 v rsample 0.0.7
#> v dplyr 1.0.0 v tibble 3.0.3
#> v ggplot2 3.3.2 v tune 0.1.1
#> v infer 0.5.2 v workflows 0.1.2
#> v parsnip 0.1.2 v yardstick 0.0.7
#> v purrr 0.3.4
#> -- Conflicts ------------------------------------------------------------------------------------------------------------------------------ tidymodels_conflicts() --
#> x purrr::discard() masks scales::discard()
#> x dplyr::filter() masks stats::filter()
#> x dplyr::lag() masks stats::lag()
#> x recipes::step() masks stats::step()
set.seed(1234)
train <- tibble(y = factor(sample(c(0,1), 1000, replace = TRUE)),
x1 = rnorm(1000),
x2 = rnorm(1000)
)
test <- tibble(y = factor(sample(c(0,1), 300, replace = TRUE)),
x1 = rnorm(300),
x2 = rnorm(300)
)
lr_cv <- vfold_cv(train)
lr_mod <- logistic_reg(penalty = tune(), mixture = 1) %>%
set_mode("classification") %>%
set_engine("glmnet")
lr_wf <- workflow() %>%
add_model(lr_mod) %>%
add_formula(y ~ .)
lr_grid <- tibble(penalty = 10^seq(-4,1, length.out = 5))
lr_tune <- tune_grid(lr_wf, resamples = lr_cv, grid = lr_grid, metrics = metric_set(roc_auc))
lr_best <- select_best(lr_tune)
lr_best
#> # A tibble: 1 x 2
#> penalty .config
#> <dbl> <chr>
#> 1 0.0001 Model1
lr_wf_final <- lr_wf %>% finalize_workflow(lr_best)
final_mod <- lr_wf_final %>% fit(train)
# the followings were tried but didn't work
lambda <- lr_best[1]
lambda
#> # A tibble: 1 x 1
#> penalty
#> <dbl>
#> 1 0.0001
predict(final.mod , s = lambda, type ="coefficients")[1:3 ,]
#> Error in predict(final.mod, s = lambda, type = "coefficients"): object 'final.mod' not found
tidy(final_mod)
#> # A tibble: 35 x 5
#> term step estimate lambda dev.ratio
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 (Intercept) 1 -0.0560 0.0183 1.71e-14
#> 2 (Intercept) 2 -0.0561 0.0167 1.65e- 4
#> 3 (Intercept) 3 -0.0562 0.0152 3.02e- 4
#> 4 (Intercept) 4 -0.0562 0.0139 4.16e- 4
#> 5 (Intercept) 5 -0.0563 0.0126 5.10e- 4
#> 6 (Intercept) 6 -0.0564 0.0115 5.89e- 4
#> 7 (Intercept) 7 -0.0564 0.0105 6.54e- 4
#> 8 (Intercept) 8 -0.0565 0.00956 7.08e- 4
#> 9 (Intercept) 9 -0.0565 0.00871 7.53e- 4
#> 10 (Intercept) 10 -0.0566 0.00794 7.90e- 4
#> # ... with 25 more rows
coef(final_mod)
#> NULL
Created on 2020-07-25 by the reprex package (v0.3.0)
Upvotes: 5
Views: 3851
Reputation: 11663
We recently added some updated support for getting out the estimated coefficients.
library(tidymodels)
#> ── Attaching packages ─────────────────────────────────────────────── tidymodels 0.1.1 ──
#> ✓ broom 0.7.0 ✓ recipes 0.1.13
#> ✓ dials 0.0.8 ✓ rsample 0.0.7
#> ✓ dplyr 1.0.0 ✓ tibble 3.0.3
#> ✓ ggplot2 3.3.2 ✓ tidyr 1.1.0
#> ✓ infer 0.5.3 ✓ tune 0.1.1.9000
#> ✓ modeldata 0.0.2 ✓ workflows 0.1.2
#> ✓ parsnip 0.1.2.9000 ✓ yardstick 0.0.7
#> ✓ purrr 0.3.4
#> ── Conflicts ────────────────────────────────────────────────── tidymodels_conflicts() ──
#> x purrr::discard() masks scales::discard()
#> x dplyr::filter() masks stats::filter()
#> x dplyr::lag() masks stats::lag()
#> x recipes::step() masks stats::step()
set.seed(1234)
train <- tibble(y = factor(sample(c(0,1), 1000, replace = TRUE)),
x1 = as.numeric(y) + rnorm(1000),
x2 = rnorm(1000),
x3 = rnorm(1000)
)
lr_cv <- vfold_cv(train)
lr_grid <- tibble(penalty = 10^seq(-4,1, length.out = 5))
lr_mod <- logistic_reg(penalty = tune(), mixture = 1) %>%
set_mode("classification") %>%
set_engine("glmnet")
lr_wf <- workflow() %>%
add_model(lr_mod) %>%
add_formula(y ~ .)
lr_best <- lr_wf %>%
tune_grid(
resamples = lr_cv,
grid = lr_grid
) %>%
select_best("roc_auc")
lr_wf %>%
finalize_workflow(lr_best) %>%
fit(train) %>%
pull_workflow_fit() %>%
tidy()
#> Loading required package: Matrix
#>
#> Attaching package: 'Matrix'
#> The following objects are masked from 'package:tidyr':
#>
#> expand, pack, unpack
#> Loaded glmnet 4.0-2
#> # A tibble: 4 x 3
#> term estimate penalty
#> <chr> <dbl> <dbl>
#> 1 (Intercept) -1.30 0.0316
#> 2 x1 0.834 0.0316
#> 3 x2 0 0.0316
#> 4 x3 0 0.0316
Created on 2020-07-30 by the reprex package (v0.3.0.9001)
The steps to extract the coefficients are to pull out the fitted object and then to tidy it.
If you want to save this object for future use, you can either just save these coefficients (it is a linear model, after all), or you can save the finalized and fitted workflow as a binary object, which can be predicted from.
Upvotes: 7