Reputation: 350
Is there a way to specify the reference level when creating dummy variables with step_dummy()
? I can do so by setting one_hot = TRUE
then removing the reference column, but wondering if it's possible to specify within step_dummy()
itself
library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#> method from
#> required_pkgs.model_spec parsnip
data(okc)
# level "anything" is the reference level
recipe(Class ~ ., data = okc) %>%
step_dummy(diet) %>%
prep() %>%
bake(new_data = NULL) %>%
select(starts_with("diet")) %>%
names()
#> Warning: There are new levels in a factor: NA
#> [1] "diet_halal" "diet_kosher"
#> [3] "diet_mostly.anything" "diet_mostly.halal"
#> [5] "diet_mostly.kosher" "diet_mostly.other"
#> [7] "diet_mostly.vegan" "diet_mostly.vegetarian"
#> [9] "diet_other" "diet_strictly.anything"
#> [11] "diet_strictly.halal" "diet_strictly.kosher"
#> [13] "diet_strictly.other" "diet_strictly.vegan"
#> [15] "diet_strictly.vegetarian" "diet_vegan"
#> [17] "diet_vegetarian"
# all 18 diet levels included
recipe(Class ~ ., data = okc) %>%
step_dummy(diet, one_hot = TRUE) %>%
prep() %>%
bake(new_data = NULL) %>%
select(starts_with("diet")) %>%
names()
#> Warning: There are new levels in a factor: NA
#> [1] "diet_anything" "diet_halal"
#> [3] "diet_kosher" "diet_mostly.anything"
#> [5] "diet_mostly.halal" "diet_mostly.kosher"
#> [7] "diet_mostly.other" "diet_mostly.vegan"
#> [9] "diet_mostly.vegetarian" "diet_other"
#> [11] "diet_strictly.anything" "diet_strictly.halal"
#> [13] "diet_strictly.kosher" "diet_strictly.other"
#> [15] "diet_strictly.vegan" "diet_strictly.vegetarian"
#> [17] "diet_vegan" "diet_vegetarian"
# force diet_vegan to be reference level
recipe(Class ~ ., data = okc) %>%
step_dummy(diet, one_hot = TRUE) %>%
step_select(-diet_vegan) %>%
prep() %>%
bake(new_data = NULL) %>%
select(starts_with("diet")) %>%
names()
#> Warning: There are new levels in a factor: NA
#> [1] "diet_anything" "diet_halal"
#> [3] "diet_kosher" "diet_mostly.anything"
#> [5] "diet_mostly.halal" "diet_mostly.kosher"
#> [7] "diet_mostly.other" "diet_mostly.vegan"
#> [9] "diet_mostly.vegetarian" "diet_other"
#> [11] "diet_strictly.anything" "diet_strictly.halal"
#> [13] "diet_strictly.kosher" "diet_strictly.other"
#> [15] "diet_strictly.vegan" "diet_strictly.vegetarian"
#> [17] "diet_vegetarian"
Created on 2021-11-19 by the reprex package (v2.0.1)
Upvotes: 1
Views: 326
Reputation: 3185
From the step_dummy() documentation
By default, the excluded dummy variable (i.e. the reference cell) will correspond to the first level of the unordered factor being converted.
We can use the step_relevel() to create a new reference level by setting the ref_level
argument.
library(tidymodels)
data(okc)
recipe(Class ~ ., data = okc) %>%
step_relevel(diet, ref_level = "vegan") %>%
step_dummy(diet) %>%
prep() %>%
bake(new_data = NULL) %>%
select(starts_with("diet")) %>%
names()
#> Warning: There are new levels in a factor: NA
#> [1] "diet_anything" "diet_halal"
#> [3] "diet_kosher" "diet_mostly.anything"
#> [5] "diet_mostly.halal" "diet_mostly.kosher"
#> [7] "diet_mostly.other" "diet_mostly.vegan"
#> [9] "diet_mostly.vegetarian" "diet_other"
#> [11] "diet_strictly.anything" "diet_strictly.halal"
#> [13] "diet_strictly.kosher" "diet_strictly.other"
#> [15] "diet_strictly.vegan" "diet_strictly.vegetarian"
#> [17] "diet_vegetarian"
Upvotes: 1