Giulio Mario Martena
Giulio Mario Martena

Reputation: 39

workflow_sets is generating columns with different lengths

I am trying to define a workflow_set to try some models on a machine learning task.

base_recipe = recipe(high_traffic ~ ., data = recipes) %>%
# remove recipe from list of predictors
update_role(recipe, new_role = "id") %>%
# create dummy variables for factor cols
step_dummy(all_nominal_predictors()) %>%
# apply Yeo-Johnson transformation to numeric variables
step_YeoJohnson(all_numeric_predictors())

logreg_spec = logistic_reg() %>%
  set_engine("glm") %>%
  set_mode("classification")

regularized_spec = logistic_reg(penalty = tune(), mixture = tune()) %>%
  set_engine("glm") %>%
  set_mode("classification")

knn_spec = nearest_neighbor(neighbors = tune(), weight_func = tune()) %>%
  set_engine("kknn") %>%
  set_mode("classification")

xgb_spec = boost_tree(learn_rate = tune(), trees = tune()) %>%
  set_engine("xgboost") %>%
  set_mode("classification")

hidden_units <- floor(0.67 * (ncol(recipes_train) - 1)) + 1
neural_network = mlp(epochs = 1000, hidden_units = hidden_units, dropout = tune(), learn_rate = 0.001) %>%
  set_engine("keras") %>%
  set_mode("classification")

Then I do

recipe_models = workflow_set(
preproc = list(simple = base_recipe),
models = list(
    logreg = logreg_spec,
    regularized_logreg = regularized_spec,
    knn = knn_spec,
    xgb = xgb_spec,
    nn = neural_network
),
cross = TRUE
)

However, when I investigate the recipe_models object I get the following error:

ERROR while rich displaying an object: Error in names(res) <- prefix: 'names' attribute [1] must be the same length as the vector [0]

Does anybody know what may be the issue? I already searched for details on the tidymodels's Github, but nobody opened a similar issue.

I give you here the first 50 rows of the dataset:

recipe,calories,carbohydrate,sugar,protein,category,servings,high_traffic
001,NA,NA,NA,NA,Pork,6,High
002,35.48,38.56,0.66,0.92,Potato,4,High
003,914.28,42.68,3.09,2.88,Breakfast,1,NA
004,97.03,30.56,38.63,0.02,Beverages,4,High
005,27.05,1.85,0.8,0.53,Beverages,4,NA
006,691.15,3.46,1.65,53.93,One Dish Meal,2,High
007,183.94,47.95,9.75,46.71,Chicken Breast,4,NA
008,299.14,3.17,0.4,32.4,Lunch/Snacks,4,NA
009,538.52,3.78,3.37,3.79,Pork,6,High
010,248.28,48.54,3.99,113.85,Chicken,2,NA
011,170.12,17.63,4.1,0.91,Beverages,1,NA
012,155.8,8.27,9.78,11.55,Breakfast,6,NA
013,274.63,23.49,1.56,2.57,Potato,4,High
014,25.23,11.51,10.32,9.57,Vegetable,4,High
015,217.14,6.69,10,15.17,Meat,4,High
016,316.45,2.65,4.68,79.71,Meat,6,High
017,454.27,1.87,2.95,61.07,Meat,2,High
018,1695.82,0.1,0.39,33.17,Meat,1,High
019,1090.75,4.65,0.69,3.49,Meat,6,High
020,127.55,27.55,1.51,8.91,Chicken,2,NA
021,9.26,17.44,8.16,10.81,Potato,6,High
022,40.53,87.91,104.91,11.93,Dessert,4,NA
023,82.73,3.17,7.95,26.04,Breakfast,4,NA
024,NA,NA,NA,NA,Meat,2,NA
025,1161.49,1.53,8.88,12.57,Breakfast,1,High
026,56.29,22.35,11.38,34.79,One Dish Meal,4,High
027,411.16,51.7,27.78,70.3,Pork,2,High
028,574.75,13.12,1.84,13.85,Potato,4,High
029,595.39,62.67,2.64,4.96,Potato,2,High
030,164.76,33.58,17.87,220.14,One Dish Meal,2,High
031,215.98,52.66,6.25,32.32,Pork,2,High
032,617.11,23.1,32.83,45.89,Breakfast,6,NA
033,347.06,9.5,5.92,82.58,Chicken Breast,4,NA
034,497.17,1.47,1.51,2.97,Lunch/Snacks,6,High
035,575.63,20.71,0.2,6.24,Breakfast,6,High
036,796.89,29.1,9.63,2.28,Lunch/Snacks,2,NA
037,1321.78,70.07,7.75,19.51,Breakfast,1,NA
038,44.55,99.82,2.62,15.57,Breakfast,4,NA
039,264.62,1.5,18.44,32.62,Chicken Breast,4,NA
040,44.81,4.62,0.4,5.9,Vegetable,4,High
041,621.54,14.16,10.7,39.69,Chicken Breast,6,High
042,290.1,4.43,1.05,40.64,Breakfast,4,NA
043,576.89,4.79,20.92,4.29,One Dish Meal,2,NA
044,262.12,17.46,0.33,87.05,Chicken Breast,4,NA
045,64.29,16.95,0.77,11.2,Breakfast,1,NA
046,83.39,13.06,1.62,3.44,Vegetable,6,High
047,69.01,39.17,39.54,0.17,Beverages,4,NA
048,43.91,48.16,4.58,7.92,Breakfast,4,NA
049,NA,NA,NA,NA,Chicken Breast,4,NA
050,1724.25,45.52,0.07,49.37,Breakfast,1,High

To clean it with:

recipes = recipes %>%
    # Clean servings
    mutate(servings = ifelse(servings == "4 as a snack", "4", servings)) %>%
    mutate(servings = ifelse(servings == "6 as a snack", "6", servings)) %>%
    mutate(servings = factor(servings)) %>%
    # Clean high_traffic, using null to detect 0's
    mutate(high_traffic = ifelse(is.na(high_traffic), 0, 1)) %>%
    mutate(high_traffic = factor(high_traffic)) %>%
    # Factor category variable
    mutate(category = factor(category)) %>%
    # Clean missing values
    group_by(category) %>%
    mutate(across(
        .cols = where(is.numeric),
        .fns = ~ifelse(is.na(.x), median(.x, na.rm = TRUE), .x),
        .names = "{col}"
    )) %>%
    ungroup()

Upvotes: 0

Views: 30

Answers (0)

Related Questions