Reputation: 33
I am new to gradient boosting (XGBoost). I tried using mlr3 to set hyperparameters for xgboost. I want to perform "nested cross-validation" with 10 inner folds and 3 outer folds to evaluate the improved accuracy of the XGboost model by each of the hyperparameters and finally put the final model on the full data set. But when I try to auto-tune I get an error and I know my codes are incorrect.
Would you please help me?
library(mlr3verse)
#> Loading required package: mlr3
library(mlr3proba)
library(xgboost)
library(tidyverse)
library(survival)
set.seed(42)
train_indxs = sample(seq_len(nrow(veteran)), 100)
task = as_task_surv(x = veteran, time = 'time', event = 'status')
poe = po('encode')
task = poe$train(list(task))[[1]]
task
#> <TaskSurv:veteran> (137 x 11)
#> * Target: time, status
#> * Properties: -
#> * Features (9):
#> - dbl (9): age, celltype.adeno, celltype.large, celltype.smallcell,
#> celltype.squamous, diagtime, karno, prior, trt
learner_xgboost = lrn("surv.xgboost", eta = to_tune(1e-4, 1),gamma = to_tune(1e-4, 1),
max_depth = to_tune(1,35), min_child_weight = to_tune(0, 10))
learner_xgboost$param_set$search_space()
#> <ParamSet>
#> id class lower upper nlevels default value
#> 1: eta ParamDbl 1e-04 1 Inf <NoDefault[3]>
#> 2: gamma ParamDbl 1e-04 1 Inf <NoDefault[3]>
#> 3: max_depth ParamInt 1e+00 35 35 <NoDefault[3]>
#> 4: min_child_weight ParamDbl 0e+00 10 Inf <NoDefault[3]>
at_xgboost = auto_tuner(
tuner = tnr("random_search", batch_size = 50),
learner = learner_xgboost,
task = task,
resampling = rsmp ("holdout"),
measure = msr("surv.cindex"),
terminator = trm("evals", n_evals = 50),
)
#> Error in auto_tuner(tuner = tnr("random_search", batch_size = 50), learner = learner_xgboost, : unused argument (task = task)
resampling_outer = rsmp("cv", folds = 3)
rr = resample(task, at, resampling_outer, store_models = TRUE)
#> Error in eval(expr, envir, enclos): object 'at' not found
as.data.table(instance$archive)
#> Error in eval(expr, envir, enclos): object 'instance' not found
at_xgboost$train(task, row_ids = train_indxs)
#> Error in eval(expr, envir, enclos): object 'at_xgboost' not found
at_xgboost$tuning_result
#> Error in eval(expr, envir, enclos): object 'at_xgboost' not found
at_xgboost$archive$data %>%
as_tibble() %>%
arrange(desc(surv.cindex)) %>% print(n = 5)
#> Error in eval(expr, envir, enclos): object 'at_xgboost' not found
#best hyperparameter configuration
instance$result_learner_param_vals
#> Error in eval(expr, envir, enclos): object 'instance' not found
#fit the final model on the complete data set
at_xgboost$param_set$values = instance$result_learner_param_vals
#> Error in eval(expr, envir, enclos): object 'instance' not found
at_xgboost$train(task)
#> Error in eval(expr, envir, enclos): object 'at_xgboost' not found
<sup>Created on 2024-02-15 with reprex v2.1.0</sup>
Upvotes: 1
Views: 354
Reputation: 109262
As the error message tells you, the auto tuner doesn't take a task. See the mlr3 book. So remove that argument:
at_xgboost = auto_tuner(
tuner = tnr("random_search", batch_size = 50),
learner = learner_xgboost,
resampling = rsmp ("holdout"),
measure = msr("surv.cindex"),
terminator = trm("evals", n_evals = 50),
)
Upvotes: 1