GPBoost and Bayesian optimization

Question

I am trying to create a parameter grid for a gpboosted model. Currently, I am using the grid tune search method, but I have been reading on Bayesian Optimization in the package rBayesianOptimization. However, it seems that there is no simple way to connect a gpboost model with the bayesian tuning. I am able to get the parameter search to start, however, the value is returned as zero. I figured I would ask here if anyone knows how to better connect these processes.

Here is the code.

Index <- createDataPartition(y = mean_tl_scale$fb_mean_tl_mm, p = 0.75, list = FALSE)
LM_train_tl_mean <- mean_tl_scale[Index, ]
LM_test_tl_mean <- mean_tl_scale[-Index, ]

# Convert data frames to matrices
LM_matrix_train <- data.matrix(LM_train_tl_mean)
LM_matrix_test <- data.matrix(LM_test_tl_mean)

# Extract feature data
features_train <- LM_matrix_train[, !colnames(LM_matrix_train) %in% c("year")]
features_test <- LM_matrix_test[, !colnames(LM_matrix_test) %in% c("year")]
colnames <- colnames(features_test)[-3]

# Train the gpmodel
gp_model <- GPModel(likelihood = "gaussian", cov_function = "exponential", group_data = LM_matrix_train[, c("year")])
boost_data <- gpb.Dataset(data = features_train[, colnames], label = features_train[, "fb_mean_tl_mm"])
gpb_boost <- gpb.Dataset.construct(boost_data)

# Define parameter bounds for Bayesian optimization
bounds <- list(
  learning_rate = c(0.05, 0.15),
  max_depth = c(5L, 7L),
  min_child_weight = c(5L, 7L),
  subsample = c(0.3, 0.5),
  colsample_bytree = c(0.5, 0.9),
  num_iterations = c(800L, 1000L),
  lambda_l2 = c(0, 5)
)

# Define the optimization function
opt_func <- function(learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, num_iterations, lambda_l2) {
  params <- list(
    objective = "regression",
    learning_rate = learning_rate,
    max_depth = max_depth,
    min_child_weight = min_child_weight,
    subsample = subsample,
    colsample_bytree = colsample_bytree,
    num_iterations = num_iterations,
    lambda_l2 = lambda_l2
  )

  cv_result <- gpb.cv(
    params = params,
    data = gpb_boost,
    gp_model = gp_model,
    nrounds = 500,
    nfold = 5,
    verbose = 0,
    eval = "rmse"
  )
  
  return(list(Score = -min(cv_result$evaluation_log$test_rmse_mean), Pred = NULL))
}

# Run Bayesian optimization
set.seed(68)
opt_result <- BayesianOptimization(
  FUN = opt_func,
  bounds = bounds,
  init_points = 10,
  n_iter = 20,
  acq = "ei",
  kappa = 2.576,
  eps = 0.0,
  verbose = TRUE
)

# Print the optimum set of parameters
opt_result

# Test the model with the output from Bayesian optimization
best_params <- list(
  objective = "regression",
  learning_rate = opt_result$Best_Par["learning_rate"],
  max_depth = opt_result$Best_Par["max_depth"],
  min_child_weight = opt_result$Best_Par["min_child_weight"],
  subsample = opt_result$Best_Par["subsample"],
  colsample_bytree = opt_result$Best_Par["colsample_bytree"],
  num_iterations = opt_result$Best_Par["num_iterations"],
  lambda_l2 = opt_result$Best_Par["lambda_l2"]
)

gp_model <- GPModel(group_data = LM_matrix_train[, c("year")], likelihood = "gaussian", cov_function = "exponential")
gpboost_model <- gpboost(
  data = boost_data,
  gp_model = gp_model,
  params = best_params,
  verbose = 1
)

GPBoost and Bayesian optimization

Answers (1)

Related Questions