Reputation: 415
I want to optimize my HPO of my lightgbm model. I used a Bayesian Optimization process to do so. Sadly my algorithm fails to converge.
import warnings
import pandas as pd
import time
import numpy as np
import lightgbm as lgb
from bayes_opt import BayesianOptimization
import sklearn as sklearn
import pyprojroot
from sklearn.metrics import roc_auc_score, mean_squared_error
from sklearn.model_selection import KFold, cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
train = pd.DataFrame(housing['data'], columns=housing['feature_names'])
train_y = train.pop('MedInc')
params = {
"objective" : "regression", "bagging_fraction" : 0.8, "bagging_freq": 1,
"min_child_samples": 20, "reg_alpha": 1, "reg_lambda": 1,"boosting": "gbdt",
"learning_rate" : 0.01, "subsample" : 0.8, "colsample_bytree" : 0.8, "verbosity": -1, "metric" : 'rmse'
train_data = lgb.Dataset(train, train_y,free_raw_data=False)
def lgb_eval(num_leaves, feature_fraction, max_depth , min_gain_to_split, min_data_in_leaf):
params = {
"objective" : "regression", "bagging_fraction" : 0.8, "bagging_freq": 1,
"min_child_samples": 20, "reg_alpha": 1, "reg_lambda": 1,"boosting": "gbdt",
"learning_rate" : 0.01, "subsample" : 0.8, "colsample_bytree" : 0.8, "verbosity": -1, "metric" : 'rmse'
params['feature_fraction'] = max(min(feature_fraction, 1), 0)
params['max_depth'] = int(round(max_depth))
params['num_leaves'] = int(round(num_leaves))
params['min_gain_to_split'] = float(min_gain_to_split)
params['min_data_in_leaf'] = int(np.round(min_data_in_leaf))
cv_result =, train_data, nfold=5, seed=0, verbose_eval =200,stratified=False)
return ( np.array(cv_result['rmse-mean'])).max()
gbBO = BayesianOptimization(lgb_eval, {'feature_fraction': (0.1, 0.9),
'max_depth': (5, 9),
'num_leaves' : (1,300),
'min_gain_to_split': (0.001, 0.1),
'min_data_in_leaf': (5, 50)}, random_state=0)
lgbBO.maximize(init_points=5, n_iter=5,acq='ei')
def bayes_parameter_opt_lgb(train, train_y, init_round=15, opt_round=25, n_folds=5, random_seed=0, n_estimators=10000, learning_rate=0.05, output_process=False):
# prepare data
train_data = lgb.Dataset(train,train_y,free_raw_data=False)
# parameters
def lgb_eval(num_leaves, feature_fraction, max_depth , min_gain_to_split, min_data_in_leaf):
params = {
"objective" : "regression", "bagging_fraction" : 0.8, "bagging_freq": 1,
"min_child_samples": 20, "reg_alpha": 1, "reg_lambda": 1,"boosting": "gbdt",
"learning_rate" : 0.01, "subsample" : 0.8, "colsample_bytree" : 0.8, "verbosity": -1, "metric" : 'rmse'
params['feature_fraction'] = max(min(feature_fraction, 1), 0)
params['max_depth'] = int(round(max_depth))
params['num_leaves'] = int(round(num_leaves))
params['min_gain_to_split'] = float(min_gain_to_split),
params['min_data_in_leaf'] = int(np.round(min_data_in_leaf))
cv_result =, train_data, nfold=n_folds, seed=random_seed, verbose_eval =200,stratified=False)
return ( np.array(cv_result['rmse-mean'])).max()
# range
lgbBO = BayesianOptimization(lgb_eval, {'feature_fraction': (0.1, 0.9),
'max_depth': (5, 9),
'num_leaves' : (200,300),
'min_gain_to_split': (0.001, 0.1),
'min_data_in_leaf': (5, 50)}, random_state=0)
# optimize
lgbBO.maximize(init_points=init_round, n_iter=opt_round,acq='ei')
# output optimization process
# return best parameters
return lgbBO.res['max']['max_params']
opt_params = bayes_parameter_opt_lgb(train, train_y, init_round=200, opt_round=20, n_folds=5, random_seed=0, n_estimators=1000, learning_rate=0.01)
This leads to the following stacktrace :
StopIteration Traceback (most recent call last)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\, in BayesianOptimization.maximize(self, init_points, n_iter, acq, kappa, kappa_decay, kappa_decay_delay, xi, **gp_params)
178 try:
--> 179 x_probe = next(self._queue)
180 except StopIteration:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\, in Queue.__next__(self)
24 if self.empty:
---> 25 raise StopIteration("Queue is empty, no more objects to retrieve.")
26 obj = self._queue[0]
StopIteration: Queue is empty, no more objects to retrieve.
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
..\GitHub\Meister2\src\lgb_new.ipynb Cell 13' in <cell line: 35>()
32 # return best parameters
33 return lgbBO.res['max']['max_params']
---> 35 opt_params = bayes_parameter_opt_lgb(train, train_y, init_round=20, opt_round=20, n_folds=5, random_seed=0, n_estimators=1000, learning_rate=0.01)
..\GitHub\Meister2\src\lgb_new.ipynb Cell 13' in bayes_parameter_opt_lgb(train, train_y, init_round, opt_round, n_folds, random_seed, n_estimators, learning_rate, output_process)
21 lgbBO = BayesianOptimization(lgb_eval, {'feature_fraction': (0.1, 0.9),
22 'max_depth': (5, 9),
23 'num_leaves' : (200,300),
24 'min_gain_to_split': (0.001, 0.1),
25 'min_data_in_leaf': (5, 50)}, random_state=0)
26 # optimize
---> 27 lgbBO.maximize(init_points=init_round, n_iter=opt_round,acq='ei')
29 # output optimization process
30 lgbBO.points_to_csv("bayes_opt_result.csv")
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\, in BayesianOptimization.maximize(self, init_points, n_iter, acq, kappa, kappa_decay, kappa_decay_delay, xi, **gp_params)
180 except StopIteration:
181 util.update_params()
--> 182 x_probe = self.suggest(util)
183 iteration += 1
185 self.probe(x_probe, lazy=False)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\, in BayesianOptimization.suggest(self, utility_function)
130 # Finding argmax of the acquisition function.
--> 131 suggestion = acq_max(
132 ac=utility_function.utility,
133 gp=self._gp,
135 bounds=self._space.bounds,
136 random_state=self._random_state
137 )
139 return self._space.array_to_params(suggestion)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\, in acq_max(ac, gp, y_max, bounds, random_state, n_warmup, n_iter)
62 continue
64 # Store it if better than previous minimum(maximum).
---> 65 if max_acq is None or[0] >= max_acq:
66 x_max = res.x
67 max_acq =[0]
TypeError: 'float' object is not subscriptable
EDIT : The MRE above the stacktrace should lead to the followed programming error. As the stacktrace implies, it looks like that[0]
should be a list and therefore subscriptable (line 65, end of the stacktrace) but it is not and I can't understand why.
This list is assigned to max_acq
which is part of the maximization function acq_max()
(line 131 of the stacktrace) the Gaussian Process which is itself part of the BayesianOptimization
function (line 27 of the stacktrace)
Why am I getting TypeError: 'float' object is not subscriptable
and how can this be fixed?
Upvotes: 4
Views: 2594
Reputation: 9816
This is related to a change in scipy 1.8.0,
One should use -np.squeeze(
instead of[0]
The comments in the bug report indicate reverting to scipy 1.7.0 fixes this,
UPDATED: It seems the fix has been merged in the BayesianOptimization package, but the new maintainer is unable to push a release to pypi
so you could either:
pip install git+
Upvotes: 7