Reputation: 8136
I am trying to use RandomizedSearchCV for a classification problem (2 classes). The dataset can be downloaded from this Kaggle site. Following is the code showing the error
# Load packages
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import make_scorer, accuracy_score
from scipy.stats import uniform
import pandas as pd
import numpy as np
import time
import warnings
warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", None)
# Make scorer: accuracy
acc_score = make_scorer(accuracy_score)
# Load dataset
trainSet = pd.read_csv('../input/train.csv')
testSet = pd.read_csv('../input/test.csv')
submitSet = pd.read_csv('../input/sample_submission.csv')
trainSet.head()
# Remove not used variables
train = trainSet.drop(columns=['Name', 'Ticket'])
train['Cabin_letter'] = train['Cabin'].str[0:1]
train['Cabin_no'] = train['Cabin'].str[1:]
train.head()
# Feature generation: training data
train = trainSet.drop(columns=['Name', 'Ticket', 'Cabin'])
train = train.dropna(axis=0)
train = pd.get_dummies(train)
train.head()
# train validation split
X_train, X_val, y_train, y_val = train_test_split(train.drop(columns=['PassengerId','Survived'], axis=0),
train['Survived'],
test_size=0.2, random_state=111,
stratify=train['Survived'])
# RandomizedSearhCV
param_rand = {'max_depth':uniform(3,10),
'max_features':uniform(0.8,1),
'learning_rate':uniform(0.01,1),
'n_estimators':uniform(80,150),
'subsample':uniform(0.8,1)}
rand = RandomizedSearchCV(estimator=GradientBoostingClassifier(), param_distributions=param_rand, scoring=acc_score, cv=5)
rand.fit(X_train.iloc[1:100,], y_train.iloc[1:100,])
Error
---------------------------------------------------------------------------
NotFittedError Traceback (most recent call last)
Input In [15], in <cell line: 10>()
2 param_rand = {'max_depth':uniform(3,10),
3 'max_features':uniform(0.8,1),
4 'learning_rate':uniform(0.01,1),
5 'n_estimators':uniform(80,150),
6 'subsample':uniform(0.8,1)}
8 rand = RandomizedSearchCV(estimator=GradientBoostingClassifier(), param_distributions=param_rand, scoring=acc_score, cv=5)
---> 10 rand.fit(X_train.iloc[1:100,], y_train.iloc[1:100,])
File ~\anaconda3\lib\site-packages\sklearn\utils\validation.py:63, in _deprecate_positional_args.<locals>._inner_deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
65 # extra_args > 0
66 args_msg = ['{}={}'.format(name, arg)
67 for name, arg in zip(kwonly_args[:extra_args],
68 args[-extra_args:])]
File ~\anaconda3\lib\site-packages\sklearn\model_selection\_search.py:841, in BaseSearchCV.fit(self, X, y, groups, **fit_params)
835 results = self._format_results(
836 all_candidate_params, n_splits, all_out,
837 all_more_results)
839 return results
--> 841 self._run_search(evaluate_candidates)
843 # multimetric is determined here because in the case of a callable
844 # self.scoring the return type is only known after calling
845 first_test_score = all_out[0]['test_scores']
File ~\anaconda3\lib\site-packages\sklearn\model_selection\_search.py:1633, in RandomizedSearchCV._run_search(self, evaluate_candidates)
1631 def _run_search(self, evaluate_candidates):
1632 """Search n_iter candidates from param_distributions"""
-> 1633 evaluate_candidates(ParameterSampler(
1634 self.param_distributions, self.n_iter,
1635 random_state=self.random_state))
File ~\anaconda3\lib\site-packages\sklearn\model_selection\_search.py:827, in BaseSearchCV.fit.<locals>.evaluate_candidates(candidate_params, cv, more_results)
822 # For callable self.scoring, the return type is only know after
823 # calling. If the return type is a dictionary, the error scores
824 # can now be inserted with the correct key. The type checking
825 # of out will be done in `_insert_error_scores`.
826 if callable(self.scoring):
--> 827 _insert_error_scores(out, self.error_score)
828 all_candidate_params.extend(candidate_params)
829 all_out.extend(out)
File ~\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:301, in _insert_error_scores(results, error_score)
298 successful_score = result["test_scores"]
300 if successful_score is None:
--> 301 raise NotFittedError("All estimators failed to fit")
303 if isinstance(successful_score, dict):
304 formatted_error = {name: error_score for name in successful_score}
NotFittedError: All estimators failed to fit
Upvotes: 0
Views: 519
Reputation: 368
It is due to the param distributions you set. uniform(x,y)
will generate float values, whereas you are using it for some params that require ints.
param_rand = {'max_depth':(3,10),
'max_features':(2,4),
'learning_rate':uniform(0.01,1),
'n_estimators':(80,150),
'subsample':uniform(0.8,1)}
Changing to this should work.
Upvotes: 1