UseR10085
UseR10085

Reputation: 8136

NotFittedError: All estimators failed to fit for RandomizedSearchCV

I am trying to use RandomizedSearchCV for a classification problem (2 classes). The dataset can be downloaded from this Kaggle site. Following is the code showing the error

# Load packages
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import make_scorer, accuracy_score
from scipy.stats import uniform
import pandas as pd
import numpy as np
import time

import warnings
warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", None)

# Make scorer: accuracy
acc_score = make_scorer(accuracy_score)

# Load dataset
trainSet = pd.read_csv('../input/train.csv')
testSet = pd.read_csv('../input/test.csv')
submitSet = pd.read_csv('../input/sample_submission.csv')

trainSet.head()

# Remove not used variables
train = trainSet.drop(columns=['Name', 'Ticket'])
train['Cabin_letter'] = train['Cabin'].str[0:1]
train['Cabin_no'] = train['Cabin'].str[1:]

train.head()

# Feature generation: training data
train = trainSet.drop(columns=['Name', 'Ticket', 'Cabin'])
train = train.dropna(axis=0)
train = pd.get_dummies(train)

train.head()

# train validation split
X_train, X_val, y_train, y_val = train_test_split(train.drop(columns=['PassengerId','Survived'], axis=0),
                                                  train['Survived'],
                                                  test_size=0.2, random_state=111,
                                                  stratify=train['Survived'])

# RandomizedSearhCV
param_rand = {'max_depth':uniform(3,10),
              'max_features':uniform(0.8,1),
              'learning_rate':uniform(0.01,1),
              'n_estimators':uniform(80,150),
              'subsample':uniform(0.8,1)}

rand = RandomizedSearchCV(estimator=GradientBoostingClassifier(), param_distributions=param_rand, scoring=acc_score, cv=5)

rand.fit(X_train.iloc[1:100,], y_train.iloc[1:100,])

Error

---------------------------------------------------------------------------
NotFittedError                            Traceback (most recent call last)
Input In [15], in <cell line: 10>()
      2 param_rand = {'max_depth':uniform(3,10),
      3               'max_features':uniform(0.8,1),
      4               'learning_rate':uniform(0.01,1),
      5               'n_estimators':uniform(80,150),
      6               'subsample':uniform(0.8,1)}
      8 rand = RandomizedSearchCV(estimator=GradientBoostingClassifier(), param_distributions=param_rand, scoring=acc_score, cv=5)
---> 10 rand.fit(X_train.iloc[1:100,], y_train.iloc[1:100,])

File ~\anaconda3\lib\site-packages\sklearn\utils\validation.py:63, in _deprecate_positional_args.<locals>._inner_deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
     61 extra_args = len(args) - len(all_args)
     62 if extra_args <= 0:
---> 63     return f(*args, **kwargs)
     65 # extra_args > 0
     66 args_msg = ['{}={}'.format(name, arg)
     67             for name, arg in zip(kwonly_args[:extra_args],
     68                                  args[-extra_args:])]

File ~\anaconda3\lib\site-packages\sklearn\model_selection\_search.py:841, in BaseSearchCV.fit(self, X, y, groups, **fit_params)
    835     results = self._format_results(
    836         all_candidate_params, n_splits, all_out,
    837         all_more_results)
    839     return results
--> 841 self._run_search(evaluate_candidates)
    843 # multimetric is determined here because in the case of a callable
    844 # self.scoring the return type is only known after calling
    845 first_test_score = all_out[0]['test_scores']

File ~\anaconda3\lib\site-packages\sklearn\model_selection\_search.py:1633, in RandomizedSearchCV._run_search(self, evaluate_candidates)
   1631 def _run_search(self, evaluate_candidates):
   1632     """Search n_iter candidates from param_distributions"""
-> 1633     evaluate_candidates(ParameterSampler(
   1634         self.param_distributions, self.n_iter,
   1635         random_state=self.random_state))

File ~\anaconda3\lib\site-packages\sklearn\model_selection\_search.py:827, in BaseSearchCV.fit.<locals>.evaluate_candidates(candidate_params, cv, more_results)
    822 # For callable self.scoring, the return type is only know after
    823 # calling. If the return type is a dictionary, the error scores
    824 # can now be inserted with the correct key. The type checking
    825 # of out will be done in `_insert_error_scores`.
    826 if callable(self.scoring):
--> 827     _insert_error_scores(out, self.error_score)
    828 all_candidate_params.extend(candidate_params)
    829 all_out.extend(out)

File ~\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:301, in _insert_error_scores(results, error_score)
    298         successful_score = result["test_scores"]
    300 if successful_score is None:
--> 301     raise NotFittedError("All estimators failed to fit")
    303 if isinstance(successful_score, dict):
    304     formatted_error = {name: error_score for name in successful_score}

NotFittedError: All estimators failed to fit

Upvotes: 0

Views: 519

Answers (1)

Hanafi Haffidz
Hanafi Haffidz

Reputation: 368

It is due to the param distributions you set. uniform(x,y) will generate float values, whereas you are using it for some params that require ints.

param_rand = {'max_depth':(3,10),
              'max_features':(2,4),
              'learning_rate':uniform(0.01,1),
              'n_estimators':(80,150),
              'subsample':uniform(0.8,1)}

Changing to this should work.

Upvotes: 1

Related Questions