GridSearchCV initialization

Question

I want to use GridSearchCV over a range of alphas (LaPlace smoothing parameters) to check which gives me the best accuracy with a Bernoulli Naive Bayes model.

def binarize_pixels(data, threshold=0.784):
    # Initialize a new feature array with the same shape as the original data.
    binarized_data = np.zeros(data.shape)

    # Apply a threshold to each feature.
    for feature in range(data.shape[1]):
        binarized_data[:,feature] = data[:,feature] > threshold
    return binarized_data

binarized_train_data = binarize_pixels(mini_train_data)

def BNB():
    clf = BernoulliNB()
    clf.fit(binarized_train_data, mini_train_labels)
    scoring = clf.score(mini_train_data, mini_train_labels)
    predsNB = clf.predict(dev_data)
    print "Bernoulli binarized model accuracy: {:.4}".format(np.mean(predsNB == dev_labels))

The model runs fine, while my GridSearch cross validation does not:

pipeline = Pipeline([('classifier', BNB())])
def P8(alphas):
    gs_clf = GridSearchCV(pipeline, param_grid = alphas, refit=True)
    y_predictions = gs_clf.best_estimator_.predict(dev_data)
    print classification_report(dev_labels, y_predictions)
alphas = {'alpha' : [0.0, 0.0001, 0.001, 0.01, 0.1, 0.5, 1.0, 2.0, 10.0]}
P8(alphas)

I get AttributeError: 'GridSearchCV' object has no attribute 'best_estimator_'

GridSearchCV initialization

Answers (1)

Related Questions