Reputation: 793
Using the code below, I have the Accuracy
. Now I am trying to
1) find the precision
and recall
for each fold (10 folds total)
2) get the mean
for precision
3) get the mean
for recall
This could be similar to print(scores)
and print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
below.
Any thoughts?
import numpy as np
from sklearn import cross_validation
from sklearn import datasets
from sklearn import svm
from sklearn.model_selection import StratifiedKFold
iris = datasets.load_iris()
skf = StratifiedKFold(n_splits=10)
clf = svm.SVC(kernel='linear', C=1)
scores = cross_validation.cross_val_score(clf, iris.data, iris.target, cv=10)
print(scores) #[ 1. 0.93333333 1. 1. 0.86666667 1. 0.93333333 1. 1. 1.]
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # Accuracy: 0.97 (+/- 0.09)
Upvotes: 1
Views: 7956
Reputation: 11
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, recall_score, precision_score,
accuracy_score, f1_score,roc_auc_score
def binary_classification_performance(y_test, y_pred):
tp, fp, fn, tn = confusion_matrix(y_test, y_pred).ravel()
accuracy = round(accuracy_score(y_pred = y_pred, y_true = y_test),2)
precision = round(precision_score(y_pred = y_pred, y_true = y_test),2)
recall = round(recall_score(y_pred = y_pred, y_true = y_test),2)
f1_score = round(2*precision*recall/(precision + recall),2)
specificity = round(tn/(tn+fp),2)
npv = round(tn/(tn+fn),2)
auc_roc = round(roc_auc_score(y_score = y_pred, y_true = y_test),2)
result = pd.DataFrame({'Accuracy' : [accuracy],
'Precision (or PPV)' : [precision],
'Recall (senitivity or TPR)' : [recall],
'f1 score' : [f1_score],
'AUC_ROC' : [auc_roc],
'Specificty (or TNR)': [specificity],
'NPV' : [npv],
'True Positive' : [tp],
'True Negative' : [tn],
'False Positive':[fp],
'False Negative':[fn]})
return result
binary_classification_performance(y_test, y_pred)
Upvotes: 1
Reputation: 3851
This is a bit different, because cross_val_score can't calculate precision/recall for non-binary classification, so you need to use recision_score, recall_score and make cross-validation manually. Parameter average='micro' calculates global precision/recall.
import numpy as np
from sklearn import cross_validation
from sklearn import datasets
from sklearn import svm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import precision_score, recall_score
iris = datasets.load_iris()
skf = StratifiedKFold(n_splits=10)
clf = svm.SVC(kernel='linear', C=1)
X = iris.data
y = iris.target
precision_scores = []
recall_scores = []
for train_index, test_index in skf.split(X, y):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
y_pred = clf.fit(X_train, y_train).predict(X_test)
precision_scores.append(precision_score(y_test, y_pred, average='micro'))
recall_scores.append(recall_score(y_test, y_pred, average='micro'))
print(precision_scores)
print("Recall: %0.2f (+/- %0.2f)" % (np.mean(precision_scores), np.std(precision_scores) * 2))
print(recall_scores)
print("Recall: %0.2f (+/- %0.2f)" % (np.mean(recall_scores), np.std(recall_scores) * 2))
Upvotes: 2