Reputation: 27
The below lines are the sample code where I am able to compute accuracy, precision, recall, and f1 score. How can I also compute a false positive rate (FPR) for Stratified K fold cross-validation?
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score,
f1_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate
scoring = {'accuracy' : make_scorer(accuracy_score),
'precision' : make_scorer(precision_score),
'recall' : make_scorer(recall_score),
'f1_score' : make_scorer(f1_score)}
skfold = StratifiedKFold(n_splits=10)
dt_clf = DecisionTreeClassifier()
results = cross_validate(estimator=dt_clf,
X=data_train_X,
y=target_train_Y,
cv=skfold,
scoring=scoring)
print("Results", results)
Upvotes: 1
Views: 846
Reputation: 43
I wrote this code with Logistic Regression. You can substitute it with any other binary classification algorithm you'd like.
#Importing required libraries
from sklearn.model_selection import KFold
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
data = load_breast_cancer(as_frame = True)
df = result.frame
X = result.iloc[:,:-1]
y = result.iloc[:,-1]
#shffling
X = X.sample(frac = 1)
y= y.sample(frac = 1)
#Implementing cross validation
kf = KFold(n_splits=10)#, random_state=None
model = LogisticRegression(max_iter=1000000)#(solver= 'liblinear')
acc_score = list()
res_tpr = list()
res_fpr = list()
for train_index , test_index in kf.split(X):
#X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
#y_train , y_test = y[train_index] , y[test_index]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
model.fit(X_train,y_train)
pred_values = model.predict(X_test)
tn, fp, fn, tp = confusion_matrix(y_test, pred_values, labels=[0, 1]).ravel()
print(f'True Positives: {tp}')
print(f'False Positives: {fp}')
print(f'True Negatives: {tn}')
print(f'False Negatives: {fn}')
tpr=(np.divide(tp,(tp+fn)))
fpr=(np.divide(fp,(fp+tn)))
if tp==0:
tpr=0
if fp==0:
fpr=0
print('tpr=%.4f fpr=%.3f' % ( mean(tpr), mean(fpr)))
res_tpr.append(mean(tpr))
res_fpr.append(mean(fpr))
print('---------------------')
acc = accuracy_score(pred_values , y_test)
acc_score.append(acc)
avg_acc_score = np.sum(acc_score)/10
total_tpr=np.sum(res_tpr)/10
total_fpr=np.sum(res_fpr)/10
print('\n\n',' total_tpr=%.4f total_fpr=%.3f' % (total_tpr,total_fpr))
#print('\n\n','accuracy of each fold - {}'.format(acc_score))
print('\n\n','Avg accuracy : {}'.format(avg_acc_score))
Upvotes: 1
Reputation: 8663
You could define a custom scorer as follows:
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import StratifiedKFold, cross_validate
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
def false_positive_rate(y_true, y_pred):
# false positive
fp = ((y_pred == 1) & (y_true == 0)).sum()
# true negative
tn = ((y_pred == 0) & (y_true == 0)).sum()
# false positive rate
return fp / (fp + tn)
scoring = {
'accuracy': make_scorer(accuracy_score),
'precision': make_scorer(precision_score),
'recall': make_scorer(recall_score),
'f1_score': make_scorer(f1_score),
'false_positive_rate': make_scorer(false_positive_rate),
}
skf = StratifiedKFold(n_splits=3)
clf = DecisionTreeClassifier(random_state=42)
X, y = make_classification(random_state=42)
results = cross_validate(estimator=clf, X=X, y=y, cv=skf, scoring=scoring)
print(results['test_false_positive_rate'])
# [0.11764706 0.11764706 0.0625]
Upvotes: 1