Reputation: 1
I'm currently having trouble switching the following code to fit a multiclass variable (3 levels).
# data import
from ucimlrepo import fetch_ucirepo
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.preprocessing import LabelBinarizer
# fetch breast cancer dataset
bc = fetch_ucirepo(id=17)
# data (as pandas dataframes)
bc_X = bc.data.features
bc_y = bc.data.targets
# fetch heart disease dataset
hd = fetch_ucirepo(id=45)
# data (as pandas dataframes)
hd_X = hd.data.features
hd_y = hd.data.targets
# fetch iris dataset
ir = fetch_ucirepo(id=53)
# data (as pandas dataframes)
ir_X = ir.data.features
ir_y = ir.data.targets
# fetch wine quality dataset
wq = fetch_ucirepo(id=186)
# data (as pandas dataframes)
wq_X = wq.data.features
wq_y = wq.data.targets
# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(hd_X, hd_y, test_size=0.2, random_state=42)
# cap values function
def cap_values(values):
# Convert input to a numpy array
values = np.array(values)
# Cap values at 1 using numpy's clip function
capped_values = np.clip(values, None, 1)
return capped_values
# Find the indices of rows with missing values in X_train
missing_indices = X_train.isnull().any(axis=1)
missing_indices2 = X_test.isnull().any(axis=1)
# Remove rows with missing values from X_train and y_train
X_train = X_train[~missing_indices]
y_train = y_train[~missing_indices]
X_test = X_test[~missing_indices2]
y_test = y_test[~missing_indices2]
# Apply the cap_values function to y_test and y_train
y_test = cap_values(y_test)
y_train = cap_values(y_train)
# Step 3: Fit each model to the training data
# Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
# Naïve Bayes
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
# Random Forest
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
# Support Vector Machine
svm_model = SVC(random_state=42)
svm_model.fit(X_train, y_train)
# Step 4: Evaluate the models using the testing data
models = {
'Decision Tree': dt_model,
'Naïve Bayes': nb_model,
'Random Forest': rf_model,
'Support Vector Machine': svm_model
}
# Initialize a dictionary to store the performance metrics
performance_metrics = {}
# Iterate through each model and calculate the metrics
for name, model in models.items():
# Get predictions
y_pred = model.predict(X_test)
y_pred_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else model.decision_function(X_test)
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
ConfusionMatrixDisplay(cm, display_labels=model.classes_).plot(cmap=plt.cm.Blues)
plt.title(f'Confusion Matrix: {name}')
plt.show()
# Calculate sensitivity (recall) and specificity
if cm.shape == (2, 2):
# Binary classification metrics
tn, fp, fn, tp = cm.ravel()
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)
else:
# Multi-class classification metrics
sensitivity = recall_score(y_test, y_pred, average='macro')
specificity = None
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)
# Calculate balanced accuracy
balanced_accuracy = (sensitivity + specificity) / 2
# Calculate precision, recall, and F1-score
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
# Calculate AUC
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
auc = roc_auc_score(y_test, y_pred_prob)
# Store the performance metrics
performance_metrics[name] = {
'Accuracy': accuracy_score(y_test, y_pred),
'Sensitivity': sensitivity,
'Specificity': specificity,
'Balanced Accuracy': balanced_accuracy,
'Precision': precision,
'Recall': recall,
'F1 Score': f1,
'AUC': auc
}
# Plot ROC curve
plt.figure()
plt.plot(fpr, tpr, label=f'{name} (AUC = {auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--') # Diagonal line
plt.title(f'ROC Curve: {name}')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.show()
# Display the performance metrics in a DataFrame
metrics_df = pd.DataFrame(performance_metrics).transpose()
print(metrics_df)
I need the accuracy, sensitivity, specificity, balanced accuracy, precision, recall, f1 score, auc, but, most importantly, and roc plot of the model. This works perfectly for binary classification so I'm confused on why it wont work for multiclass. Flattening the variable did not work.
Thanks for any help
I tried flattening the variable, I tried doing a for loop to do 3 separate binary classifications and taking the average of each, I tried splitting the data into 3 datasets (each with only 2 of the classes so it was essentially a binary classification problem).
Upvotes: 0
Views: 153
Reputation: 5095
I've modified the code to do the following:
It'd be worth double-checking results against sklearn
if you have binary data; that would ensure things are consistent. I've included checks for the overall metrics.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ucimlrepo import fetch_ucirepo
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import (
accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
)
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
# fetch heart disease dataset
# hd = fetch_ucirepo(id=45)
# data (as pandas dataframes)
hd_X_orig = hd.data.features
hd_y_orig = hd.data.targets
#Remove rows with any missing feature
# Find the indices of rows with missing values in X_train
# Remove rows with missing values from X_train and y_train
missing_indices = hd_X_orig.isnull().any(axis=1)
hd_X = hd_X_orig[~missing_indices].copy()
hd_y = hd_y_orig[~missing_indices].copy()
# Step 2: Split the data into training and testing sets
train_frac, val_frac = 0.7, 0.2
test_frac = 1 - (train_frac + val_frac)
train_val_ixs, test_ixs = train_test_split(
range(len(hd_X)), test_size=test_frac, random_state=0
)
train_ixs, val_ixs = train_test_split(
train_val_ixs, test_size=val_frac / (1 - test_frac), random_state=0
)
print(
'Partition sizes are:',
f'train={train_frac} ({len(train_ixs)} samples) | '
f'val={val_frac} ({len(val_ixs)} samples) | '
f'test={test_frac:.1f} ({len(test_ixs)} samples)'
)
X_train, X_val, X_test = [hd_X.iloc[ixs] for ixs in [train_ixs, val_ixs, test_ixs]]
y_train, y_val, y_test = [hd_y.iloc[ixs].values.ravel() for ixs in [train_ixs, val_ixs, test_ixs]]
# Step 3: Fit each model to the training data
# Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
# Naïve Bayes
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
# Random Forest
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
# Support Vector Machine
svm_model = SVC(probability=True, random_state=42)
svm_model.fit(X_train, y_train)
# Step 4: Evaluate the models using the testing data
models = {
'Decision Tree': dt_model,
'Naïve Bayes': nb_model,
'Random Forest': rf_model,
'Support Vector Machine': svm_model
}
# Initialize a dictionary to store the performance metrics
performance_metrics = {}
#
# Returns macro metrics for any number of classes (can be binary or multiclass)
# Returns tuple: (
# global TN, global FP, global FN, global TP,
# macro sensitivity, macro specificity, macro precision, macro f1,
# macro accuracy, macro balanced accuracy, subset accuracy,
# supports
# )
#
def macro_metrics(y_true, y_pred):
classes = np.unique(y_true)
tp_perclass = []
fp_perclass = []
tn_perclass = []
fn_perclass = []
sensitivity_perclass = []
specificity_perclass = []
precision_perclass = []
f1_perclass = []
accuracy_perclass = []
balanced_accuracy_perclass = []
support_perclass = []
#For each class, get its metrics
for clas in classes:
#Convert to binary True/False for this class
class_true = y_true == clas
class_pred = y_pred == clas
tp = (class_pred & class_true).sum()
tn = (~class_pred & ~class_true).sum()
fp = (class_pred & ~class_true).sum()
fn = (~class_pred & class_true).sum()
eps = 1e-6 #prevent division by 0
sensitivity = tp / (tp + fn + eps)
specificity = tn / (tn + fp + eps)
precision = tp / (tp + fp + eps)
f1 = 2 * precision * sensitivity / (precision + sensitivity + eps)
accuracy = (tp + tn) / (tp + tn + fp + fn)
balanced_accuracy = (sensitivity + specificity) / 2
#Record metrics for each class
tp_perclass.append(tp)
tn_perclass.append(tn)
fp_perclass.append(fp)
fn_perclass.append(fn)
sensitivity_perclass.append(sensitivity)
specificity_perclass.append(specificity)
precision_perclass.append(precision)
f1_perclass.append(f1)
accuracy_perclass.append(accuracy)
balanced_accuracy_perclass.append(balanced_accuracy)
support_perclass.append(class_true.sum())
#This is equivalent to sklearns accuracy_score() for multilabel data
subset_accuracy = (y_pred == y_true).mean()
#Return macro metrics, i.e. a simple average over each class's score
return (
#Global TN, FP, FN, TP
sum(tn_perclass), sum(fp_perclass), sum(fn_perclass), sum(tp_perclass),
#Macro metrics
np.mean(sensitivity_perclass),
np.mean(specificity_perclass),
np.mean(precision_perclass),
np.mean(f1_perclass),
np.mean(accuracy_perclass),
np.mean(balanced_accuracy_perclass),
#like accuracy_score():
subset_accuracy,
#Supports
support_perclass
)
# Iterate through each model and calculate the metrics
roc_ax = plt.figure(figsize=(7, 3)).add_subplot()
plt.close()
for name, model in models.items():
# Get predictions
y_pred = model.predict(X_val)
y_pred_probs = model.predict_proba(X_val)
# Confusion matrix
cm = confusion_matrix(y_val, y_pred)
ConfusionMatrixDisplay(cm, display_labels=model.classes_).plot(
cmap=plt.cm.Blues, colorbar=False
)
plt.title(f'Confusion Matrix: {name}', fontsize=10)
plt.gcf().set_size_inches(2.5, 2.5)
plt.show()
# Binary/multiclass classification metrics
tn, fp, fn, tp, macro_sens, macro_spec, macro_prec, macro_f1, macro_acc,\
macro_bacc, acc, supports = macro_metrics(y_val, y_pred)
#Confirm against sklearn's classification_report()
if False:
print('tn:', tn, 'fp:', fp, 'fn:', fn, 'tp:', tp,
'\nmacro sens:', macro_sens,
'\nmacro spec:', macro_spec,
'\nmacro prec:', macro_prec,
'\nmacro f1:', macro_f1,
'\nmacro acc:', macro_acc,
'\nmacro bacc:', macro_bacc,
'\nacc:', acc,
'\nsupports:', supports
)
#sklearn's classification_report() for various class-wise & averaged metrics
print( classification_report(y_val, y_pred, digits=6, zero_division=0) )
# Calculate precision, recall, and F1-score
precision = precision_score(y_val, y_pred, average='macro', zero_division=0)
recall = recall_score(y_val, y_pred, average='macro')
f1 = f1_score(y_val, y_pred, average='macro')
# Calculate AUC
y_val_onehot = pd.get_dummies(y_val, dtype=int).values
#Note that "auc" below is still affected by class imbalance. See its doc page for info.
auc = roc_auc_score(y_val_onehot, y_pred_probs, average='macro', multi_class='ovr')
#ROC averaged over classes
# Get the ROC for each label, and simply average together on a common axis
common_roc_fpr = np.linspace(0, 1, 100)
tprs_perclass = []
for clas in np.unique(y_val):
fpr, tpr, thresholds = roc_curve(y_val==clas, y_pred_probs[:, clas])
tprs_perclass.append(
np.interp(common_fpr, fpr, tpr)
)
macro_roc_tpr = np.row_stack(tprs_perclass).mean(axis=0)
# Store the performance metrics
performance_metrics[name] = {
'Accuracy': accuracy_score(y_val, y_pred),
# 'Specificity': specificity, #removed - same as recall
'Balanced Accuracy': macro_bacc,
'Precision': precision,
'Recall': recall,
'Specificity': macro_spec, #added
'F1 Score': f1,
'AUC': auc #a macro average
}
# Add ROC curve
roc_ax.plot(common_roc_fpr, macro_roc_tpr, label=f'{name} (AUC={auc:.2f})')
roc_ax.plot([0, 1], [0, 1], 'k--') # Diagonal line
roc_ax.set(xlim=(-0.01, 1.01), ylim=(-0.01, 1.01))
roc_ax.set_title(f'ROC Curves')
roc_ax.set_xlabel('False Positive Rate')
roc_ax.set_ylabel('True Positive Rate')
roc_ax.legend(fontsize=9)
plt.sca(roc_ax)
# Display the performance metrics in a DataFrame
metrics_df = pd.DataFrame(performance_metrics).transpose()
display(metrics_df)
Upvotes: 0