Daniel Shiverman
Daniel Shiverman

Reputation: 1

Switching binary classification python scikit-learn model to multi-class classification model

I'm currently having trouble switching the following code to fit a multiclass variable (3 levels).

# data import
from ucimlrepo import fetch_ucirepo
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.preprocessing import LabelBinarizer

# fetch breast cancer dataset
bc = fetch_ucirepo(id=17)

# data (as pandas dataframes)
bc_X = bc.data.features
bc_y = bc.data.targets

# fetch heart disease dataset
hd = fetch_ucirepo(id=45)
# data (as pandas dataframes)
hd_X = hd.data.features
hd_y = hd.data.targets


# fetch iris dataset
ir = fetch_ucirepo(id=53)
# data (as pandas dataframes)
ir_X = ir.data.features
ir_y = ir.data.targets


# fetch wine quality dataset
wq = fetch_ucirepo(id=186)
# data (as pandas dataframes)
wq_X = wq.data.features
wq_y = wq.data.targets


# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(hd_X, hd_y, test_size=0.2, random_state=42)

# cap values function
def cap_values(values):
    # Convert input to a numpy array
    values = np.array(values)
    # Cap values at 1 using numpy's clip function
    capped_values = np.clip(values, None, 1)
    return capped_values

# Find the indices of rows with missing values in X_train
missing_indices = X_train.isnull().any(axis=1)
missing_indices2 = X_test.isnull().any(axis=1)

# Remove rows with missing values from X_train and y_train
X_train = X_train[~missing_indices]
y_train = y_train[~missing_indices]
X_test = X_test[~missing_indices2]
y_test = y_test[~missing_indices2]

# Apply the cap_values function to y_test and y_train
y_test = cap_values(y_test)
y_train = cap_values(y_train)

# Step 3: Fit each model to the training data

# Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

# Naïve Bayes
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# Random Forest
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Support Vector Machine
svm_model = SVC(random_state=42)
svm_model.fit(X_train, y_train)

# Step 4: Evaluate the models using the testing data
models = {
    'Decision Tree': dt_model,
    'Naïve Bayes': nb_model,
    'Random Forest': rf_model,
    'Support Vector Machine': svm_model
}

# Initialize a dictionary to store the performance metrics
performance_metrics = {}

# Iterate through each model and calculate the metrics
for name, model in models.items():
    # Get predictions
    y_pred = model.predict(X_test)
    y_pred_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else model.decision_function(X_test)

    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    ConfusionMatrixDisplay(cm, display_labels=model.classes_).plot(cmap=plt.cm.Blues)
    plt.title(f'Confusion Matrix: {name}')
    plt.show()

    # Calculate sensitivity (recall) and specificity
    if cm.shape == (2, 2):
      # Binary classification metrics
      tn, fp, fn, tp = cm.ravel()
      sensitivity = tp / (tp + fn)
      specificity = tn / (tn + fp)
    else:
      # Multi-class classification metrics
      sensitivity = recall_score(y_test, y_pred, average='macro')
      specificity = None
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)

    # Calculate balanced accuracy
    balanced_accuracy = (sensitivity + specificity) / 2

    # Calculate precision, recall, and F1-score
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')

    # Calculate AUC
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
    auc = roc_auc_score(y_test, y_pred_prob)

    # Store the performance metrics
    performance_metrics[name] = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Sensitivity': sensitivity,
        'Specificity': specificity,
        'Balanced Accuracy': balanced_accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'AUC': auc
    }

    # Plot ROC curve
    plt.figure()
    plt.plot(fpr, tpr, label=f'{name} (AUC = {auc:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line
    plt.title(f'ROC Curve: {name}')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend()
    plt.show()

# Display the performance metrics in a DataFrame
metrics_df = pd.DataFrame(performance_metrics).transpose()
print(metrics_df)

I need the accuracy, sensitivity, specificity, balanced accuracy, precision, recall, f1 score, auc, but, most importantly, and roc plot of the model. This works perfectly for binary classification so I'm confused on why it wont work for multiclass. Flattening the variable did not work.

Thanks for any help

I tried flattening the variable, I tried doing a for loop to do 3 separate binary classifications and taking the average of each, I tried splitting the data into 3 datasets (each with only 2 of the classes so it was essentially a binary classification problem).

Upvotes: 0

Views: 153

Answers (1)

MuhammedYunus
MuhammedYunus

Reputation: 5095

I've modified the code to do the following:

  • For each class, get its ROC curve, and then average those curves together to get a single curve for that model
  • Added macro specificity score
  • Amended the code to work with both binary and multi-class labels (new function defined to help with this)

enter image description here

It'd be worth double-checking results against sklearn if you have binary data; that would ensure things are consistent. I've included checks for the overall metrics.


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from ucimlrepo import fetch_ucirepo

from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
)
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve, roc_auc_score

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

# fetch heart disease dataset
# hd = fetch_ucirepo(id=45)
# data (as pandas dataframes)
hd_X_orig = hd.data.features
hd_y_orig = hd.data.targets

#Remove rows with any missing feature
# Find the indices of rows with missing values in X_train
# Remove rows with missing values from X_train and y_train
missing_indices = hd_X_orig.isnull().any(axis=1)
hd_X = hd_X_orig[~missing_indices].copy()
hd_y = hd_y_orig[~missing_indices].copy()

# Step 2: Split the data into training and testing sets
train_frac, val_frac = 0.7, 0.2
test_frac = 1 - (train_frac + val_frac)

train_val_ixs, test_ixs = train_test_split(
    range(len(hd_X)), test_size=test_frac, random_state=0
)

train_ixs, val_ixs = train_test_split(
    train_val_ixs, test_size=val_frac / (1 - test_frac), random_state=0
)

print(
    'Partition sizes are:',
    f'train={train_frac} ({len(train_ixs)} samples) | '
    f'val={val_frac} ({len(val_ixs)} samples) | '
    f'test={test_frac:.1f} ({len(test_ixs)} samples)'
)

X_train, X_val, X_test = [hd_X.iloc[ixs] for ixs in [train_ixs, val_ixs, test_ixs]]
y_train, y_val, y_test = [hd_y.iloc[ixs].values.ravel() for ixs in [train_ixs, val_ixs, test_ixs]]

# Step 3: Fit each model to the training data

# Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

# Naïve Bayes
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# Random Forest
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Support Vector Machine
svm_model = SVC(probability=True, random_state=42)
svm_model.fit(X_train, y_train)

# Step 4: Evaluate the models using the testing data
models = {
    'Decision Tree': dt_model,
    'Naïve Bayes': nb_model,
    'Random Forest': rf_model,
    'Support Vector Machine': svm_model
}

# Initialize a dictionary to store the performance metrics
performance_metrics = {}

#
# Returns macro metrics for any number of classes (can be binary or multiclass)
# Returns tuple: (
#     global TN, global FP, global FN, global TP,
#     macro sensitivity, macro specificity, macro precision, macro f1,
#     macro accuracy, macro balanced accuracy, subset accuracy,
#     supports
# )
#
def macro_metrics(y_true, y_pred):
    classes = np.unique(y_true)
    
    tp_perclass = []
    fp_perclass = []
    tn_perclass = []
    fn_perclass = []
    sensitivity_perclass = []
    specificity_perclass = []
    precision_perclass = []
    f1_perclass = []
    accuracy_perclass = []
    balanced_accuracy_perclass = []
    support_perclass = []
    
    #For each class, get its metrics
    for clas in classes:
        #Convert to binary True/False for this class
        class_true = y_true == clas
        class_pred = y_pred == clas
        
        tp = (class_pred & class_true).sum()
        tn = (~class_pred & ~class_true).sum()
        
        fp = (class_pred & ~class_true).sum()
        fn = (~class_pred & class_true).sum()
        
        eps = 1e-6 #prevent division by 0
        sensitivity = tp / (tp + fn + eps)
        specificity = tn / (tn + fp + eps)
        precision = tp / (tp + fp + eps)
        f1 = 2 * precision * sensitivity / (precision + sensitivity + eps)
        accuracy = (tp + tn) / (tp + tn + fp + fn)
        balanced_accuracy = (sensitivity + specificity) / 2
        
        #Record metrics for each class
        tp_perclass.append(tp)
        tn_perclass.append(tn)
        fp_perclass.append(fp)
        fn_perclass.append(fn)
        
        sensitivity_perclass.append(sensitivity)
        specificity_perclass.append(specificity)
        precision_perclass.append(precision)
        f1_perclass.append(f1)
        accuracy_perclass.append(accuracy)
        balanced_accuracy_perclass.append(balanced_accuracy)
        support_perclass.append(class_true.sum())
    
    #This is equivalent to sklearns accuracy_score() for multilabel data
    subset_accuracy = (y_pred == y_true).mean()
    
    #Return macro metrics, i.e. a simple average over each class's score
    return (
        #Global TN, FP, FN, TP
        sum(tn_perclass), sum(fp_perclass), sum(fn_perclass), sum(tp_perclass),
        
        #Macro metrics
        np.mean(sensitivity_perclass),
        np.mean(specificity_perclass),
        np.mean(precision_perclass),
        np.mean(f1_perclass),
        np.mean(accuracy_perclass),
        np.mean(balanced_accuracy_perclass),
        
        #like accuracy_score():
        subset_accuracy,
        
        #Supports
        support_perclass
    )

# Iterate through each model and calculate the metrics
roc_ax = plt.figure(figsize=(7, 3)).add_subplot()
plt.close()

for name, model in models.items():
    # Get predictions
    y_pred = model.predict(X_val)
    y_pred_probs = model.predict_proba(X_val)

    # Confusion matrix
    cm = confusion_matrix(y_val, y_pred)
    ConfusionMatrixDisplay(cm, display_labels=model.classes_).plot(
        cmap=plt.cm.Blues, colorbar=False
    )
    plt.title(f'Confusion Matrix: {name}', fontsize=10)
    plt.gcf().set_size_inches(2.5, 2.5)
    plt.show()

    # Binary/multiclass classification metrics
    tn, fp, fn, tp, macro_sens, macro_spec, macro_prec, macro_f1, macro_acc,\
        macro_bacc, acc, supports = macro_metrics(y_val, y_pred)

    #Confirm against sklearn's classification_report()
    if False:
        print('tn:', tn, 'fp:', fp, 'fn:', fn, 'tp:', tp,
            '\nmacro sens:', macro_sens,
            '\nmacro spec:', macro_spec,
            '\nmacro prec:', macro_prec,
            '\nmacro f1:', macro_f1,
            '\nmacro acc:', macro_acc,
            '\nmacro bacc:', macro_bacc,
            '\nacc:', acc,
            '\nsupports:', supports
            )
    
    #sklearn's classification_report() for various class-wise & averaged metrics
    print( classification_report(y_val, y_pred, digits=6, zero_division=0) )

    # Calculate precision, recall, and F1-score
    precision = precision_score(y_val, y_pred, average='macro', zero_division=0)
    recall = recall_score(y_val, y_pred, average='macro')
    f1 = f1_score(y_val, y_pred, average='macro')

    # Calculate AUC
    y_val_onehot = pd.get_dummies(y_val, dtype=int).values
    #Note that "auc" below is still affected by class imbalance. See its doc page for info.
    auc = roc_auc_score(y_val_onehot, y_pred_probs, average='macro', multi_class='ovr')
    
    #ROC averaged over classes
    # Get the ROC for each label, and simply average together on a common axis
    common_roc_fpr = np.linspace(0, 1, 100)
    tprs_perclass = []
    
    for clas in np.unique(y_val):
        fpr, tpr, thresholds = roc_curve(y_val==clas, y_pred_probs[:, clas])
        tprs_perclass.append(
            np.interp(common_fpr, fpr, tpr)
        )
    macro_roc_tpr = np.row_stack(tprs_perclass).mean(axis=0)
    
    # Store the performance metrics
    performance_metrics[name] = {
        'Accuracy': accuracy_score(y_val, y_pred),
        # 'Specificity': specificity, #removed - same as recall
        'Balanced Accuracy': macro_bacc,
        'Precision': precision,
        'Recall': recall,
        'Specificity': macro_spec, #added
        'F1 Score': f1,
        'AUC': auc #a macro average
    }
    
    # Add ROC curve
    roc_ax.plot(common_roc_fpr, macro_roc_tpr, label=f'{name} (AUC={auc:.2f})')
    
roc_ax.plot([0, 1], [0, 1], 'k--')  # Diagonal line
roc_ax.set(xlim=(-0.01, 1.01), ylim=(-0.01, 1.01))
roc_ax.set_title(f'ROC Curves')
roc_ax.set_xlabel('False Positive Rate')
roc_ax.set_ylabel('True Positive Rate')
roc_ax.legend(fontsize=9)
plt.sca(roc_ax)

# Display the performance metrics in a DataFrame
metrics_df = pd.DataFrame(performance_metrics).transpose()
display(metrics_df)

Upvotes: 0

Related Questions