Reputation: 1
I am training a One Class Autoencoder on 50,000 benign samples. Then, when testing the model, I am giving it 10,000 benign samples and 10,000 anomolies. The resulting F1 and Recall score of the model is 0.974 ad 0.992, respectively. It is able to classify that there are 9600 anomolies and 10,400 benign samples (so not perfect, but performs really well). However, when I go to plot the ROC curve and calculate the AUC, I am met with a value of 0.005.
How could this be possible?
I am using sklearns roc_curve() function to calculate the fpr and tpr so I can plot the curve, and then the auc() function to get the numerical value.
I do not see how it is possible that my model can perform so well, but somehow still have such a low AUC. If anyone has any tips, they would be greatly appreciated. I have attached my autoencoder model below: Feel free to ask for any more information
from keras.layers import Input, Dense, Dropout
from keras.models import Model
from keras import regularizers
from keras.callbacks import EarlyStopping
from sklearn.metrics import roc_curve, auc, f1_score, recall_score
from sklearn.svm import OneClassSVM
from sklearn import metrics
import numpy as np
class Autoencoder:
def __init__(self, encoding_dim=64, activity_regularizer=10e-6):
self.encoding_dim = encoding_dim
self.activity_regularizer = activity_regularizer
self.autoencoder = None
self.threshold = None
def fit(self, x_train, x_valid, epochs=50, batch_size=256, earlystop_patience=10):
# Input Shape
input_dim = x_train.shape[1]
# Input Layer
input_layer = Input(shape = (input_dim,))
# Encoder Layers
hidden_layer1 = Dense(512, activation='tanh', activity_regularizer=regularizers.l1(self.activity_regularizer))(input_layer)
hidden_layer1 = Dropout(0.5)(hidden_layer1)
hidden_layer2 = Dense(256, activation='tanh', activity_regularizer=regularizers.l1(self.activity_regularizer))(hidden_layer1)
hidden_layer2 = Dropout(0.5)(hidden_layer2)
encoded = Dense(self.encoding_dim, activation='tanh', activity_regularizer=regularizers.l1(self.activity_regularizer))(hidden_layer2)
# Decoder Layers
hidden_layer4 = Dense(256, activation='tanh', activity_regularizer=regularizers.l1(self.activity_regularizer))(encoded)
hidden_layer4 = Dropout(0.5)(hidden_layer4)
hidden_layer5 = Dense(512, activation='tanh', activity_regularizer=regularizers.l1(self.activity_regularizer))(hidden_layer4)
hidden_layer5 = Dropout(0.5)(hidden_layer5)
decoded = Dense(input_dim, activation='sigmoid')(hidden_layer5)
# Define autoencoder
self.autoencoder = Model(inputs = input_layer, outputs = decoded)
# Compile Autoencoder
self.autoencoder.compile(optimizer = 'adam', loss = 'mean_squared_error')
# Early stopping
earlystop_callback = EarlyStopping(monitor='val_loss', patience=earlystop_patience, verbose=1, mode='min')
# Train
self.autoencoder.fit(x_train, x_train, epochs = epochs, batch_size = batch_size, validation_data = (x_valid, x_valid), callbacks=[earlystop_callback])
def evaluate(self, x_test, true):
pred = self.autoencoder.predict(x_test)
# Reconstruction Error
mse = np.mean(np.power(x_test - pred, 2), axis = 1)
np.savetxt('mse.csv', mse, delimiter=',')
# Threshold Calculation
self.threshold = np.mean(mse)
print("Threshold: ")
print(self.threshold)
print("\n")
# True Label Calculations for AE
ae_test = np.where(mse <= self.threshold, 1, -1)
anomoly_counter = 0
normal_counter = 0
np.savetxt('ae_test.csv', ae_test, delimiter=',')
for val in ae_test:
if val == -1:
anomoly_counter += 1
elif val == 1:
normal_counter += 1
# AUC Calculations
fpr, tpr, _ = roc_curve(true, mse, pos_label=1)
auc_num = auc(fpr, tpr)
# F1 Score
f1 = f1_score(true, ae_test)
# Recall Score
recall = recall_score(true, ae_test)
print('AUC: {:.3f}'.format(auc_num))
print('Recall: {:.3f}'.format(recall))
print('F1 Score: {:.3f}'.format(f1))
print('Anomaly: {:.3f}'.format(anomoly_counter))
print('Positive Class: {:.3f}'.format(normal_counter))
return fpr, tpr
def save_model(self, model_file):
with open(model_file, 'wb') as file:
pickle.dump(self, file)
I compared the predicted binary values to the actual values and they do align. So, all the values that are predicted as an inlier (1) are actually inliers and the values predicted as anomalies (-1) are actually anomalies.
Additionally, I am training an OCSVM on the same data and getting an AUC of 0.997 with reflective f1 and recall scores.
I am almost 100% certain that the ROC is just somehow inverted for the AE, but I do not how to prove it or uninvert it.
ROC CURVE: https://drive.google.com/file/d/1RSJSTtHW46ZnNRYSInCGOP9sScPRe71j/view?usp=share_link
Upvotes: 0
Views: 118