Reputation: 135
I get the error:
On Training Data: Traceback (most recent call last):
File "C:\Users\Roman\Desktop\Key\Poyd.py", line 80, in stock(AAPL, "AAPL")
File "C:\Users\Roman\Desktop\Key\Poyd.py", line 71, in stock evaluate_print(clf_name, yTrain, y_train_scores)
File "C:\Users\Roman\anaconda3\lib\site-packages\pyod\utils\data.py", line 283, in evaluate_print roc=np.round(roc_auc_score(y, y_pred), decimals=4),
File "C:\Users\Roman\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f return f(*args, **kwargs)
File "C:\Users\Roman\anaconda3\lib\site-packages\sklearn\metrics_ranking.py", line 536, in roc_auc_score raise ValueError("multi_class must be in ('ovo', 'ovr')")
ValueError: multi_class must be in ('ovo', 'ovr')
so I've taken the X the date column and Y the close\last column
I know there is roc_auc_score How to combine them together? The goal is to create visualization for anomaly detection, but currently it doesn't work.
from pandas import read_csv
from matplotlib import pyplot
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
#from IPython.core.debugger import set_trace
#import data
AAPL= pd.read_csv('AAPL.csv', header=0, squeeze=True)
#function to get stock
def stock(s, title):
print(title)
#independent variables-Matrix
X = s.iloc[:,1].values.reshape(-1,1)
#np.atleast_2d(X)
print(X)
#dependent variables vectors
#Y = df.iloc[:, -1].values
Y = s.iloc[:, 2].values.reshape(-1,1)
print(Y)
from sklearn.model_selection import train_test_split
xTrain, xTest, yTrain, yTest = train_test_split(X, Y, test_size = 0.3 ,random_state = 0)#70% on train and 30% on test
# # train the COPOD detector
# from pyod.models.copod import COPOD
# clf = COPOD()
# clf.fit(xTrain)
# clf.reshape(-1,1)
# # get outlier scores
# y_train_scores = clf.decision_scores_ # raw outlier scores on the train data
# y_test_scores = clf.decision_function(xTest) # predict raw outlier scores on test
# # print("#####################")
# # print( "outlier:", y_test_scores)
from pyod.models.knn import KNN # kNN detector
# train kNN detector
clf_name = 'KNN'
clf = KNN()
clf.fit(xTrain)
# get the prediction label and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# get the prediction on the test data
y_test_pred = clf.predict(xTest) # outlier labels (0 or 1)
y_test_scores = clf.decision_function(xTest) # outlier scores
from pyod.utils.data import evaluate_print
from sklearn.metrics import make_scorer
from sklearn.metrics import roc_auc_score
#myscore = make_scorer(roc_auc_score, multi_class='ovo',needs_proba=True)
# evaluate and print the results
print("\nOn Training Data:")
evaluate_print(clf_name, yTrain, y_train_scores)
print("\nOn Test Data:")
evaluate_print(clf_name, yTest, y_test_scores)
from pyod.utils.example import visualize
visualize(clf_name, xTrain, yTrain, xTest, yTest, y_train_pred,
y_test_pred, show_figure=True, save_figure=False)
stock(AAPL, "AAPL")
Thank you in advance!
Upvotes: 1
Views: 19842
Reputation: 91
Try this code, it worked for me:
clf = LogisticRegression(solver="liblinear").fit(x_train, y_train)
preds = clf.predict_proba(x_test)
roc_auc = roc_auc_score(y_test,preds, multi_class='ovr')
print(roc_auc)
Upvotes: 5