Kenon39
Kenon39

Reputation: 11

How to create confusion matrix in Python

I am trying to create a model that use Batch Normalization for a limited class and number of MNIST datasets. Here I am trying with 2 classes with 1 class has limited numbers of datasets. For example in my code, from MNIST class 4 and 7 are taken, and number of dataset of class 4 is limited to 39. However, this applies to training data only. Therefore total number of datasets of class 4 and 7 combined for training data is 6304, and for test data will be 2010.

I am trying to create a confusion matrix of test data only, but I do not know how to do because I am quite new in Python.

Could anyone show me how to create a confusion matrix that consists whole number of test data?

# coding: utf-8



import gzip
import numpy as np
from numpy.linalg import inv

def _load_testimage_filter(a,b,file):
    x=np.loadtxt(file,delimiter=',')
    idx=np.hstack((np.where(x[:,0]==a),np.where(x[:,0]==b)))
    x=x[idx][0]
    x0,x1=np.hsplit(x,[1])
    return x1


def _load_image_filter(a,b,c,file):
    x=np.loadtxt(file,delimiter=',')
    idx=np.hstack((np.where(x[:,0]==a),np.where(x[:,0]==None)))
    x=x[idx][0]
    x=x[:c,:]
    x0,x1=np.hsplit(x,[1])

    y=np.loadtxt(file,delimiter=',')
    idx=np.hstack((np.where(y[:,0]==b),np.where(y[:,0]==None)))
    y=y[idx][0]
    y0,y1=np.hsplit(y,[1])

    z=np.vstack((x1,y1))
    return z

def _load_image_filter2(a,b,c,file):
    x=np.loadtxt(file,delimiter=',')
    idx=np.hstack((np.where(x[:,0]==a),np.where(x[:,0]==None)))
    x=x[idx][0]
    x=x[:c,:]
    x0,x1=np.hsplit(x,[1])

    y=np.loadtxt(file,delimiter=',')
    idx=np.hstack((np.where(y[:,0]==b),np.where(y[:,0]==None)))
    y=y[idx][0]
    y0,y1=np.hsplit(y,[1])

    z=np.vstack((x0,y0))
    return z,x0,y0

def _load_testlabel_filter(a,b,file):
    x=np.loadtxt(file,delimiter=',')
    idx=np.hstack((np.where(x[:,0]==a),np.where(x[:,0]==b)))
    x=x[idx][0]
    x0,x1=np.hsplit(x,[1])
    label=x0[:,0]
    label_1ofK=np.identity(10)[label.astype(np.int)]
    return label_1ofK

def _load_label_filter(a,b,c,file):
    x=np.loadtxt(file,delimiter=',')
    idx=np.hstack((np.where(x[:,0]==a),np.where(x[:,0]==None)))
    x=x[idx][0]
    x=x[:c,:]
    x0,x1=np.hsplit(x,[1])
    y=np.loadtxt(file,delimiter=',')
    idx=np.hstack((np.where(y[:,0]==b),np.where(y[:,0]==None)))
    y=y[idx][0]
    y0,y1=np.hsplit(y,[1])      
    z=np.vstack((x0,y0))

    label=z[:,0]
    label_1ofK=np.identity(10)[label.astype(np.int)]
    return label_1ofK

def load_mnist_filter(a,b,c):
    return _load_image_filter(a,b,c,"./MNIST/mnist_train.csv"), _load_label_filter(a,b,c,"./MNIST/mnist_train.csv"), _load_testimage_filter(a,b,"./MNIST/mnist_test.csv"), _load_testlabel_filter(a,b,"./MNIST/mnist_test.csv")






#BN
import numpy as np
import time

from sklearn.metrics import confusion_matrix,accuracy_score


def learn(name,x_train,t_train,x_test,t_test,md=[200],weight_init_func=he_normal,weight_init_params={},bias_init_func=zeros_b,bias_init_params={},eta=0.01,batch_size=100,epoch=100,data_norm_func=min_max,data_norm_params={},middle_func=relu,middle_params={},output_func=softmax,output_params={},error_func2=cross_entropy_error2,optimizer_func=Adam,optimizer_params={"beta1":0.9,"beta2":0.999},regularization_params={},shuffle_flag=True):
    print(name)
    d=[x_train.shape[x_train.ndim-1]]+md+[t_train.shape[t_train.ndim-1]]
    layer=len(d)-1
    print(layer)
    # 重み、バイアスの初期化 Weight,Bias Initialization
    W = {}
    b = {}
    for i in range(layer):
        W[i+1]=weight_init_func(d[i], d[i+1], weight_init_params)
    for i in range(layer):
        b[i+1]=bias_init_func(d[i+1], bias_init_params)
    # 入力データの正規化 Input data Normalization
    stats={}
    nx_train,train_stats=data_norm_func(x_train, stats, data_norm_params)
    nx_test,test_stats=data_norm_func(x_test, train_stats, data_norm_params)
    # 正解率、誤差初期化 Accuracy Rate and Train error initialization
    train_rate=np.zeros(epoch+1)
    test_rate=np.zeros(epoch+1)
    train_err=np.zeros(epoch+1)
    test_err=np.zeros(epoch+1)
    # 勾配関数 Gradient Function
    middle_back_func = eval(middle_func.__name__ + "_back")
    output_error_back_func = eval(output_func.__name__ + "_" + error_func2.__name__ + "_back")
    optimizer_statsW={}
    optimizer_statsb={}
    for i in range(layer):
        optimizer_statsW[i+1]={}
        optimizer_statsb[i+1]={}        
    # 正規化用、γ、β、学習率の初期化 BatchNormalization parameters
    batch_norm_params = {}
    batch_norm_params["batch_norm_gamma"] = {}
    batch_norm_params["batch_norm_beta"]  = {}
    for i in range(1, layer):
        if regularization_params.get("batch_norm_node"): 
            batch_norm_params["batch_norm_gamma"][i] = np.ones((1, d[i]))
            batch_norm_params["batch_norm_beta"][i]  = np.zeros((1, d[i]))
        else:
            batch_norm_params["batch_norm_gamma"][i] = np.ones(1)
            batch_norm_params["batch_norm_beta"][i]  = np.zeros(1)
    batch_norm_eta = eta
    if "batch_norm_eta" in regularization_params:
        batch_norm_eta = regularization_params["batch_norm_eta"]
    optimizer_stats_gamma={}
    optimizer_stats_beta={}
    for i in range(1,layer):
        optimizer_stats_gamma[i]={}
        optimizer_stats_beta[i]={}

    # 実行(学習データ) Training(TrainData)
    u_train, un_train, z_train, y_train=propagation(layer, nx_train, W, b, middle_func, middle_params, output_func, output_params, regularization_params, batch_norm_params)
    train_rate[0]=accuracy_rate(y_train, t_train)
    train_err[0]=calc_error2(y_train, t_train, W, error_func2, regularization_params)
    # 実行(テストデータ) Training(TestData)
    u_test, un_test, z_test, y_test=propagation(layer, nx_test, W, b, middle_func, middle_params, output_func, output_params, regularization_params, batch_norm_params)
    test_rate[0]=accuracy_rate(y_test, t_test)
    test_err[0]=calc_error2(y_test, t_test, W, error_func2, regularization_params)    
    print("{0:3d} train_rate={1:6.2f}% test_rate={2:6.2f}% train_err={3:8.5f} test_err={4:8.5f}".format((0), train_rate[0]*100, test_rate[0]*100, train_err[0], test_err[0]))

    start_time = time.time()
    for i in range(epoch):
        nx = nx_train
        t  = t_train
        if shuffle_flag:
            nx_t = np.concatenate([nx_train, t_train], axis=1)
            np.random.shuffle(nx_t)
            nx, t = np.split(nx_t, [nx_train.shape[1]], axis=1)       
        for j in range(0, nx.shape[0], batch_size):
            # 実行 (Training)
            u, un, z, y = propagation(layer, nx[j:j+batch_size], W, b, middle_func, middle_params, output_func, output_params, regularization_params, batch_norm_params)
            # 勾配を計算 Gradient Calculation
            du, dz, dW, db, batch_norm_dparams = back_propagation(layer, u, un, z, y, t[j:j+batch_size], W, b, middle_back_func, middle_params, output_error_back_func, regularization_params, batch_norm_params)
            # 重み、バイアスの調整 Parameter adjustment
            for k in range(1, layer+1):
                W[k],optimizer_statsW[k] = optimizer_func(W[k],dW[k],eta,optimizer_params,optimizer_statsW[k])
                b[k],optimizer_statsb[k] = optimizer_func(b[k],db[k],eta,optimizer_params,optimizer_statsb[k])
            # バッチ正規化の調整 Batch Normalization parameter adjustment
            if regularization_params.get("batch_norm"):
                for k in range(1, layer):
                    batch_norm_params["batch_norm_gamma"][k],optimizer_stats_gamma[k]=optimizer_func(batch_norm_params["batch_norm_gamma"][k],batch_norm_dparams["batch_norm_dgamma"][k],batch_norm_eta,optimizer_params,optimizer_stats_gamma[k])
                    batch_norm_params["batch_norm_beta"][k],optimizer_stats_beta[k]=optimizer_func(batch_norm_params["batch_norm_beta"][k],batch_norm_dparams["batch_norm_dbeta"][k],batch_norm_eta,optimizer_params,optimizer_stats_beta[k])

        # 重み、バイアス調整後の実行(学習データ) After adjustment training (train)
        u_train, un_train, z_train, y_train = propagation(layer, nx_train, W, b, middle_func, middle_params, output_func, output_params, regularization_params, batch_norm_params)
        train_rate[i+1] = accuracy_rate(y_train, t_train)
        train_err[i+1] = calc_error2(y_train, t_train, W, error_func2, regularization_params)
        # 重み、バイアス調整後の実行(テストデータ) After adjustment training (test)
        u_test, un_test, z_test, y_test = propagation(layer, nx_test, W, b, middle_func, middle_params, output_func, output_params, regularization_params, batch_norm_params)

        #Confusion Matrix
        test_predictions = np.argmax(y_test[1:10],1)
        true_classes = np.argmax(t_test[1:10],1)
        cm=confusion_matrix(true_classes, test_predictions)
        print(cm)

        test_rate[i+1] = accuracy_rate(y_test, t_test)
        test_err[i+1] = calc_error2(y_test, t_test, W, error_func2, regularization_params)
        # Show Accuracy rate, train error 
        print("{0:3d} train_rate={1:6.2f}% test_rate={2:6.2f}% train_err={3:8.5f} test_err={4:8.5f}".format((i+1), train_rate[i+1]*100, test_rate[i+1]*100, train_err[i+1],test_err[i+1]))

    end_time = time.time()
    total_time = end_time - start_time
    print("所要時間 = " +str(int(total_time/60))+" 分 "+str(int(total_time%60)) + " 秒")
    #print(cm)

    return y_train, y_test, W, b, train_rate, train_err, test_rate, test_err, total_time






x_train,t_train,x_test,t_test=load_mnist_filter(4,7,39)
print(x_train.shape,t_train.shape,x_test.shape,t_test.shape)





#BN
regularization_params={"batch_norm":True}
_, _, _, _,train_rate,train_err,test_rate,test_err,total_time=learn("batch_norm",x_train,t_train,x_test,t_test,regularization_params=regularization_params)







I expect the output will be [[342,640],[0,1028]], something like this but the output is [[1,8],[0,0]]

Upvotes: 0

Views: 1245

Answers (1)

Hayat
Hayat

Reputation: 1639

Try this:

from sklearn.metrics import confusion_matrix
y_true = [2, 0, 2, 2, 0, 1]
y_pred = [0, 0, 2, 2, 0, 2]
confusion_matrix(y_true, y_pred)

You can also use matplotlib. This link might help

Upvotes: 1

Related Questions