Wesley Low
Wesley Low

Reputation: 11

Why does my multilayer perceptron read the same digit every time?

I created a multilayer perceptron from scratch to demonstrate backpropagation. It is meant to identify handwritten digits provided by the MNIST database. However the machine runs into two issues that I can't trace the source of:

Firstly, the machine converges to a state where it reads the same digit every time, regardless of what the initial input array is. The cost (defined as the sum of squared difference between output nodes and ground truth) converges to 1.

Secondly, the output is a one-hot vector, but the highest activation node in the output layer gradually becomes less activated as the machine processes more training examples.

import mnist
import numpy as np
import random as rd

W2 = np.random.rand(256, 784)/784
W3 = np.random.rand(256, 256)/256
W4 = np.random.rand(10, 256)/256
Z1 = np.zeros(784)
Z2 = B2 = np.zeros(256)
Z3 = B3 = np.zeros(256)
Z4 = B4 = np.zeros(10)

test_images = mnist.test_images()
test_labels = mnist.test_labels()
train_images = mnist.train_images()
train_labels = mnist.train_labels()

def sig(x):
    return 1/(1+np.exp(-x))
def sigp(x):
    return sig(x)*(1-sig(x))

class Network:

    # Initialising weight matrices and bias and activation vectors
    def __init__(self, Z1, W2, B2, Z2, W3, B3, Z3, W4, B4, Z4):
        self.Z1 = Z1
        self.W2 = W2
        self.B2 = B2
        self.Z2 = Z2
        self.W3 = W3
        self.B3 = B3
        self.Z3 = Z3
        self.W4 = W4
        self.B4 = B4
        self.Z4 = Z4

    # Quick access to weights, biases and activations
    def W(self, n):
        if n==4: return self.W4
        if n==3: return self.W3
        if n==2: return self.W2
    def Z(self, n):
        if n==4: return self.Z4
        if n==3: return self.Z3
        if n==2: return self.Z2
        if n==1: return self.Z1
    def B(self, n):
        if n==4: return self.B4
        if n==3: return self.B3
        if n==2: return self.B2

    # Weight and bias modification
    def Wmod(self, n, weight_change, learning_rate):
        if n==4: self.W4 -= learning_rate*weight_change
        elif n==3: self.W3 -= learning_rate*weight_change
        elif n==2: self.W2 -= learning_rate*weight_change
        return
    def Bmod(self, n, delta, learning_rate):
        if n==4: self.B4 -= learning_rate*delta
        elif n==3: self.B3 -= learning_rate*delta
        elif n==2: self.B2 -= learning_rate*delta
        return

    # Reads the input image
    def read(self, input):
        self.Z1 = np.reshape(input, 784)
        self.Z2 = np.matmul(self.W2, sig(self.Z1)) + B2
        self.Z3 = np.matmul(self.W3, sig(self.Z2)) + B3
        self.Z4 = np.matmul(self.W4, sig(self.Z3)) + B4
        result = np.argmax(sig(self.Z4))
        return result

    # Calculates cost
    def cost(self, truth):
        truth -= sig(self.Z4)
        return np.dot(truth, truth)

net = Network(Z1, W2, B2, Z2, W3, B3, Z3, W4, B4, Z4)

for k in range(1000):
    i=rd.randint(1,len(train_images))

    # Creating truth vector
    truth = np.zeros(10)
    truth[train_labels[i]] = 1

    # Reading training input
    r = net.read(train_images[i])
    print("Training set {}, digit read = {}, actual digit = {} ".format(k,r,train_labels[i]))

    # Backpropagation
    for j in range(4,1,-1):
        if j==4: delta = np.multiply(sig(net.Z4) - truth, sigp(net.Z4))
        else: delta = np.multiply(sigp(net.Z(j)), np.matmul(net.W(j+1).T, delta))
        weight_change = np.outer(delta, sig(net.Z(j-1)))
        net.Wmod(j, weight_change, 0.001)
        net.Bmod(j, delta, 0.001)

print("\n")
for i in range(20):
    print("Read = {}, Actual = {}".format(net.read(test_images[i]), test_labels[i]))

I tried setting a smaller learning rate and testing up to 10000 training examples but it still converges incorrectly. I've checked through the math but it seems to work correctly, the only inaccuracies being floating point errors and accumulated discrepancy over the weight layers.

Upvotes: 1

Views: 38

Answers (0)

Related Questions