Not reaching optimal parameter values for my neural network

Question

I am trying to create a classification neural network using only the NumPY library for it. I have completely made the network and worked through the logic of it, and it seems perfectly fine to me. I don't know what is causing it to not reach the optimal parameters value. One of the important things that I have noticed is that the weights in the first layer do not change whatsoever.

What could be causing the code to not work as it is supposed to?

import numpy as np
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)
x_train, x_test = x_train/255, x_test/255
print(x_train.shape)

from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder(sparse_output=False)
ohe.fit(y_train.reshape(-1,1))
y_train = ohe.fit_transform(y_train.reshape(-1,1))
y_test = ohe.transform(y_test.reshape(-1,1))
print(y_train.shape)

def linear(x,deriv=False):
    return x

def relu(x,deriv=False):
    if deriv:
        return (x > 0).astype(float)
    return np.maximum(0,x)

def softmax(x,deriv=False):
    xre = x - x.max(axis=0,keepdims=True)
    xexp = np.exp(xre)
    a = xexp.sum(axis=0,keepdims=True)
    prob = xexp/a
    return prob

def sigmoid(x,deriv=False):
    a = 1/(1+np.exp(-x))
    if deriv:
        return a * (1 - a)
    return a

activations = {'linear':linear,'relu':relu,'sigmoid':sigmoid,'softmax':softmax}

def initialvals(cols=784):
    shape = [cols,10,10]
    w = dict()
    b = dict()
    for i in range(len(shape)-1):
        w[i+1] = np.random.uniform(-0.5,0.5,(shape[i+1],shape[i]))

    for i in range(len(shape)-1):
        b[i+1] = np.zeros((shape[i+1],1))
    return w,b

def allprints(ww,bb):
    print('Weights')
    for i in ww:
        print(ww[i].shape)
    print('Biases')
    for i in bb:
        print(bb[i].shape)

    print('Weights')
    for i in ww:
        print(i)
        print(ww[i])
        print()
    print('Biases')
    for i in bb:
        print(i)
        print(bb[i])
        print()

def forprop(inputs,weight,bias,acts,av):
    z = dict()
    a = dict()
    
    z[0] = inputs.T
    a[0] = acts[av[0]](z[0])
    
    for i in range(1,len(weight)+1):
        z[i] = np.dot(weight[i],a[i-1]) + bias[i]
        a[i] = acts[av[i]](z[i])
    return z,a

def backprop(inputs, output, weight, bias, acts, av, size=50, iters=20, lr=0.01):
    n_samples = inputs.shape[0]
    global z_in
    for k in range(iters):
        shuff = np.random.permutation(n_samples)
        inputs = inputs[shuff]
        output = output[shuff]
        
        for i in range(0, n_samples, size): 
            batch_inputs = inputs[i:i + size]
            batch_output = output[i:i + size]
            z, a = forprop(batch_inputs, weight, bias, acts, av)
            z_in = z
            er = dict()
            er[len(bias)] = a[len(bias)] - batch_output.T 
            for j in range(len(bias)-1, 0, -1):
                er[j] = np.dot(weight[j + 1].T, er[j + 1]) * acts[av[j]](z[j], deriv=True)
                # delta_h = np.transpose(w_h_o) @ delta_o * (h * (1 - h))

            for j in range(1, len(bias) + 1):
                bias[j] -= lr * er[j].mean(axis=1, keepdims=True)
                weight[j] -= lr * (np.dot(er[j], a[j - 1].T) / batch_inputs.shape[0])
    return weight, bias, er ,z_in

we,be = initialvals()
allprints(we,be)
w_calc, b_calc, er, z= backprop(x_train,y_train,we,be,activations,['linear','sigmoid','softmax'])
allprints(w_calc,b_calc)

I have checked the error values as well as the shapes for the operations that doesn't seem to be the problem.

I have tried different learning rates as well as taking different batch sizes and I even tried the same setup using the tensorflow library and it was giving good predictions so the model structure is not the issue.

I also have tried to initialize my parameters in different ways like random.randn, zeros, etc.

Not reaching optimal parameter values for my neural network

Answers (1)

Related Questions