What's wrong with my backpropagation?

Question

I'm trying to code a neural network from scratch in python. To check whether everything works I wanted to overfit the network but the loss seems to explode at first and then comes back to the initial value and stops there (Doesn't converge). I've checked my code and could find the reason. I assume my understanding or implementation of backpropagation is incorrect but there might be some other reason. Can anyone help me out or at least point me in the right direction?

# Initialize weights and biases given dimesnsions (For this example the dimensions are set to [12288, 64, 1])
def initialize_parameters(dims):
    # Initiate parameters
    parameters = {} 
    L = len(dims) # Number of layers in the network

    # Loop over the given dimensions. Initialize random weights and set biases to zero.
    for i in range(1, L):
        parameters["W" + str(i)] = np.random.randn(dims[i], dims[i-1]) * 0.01
        parameters["b" + str(i)] = np.zeros([dims[i], 1])

    return parameters

# Activation Functions
def relu(x, deriv=False):
    if deriv:
        return 1. * (x > 0)
    else:
        return np.maximum(0,x)

def sigmoid(x, deriv=False):
    if deriv:
        return x * (1-x)
    else:
        return 1/(1 + np.exp(-x))


# Forward and backward pass for 2 layer neural network. (1st relu, 2nd sigmoid)
def forward_backward(X, Y, parameters):
    # Array for storing gradients
    grads = {}

    # Get the length of examples
    m = Y.shape[1]

    # First layer
    Z1 = np.dot(parameters["W1"], X) + parameters["b1"]
    A1 = relu(Z1)

    # Second layer
    Z2 = np.dot(parameters["W2"], A1) + parameters["b2"]
    AL = sigmoid(Z2)

    # Compute cost
    cost = (-1 / m) * np.sum(np.multiply(Y, np.log(AL)) + np.multiply(1 - Y, np.log(1 - AL)))

    # Backpropagation
    # Second Layer
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    dZ2 = dAL * sigmoid(AL, deriv=True)
    grads["dW2"] = np.dot(dZ2, A1.T) / m
    grads["db2"] = np.sum(dZ2, axis=1, keepdims=True) / m

    # First layer
    dA1 = np.dot(parameters["W2"].T, dZ2)
    dZ1 = dA1 * relu(A1, deriv=True)
    grads["dW1"] = np.dot(dZ1, X.T)
    grads["db1"] = np.sum(dZ1, axis=1, keepdims=True) / m

    return AL, grads, cost

# Hyperparameters
dims = [12288, 64, 1]
epoches = 2000
learning_rate = 0.1

# Initialize parameters
parameters = initialize_parameters(dims)
log_list = []

# Train the network
for i in range(epoches):
    # Get X and Y
    x = np.array(train[0:10],ndmin=2).T
    y = np.array(labels[0:10], ndmin=2).T

    # Perform forward and backward pass
    AL, grads, cost = forward_backward(x, y, parameters)

    # Compute cost and append to the log_list
    log_list.append(cost)

    # Update parameters with computed gradients 
    parameters = update_parameters(grads, parameters, learning_rate)

plt.plot(log_list)
plt.title("Loss of the network")
plt.show()

What's wrong with my backpropagation?

Answers (1)

Related Questions

What&#39;s wrong with my backpropagation?

Answers (1)

Related Questions

What's wrong with my backpropagation?