Reputation: 89
I am currently working on creating a Neural Network in Python and I have been stuck to this problem.
[0, 0, 0, 1, 0, 0, 0, 0]
Problem:
[0.99999997, 0.99999997, 0.99999997, 0.99999997, 0.99999997, 0.99999997, 0.99999997, 0.99999997]
error is fluctuating
Epoch: 119 Error: [255.99999638]
Epoch: 120 Error: [143.99999741]
Epoch: 121 Error: [63.99999831]
Epoch: 122 Error: [3.99999957]
Epoch: 123 Error: [3.99999955]
Epoch: 124 Error: [35.99999874]
Epoch: 125 Error: [255.9999965]
What might be wrong here?
Code for training()
def train(self, test_set, test_labels, validation_set, validation_label):
total_error = numpy.zeros((max_epoch, 1))
# temporary values, use reshape
size_of_traning_set = len(test_set)
len_test_set_col = len(test_set[0])
len_test_label_col = len(test_labels[0])
for count in range(0, max_epoch):
random_permutations = numpy.random.permutation(size_of_traning_set)
for count_2 in range(0, size_of_traning_set):
random_index = random_permutations[count_2]
x_in = numpy.reshape(test_set[random_index], (len_test_set_col, 1))
d_out = numpy.reshape(test_labels[random_index], (len_test_label_col, 1))
# forward propagation
# 1st hidden layer
v_hidden_layer_1 = numpy.add(numpy.dot(self.layer_one_weights, x_in), self.layer_one_bias)
y_hidden_layer_1 = compute_activation(v_hidden_layer_1)
# 2nd hidden layer
v_hidden_layer_2 = numpy.add(numpy.dot(self.layer_two_weights, y_hidden_layer_1), self.layer_two_bias)
y_hidden_layer_2 = compute_activation(v_hidden_layer_2)
v_output_layer = numpy.add(numpy.dot(self.output_layer_weights, y_hidden_layer_2), self.output_layer_bias)
final_output = compute_activation(v_output_layer)
error_vector = d_out - final_output
# compute gradient in output layer
delta_output_x = numpy.multiply(error_vector, final_output)
one_minus_out = 1 - final_output
delta_output = numpy.multiply(delta_output_x, one_minus_out)
# compute gradient in hidden layer 2
one_minus_y_h2 = 1 - y_hidden_layer_2
output_layer_weights_trans = numpy.transpose(self.output_layer_weights)
deriv_hidden_layer_2_x = numpy.multiply(y_hidden_layer_2, one_minus_y_h2)
deriv_out_layer = numpy.dot(output_layer_weights_trans, delta_output)
delta_hidden_layer_2 = numpy.multiply(deriv_hidden_layer_2_x, deriv_out_layer)
# compute gradient in hidden layer 1
one_minus_y_h1 = 1 - y_hidden_layer_1
hidden_layer_2_weights_trans = numpy.transpose(self.layer_two_weights)
deriv_hidden_layer_1_x = numpy.multiply(y_hidden_layer_1, one_minus_y_h1)
deriv_layer_2 = numpy.dot(hidden_layer_2_weights_trans, delta_hidden_layer_2)
delta_hidden_layer_1 = numpy.multiply(deriv_hidden_layer_1_x, deriv_layer_2)
# update weights and biases of output layer
self.output_layer_weights = self.output_layer_weights + \
numpy.multiply(self.learning_rate, numpy.dot(delta_output,
numpy.reshape(y_hidden_layer_2, (1, self.number_of_layer_2))))
self.output_layer_bias = self.output_layer_bias + numpy.multiply(self.learning_rate, delta_output)
# update weights and biases of hidden layer 2
self.layer_two_weights = self.layer_two_weights + \
numpy.multiply(self.learning_rate, numpy.dot(delta_hidden_layer_2,
numpy.reshape(y_hidden_layer_1, (1, self.number_of_layer_1))))
self.layer_two_bias = self.layer_two_bias + numpy.multiply(self.learning_rate, delta_hidden_layer_2)
# update weights and biases of hidden layer 1
self.layer_one_weights = self.layer_one_weights + \
numpy.multiply(self.learning_rate, numpy.dot(delta_hidden_layer_1,
numpy.reshape(x_in, (1, self.number_of_inputs))))
self.layer_one_bias = self.layer_one_bias + numpy.multiply(self.learning_rate, delta_hidden_layer_1)
err_sum = numpy.multiply(error_vector, error_vector)
err_sum = numpy.divide(err_sum, 2)
total_error[count] = total_error[count] + numpy.sum(err_sum)
print('Epoch: {} Error: {}'.format(count, total_error[count]))
if count % 10 == 0 and count != 0:
self.validate(validation_set, validation_label)
Upvotes: 1
Views: 273
Reputation: 820
Assuming your code is correct,When you are getting the same output for all classes that means all of the neurons in your hidden layers are learning the same weights in other words the same function, your whole network is doing what just a single neuron can do. this is happens when you initialize all weights with same value, you must start with random different values for each weight.
Upvotes: 1