Reputation: 1616
I am trying to write a neural network with one layer in tensorflow to perform classification on MNIST data. The size of the hidden layer taken is 30 (I also tried to changed it but the problem persists).
The problem is : when I don't use any hidden layer and directly do X*w + b
, I get 85% accuracy but when I increase the layers as follows, the accuracy stay as 0.113 with cross entropy loss at 2.3. I am sure its a silly mistake at my end. Can someone please point out what is wrong with the code?
import os
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import time
learning_rate = 0.01
batch_size = 128
n_epochs = 10
X = tf.placeholder(tf.float32, shape=(batch_size, 784))
Y = tf.placeholder(tf.float32, shape=(batch_size, 10))
w1 = tf.Variable(tf.zeros( [X.shape[1], 30]))
b1 = tf.Variable(tf.zeros([1, 30]))
z = tf.matmul(X,w1) + b1
a = tf.nn.relu(z)
w2 = tf.Variable(tf.zeros( [30, 10]))
b2 = tf.Variable(tf.zeros([1, 10]))
logits = tf.matmul(a,w2) + b2
entropy = tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = Y)
loss = tf.reduce_mean(entropy)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
with tf.Session() as sess:
start_time = time.time()
sess.run(tf.global_variables_initializer())
n_batches = int(mnist.train.num_examples/batch_size)
for i in range(n_epochs): # train the model n_epochs times
total_loss = 0
for _ in range(n_batches):
X_batch, Y_batch = mnist.train.next_batch(batch_size)
_, loss_batch = sess.run([optimizer, loss], feed_dict={X: X_batch, Y:Y_batch})
total_loss += loss_batch
print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))
print('Optimization Finished!') # should be around 0.35 after 25 epochs
preds = tf.nn.softmax(logits)
correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
n_batches = int(mnist.test.num_examples/batch_size)
total_correct_preds = 0
for i in range(n_batches):
X_batch, Y_batch = mnist.test.next_batch(batch_size)
_, accuracy_batch = sess.run([correct_preds, accuracy], feed_dict={X: X_batch, Y:Y_batch})
total_correct_preds += accuracy_batch
print('Accuracy {0}'.format(total_correct_preds/mnist.test.num_examples))
Upvotes: 0
Views: 841
Reputation: 876
try to initialize your weights with random values instead of zero, as described here:
https://www.tensorflow.org/get_started/mnist/pros#weight_initialization
w1 = tf.Variable(tf.truncated_normal([784, 30], stddev=0.1))
Upvotes: 2