Reputation: 261
I converted the MNIST dataset using the script here: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/how_tos/reading_data/convert_to_records.py
Below is the code that I use to read the TFRecord, build the model, and train.
import tensorflow as tf
BATCH_SIZE = 32
epoch = 20
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
num_input = 784 # MNIST data input (img shape: 28*28)
num_classes = 10 # MNIST total classes (0-9 digits)
def parse_func(serialized_data):
keys_to_features = {'image_raw': tf.FixedLenFeature([],tf.string),
'label': tf.FixedLenFeature([], tf.int64)}
parsed_features = tf.parse_single_example(serialized_data, keys_to_features)
prices = tf.decode_raw(parsed_features['image_raw'],tf.float32)
label = tf.cast(parsed_features['label'], tf.int32)
return prices,tf.one_hot(label - 1, 10)
def input_fn(filenames):
dataset = tf.data.TFRecordDataset(filenames=filenames)
dataset = dataset.map(parse_func,num_parallel_calls=8)
dataset = dataset.batch(BATCH_SIZE).prefetch(50)
# dataset = dataset.shuffle(2000)
return dataset.make_initializable_iterator()
weights = {
'h1': tf.Variable(tf.random_normal([num_input, n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, num_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([num_classes]))
}
# Create model
def neural_net(x):
# Hidden fully connected layer with 256 neurons
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
# Hidden fully connected layer with 256 neurons
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Output fully connected layer with a neuron for each class
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
def inference(input):
input = tf.reshape(input,[-1,784])
dense = tf.layers.dense(inputs=input, units=1024, activation=tf.nn.relu)
# Logits Layer
output = tf.layers.dense(inputs=dense, units=10)
return output
train_iter = input_fn('train_mnist.tfrecords')
valid_iter = input_fn('validation_mnist.tfrecords')
is_training = tf.placeholder(shape=[],dtype=tf.bool)
img,labels = tf.cond(is_training,lambda :train_iter.get_next(),lambda :valid_iter.get_next())
# img,labels = train_iter.get_next()
logits = neural_net(img)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels))
train_op = tf.train.AdamOptimizer().minimize(loss_op)
prediction = tf.nn.softmax(logits)
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, "float"))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for e in range(epoch):
epoch_loss = 0
sess.run(train_iter.initializer)
count = 0
while True:
try:
count +=1
_,c = sess.run([train_op,loss_op],feed_dict={is_training:True})
epoch_loss += c
except tf.errors.OutOfRangeError:
break
print('Epoch', e, ' completed out of ', epoch, ' Epoch loss: ',epoch_loss,' count :',count)
total_acc = 0
count = 0
sess.run(valid_iter.initializer)
while True:
try:
count += 1
acc = sess.run(accuracy,feed_dict={is_training:False})
total_acc += acc
except tf.errors.OutOfRangeError:
break
print('Accuracy: ', total_acc/count,' count ',count)
I don't know if I did anything wrong, but the loss and accuracy are not improved after a few epochs. I tested the model above with the traditional way, the feed_dict method. Everything worked fine, I could reach 85% accuracy with that model. Here is the output of the code above
Epoch 0 completed out of 20 Epoch loss: 295472940.19140625 count : 1720
Accuracy: 0.5727848101265823 count 158
Epoch 1 completed out of 20 Epoch loss: 2170057598.328125 count : 1720
Accuracy: 0.22231012658227847 count 158
Epoch 2 completed out of 20 Epoch loss: 6578130587.9375 count : 1720
Accuracy: 0.29944620253164556 count 158
Epoch 3 completed out of 20 Epoch loss: 13321823489.0 count : 1720
Accuracy: 0.13310917721518986 count 158
Epoch 4 completed out of 20 Epoch loss: 22460952288.75 count : 1720
Accuracy: 0.20787183544303797 count 158
Epoch 5 completed out of 20 Epoch loss: 34615459125.0 count : 1720
Accuracy: 0.28560126582278483 count 158
Epoch 6 completed out of 20 Epoch loss: 50057282083.0 count : 1720
Accuracy: 0.11748417721518987 count 158
I checked the output of the Dataset. Everything look normal and have correct shape. Can somebody point out what I did wrong here ?
EDIT This is the working code, which uses the traditional feed_dict method
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
import tensorflow as tf
BATCH_SIZE = 32
epoch = 5
# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
num_input = 784 # MNIST data input (img shape: 28*28)
num_classes = 10 # MNIST total classes (0-9 digits)
# tf Graph input
X = tf.placeholder("float", [None, num_input])
Y = tf.placeholder("float", [None, num_classes])
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([num_input, n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, num_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([num_classes]))
}
# Create model
def neural_net(x):
# Hidden fully connected layer with 256 neurons
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
# Hidden fully connected layer with 256 neurons
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
# Output fully connected layer with a neuron for each class
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
# Construct model
logits = neural_net(X)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
train_op = tf.train.AdamOptimizer().minimize(loss_op)
prediction = tf.nn.softmax(logits)
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, "float"))
# Start training
with tf.Session() as sess:
# Run the initializer
sess.run(tf.global_variables_initializer())
for e in range(epoch):
epoch_loss = 0
for _ in range(int(mnist.train.num_examples / BATCH_SIZE)):
epoch_x, epoch_y = mnist.train.next_batch(BATCH_SIZE)
_, c = sess.run([train_op, loss_op], feed_dict={X: epoch_x, Y: epoch_y})
epoch_loss += c
print('Epoch', e, ' completed out of ', epoch, ' Epoch loss: ', epoch_loss)
# Calculate accuracy for MNIST test images
print("Testing Accuracy:",sess.run(accuracy, feed_dict={X: mnist.test.images,Y: mnist.test.labels}))
Upvotes: 0
Views: 576
Reputation: 261
I found my mistake. In the parse function, I decode the label into one hot vector by using
tf.one_hot(label - 1, 10)
It should be
tf.one_hot(label, 10)
Upvotes: 0
Reputation: 3763
@Thien , I downloaded all your files and ran them to generate the tfrecords and then load the tf records. I inspected your tf records and the image batch returns a shape of 32,194 (which is 14x14 , not 28x28). I then used matplotlib to look at the images and they don't look like digits at all and do not look like the original mnist data. Your encoding/decoding into tfrecords is the problem. Consider writing an encoding function for your tf records, a decoding function for your tf records, and then testing that tfdecode( tfencode( a ) ) == a.
x,y = train_iter.get_next()
a = sess.run(x)
import matplotlib.pyplot as plt
plt.imshow( a[0].reshape(14,14) )
plt.gray()
plt.show()
Upvotes: 0
Reputation: 4183
Without seeing your tfrecords
files it's difficult to say for sure, but if your data is sorted according to label (i.e. the first 10% of labels are 0s, the second 10% are 1s etc) then failing to shuffle will have a significant effect on your results. 57% accuracy after a single epoch also seems quite surprising (though I've never looked at results at that point), so it's possible your evaluation metric (accuracy) isn't correct (though I can't see anything clearly wrong).
If you haven't visualized your inputs (i.e. the actual images and labels, not just the shape) definitely do that as a first step.
Quite apart from your question, one clear weakness of your code is the lack of non-linearities - a linear layer followed immediately by a linear layer is equivalent to a linear layer. To get more complex behaviour/better results, add a non-linearity e.g. tf.nn.relu
after each layer apart from the last, e.g.
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
Finally, prefetch
ing a large number of dataset elements defeats the purpose of prefetching
. 1
or 2
is generally enough.
Upvotes: 1