Reputation: 741
With the help of tf.layers
module I created a simple CNN to train it on MNIST database.
First we load the data:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
And then set some basic parameters, build and train the model:
learning_rate = 0.01
training_epochs = 10
batch_size = 100
x = tf.placeholder(tf.float32, [None, 784], name='InputData')
y = tf.placeholder(tf.float32, [None, 10], name='LabelData')
with tf.name_scope('Model'):
input_layer = tf.reshape(x, [-1, 28, 28, 1], name='InputReshaped')
conv1 = tf.layers.conv2d(inputs=input_layer, filters=32, kernel_size=[4, 4], padding="same", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
dropout1 = tf.layers.dropout(inputs=pool1, rate=0.25)
conv2 = tf.layers.conv2d(inputs=dropout1, filters=32, kernel_size=[4, 4], padding="same", activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
dropout2 = tf.layers.dropout(inputs=pool2, rate=0.25)
pool2_flat = tf.reshape(dropout2, [-1, 7 * 7 * 32])
dense = tf.layers.dense(inputs=pool2_flat, units=256, activation=tf.nn.relu)
dropout3 = tf.layers.dropout(inputs=dense, rate=0.5)
pred = tf.layers.dense(inputs=dropout3, units=10)
with tf.name_scope('Loss'):
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y))
with tf.name_scope('SGD'):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_step = optimizer.minimize(loss)
with tf.name_scope('Accuracy'):
acc = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
acc = tf.reduce_mean(tf.cast(acc, tf.float32))
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
avg_cost = 0.
avg_acc = 0.
total_batch = int(mnist.train.num_examples/batch_size)
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
_, c, ac = sess.run([train_step, loss, acc], feed_dict={x: batch_xs, y: batch_ys})
avg_cost += c / total_batch
avg_acc += ac / total_batch
print("Epoch: {:04}, avg_cost = {:.9f}, avg_acc = {:.4f}".format(epoch + 1, avg_cost, avg_acc ))
print("Optimization Finished!")
It works fine, performs decently and outputs the following:
Epoch: 0001, avg_cost = 1.032827925, avg_acc = 0.7110
Epoch: 0002, avg_cost = 0.271804677, avg_acc = 0.9180
...
Epoch: 0010, avg_cost = 0.067859485, avg_acc = 0.9790
Optimization Finished!
However, I want to rewrite the model without using tf.layers
. So I changed the code in the Model
block to the following - which I think should work almost identically as the previous one:
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1, mean = 0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
with tf.name_scope('Model'):
with tf.name_scope('Input_L'):
input_tsr = tf.reshape(x, [-1, 28, 28, 1], name='InputReshaped')
with tf.name_scope('Conv1_L'):
W_conv1 = weight_variable([4, 4, 1, 32])
b_conv1 = bias_variable([32])
conv1 = tf.add(tf.nn.conv2d(input_tsr, W_conv1, strides=[1, 1, 1, 1], padding='SAME'), b_conv1)
h_conv1 = tf.nn.relu(conv1)
h_pool1 = tf.nn.max_pool(h_conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
dropout1 = tf.nn.dropout(h_pool1, 0.75)
with tf.name_scope('Conv2_L'):
W_conv2 = weight_variable([4, 4, 32, 32])
b_conv2 = bias_variable([32])
conv2 = tf.add(tf.nn.conv2d(dropout1, W_conv2, strides=[1, 1, 1, 1], padding='SAME'), b_conv2)
h_conv2 = tf.nn.relu(conv2)
h_pool2 = tf.nn.max_pool(h_conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
dropout2 = tf.nn.dropout(h_pool2, 0.75)
with tf.name_scope('Dense_L'):
W_dense = weight_variable([7 * 7 * 32, 256])
b_dense = bias_variable([256])
flat_tsr = tf.reshape(dropout2, [-1, 7 * 7 * 32])
dense = tf.add(tf.matmul(flat_tsr, W_dense), b_dense)
h_dense = tf.nn.relu(dense)
dropout3 = tf.nn.dropout(h_dense, 0.5)
with tf.name_scope('Output_L'):
W_out = weight_variable([256, 10])
b_out = bias_variable([10])
pred = tf.add(tf.matmul(dropout3, W_out), b_out)
Unfortunately it performs very poorly and can't get the accuracy above 0.12
which I think means that the model is guessing the right answer.
Epoch: 0001, avg_cost = 22.226242821, avg_acc = 0.1106
Epoch: 0002, avg_cost = 2.301470806, avg_acc = 0.1123
...
Epoch: 0010, avg_cost = 2.301233784, avg_acc = 0.1123
Optimization Finished!
Why does the second model does not learn properly? Can you point out where is the difference between the first model and the second one (except weights and biases initialization)?
Upvotes: 0
Views: 76
Reputation: 24591
I don't think it is mentioned in the documentation, but for layers in the tf.layers
submodule, variable initializers default to glorot_uniform_initializer
when None
are provided.
If you replace your weight definition accordingly, you should get closer to your previous results.
Upvotes: 1