Reputation: 2181
I have a two layer CNN with the following architecture:
Here is this architecture represented in tensorflow:
import os
import tensorflow as tf
import sys
import urllib
import numpy as np
import random
from sklearn.preprocessing import OneHotEncoder
from PIL import Image
import glob
train = []
for filename in glob.glob('/Users/madhavthaker/Documents/CSCI63/Final Project/face-emoticon-master/data/ck+_scaled/*.png'): #assuming gif
img=np.asarray(Image.open(filename))
img_flat = img.reshape(img.size)
train.append(img_flat)
if sys.version_info[0] >= 3:
from urllib.request import urlretrieve
else:
from urllib import urlretrieve
LOGDIR = 'log3/'
GITHUB_URL ='https://raw.githubusercontent.com/mamcgrath/TensorBoard-TF-Dev-Summit-Tutorial/master/'
### MNIST EMBEDDINGS ###
ckp_labels = [5, 0, 3, 5, 4, 0, 1, 3, 5, 4, 0, 3, 5, 0, 1, 5, 4, 0, 0, 0, 2, 1, 3, 5, 0, 3, 5, 1, 3, 5, 0, 3, 5, 4, 0, 3, 5, 3, 1, 1, 0, 4, 5, 2, 1, 5, 3, 5, 1, 5, 3, 1, 5, 1, 5, 0, 1, 5, 3, 5, 1, 3, 0, 1, 5, 2, 3, 1, 5, 3, 1, 3, 1, 5, 3, 2, 5, 3, 1, 5, 3, 4, 0, 5, 0, 3, 1, 3, 2, 5, 1, 3, 5, 1, 5, 4, 0, 3, 1, 5, 1, 2, 5, 1, 3, 5, 3, 5, 1, 3, 5, 5, 3, 1, 1, 3, 4, 1, 5, 4, 1, 5, 0, 1, 3, 5, 2, 3, 5, 5, 3, 5, 1, 0, 1, 5, 3, 0, 5, 1, 0, 3, 5, 0, 3, 5, 3, 1, 4, 5, 1, 3, 5, 1, 3, 1, 3, 5, 1, 5, 0, 3, 5, 1, 1, 4, 1, 5, 1, 4, 1, 0, 1, 3, 5, 5, 0, 1, 0, 5, 4, 0, 5, 3, 5, 3, 5, 1, 3, 5, 2, 0, 5, 2, 0, 5, 2, 3, 4, 3, 2, 5, 1, 5, 0, 3, 0, 1, 3, 5, 0, 1, 3, 5, 0, 4, 3, 3, 1, 4, 2, 1, 3, 5, 5, 3, 0, 3, 1, 5, 5, 0, 3, 5, 3, 2, 5, 3, 4, 7, 7, 7, 7, 7, 7, 7, 7, 0, 2, 4, 0, 7, 2, 0, 7, 0, 7, 2, 4, 4, 0, 2, 4, 7, 2]
labels_test = np.array(ckp_labels).reshape(-1,1)
enc = OneHotEncoder()
enc.fit(labels_test)
labels_final = enc.transform(labels_test).toarray()
train = np.asarray(train)
# Add convolution layer
def conv_layer(input, size_in, size_out, name="conv"):
with tf.name_scope(name):
#w = tf.Variable(tf.zeros([5, 5, size_in, size_out]), name="W")
#b = tf.Variable(tf.zeros([size_out]), name="B")
w = tf.Variable(tf.truncated_normal([17, 17, size_in, size_out], stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="SAME")
act = tf.nn.relu(conv + b)
tf.summary.histogram("weights", w)
tf.summary.histogram("biases", b)
tf.summary.histogram("activations", act)
return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
# Add fully connected layer
def fc_layer(input, size_in, size_out, name="fc"):
with tf.name_scope(name):
w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
act = tf.nn.relu(tf.matmul(input, w) + b)
tf.summary.histogram("weights", w)
tf.summary.histogram("biases", b)
tf.summary.histogram("activations", act)
return act
def mnist_model(learning_rate, use_two_conv, use_two_fc, hparam):
tf.reset_default_graph()
tf.set_random_seed(1)
sess = tf.Session()
# Setup placeholders, and reshape the data
x = tf.placeholder(tf.float32, shape=[None, 256*256], name="x")
x_image = tf.reshape(x, [-1, 256, 256, 1])
tf.summary.image('input', x_image, 3)
y = tf.placeholder(tf.float32, shape=[None, 7], name="labels")
if use_two_conv:
conv1 = conv_layer(x_image, 1, 32, "conv1")
conv_out = conv_layer(conv1, 32, 64, "conv2")
else:
conv1 = conv_layer(x_image, 1, 64, "conv")
conv_out = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") #adding padding "VALID" means no padding
flattened = tf.reshape(conv_out, [-1, 55 * 55 * 64])
if use_two_fc:
fc1 = fc_layer(flattened, 55 * 55 * 64, 40, "fc1")
embedding_input = fc1
embedding_size = 40
logits = fc_layer(fc1, 40, 7, "fc2")
else:
embedding_input = flattened
embedding_size = 7*7*64
logits = fc_layer(flattened, 7*7*64, 10, "fc")
with tf.name_scope("xent"):
xent = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(
logits=logits, labels=y), name="xent")
tf.summary.scalar("xent", xent)
with tf.name_scope("train"):
train_step = tf.train.AdamOptimizer(learning_rate).minimize(xent)
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(tf.argmax(logits, -1), tf.argmax(y, -1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar("accuracy", accuracy)
summ = tf.summary.merge_all()
embedding = tf.Variable(tf.zeros([1024, embedding_size]), name="test_embedding")
assignment = embedding.assign(embedding_input)
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
writer = tf.summary.FileWriter(LOGDIR + hparam)
writer.add_graph(sess.graph)
config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
embedding_config = config.embeddings.add()
embedding_config.tensor_name = embedding.name
embedding_config.sprite.image_path = LOGDIR + 'sprite_1024.png'
embedding_config.metadata_path = LOGDIR + 'labels_1024.tsv'
# Specify the width and height of a single thumbnail.
embedding_config.sprite.single_image_dim.extend([256, 256])
tf.contrib.tensorboard.plugins.projector.visualize_embeddings(writer, config)
for i in range(300):
batch_index = random.sample(range(0,100),25)
if i % 5 == 0:
[train_accuracy, s] = sess.run([accuracy, summ], feed_dict={x: train[batch_index], y: labels_final[batch_index]})
writer.add_summary(s, i)
print ("train accuracy:", train_accuracy)
sess.run(train_step, feed_dict={x: train[batch_index], y: labels_final[batch_index]})
def make_hparam_string(learning_rate, use_two_fc, use_two_conv):
conv_param = "conv2" if use_two_conv else "conv1"
fc_param = "fc2" if use_two_fc else "fc1"
return "lr_%.0E%s%s" % (learning_rate, conv_param, fc_param)
def main():
# You can try adding some more learning rates
#for learning_rate in [1E-3, 1E-4, 1E-5]:
for learning_rate in [1E-4]:
# Include "False" as a value to try different model architectures
#for use_two_fc in [True, False]:
for use_two_fc in [True]:
#for use_two_conv in [True, False]:
for use_two_conv in [True]:
# Construct a hyperparameter string for each one (example: "lr_1E-3fc2conv2")
hparam = make_hparam_string(learning_rate, use_two_fc, use_two_conv)
print('Starting run for %s' % hparam)
sys.stdout.flush() # this forces print-ed lines to show up.
# Actually run with the new settings
mnist_model(learning_rate, use_two_fc, use_two_conv, hparam)
if __name__ == '__main__':
main()
According to my math, everything checks out but when I run the code, I get the following error:
InvalidArgumentError (see above for traceback): Input to reshape is a tensor with 6553600 values, but the requested shape requires a multiple of 193600
I'm getting the error at this line of code:
flattened = tf.reshape(conv_out, [-1, 55 * 55 * 64])
I'm really not sure why that is happening. Is something wrong with my math because they are assuming the flattened size of conv_out should be [-1, 64*64*64]
.
Any help would be much appreciated. Let me know if you need any more information.
Upvotes: 1
Views: 262
Reputation: 3211
It looks to me like you have miscalculated the sizes that are the outputs of each convolution/pooling layer. Here's how you can figure this out. I distilled your code down to just this:
import tensorflow as tf
import numpy as np
def conv_layer(input, size_in, size_out, name="conv"):
with tf.name_scope(name):
w = tf.Variable(tf.truncated_normal([17, 17, size_in, size_out], stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="VALID")
act = tf.nn.relu(conv + b)
return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
# Setup placeholders, and reshape the data
x = tf.placeholder(tf.float32, shape=[None, 256*256], name="x")
x_image = tf.reshape(x, [-1, 256, 256, 1])
conv1 = conv_layer(x_image, 1, 32, "conv1")
conv_out = conv_layer(conv1, 32, 64, "conv2")
flattened = tf.reshape(conv_out, [-1, 55 * 55 * 64])
sess.run(tf.global_variables_initializer())
print(sess.run(tf.shape(conv1), {x: np.zeros([1, 256*256])}))
This code feeds a zero input of the right shape, and uses tf.shape()
to compute the shape of the conv1
output. I got back:
[ 1 128 128 32]
which doesn't match your calculated numbers.
I suspect you are miscalculating the padding, but it's hard to say without knowing how you derived the numbers in the table at the top. If nothing else, the first convolution has padding SAME
and strides of 1
, so the input and output will have the same spatial dimensions.
Hope this helps!
Upvotes: 1