Reputation: 1527
I tried to port the Levi-Hassner non-BN model from the rude-carnie Github project into tf.layers
syntax which now looks like this:
pkeep=0.5
weight_decay = 0.0005
weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay)
weights_initializer = tf.random_normal_initializer(stddev=0.01)
input_image = features["x"]
input_layer = tf.reshape(input_image, [-1, 227, 227, 3])
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=96,
kernel_size=[7,7],
strides=[4,4],
padding="valid",
kernel_initializer=weights_initializer,
bias_initializer=tf.constant_initializer(0.),
kernel_regularizer=weights_regularizer,
activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1,
pool_size=[3, 3],
strides=2,
padding="valid")
lr_n1 = tf.nn.local_response_normalization(input=pool1, depth_radius=5, alpha=0.0001, beta=0.75, name='norm1')
conv2 = tf.layers.conv2d(
inputs=lr_n1,
filters=256,
kernel_size=[5,5],
strides=[1,1],
kernel_initializer=weights_initializer,
bias_initializer=tf.constant_initializer(1.),
kernel_regularizer=weights_regularizer,
padding="same",
activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2,
pool_size=[3, 3],
strides=2,
padding="valid")
lr_n2 = tf.nn.local_response_normalization(input=pool2,
depth_radius=5,
alpha=0.0001,
beta=0.75,
name='norm2')
conv3 = tf.layers.conv2d(
inputs=lr_n2,
filters=384,
kernel_size=[3,3],
strides=[1,1],
kernel_initializer=weights_initializer,
bias_initializer=tf.constant_initializer(0.),
kernel_regularizer=weights_regularizer,
padding="same",
activation=tf.nn.relu)
pool3 = tf.layers.max_pooling2d(inputs=conv3,
pool_size=[3, 3],
strides=2,
padding="valid")
flat = tf.reshape(pool3, [-1, 384*6*6 ], name="reshape")
full1 = tf.layers.dense(inputs=flat, units=512)
drop1 = tf.layers.dropout(inputs=full1, rate=1-pkeep, training=(mode == tf.estimator.ModeKeys.TRAIN), name='drop1')
full2 = tf.layers.dense(inputs=drop1, units=512)
drop2 = tf.layers.dropout(inputs=full2, rate=1-pkeep, training=(mode == tf.estimator.ModeKeys.TRAIN), name='drop2')
l_weights = tf.Variable(tf.random_normal([512, 8], mean=0.0, stddev=0.01), name='weights')
l_biases = tf.Variable(tf.constant(0.0, shape=[8], dtype=tf.float32), name='biases')
#logits = tf.layers.dense(inputs=drop2, units=8,kernel_initializer=tf.constant_initializer(l_weights))
logits = tf.add(tf.matmul(drop2, l_weights), l_biases)
tf.logging.info("{} {} {} {}, {}, {}".format(pool1.get_shape(), pool2.get_shape(), pool3.get_shape(), flat.get_shape(), drop2.get_shape(), logits.get_shape()))
predictions = {
# Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=logits, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
# `logging_hook`.
"probabilities": tf.nn.softmax(logits, name="matmul_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
export_outputs = {
'predict_output': tf.estimator.export.PredictOutput(predictions)
}
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs)
# Calculate Loss (for both TRAIN and EVAL modes)
onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=8)
#logger.info("onehots {} logits {}".format(tf.shape(onehot_labels), tf.shape(logits)))
loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits)
# Configure the Training Op (for TRAIN mode)
if mode == tf.estimator.ModeKeys.TRAIN:
"""
optimizer = tf.train.MomentumOptimizer(learning_rate=0.01,
momentum=0.9,
use_nesterov=True)
"""
#optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
optimizer= tf.train.AdadeltaOptimizer(rho=0.95, epsilon=1e-6)
train_op = optimizer.minimize(
loss=loss,
global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
# Add evaluation metrics (for EVAL mode)
eval_metric_ops = {
"accuracy": tf.metrics.accuracy(
labels=labels, predictions=predictions["classes"])}
return tf.estimator.EstimatorSpec(
mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
Running it with my tf.Estimator
gives the error in the title.
I've checked the tensors' sizes. conv1
's is (?, 56, 56, 96)
, pool1
's is (?, 27, 27, 96)
, pool2
's is (?, 13, 13, 256)
, pool3
's is (?, 6, 6, 384)
, flat
's is (?, 13824)
, drop2
's is (?, 512)
, and logits
's is (?, 8)
.
All the shapes seem to check out to me, or am I missing something? And since the '?' refers to the batch size which I set up as 128, is it possible that I'm setting up the TFRecords
input function wrong?
EDIT:
With batch size being 1, I get the same error except with a twist: the logits_size
is [4, 8]
and the layers_size
is [1, 8]
Upvotes: 0
Views: 83
Reputation: 1527
Turns out it really is my TFRecords reading function. I encoded my images from float32
and read it with uint8
.
Upvotes: 1