Gensoukyou1337
Gensoukyou1337

Reputation: 1527

Tensorflow-Python - InvalidArgumentError - logits and labels must be of the same size, logits_size=[512, 8] labels_size=[128, 8]

I tried to port the Levi-Hassner non-BN model from the rude-carnie Github project into tf.layers syntax which now looks like this:

pkeep=0.5

weight_decay = 0.0005
weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay)
weights_initializer = tf.random_normal_initializer(stddev=0.01)

input_image = features["x"]

input_layer = tf.reshape(input_image, [-1, 227, 227, 3])

conv1 = tf.layers.conv2d(
        inputs=input_layer,
        filters=96,
        kernel_size=[7,7],
        strides=[4,4],
        padding="valid",
        kernel_initializer=weights_initializer,
        bias_initializer=tf.constant_initializer(0.),
        kernel_regularizer=weights_regularizer,
        activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1,
                                pool_size=[3, 3],
                                strides=2,
                                padding="valid")
lr_n1 = tf.nn.local_response_normalization(input=pool1, depth_radius=5, alpha=0.0001, beta=0.75, name='norm1')

conv2 = tf.layers.conv2d(
        inputs=lr_n1,
        filters=256,
        kernel_size=[5,5],
        strides=[1,1],
        kernel_initializer=weights_initializer,
        bias_initializer=tf.constant_initializer(1.),
        kernel_regularizer=weights_regularizer,
        padding="same",
        activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2,
                                pool_size=[3, 3],
                                strides=2,
                                padding="valid")
lr_n2 = tf.nn.local_response_normalization(input=pool2, 
                                           depth_radius=5, 
                                           alpha=0.0001, 
                                           beta=0.75, 
                                           name='norm2')

conv3 = tf.layers.conv2d(
        inputs=lr_n2,
        filters=384,
        kernel_size=[3,3],
        strides=[1,1],
        kernel_initializer=weights_initializer,
        bias_initializer=tf.constant_initializer(0.),
        kernel_regularizer=weights_regularizer,
        padding="same",
        activation=tf.nn.relu)
pool3 = tf.layers.max_pooling2d(inputs=conv3,
                                pool_size=[3, 3],
                                strides=2,
                                padding="valid")

flat = tf.reshape(pool3, [-1, 384*6*6 ], name="reshape")

full1 = tf.layers.dense(inputs=flat, units=512)
drop1 = tf.layers.dropout(inputs=full1, rate=1-pkeep, training=(mode == tf.estimator.ModeKeys.TRAIN), name='drop1')
full2 = tf.layers.dense(inputs=drop1, units=512)
drop2 = tf.layers.dropout(inputs=full2, rate=1-pkeep, training=(mode == tf.estimator.ModeKeys.TRAIN), name='drop2')

l_weights = tf.Variable(tf.random_normal([512, 8], mean=0.0, stddev=0.01), name='weights')
l_biases = tf.Variable(tf.constant(0.0, shape=[8], dtype=tf.float32), name='biases')
#logits = tf.layers.dense(inputs=drop2, units=8,kernel_initializer=tf.constant_initializer(l_weights))
logits = tf.add(tf.matmul(drop2, l_weights), l_biases)

tf.logging.info("{} {} {} {}, {}, {}".format(pool1.get_shape(), pool2.get_shape(), pool3.get_shape(), flat.get_shape(), drop2.get_shape(), logits.get_shape()))

predictions = {
    # Generate predictions (for PREDICT and EVAL mode)
    "classes": tf.argmax(input=logits, axis=1),
    # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
    # `logging_hook`.
    "probabilities": tf.nn.softmax(logits, name="matmul_tensor")
}

if mode == tf.estimator.ModeKeys.PREDICT:
    export_outputs = {
            'predict_output': tf.estimator.export.PredictOutput(predictions)
    }
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs)

# Calculate Loss (for both TRAIN and EVAL modes)
onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=8)
#logger.info("onehots {} logits {}".format(tf.shape(onehot_labels), tf.shape(logits)))
loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits)

# Configure the Training Op (for TRAIN mode)
if mode == tf.estimator.ModeKeys.TRAIN:
    """
    optimizer = tf.train.MomentumOptimizer(learning_rate=0.01,
                                           momentum=0.9,
                                           use_nesterov=True)
    """
    #optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
    optimizer= tf.train.AdadeltaOptimizer(rho=0.95, epsilon=1e-6)
    train_op = optimizer.minimize(
        loss=loss,
        global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

  # Add evaluation metrics (for EVAL mode)
eval_metric_ops = {
    "accuracy": tf.metrics.accuracy(
        labels=labels, predictions=predictions["classes"])}
return tf.estimator.EstimatorSpec(
    mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

Running it with my tf.Estimator gives the error in the title.

I've checked the tensors' sizes. conv1's is (?, 56, 56, 96), pool1's is (?, 27, 27, 96), pool2's is (?, 13, 13, 256), pool3's is (?, 6, 6, 384), flat's is (?, 13824), drop2's is (?, 512), and logits's is (?, 8).

All the shapes seem to check out to me, or am I missing something? And since the '?' refers to the batch size which I set up as 128, is it possible that I'm setting up the TFRecords input function wrong?

EDIT:

With batch size being 1, I get the same error except with a twist: the logits_size is [4, 8] and the layers_size is [1, 8]

Upvotes: 0

Views: 83

Answers (1)

Gensoukyou1337
Gensoukyou1337

Reputation: 1527

Turns out it really is my TFRecords reading function. I encoded my images from float32 and read it with uint8.

Upvotes: 1

Related Questions