Reputation: 37
I want to train a model with the following layers:
embedding_dim = 80
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu', input_shape=(50, 120, 3)))
model.add(tf.keras.layers.MaxPool2D(padding='same'))
model.add(tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu'))
model.add(tf.keras.layers.MaxPool2D(padding='same'))
model.add(tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu'))
model.add(tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu'))
model.add(tf.keras.layers.Conv2D(512, 3, padding='same', activation='relu'))
model.add(tf.keras.layers.Conv2D(512, 2, strides=(2, 4), activation='relu'))
model.add(tf.keras.layers.Conv2D(512, 3, activation='relu'))
model.add(tf.keras.layers.Lambda(add_timing_signal_nd))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(embedding_dim))
After, I run
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none'), metrics=['accuracy'])
model.fit(image_dataset, epochs=10, validation_data=val_dataset)
I get the following error
ValueError: No gradients provided for any variable: ['conv2d/kernel:0', 'conv2d/bias:0', 'conv2d_1/kernel:0', 'conv2d_1/bias:0', 'conv2d_2/kernel:0', 'conv2d_2/bias:0', 'conv2d_3/kernel:0', 'conv2d_3/bias:0', 'conv2d_4/kernel:0', 'conv2d_4/bias:0', 'conv2d_5/kernel:0', 'conv2d_5/bias:0', 'conv2d_6/kernel:0', 'conv2d_6/bias:0', 'dense/kernel:0', 'dense/bias:0'].
For further context, add_timing_signa_nd
is defined as the following
def add_timing_signal_nd(x, min_timescale=1.0, max_timescale=1.0e4):
"""
Args:
x: a Tensor with shape [batch, d1 ... dn, channels]
min_timescale: a float
max_timescale: a float
Returns:
a Tensor the same shape as x.
"""
static_shape = x.get_shape().as_list()
num_dims = len(static_shape) - 2
channels = tf.shape(x)[-1]
num_timescales = channels // (num_dims * 2)
log_timescale_increment = (
math.log(float(max_timescale) / float(min_timescale)) /
(tf.cast(num_timescales, dtype=tf.float32) - 1))
inv_timescales = min_timescale * tf.exp(
tf.cast(tf.range(num_timescales), dtype=tf.float32) * -log_timescale_increment)
for dim in xrange(num_dims):
length = tf.shape(x)[dim + 1]
position = tf.cast(tf.range(length), dtype=tf.float32)
scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(
inv_timescales, 0)
signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1)
prepad = dim * 2 * num_timescales
postpad = channels - (dim + 1) * 2 * num_timescales
signal = tf.pad(signal, [[0, 0], [prepad, postpad]])
for _ in xrange(1 + dim):
signal = tf.expand_dims(signal, 0)
for _ in xrange(num_dims - 1 - dim):
signal = tf.expand_dims(signal, -2)
x += signal
return x
If it helps by input sizes are the following
(3, 50, 120, 3)
(3, 50, 120, 3)
(3, 50, 120, 3)
(1, 50, 120, 3)
Also, I'm planning to extract the weights after training to use them in another problem.
Thanks in advance!
Upvotes: 0
Views: 142
Reputation: 1298
I guess the image_dataset or val_dataset is not proper. Follow your code, I simulated some data(include labels) to train and it could run properly.
image_dataset = np.random.uniform(0, 1, (3000, 50, 120, 3))
image_dataset_y = np.random.uniform(0, embedding_dim, (3000,)).astype(np.int)
val_dataset = np.random.uniform(0, 1, (300, 50, 120, 3))
val_dataset_y = np.random.uniform(0, embedding_dim, (300,)).astype(np.int)
model.fit(image_dataset, image_dataset_y , batch_size=30, epochs=10, validation_data=(val_dataset, val_dataset_y))
Upvotes: 1