Reputation: 11
I use TensorFlow Keras to make my GAN model I want to use TPU And I have the error with tf.GradientTape() Gradients for my discriminator are ok BUT All Gradients for my generator are None Please use my colab for understand
Thank you!
Error:
discriminator_pretrain_loss real_output Tensor("sequential_8/dense_8/BiasAdd:0", shape=(1, 1), dtype=float32)
discriminator_pretrain_loss fake_output Tensor("sequential_8/dense_8/BiasAdd_1:0", shape=(1, 1), dtype=float32)
discriminator_pretrain_loss like Tensor("likes:0", shape=(1, 1), dtype=float32)
discriminator_pretrain_loss real_loss Tensor("binary_crossentropy/weighted_loss/value:0", shape=(), dtype=float32)
discriminator_pretrain_loss fake_loss Tensor("binary_crossentropy_1/weighted_loss/value:0", shape=(), dtype=float32)
discriminator_pretrain_loss fake_loss Tensor("add:0", shape=(), dtype=float32)
disc_loss Tensor("add:0", shape=(), dtype=float32)
vars gen_tape: ['dense_7/kernel/packed:0', 'conv2d_transpose_16/kernel/packed:0', 'conv2d_transpose_17/kernel/packed:0', 'conv2d_transpose_18/kernel/packed:0', 'conv2d_transpose_19/kernel/packed:0', 'conv2d_12/kernel:0', 'conv2d_12/bias:0', 'conv2d_13/kernel:0', 'conv2d_13/bias:0', 'conv2d_14/kernel:0', 'conv2d_14/bias:0', 'conv2d_15/kernel:0', 'conv2d_15/bias:0', 'dense_8/kernel:0', 'dense_8/bias:0']
vars disc_tape: ['dense_7/kernel/packed:0', 'conv2d_transpose_16/kernel/packed:0', 'conv2d_transpose_17/kernel/packed:0', 'conv2d_transpose_18/kernel/packed:0', 'conv2d_transpose_19/kernel/packed:0', 'conv2d_12/kernel:0', 'conv2d_12/bias:0', 'conv2d_13/kernel:0', 'conv2d_13/bias:0', 'conv2d_14/kernel:0', 'conv2d_14/bias:0', 'conv2d_15/kernel:0', 'conv2d_15/bias:0', 'dense_8/kernel:0', 'dense_8/bias:0']
gradients_of_generator [None, None, None, None, None]
gradients_of_discriminator [<tf.Tensor 'AddN_3:0' shape=(5, 5, 3, 64) dtype=float32>, <tf.Tensor 'AddN_4:0' shape=(64,) dtype=float32>, <tf.Tensor 'AddN_5:0' shape=(5, 5, 64, 128) dtype=float32>, <tf.Tensor 'AddN_6:0' shape=(128,) dtype=float32>, <tf.Tensor 'AddN_7:0' shape=(5, 5, 128, 256) dtype=float32>, <tf.Tensor 'AddN_8:0' shape=(256,) dtype=float32>, <tf.Tensor 'AddN_9:0' shape=(5, 5, 256, 512) dtype=float32>, <tf.Tensor 'AddN_10:0' shape=(512,) dtype=float32>, <tf.Tensor 'AddN_11:0' shape=(73728, 1) dtype=float32>, <tf.Tensor 'AddN_12:0' shape=(1,) dtype=float32>]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-297-668c74d6b82e> in <module>()
----> 1 train(raw_dataset, EPOCHS)
9 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
984 except Exception as e: # pylint:disable=broad-except
985 if hasattr(e, "ag_error_metadata"):
--> 986 raise e.ag_error_metadata.to_exception(e)
987 else:
988 raise
ValueError: in user code:
<ipython-input-290-f71b18632068>:28 pre_train *
generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:630 apply_gradients **
grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/optimizer_v2/utils.py:76 filter_empty_gradients
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable: ['dense_7/kernel:0', 'conv2d_transpose_16/kernel:0', 'conv2d_transpose_17/kernel:0', 'conv2d_transpose_18/kernel:0', 'conv2d_transpose_19/kernel:0'].
The function below is using for step traning. Surprisingly for me, gradients are calculated for discriminator and did not calculate for generator
def train_step(images,likes):
noise = tf.random.normal([BATCH_SIZE, noise_dim])
with tf.GradientTape() as gen_tape, tf.GradientTape(persistent=True) as disc_tape:
gen_tape.watch(noise)
generated_images = generator(noise, training=True)
real_output = discriminator(images, training=True)
fake_output = discriminator(generated_images, training=True)
# gen_loss = generator_loss(fake_output)
gen_tape.watch(fake_output)
# gen_tape.watch(gen_loss)
# print("gen_loss",gen_loss)
disc_loss = discriminator_pretrain_loss(real_output, fake_output, likes)
gen_loss = cross_entropy(tf.ones_like(fake_output), fake_output)
print("disc_loss",disc_loss)
print("vars gen_tape: ",[var.name for var in gen_tape.watched_variables()])
print("vars disc_tape: ",[var.name for var in disc_tape.watched_variables()])
gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
print("gradients_of_generator",gradients_of_generator)
gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
print("gradients_of_discriminator",gradients_of_discriminator)
generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
The function below is train function the function is working correct
with tpu_strategy.scope():
def train(dataset, epochs):
for epoch in range(epochs):
start = time.time()
for row in dataset:
parsed_row = _parse_function(row)
image_batch = parsed_row['img_like']
like_batch = parsed_row['is_like']
# try:
train_step(image_batch,like_batch)
# except Exception as e:
# print("Была ошибка...\r\n", e)
# train_step(image_batch)
# Produce images for the GIF as you go
if (epoch + 1) % 10 == 0:
display.clear_output(wait=True)
generate_and_save_images(generator,
epoch + 1,
seed)
# Save the model every 15 epochs
if (epoch + 1) % 100 == 0:
checkpoint.save(file_prefix = checkpoint_prefix)
print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))
# Generate after the final epoch
display.clear_output(wait=True)
generate_and_save_images(generator,
epochs,
seed)
Discriminator model
def make_discriminator_model():
model = tf.keras.Sequential()
model.add(layers.MaxPooling2D(pool_size=(5, 5),
strides=(5, 5), padding='same'))
# print(model.output_shape)
# print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)
model.add(layers.Conv2D(64, (5, 5), strides=(1, 1), padding='same',
input_shape=[288, 128, 3]))
model.add(layers.LeakyReLU())
model.add(layers.Dropout(0.3))
model.add(layers.MaxPooling2D(pool_size=(2, 2),
strides=(2, 2), padding='same'))
# print(model.output_shape)
# print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)
model.add(layers.Conv2D(128, (5, 5), strides=(1, 1), padding='same'))
model.add(layers.LeakyReLU())
model.add(layers.Dropout(0.3))
model.add(layers.MaxPooling2D(pool_size=(2, 2),
strides=(2, 2), padding='same'))
# print(model.output_shape)
# print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)
model.add(layers.Conv2D(256, (5, 5), strides=(1, 1), padding='same'))
model.add(layers.LeakyReLU())
model.add(layers.Dropout(0.3))
model.add(layers.MaxPooling2D(pool_size=(2, 2),
strides=(2, 2), padding='same'))
# print(model.output_shape)
# print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)
model.add(layers.Conv2D(512, (5, 5), strides=(2, 2), padding='same'))
model.add(layers.LeakyReLU())
model.add(layers.Dropout(0.3))
# print(model.output_shape)
# print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)
model.add(layers.Flatten())
model.add(layers.Dense(1))
# print(model.output_shape)
# print(model.output_shape[1]*BATCH_SIZE)
return model
Generator model
def make_generator_model():
model = tf.keras.Sequential()
model.add(layers.Dense(90*40*256, use_bias=False, input_shape=(100,)))
# model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.Reshape((90, 40, 256)))
assert model.output_shape == (None, 90, 40, 256) # Note: None is the batch size
print(model.output_shape)
print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)
model.add(layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False))
assert model.output_shape == (None, 90, 40, 128)
print(model.output_shape)
print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)
# model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
# model.add(layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False))
# assert model.output_shape == (None, 180, 80, 64)
# print(model.output_shape)
# print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*256)
# model.add(layers.BatchNormalization())
# model.add(layers.LeakyReLU())
model.add(layers.Conv2DTranspose(16, (5, 5), strides=(4, 4), padding='same', use_bias=False))
assert model.output_shape == (None, 360, 160, 16)
print(model.output_shape)
print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)
# model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.Conv2DTranspose(8, (5, 5), strides=(2, 2), padding='same', use_bias=False))
assert model.output_shape == (None, 720, 320, 8)
print(model.output_shape)
print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)
# model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))
assert model.output_shape == (None, 1440, 640, 3)
print(model.output_shape)
print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)
return model
def discriminator_pretrain_loss(real_output, fake_output, like):
print("discriminator_pretrain_loss real_output",real_output)
print("discriminator_pretrain_loss fake_output",fake_output)
print("discriminator_pretrain_loss like",like)
real_loss = cross_entropy(like, real_output)
print("discriminator_pretrain_loss real_loss",real_loss)
fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
print("discriminator_pretrain_loss fake_loss",fake_loss)
total_loss = real_loss + fake_loss
print("discriminator_pretrain_loss fake_loss",total_loss)
return total_loss
def generator_loss(fake_output):
print("generator_loss fake_output",fake_output)
print("generator_loss ones_like",tf.ones_like(fake_output))
gen_loss = cross_entropy(tf.ones_like(fake_output), fake_output)
print("generator_loss gen_loss",gen_loss)
return gen_loss
Im using google TPU in my colab
import tensorflow as tf
print("Tensorflow version " + tf.__version__)
tf.keras.backend.set_floatx('float32')
try:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='') # TPU detection
print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
except ValueError:
raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.TPUStrategy(tpu)
print("All devices: ", tf.config.list_logical_devices('TPU'))
Upvotes: 0
Views: 233
Reputation: 11
The problem was resolved by using in the "train" function:
tpu_strategy.run(train_step, args = (image_batch,like_batch))
And using in strategy scope:
tf.keras.losses.BinaryCrossentropy(from_logits=True,reduction=tf.keras.losses.Reduction.NONE)
So "train" function was chanched by me to:
with tpu_strategy.scope():
def train(dataset, epochs):
for epoch in range(epochs):
start = time.time()
for row in dataset:
parsed_row = _parse_function(row)
image_batch = parsed_row['img_like']
like_batch = parsed_row['is_like']
# try:
tpu_strategy.run(train_step, args = (image_batch,like_batch))
# except Exception as e:
# print("Была ошибка...\r\n", e)
# train_step(image_batch)
# Produce images for the GIF as you go
if (epoch + 1) % 10 == 0:
display.clear_output(wait=True)
generate_and_save_images(generator,
epoch + 1,
seed)
# Save the model every 15 epochs
if (epoch + 1) % 100 == 0:
checkpoint.save(file_prefix = checkpoint_prefix)
print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))
# Generate after the final epoch
display.clear_output(wait=True)
generate_and_save_images(generator,
epochs,
seed)
Happy coding! Thank you!
Upvotes: 1