New World
New World

Reputation: 11

tf.GradientTape() returns only Nones

I use TensorFlow Keras to make my GAN model I want to use TPU And I have the error with tf.GradientTape() Gradients for my discriminator are ok BUT All Gradients for my generator are None Please use my colab for understand

Thank you!

Error:

discriminator_pretrain_loss real_output Tensor("sequential_8/dense_8/BiasAdd:0", shape=(1, 1), dtype=float32)
discriminator_pretrain_loss fake_output Tensor("sequential_8/dense_8/BiasAdd_1:0", shape=(1, 1), dtype=float32)
discriminator_pretrain_loss like Tensor("likes:0", shape=(1, 1), dtype=float32)
discriminator_pretrain_loss real_loss Tensor("binary_crossentropy/weighted_loss/value:0", shape=(), dtype=float32)
discriminator_pretrain_loss fake_loss Tensor("binary_crossentropy_1/weighted_loss/value:0", shape=(), dtype=float32)
discriminator_pretrain_loss fake_loss Tensor("add:0", shape=(), dtype=float32)
disc_loss Tensor("add:0", shape=(), dtype=float32)
vars gen_tape:  ['dense_7/kernel/packed:0', 'conv2d_transpose_16/kernel/packed:0', 'conv2d_transpose_17/kernel/packed:0', 'conv2d_transpose_18/kernel/packed:0', 'conv2d_transpose_19/kernel/packed:0', 'conv2d_12/kernel:0', 'conv2d_12/bias:0', 'conv2d_13/kernel:0', 'conv2d_13/bias:0', 'conv2d_14/kernel:0', 'conv2d_14/bias:0', 'conv2d_15/kernel:0', 'conv2d_15/bias:0', 'dense_8/kernel:0', 'dense_8/bias:0']
vars disc_tape:  ['dense_7/kernel/packed:0', 'conv2d_transpose_16/kernel/packed:0', 'conv2d_transpose_17/kernel/packed:0', 'conv2d_transpose_18/kernel/packed:0', 'conv2d_transpose_19/kernel/packed:0', 'conv2d_12/kernel:0', 'conv2d_12/bias:0', 'conv2d_13/kernel:0', 'conv2d_13/bias:0', 'conv2d_14/kernel:0', 'conv2d_14/bias:0', 'conv2d_15/kernel:0', 'conv2d_15/bias:0', 'dense_8/kernel:0', 'dense_8/bias:0']
gradients_of_generator [None, None, None, None, None]
gradients_of_discriminator [<tf.Tensor 'AddN_3:0' shape=(5, 5, 3, 64) dtype=float32>, <tf.Tensor 'AddN_4:0' shape=(64,) dtype=float32>, <tf.Tensor 'AddN_5:0' shape=(5, 5, 64, 128) dtype=float32>, <tf.Tensor 'AddN_6:0' shape=(128,) dtype=float32>, <tf.Tensor 'AddN_7:0' shape=(5, 5, 128, 256) dtype=float32>, <tf.Tensor 'AddN_8:0' shape=(256,) dtype=float32>, <tf.Tensor 'AddN_9:0' shape=(5, 5, 256, 512) dtype=float32>, <tf.Tensor 'AddN_10:0' shape=(512,) dtype=float32>, <tf.Tensor 'AddN_11:0' shape=(73728, 1) dtype=float32>, <tf.Tensor 'AddN_12:0' shape=(1,) dtype=float32>]
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-297-668c74d6b82e> in <module>()
----> 1 train(raw_dataset, EPOCHS)

9 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
    984           except Exception as e:  # pylint:disable=broad-except
    985             if hasattr(e, "ag_error_metadata"):
--> 986               raise e.ag_error_metadata.to_exception(e)
    987             else:
    988               raise

ValueError: in user code:

    <ipython-input-290-f71b18632068>:28 pre_train  *
        generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:630 apply_gradients  **
        grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/optimizer_v2/utils.py:76 filter_empty_gradients
        ([v.name for _, v in grads_and_vars],))

    ValueError: No gradients provided for any variable: ['dense_7/kernel:0', 'conv2d_transpose_16/kernel:0', 'conv2d_transpose_17/kernel:0', 'conv2d_transpose_18/kernel:0', 'conv2d_transpose_19/kernel:0']. 

The function below is using for step traning. Surprisingly for me, gradients are calculated for discriminator and did not calculate for generator

def train_step(images,likes):
    noise = tf.random.normal([BATCH_SIZE, noise_dim])

    with tf.GradientTape() as gen_tape, tf.GradientTape(persistent=True) as disc_tape:
      gen_tape.watch(noise)
      generated_images = generator(noise, training=True)
      real_output = discriminator(images, training=True)
      fake_output = discriminator(generated_images, training=True)

      # gen_loss = generator_loss(fake_output)
      gen_tape.watch(fake_output)
      # gen_tape.watch(gen_loss)
      # print("gen_loss",gen_loss)
      disc_loss = discriminator_pretrain_loss(real_output, fake_output, likes)
      gen_loss = cross_entropy(tf.ones_like(fake_output), fake_output)
      print("disc_loss",disc_loss)
      print("vars gen_tape: ",[var.name for var in gen_tape.watched_variables()])
      print("vars disc_tape: ",[var.name for var in disc_tape.watched_variables()])


    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    print("gradients_of_generator",gradients_of_generator)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
    print("gradients_of_discriminator",gradients_of_discriminator)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

The function below is train function the function is working correct

with tpu_strategy.scope():
  def train(dataset, epochs):
    for epoch in range(epochs):
      start = time.time()

      for row in dataset:
        parsed_row = _parse_function(row)
        image_batch = parsed_row['img_like']
        like_batch = parsed_row['is_like']
        # try:
        train_step(image_batch,like_batch)
        # except Exception as e:
          # print("Была ошибка...\r\n", e)
  #       train_step(image_batch)


      # Produce images for the GIF as you go
      if (epoch + 1) % 10 == 0:
        display.clear_output(wait=True)
        generate_and_save_images(generator,
                                epoch + 1,
                                seed)

      # Save the model every 15 epochs
      if (epoch + 1) % 100 == 0:
        checkpoint.save(file_prefix = checkpoint_prefix)

      print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))

    # Generate after the final epoch
    display.clear_output(wait=True)
    generate_and_save_images(generator,
                            epochs,
                            seed)

Discriminator model

def make_discriminator_model():
    model = tf.keras.Sequential()

    model.add(layers.MaxPooling2D(pool_size=(5, 5),
    strides=(5, 5), padding='same'))
    # print(model.output_shape)
    # print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE) 

    model.add(layers.Conv2D(64, (5, 5), strides=(1, 1), padding='same',
                                     input_shape=[288, 128, 3]))   
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.MaxPooling2D(pool_size=(2, 2),
    strides=(2, 2), padding='same'))


    # print(model.output_shape)
    # print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)
    model.add(layers.Conv2D(128, (5, 5), strides=(1, 1), padding='same'))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.MaxPooling2D(pool_size=(2, 2),
    strides=(2, 2), padding='same'))

    # print(model.output_shape)
    # print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)
    model.add(layers.Conv2D(256, (5, 5), strides=(1, 1), padding='same'))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.MaxPooling2D(pool_size=(2, 2),
    strides=(2, 2), padding='same'))



    # print(model.output_shape)
    # print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)
    model.add(layers.Conv2D(512, (5, 5), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))
    # print(model.output_shape)
    # print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)
    model.add(layers.Flatten())
    model.add(layers.Dense(1))
    # print(model.output_shape)
    # print(model.output_shape[1]*BATCH_SIZE)
    return model

Generator model

def make_generator_model():
    model = tf.keras.Sequential()
    model.add(layers.Dense(90*40*256, use_bias=False, input_shape=(100,)))
    # model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))

    model.add(layers.Reshape((90, 40, 256)))
    assert model.output_shape == (None, 90, 40, 256)  # Note: None is the batch size
    print(model.output_shape)
    print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)

    model.add(layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False))
    assert model.output_shape == (None, 90, 40, 128)
    print(model.output_shape)
    print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)
    # model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))

    # model.add(layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False))
    # assert model.output_shape == (None, 180, 80, 64)
    # print(model.output_shape)
    # print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*256)
    # model.add(layers.BatchNormalization())
    # model.add(layers.LeakyReLU())

    model.add(layers.Conv2DTranspose(16, (5, 5), strides=(4, 4), padding='same', use_bias=False))
    assert model.output_shape == (None, 360, 160, 16)
    print(model.output_shape)
    print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)
    # model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))

    model.add(layers.Conv2DTranspose(8, (5, 5), strides=(2, 2), padding='same', use_bias=False))
    assert model.output_shape == (None, 720, 320, 8)
    print(model.output_shape)
    print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)
    # model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))

    model.add(layers.Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))
    assert model.output_shape == (None, 1440, 640, 3)
    print(model.output_shape)
    print(model.output_shape[1]*model.output_shape[2]*model.output_shape[3]*BATCH_SIZE)

    return model
def discriminator_pretrain_loss(real_output, fake_output, like):
    print("discriminator_pretrain_loss real_output",real_output)
    print("discriminator_pretrain_loss fake_output",fake_output)
    print("discriminator_pretrain_loss like",like)
    real_loss = cross_entropy(like, real_output)
    print("discriminator_pretrain_loss real_loss",real_loss)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    print("discriminator_pretrain_loss fake_loss",fake_loss)
    total_loss = real_loss + fake_loss
    print("discriminator_pretrain_loss fake_loss",total_loss)
    return total_loss
def generator_loss(fake_output):
    print("generator_loss fake_output",fake_output)
    print("generator_loss ones_like",tf.ones_like(fake_output))
    gen_loss = cross_entropy(tf.ones_like(fake_output), fake_output)
    print("generator_loss gen_loss",gen_loss)   
    return gen_loss

Im using google TPU in my colab

import tensorflow as tf
print("Tensorflow version " + tf.__version__)
tf.keras.backend.set_floatx('float32')

try:
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')  # TPU detection
  print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
except ValueError:
  raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')

tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.TPUStrategy(tpu)
print("All devices: ", tf.config.list_logical_devices('TPU'))

Upvotes: 0

Views: 233

Answers (1)

New World
New World

Reputation: 11

The problem was resolved by using in the "train" function:

tpu_strategy.run(train_step, args = (image_batch,like_batch))

And using in strategy scope:

tf.keras.losses.BinaryCrossentropy(from_logits=True,reduction=tf.keras.losses.Reduction.NONE)

So "train" function was chanched by me to:

with tpu_strategy.scope():
  def train(dataset, epochs):
    for epoch in range(epochs):
      start = time.time()

      for row in dataset:
        parsed_row = _parse_function(row)
        image_batch = parsed_row['img_like']
        like_batch = parsed_row['is_like']
        # try:
        tpu_strategy.run(train_step, args = (image_batch,like_batch))
        # except Exception as e:
          # print("Была ошибка...\r\n", e)
  #       train_step(image_batch)


      # Produce images for the GIF as you go
      if (epoch + 1) % 10 == 0:
        display.clear_output(wait=True)
        generate_and_save_images(generator,
                                epoch + 1,
                                seed)

      # Save the model every 15 epochs
      if (epoch + 1) % 100 == 0:
        checkpoint.save(file_prefix = checkpoint_prefix)

      print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))

    # Generate after the final epoch
    display.clear_output(wait=True)
    generate_and_save_images(generator,
                            epochs,
                            seed)

Happy coding! Thank you!

Upvotes: 1

Related Questions