Why can't I perform gradients on a variable passed as an argument to a tf.function?

Question

My training loop was giving me the following warning:

WARNING:tensorflow:Gradients do not exist for variables ['noise:0'] when minimizing the loss.

After some tinkering I determined this only happened when the noise variable was being passed as an argument to my loss function which is a tf.function. The code below shows that there is no problem when the loss function is not a tf.function or when the global noise variable is referenced in the function. It also shows that an error results from trying to perform a gradient on the noise variable when it is used as argument in a tf.function:

import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
from  tensorflow_probability import distributions as tfd 
from tensorflow_probability import bijectors as tfb

constrain_positive = tfb.Shift(np.finfo(np.float64).tiny)(tfb.Exp())
noise = tfp.util.TransformedVariable(initial_value=.1, bijector=constrain_positive, dtype=np.float64, name="noise")
trainable_variables = [noise.variables[0]]
kernel = tfp.math.psd_kernels.ExponentiatedQuadratic()
optimizer = tf.keras.optimizers.Adam()
index_points = tf.constant([[0]], dtype=np.float64)
observations = tf.constant([0], dtype=np.float64)

# I can train noise when it is passed as an argument to a python function 
def loss_function_1(index_points, observations, kernel, observation_noise_variance):
    gp = tfd.GaussianProcess(kernel, index_points, observation_noise_variance=observation_noise_variance)
    return -gp.log_prob(observations)

with tf.GradientTape() as tape:
    nll_1 = loss_function_1(index_points, observations, kernel, noise)
grad_1 = tape.gradient(nll_1, trainable_variables)
print(grad_1)
optimizer.apply_gradients(zip(grad_1, trainable_variables))

# I can train noise if it is used in a tf.function and not passed as an argument
@tf.function(autograph=False, experimental_compile=False)
def loss_function_2(index_points, observations, kernel):
    gp = tfd.GaussianProcess(kernel, index_points, observation_noise_variance=noise)
    return -gp.log_prob(observations)

with tf.GradientTape() as tape:
    nll_2 = loss_function_2(index_points, observations, kernel)
grad_2 = tape.gradient(nll_2, trainable_variables)
print(grad_2)
optimizer.apply_gradients(zip(grad_2, trainable_variables))

# I can train noise if it is passed as an argument to a tf.function if the tf.function
# uses the global variable
@tf.function(autograph=False, experimental_compile=False)
def loss_function_3(index_points, observations, kernel, observation_noise_variance):
    gp = tfd.GaussianProcess(kernel, index_points, observation_noise_variance=noise)
    return -gp.log_prob(observations)

with tf.GradientTape() as tape:
    nll_3 = loss_function_3(index_points, observations, kernel, noise)
grad_3 = tape.gradient(nll_3, trainable_variables)
print(grad_3)
optimizer.apply_gradients(zip(grad_3, trainable_variables))

# I cannot train noise if it is passed as an argument to a tf.function if the tf.function
# the local variable
@tf.function(autograph=False, experimental_compile=False)
def loss_function_4(index_points, observations, kernel, observation_noise_variance):
    gp = tfd.GaussianProcess(kernel, index_points, observation_noise_variance=observation_noise_variance)
    return -gp.log_prob(observations)

with tf.GradientTape() as tape:
    nll_4 = loss_function_4(index_points, observations, kernel, noise)
grad_4 = tape.gradient(nll_4, trainable_variables)
print(grad_4)
optimizer.apply_gradients(zip(grad_4, trainable_variables))

This code prints:

[]
[]
[]
[None]

And then it returns the error message:

ValueError: No gradients provided for any variable: ['noise:0'].

Ideally I would get the performance boost of a tf.function so I don't want to use loss_function_1. Also, I would like to be able to pass different noise variables to my loss function so I do not want to use the global variable like I do in loss_function_2 or loss_function_3.

Why do I get None when I try to perform a gradient on a variable passed as an argument to a tf.function? How can I get around this?

Why can't I perform gradients on a variable passed as an argument to a tf.function?

Answers (1)

Related Questions

Why can&#39;t I perform gradients on a variable passed as an argument to a tf.function?

Answers (1)

Related Questions

Why can't I perform gradients on a variable passed as an argument to a tf.function?