Tape gradient gives wrong output

Question

I am trying to compute gradient using tape.gradient() but it gives me wrong answer. The error is in the lines u_z=tape.gradient(u,z,unconnected_gradients=tf.UnconnectedGradients.ZERO) and two lines that follow it from below code. The function u is not constant in the variables z,f,t but the output from computing tape.gradient(u,z) or tape.gradient(u,t) gives me a None object. If I pass unconnected_gradients=tf.UnconnectedGradients.ZERO as the argument, then I get 0.0 as the derivative, which does not make sense. So one thing that might have gone wrong is that the network gets disconnected but I cannot understand why this happens and how to fix it. I am using tensorflow 2.6.0 and keras 2.6.0. I provide the code and error message below.

import tensorflow as tf
import numpy as np
from tensorflow import keras
import os
from tqdm import trange
import matplotlib.pyplot as plt
# Switch of unnecessary TF warning messages
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

class Model():
    def __init__(self):
        self.optimizer = keras.optimizers.Adam()
        self.initializer = tf.keras.initializers.HeNormal()
        self.batchSize = 500
        self.number_epochs=5000
        
    def NN(self,num_layers = 3, num_neurons = 30):
        model_ = keras.models.Sequential()
        model_.add(keras.layers.Dense(num_neurons,activation='tanh',input_dim=3,kernel_initializer = self.initializer))
        for layer in range(num_layers-1):
            model_.add(keras.layers.Dense(num_neurons,activation='tanh',kernel_initializer=self.initializer))
        model_.add(keras.layers.Dense(1,kernel_initializer=self.initializer))
        return model_
    
    def solve_pde(self,value_function,X,idx):
        z,f,t = X[:,0:1],X[:,1:2],X[:,2:3]
        with tf.GradientTape(persistent=True) as tape:
            u = value_function(tf.concat([z,f,t],axis=1))
        u_z = tape.gradient(u,z,unconnected_gradients=tf.UnconnectedGradients.ZERO)
        u_zz = tape.gradient(u_z,z,unconnected_gradients=tf.UnconnectedGradients.ZERO)
        u_t = tape.gradient(u,t)
        u_pde = u_t +   u_z  +  u_zz - tf.cast(0.5,dtype=tf.float32) * u
        return u_pde
    
    def loss_function(self,batchSize):
        z = tf.linspace(0.001,0.999, 200)
        f = tf.linspace(0.1,0.2, 20)
        z_tile = tf.tile(tf.expand_dims(z,axis=-1),multiples=[20,1])
        f_tile = tf.reshape(tf.repeat(f,200),[-1,1])
        dt = 0.9
        X=tf.concat((z_tile,f_tile,tf.reshape(tf.repeat(dt,z_tile.shape[0]),[-1,1])),axis=1)
        X_pde = tf.concat((z_tile,f_tile,tf.random.uniform(shape=(z_tile.shape[0],1),minval=0,maxval=dt)),axis=1)
        x_star = tf.concat((z_tile,f_tile,tf.reshape(tf.repeat(0.0,z_tile.shape[0]),[-1,1])),axis=1)
        idx = np.random.choice(X.shape[0],batchSize,replace=True)
        loss_e = self.solve_pde(self.value_function_e,X_pde,idx)
        self.value_updated = self.value_function_e(tf.concat[x_star[:,0:1],x_star[:,1:2],x_star[:,2:3]]).numpy().reshape(self.innerStep.Nz,self.innerStep.Nf).transpose()
        return loss_e
    
    @tf.function 
    def training_step(self):
        with tf.GradientTape(persistent=True) as tape:
            loss_e = self.loss_function(self.batchSize)
        grads_valueE = tape.gradient(loss_e,self.theta_valueFunction_e)
        self.optimizer.apply_gradients(zip(grads_valueE,self.theta_valueFunction_e))
        return loss_e
    
    def train_model(self):
        self.value_function_e = self.NN()

        self.theta_valueFunction_e = self.value_function_e.trainable_variables
        
        self.LVF= []

        for epoch in trange(self.number_epochs):
            print(epoch)
            loss_e = self.training_step()
            self.LVF_list.append(loss_e.numpy())
            
        
if __name__=="__main__":
    ext = Model()
    ext.train_model()

The error message along with full traceback is

Traceback (most recent call last):

  File "", line 1, in 
    runfile('C:/Users/user/Google Drive/S/Research Project4/trial.py', wdir='C:/Users/user/Google Drive/SFI/Research Project4')

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
    execfile(filename, namespace)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

  File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 85, in 
    ext.train_model()

  File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 79, in train_model
    loss_e = self.training_step()

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages	ensorflow\python\eager\def_function.py", line 862, in __call__
    return self._python_function(*args, **kwds)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages	ensorflow\python\eager\function.py", line 3985, in bound_method_wrapper
    return wrapped_fn(weak_instance(), *args, **kwargs)

  File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 65, in training_step
    loss_e = self.loss_function(self.batchSize)

  File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 58, in loss_function
    loss_e = self.solve_pde(self.value_function_e,X_pde,idx)

  File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 34, in solve_pde
    u_pde = u_t +   u_z  +  u_zz - tf.cast(0.5,dtype=tf.float32) * u

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages	ensorflow\python\ops\math_ops.py", line 1399, in r_binary_op_wrapper
    y, x = maybe_promote_tensors(y, x)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages	ensorflow\python\ops\math_ops.py", line 1335, in maybe_promote_tensors
    ops.convert_to_tensor(tensor, dtype, name="x"))

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages	ensorflow\python\profiler	race.py", line 163, in wrapped
    return func(*args, **kwargs)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages	ensorflow\python\framework\ops.py", line 1566, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages	ensorflow\python\framework\constant_op.py", line 346, in _constant_tensor_conversion_function
    return constant(v, dtype=dtype, name=name)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages	ensorflow\python\framework\constant_op.py", line 272, in constant
    allow_broadcast=True)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages	ensorflow\python\framework\constant_op.py", line 283, in _constant_impl
    return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages	ensorflow\python\framework\constant_op.py", line 308, in _constant_eager_impl
    t = convert_to_eager_tensor(value, ctx, dtype)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages	ensorflow\python\framework\constant_op.py", line 106, in convert_to_eager_tensor
    return ops.EagerTensor(value, ctx.device_name, dtype)

ValueError: Attempt to convert a value (None) with an unsupported type () to a Tensor.

Any help is much appreciated. Thank you.

JumbaMumba · Accepted Answer

You have 2 problems in your code which prevents you from getting the result you want.

If you want to compute higher-order derivatives you have to create nested GradientTape objects
GradientTape automatically track variables in its context, if you want to track tensors (as in your case, you want to track z and t) you have to call tape.watch() otherwise you will not have gradients for it.

Fixed code:

def solve_pde(self, value_function, X, idx):
    z, f, t = X[:, 0:1], X[:, 1:2], X[:, 2:3]
    with tf.GradientTape(persistent=True) as tape:
        tape.watch(z)
        with tf.GradientTape(persistent=True) as tape2:
            tape2.watch(z)
            tape2.watch(t)
            u = value_function(tf.concat([z, f, t], axis=1))
        u_z = tape2.gradient(u, z)
    u_zz = tape.gradient(u_z, z)
    u_t = tape2.gradient(u, t)
    u_pde = u_t + u_z + u_zz - tf.cast(0.5, dtype=tf.float32) * u
    return u_pde

More on gradient tape can be found in the official documentation: https://www.tensorflow.org/api_docs/python/tf/GradientTape

Tape gradient gives wrong output

Answers (1)

Related Questions