ForumWhiner
ForumWhiner

Reputation: 121

Tape gradient gives wrong output

I am trying to compute gradient using tape.gradient() but it gives me wrong answer. The error is in the lines u_z=tape.gradient(u,z,unconnected_gradients=tf.UnconnectedGradients.ZERO) and two lines that follow it from below code. The function u is not constant in the variables z,f,t but the output from computing tape.gradient(u,z) or tape.gradient(u,t) gives me a None object. If I pass unconnected_gradients=tf.UnconnectedGradients.ZERO as the argument, then I get 0.0 as the derivative, which does not make sense. So one thing that might have gone wrong is that the network gets disconnected but I cannot understand why this happens and how to fix it. I am using tensorflow 2.6.0 and keras 2.6.0. I provide the code and error message below.

import tensorflow as tf
import numpy as np
from tensorflow import keras
import os
from tqdm import trange
import matplotlib.pyplot as plt
# Switch of unnecessary TF warning messages
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

class Model():
    def __init__(self):
        self.optimizer = keras.optimizers.Adam()
        self.initializer = tf.keras.initializers.HeNormal()
        self.batchSize = 500
        self.number_epochs=5000
        
    def NN(self,num_layers = 3, num_neurons = 30):
        model_ = keras.models.Sequential()
        model_.add(keras.layers.Dense(num_neurons,activation='tanh',input_dim=3,kernel_initializer = self.initializer))
        for layer in range(num_layers-1):
            model_.add(keras.layers.Dense(num_neurons,activation='tanh',kernel_initializer=self.initializer))
        model_.add(keras.layers.Dense(1,kernel_initializer=self.initializer))
        return model_
    
    def solve_pde(self,value_function,X,idx):
        z,f,t = X[:,0:1],X[:,1:2],X[:,2:3]
        with tf.GradientTape(persistent=True) as tape:
            u = value_function(tf.concat([z,f,t],axis=1))
        u_z = tape.gradient(u,z,unconnected_gradients=tf.UnconnectedGradients.ZERO)
        u_zz = tape.gradient(u_z,z,unconnected_gradients=tf.UnconnectedGradients.ZERO)
        u_t = tape.gradient(u,t)
        u_pde = u_t +   u_z  +  u_zz - tf.cast(0.5,dtype=tf.float32) * u
        return u_pde
    
    def loss_function(self,batchSize):
        z = tf.linspace(0.001,0.999, 200)
        f = tf.linspace(0.1,0.2, 20)
        z_tile = tf.tile(tf.expand_dims(z,axis=-1),multiples=[20,1])
        f_tile = tf.reshape(tf.repeat(f,200),[-1,1])
        dt = 0.9
        X=tf.concat((z_tile,f_tile,tf.reshape(tf.repeat(dt,z_tile.shape[0]),[-1,1])),axis=1)
        X_pde = tf.concat((z_tile,f_tile,tf.random.uniform(shape=(z_tile.shape[0],1),minval=0,maxval=dt)),axis=1)
        x_star = tf.concat((z_tile,f_tile,tf.reshape(tf.repeat(0.0,z_tile.shape[0]),[-1,1])),axis=1)
        idx = np.random.choice(X.shape[0],batchSize,replace=True)
        loss_e = self.solve_pde(self.value_function_e,X_pde,idx)
        self.value_updated = self.value_function_e(tf.concat[x_star[:,0:1],x_star[:,1:2],x_star[:,2:3]]).numpy().reshape(self.innerStep.Nz,self.innerStep.Nf).transpose()
        return loss_e
    
    @tf.function 
    def training_step(self):
        with tf.GradientTape(persistent=True) as tape:
            loss_e = self.loss_function(self.batchSize)
        grads_valueE = tape.gradient(loss_e,self.theta_valueFunction_e)
        self.optimizer.apply_gradients(zip(grads_valueE,self.theta_valueFunction_e))
        return loss_e
    
    def train_model(self):
        self.value_function_e = self.NN()

        self.theta_valueFunction_e = self.value_function_e.trainable_variables
        
        self.LVF= []

        for epoch in trange(self.number_epochs):
            print(epoch)
            loss_e = self.training_step()
            self.LVF_list.append(loss_e.numpy())
            
        
if __name__=="__main__":
    ext = Model()
    ext.train_model()    
    

The error message along with full traceback is

Traceback (most recent call last):

  File "<ipython-input-26-f5a127c3c9ae>", line 1, in <module>
    runfile('C:/Users/user/Google Drive/S/Research Project4/trial.py', wdir='C:/Users/user/Google Drive/SFI/Research Project4')

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
    execfile(filename, namespace)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

  File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 85, in <module>
    ext.train_model()

  File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 79, in train_model
    loss_e = self.training_step()

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 862, in __call__
    return self._python_function(*args, **kwds)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3985, in bound_method_wrapper
    return wrapped_fn(weak_instance(), *args, **kwargs)

  File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 65, in training_step
    loss_e = self.loss_function(self.batchSize)

  File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 58, in loss_function
    loss_e = self.solve_pde(self.value_function_e,X_pde,idx)

  File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 34, in solve_pde
    u_pde = u_t +   u_z  +  u_zz - tf.cast(0.5,dtype=tf.float32) * u

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1399, in r_binary_op_wrapper
    y, x = maybe_promote_tensors(y, x)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1335, in maybe_promote_tensors
    ops.convert_to_tensor(tensor, dtype, name="x"))

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\profiler\trace.py", line 163, in wrapped
    return func(*args, **kwargs)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1566, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 346, in _constant_tensor_conversion_function
    return constant(v, dtype=dtype, name=name)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 272, in constant
    allow_broadcast=True)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 283, in _constant_impl
    return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 308, in _constant_eager_impl
    t = convert_to_eager_tensor(value, ctx, dtype)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 106, in convert_to_eager_tensor
    return ops.EagerTensor(value, ctx.device_name, dtype)

ValueError: Attempt to convert a value (None) with an unsupported type (<class 'NoneType'>) to a Tensor.

Any help is much appreciated. Thank you.

Upvotes: 0

Views: 741

Answers (1)

JumbaMumba
JumbaMumba

Reputation: 579

You have 2 problems in your code which prevents you from getting the result you want.

  1. If you want to compute higher-order derivatives you have to create nested GradientTape objects
  2. GradientTape automatically track variables in its context, if you want to track tensors (as in your case, you want to track z and t) you have to call tape.watch(<my_tensor>) otherwise you will not have gradients for it.

Fixed code:

def solve_pde(self, value_function, X, idx):
    z, f, t = X[:, 0:1], X[:, 1:2], X[:, 2:3]
    with tf.GradientTape(persistent=True) as tape:
        tape.watch(z)
        with tf.GradientTape(persistent=True) as tape2:
            tape2.watch(z)
            tape2.watch(t)
            u = value_function(tf.concat([z, f, t], axis=1))
        u_z = tape2.gradient(u, z)
    u_zz = tape.gradient(u_z, z)
    u_t = tape2.gradient(u, t)
    u_pde = u_t + u_z + u_zz - tf.cast(0.5, dtype=tf.float32) * u
    return u_pde

More on gradient tape can be found in the official documentation: https://www.tensorflow.org/api_docs/python/tf/GradientTape

Upvotes: 1

Related Questions