ValueError: An operation has `None` for gradient - Not using Custom

As per the title, I get this common error when trying to use Keras to do some Image Classification training. Unlike nearly all of the other examples, I am not trying to customise anything and simply using bog-standard keras functionality! Like this, who asks a similar question, but doesn't appear to have followed up.

I previously had an issue with this same project, but after having upgraded cudnn, and cudatoolkit (and relevant NVidia backends) I get this new error.

import os
import glob
import shutil
import pickle
import cv2
import numpy as np
import matplotlib.pyplot as plt
import random
from IPython.display import display
from PIL import Image

from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, BatchNormalization, Activation
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.convolutional import Conv3D, MaxPooling3D
from keras.constraints import maxnorm
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator

import tensorflow as tf


# read in the training and validation labels
trainPairs = np.genfromtxt('/home/me/Videos/sign_language/jester-v1-train.csv', delimiter=';', skip_header=0, dtype=[('class', 'S12'),('sign','S50')])
trainLabels = [v for k,v in trainPairs]

validPairs = np.genfromtxt('/home/me/Videos/sign_language/jester-v1-validation.csv', delimiter=';', skip_header=0, dtype=[('class', 'S12'),('sign','S50')])
validLabels = [v for k,v in validPairs]

def copyDirectory(src, dest):
        shutil.copytree(src, dest)
    # Directories are the same
    except shutil.Error as e:
        print('Directory not copied. Error: %s' % e)
    # Any error saying that the directory doesn't exist
    except OSError as e:
        print('Directory not copied. Error: %s' % e)

source = '/media/me/other/20bn-jester-v1/'
dest = '/media/me/other/jester/validation/'

# counter = 0
# for k,v in validPairs:
#     counter = counter + 1
#     source_folder = source + k.decode("utf-8")
#     dest_folder = dest + v.decode("utf-8") + "/" + k.decode("utf-8")

#     if counter%100 == 0:        
#         print(k)
#         print(v)
#         print(counter)
#         print(source_folder)
#         print(dest_folder)

#     if os.path.isdir(source_folder):
#         if os.path.isdir(dest + v.decode("utf-8")):
#             copyDirectory(source_folder, dest_folder)        

#     if counter%1000 == 0:
#         print(counter)

datagen = ImageDataGenerator(rescale=1./255)

train_it = datagen.flow_from_directory('/media/me/other/jester/train/', 
valid_it = datagen.flow_from_directory('/media/me/other/jester/validation/', class_mode='categorical', batch_size=16)
# test_it = datagen.flow_from_directory('/media/me/other/jester/test/', class_mode='binary', batch_size=64)

seed = 21
epochs = 5
optimizer = 'Adamax'

with tf.device("/cpu:0"):
    model = Sequential()

#model = Sequential()

#model.add(Conv2D(32,(3,3), input_shape=(X_train.shape[1:]), padding='same'))
#TODO is this the right shape??
model.add(Conv2D(32,(16,16), strides=(8,8), input_shape=(256, 256, 3), padding='same'))
#model.add(MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid', data_format=None))

model.add(Conv2D(64, (3,3), input_shape=(3,16,16), activation='relu', padding='same'))
#model.add(Conv2D(64, (3,3), padding='same'))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), padding='same'))

#model.add(Dense(256, kernel_constraint=maxnorm(3)))

model.add(Dense(128, kernel_constraint=maxnorm(3)))

#TODO make this a variable

model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

for layer in model.layers:



image_batch_train, label_batch_train = next(iter(train_it))
print("Image batch shape: ", image_batch_train.shape)
print("Label batch shape: ", label_batch_train.shape)
dataset_labels = sorted(train_it.class_indices.items(), key=lambda pair:pair[1])
dataset_labels = np.array([key.title() for key, value in dataset_labels])


from keras import backend as K

import keras 

model.fit_generator(train_it, steps_per_epoch=16, validation_data=valid_it, validation_steps=8)


from keras import backend as K

import keras 

model.fit_generator(train_it, steps_per_epoch=16, validation_data=valid_it, validation_steps=8)
Traceback (most recent call last):

  File "<ipython-input-19-ba2ec4f0a2a8>", line 8, in <module>
    model.fit_generator(train_it, steps_per_epoch=16, validation_data=valid_it, validation_steps=8)

  File "/home/me/Programs/anaconda3/envs/hand-gesture/lib/python3.7/site-packages/keras/legacy/", line 91, in wrapper
    return func(*args, **kwargs)

  File "/home/me/Programs/anaconda3/envs/hand-gesture/lib/python3.7/site-packages/keras/engine/", line 1732, in fit_generator

  File "/home/me/Programs/anaconda3/envs/hand-gesture/lib/python3.7/site-packages/keras/engine/", line 42, in fit_generator

  File "/home/me/Programs/anaconda3/envs/hand-gesture/lib/python3.7/site-packages/keras/engine/", line 316, in _make_train_function

  File "/home/me/Programs/anaconda3/envs/hand-gesture/lib/python3.7/site-packages/keras/legacy/", line 91, in wrapper
    return func(*args, **kwargs)

  File "/home/me/Programs/anaconda3/envs/hand-gesture/lib/python3.7/site-packages/keras/backend/", line 75, in symbolic_fn_wrapper
    return func(*args, **kwargs)

  File "/home/me/Programs/anaconda3/envs/hand-gesture/lib/python3.7/site-packages/keras/", line 598, in get_updates
    grads = self.get_gradients(loss, params)

  File "/home/me/Programs/anaconda3/envs/hand-gesture/lib/python3.7/site-packages/keras/", line 93, in get_gradients
    raise ValueError('An operation has `None` for gradient. '

ValueError: An operation has `None` for gradient. Please make sure that all of your ops have a gradient defined (i.e. are differentiable). Common ops without gradient: K.argmax, K.round, K.eval.

Edit 1: Following Matias' recommendation and removing the

from keras import backend as K

import keras 

allows me to run one epoch, but now I get

Epoch 1/1
16/16 [==============================] - 6s 370ms/step - loss: 4.0208 - accuracy: 0.0391 - val_loss: 7.3795 - val_accuracy: 0.0469
Out[3]: <keras.callbacks.callbacks.History at 0x7f817e63c2d0>

Edit 2: As Matias pointed out, my code was only set to run 1 epoch. So, removing the clear_session() worked to fix my problem.

Answers (1)

Dr. Snoopy
Dr. Snoopy

I think the problem is that you are clearing the session before training the model, doing this would make no sense, because clearing the session cleans the model structures in memory, so there would be no model representation in the TensorFlow side, making training fail.

So do not juse K.clear_session() in this case. It does not seem to be needed.

