Gradients are None for Custom Convolution Layer

Question

I have implemented the Basic MNIST model with Custom convolution layer as shown below. The problem is that the Gradients are always 'None' for the Custom Layer and so the learning does not happens during back propagation, as the Grad has None values. I have debugged the outputs of the layers during forward pass and they are OK. Here is the sample code, for simplicity I have passed image of 'Ones' and have just returned the matrix from the custom layer. I have tried my best but could make it work any help is very much appreciated in advance following code is executable and raises the

warning :tensorflow:Gradients do not exist for variables ['cnn/custom_conv2d/kernel:0', 'cnn/custom_conv2d/bias:0', 'cnn/custom_conv2d_1/kernel:0', 'cnn/custom_conv2d_1/bias:0', 'cnn/custom_conv2d_2/kernel:0', 'cnn/custom_conv2d_2/bias:0'] when minimizing the loss.

import numpy as np
import tensorflow as tf
from grpc.beta import interfaces
class CustomConv2D(tf.keras.layers.Conv2D):
    def __init__(self, filters,
                 kernel_size,
                 strides=(1, 1),
                 padding='valid',
                 data_format=None,
                 dilation_rate=(1, 1),
                 activation=None,
                 use_bias=True,
                 kernel_initializer='glorot_uniform',
                 bias_initializer='glorot_uniform',
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 __name__ = 'CustomConv2D',
                 **kwargs
                 ):
        super(CustomConv2D, self).__init__(
            filters=filters,
            kernel_size=kernel_size,
            strides=strides,
            padding=padding,
            data_format=data_format,
            dilation_rate=dilation_rate,
            activation=activation,
            use_bias=use_bias,
            kernel_initializer=kernel_initializer,
            bias_initializer=bias_initializer,
            kernel_regularizer=kernel_regularizer,
            bias_regularizer=bias_regularizer,
            activity_regularizer=activity_regularizer,
            kernel_constraint=kernel_constraint,
            bias_constraint=bias_constraint,
            **kwargs )

     def call(self, input):

        (unrolled_mat, filters, shape) = self.prepare(input)
#unrolled_mat=unrolled inputs
#filters=unrolled kernels of the lAYER
#convolution through unrolling
        conv_result = tf.tensordot(unrolled_mat, filters, axes=1)
        result=tf.convert_to_tensor(tf.reshape(conv_result, shape))
        return result


    def prepare(self, matrix):
        batches,rows,cols,channels=matrix.shape
        kernel_size = self.kernel_size[0]
        unrolled_matrices=None
        # start = timer()
        for batch in range(batches):
            unrolled_maps=None
            for chanel in range(channels):
                unrolled_map = self.unroll(batch, cols, kernel_size, matrix, rows,chanel)
                if unrolled_maps is None:
                    unrolled_maps = unrolled_map
                else:
                    unrolled_maps=np.append(unrolled_maps,unrolled_map,axis=1)
            unrolled_maps = np.reshape(unrolled_maps,(-1,unrolled_maps.shape[0],unrolled_maps.shape[1]))
            if unrolled_matrices is None:
                unrolled_matrices = unrolled_maps
            else:
                unrolled_matrices = np.concatenate((unrolled_matrices, unrolled_maps))
        kernels=self.get_weights()
        kernels=np.reshape(kernels[0],(unrolled_matrices[0].shape[1],-1))
        shp=(batches,rows-(kernel_size-1),cols-(kernel_size-1),self.filters)
        matrix=unrolled_matrices
        return (matrix, kernels, shp)

    def unroll(self, batch, cols, kernel_size, matrix, rows, chanel):
        # a=np.zeros((shape))
        unrolled_feature_map = None
        for x in range(0, rows - (kernel_size - 1)):
            for y in range(0, (cols - (kernel_size - 1))):
                temp_row = None  # flattened kernal at single position
                for k in range(kernel_size):
                    for l in range(kernel_size):
                        if temp_row is None:
                            temp_row = matrix[batch, x + k, y + l, chanel]
                            # print(matrix[batch, x + k, y + l])
                        else:
                            temp_row = np.append(temp_row, matrix[batch, x + k, y + l, chanel])
                            # print(matrix[batch, x + k, y + l])
                if unrolled_feature_map is None:
                    unrolled_feature_map = np.reshape(temp_row,
                        (-1, kernel_size * kernel_size))  # first row of unrolled matrix added
                else:
                    unrolled_feature_map = np.concatenate((unrolled_feature_map, np.reshape(temp_row,
                        (-1, kernel_size * kernel_size))))  # concatinate subsequent row to un_mat
        unrolled_feature_map = np.reshape(unrolled_feature_map,( unrolled_feature_map.shape[0], unrolled_feature_map.shape[1]))
        # print(unrolled_feature_map.shape)
        matrix=unrolled_feature_map
        return matrix

class CNN(tf.keras.Model):
  def __init__(self):
    super(CNN, self).__init__()
    self.learning_rate = 0.001
    self.momentum = 0.9
    self.optimizer = tf.keras.optimizers.Adam(self.learning_rate, self.momentum)
    self.conv1 = CustomConv2D(filters = 6, kernel_size= 3, activation = 'relu')  ## valid means no padding
    self.pool1 = tf.keras.layers.MaxPool2D(pool_size=2) # default stride??-
    self.conv2 = CustomConv2D(filters = 16, kernel_size = 3,  activation = 'relu')
    self.pool2 = tf.keras.layers.MaxPool2D(pool_size = 2)
    self.conv3 = CustomConv2D(filters=120, kernel_size=3,  activation='relu')
    self.flatten = tf.keras.layers.Flatten()
    self.fc1 = tf.keras.layers.Dense(units=82,kernel_initializer='glorot_uniform')
    self.fc2 = tf.keras.layers.Dense(units=10, activation = 'softmax',kernel_initializer='glorot_uniform')
  def call(self, x):
      x = self.conv1(x)  # shap(32,26,26,6) all (6s 3s 6s 3s)
      x = self.pool1(x)  # shap(32,13,13,6) all (6s)
      x = self.conv2(x)  # shap(32,11,11,16) all(324s)
      x = self.pool2(x)  # shap(32,5,5,16)
      x = self.conv3(x)  # shap(32,3,3,120)all(46656)
      x = self.flatten(x)  # shap(32,1080)
      x = self.fc1(x)  # shap(32,82)
      x = self.fc2(x)  # shap(32,10)
      return x
  def feedForward(self, image, label):
            accuracy_object = tf.metrics.Accuracy()
            loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
            with tf.GradientTape() as tape:
                feedForwardCompuation = self(image, training=True)
                self.loss_value = loss_object(label, feedForwardCompuation)
            grads = tape.gradient(self.loss_value, self.variables)
            self.optimizer.apply_gradients(zip(grads, self.variables))
            accuracy = accuracy_object(tf.argmax(feedForwardCompuation, axis=1, output_type=tf.int32), label)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train=x_train.astype('float32')
y_train = y_train.astype('float32')
image=x_train[0].reshape((1,28,28,1)) 
label=y_train[0]
cnn=CNN()
cnn.feedForward(image,label)

UPDATE: I am not using the builtin TF conv fucntion rather I am implementing my own custom convolution operation via Matrix unrolling method(unrolled map*unrolled filters). But the Tap.gradient returns "None" for the custom layers however when I use the builtin conv2d function of TF then it works fine! I have Added the actual code of the operation
Snapshot of grads while debugging

Gradients are None for Custom Convolution Layer

Answers (1)

Related Questions