Reputation: 2010
I have created a custom Keras Conv2D layer as follows:
class CustConv2D(Conv2D):
def __init__(self, filters, kernel_size, kernelB=None, activation=None, **kwargs):
self.rank = 2
self.num_filters = filters
self.kernel_size = conv_utils.normalize_tuple(kernel_size, self.rank, 'kernel_size')
self.kernelB = kernelB
self.activation = activations.get(activation)
super(CustConv2D, self).__init__(self.num_filters, self.kernel_size, **kwargs)
def build(self, input_shape):
if K.image_data_format() == 'channels_first':
channel_axis = 1
else:
channel_axis = -1
if input_shape[channel_axis] is None:
raise ValueError('The channel dimension of the inputs '
'should be defined. Found `None`.')
input_dim = input_shape[channel_axis]
num_basis = K.int_shape(self.kernelB)[-1]
kernel_shape = (num_basis, input_dim, self.num_filters)
self.kernelA = self.add_weight(shape=kernel_shape,
initializer=RandomUniform(minval=-1.0,
maxval=1.0, seed=None),
name='kernelA',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
self.kernel = K.sum(self.kernelA[None, None, :, :, :] * self.kernelB[:, :, :, None, None], axis=2)
# Set input spec.
self.input_spec = InputSpec(ndim=self.rank + 2, axes={channel_axis: input_dim})
self.built = True
super(CustConv2D, self).build(input_shape)
I use the CustomConv2D as the first Conv layer of my model.
img = Input(shape=(width, height, 1))
l1 = CustConv2D(filters=64, kernel_size=(11, 11), kernelB=basis_L1, activation='relu')(img)
The model compiles fine; but gives me the following error while training.
ValueError: An operation has
None
for gradient. Please make sure that all of your ops have a gradient defined (i.e. are differentiable). Common ops without gradient: K.argmax, K.round, K.eval.
Is there a way to figure out which operation is throwing the error? Also, is there any implementation error in the way I am writing the custom layer?
Upvotes: 2
Views: 2903
Reputation:
It may be because there are some weights in your code that are defined by not used in the calculation of the output. Thus its gradient wrt the loss is None/undefined.
A coded out example can be found here: https://github.com/keras-team/keras/issues/12521#issuecomment-496743146
Upvotes: 0
Reputation: 86600
You're destroying your build by calling the original Conv2D build (your self.kernel
will be replaced, then self.kernelA
will never be used, thus backpropagation will never reach it).
It's also expecting biases and all the regular stuff:
class CustConv2D(Conv2D):
def __init__(self, filters, kernel_size, kernelB=None, activation=None, **kwargs):
#...
#...
#don't use bias if you're not defining it:
super(CustConv2D, self).__init__(self.num_filters, self.kernel_size,
activation=activation,
use_bias=False, **kwargs)
#bonus: don't forget to add the activation to the call above
#it will also replace all your `self.anything` defined before this call
def build(self, input_shape):
#...
#...
#don't use bias:
self.bias = None
#consider the layer built
self.built = True
#do not destroy your build
#comment: super(CustConv2D, self).build(input_shape)
Upvotes: 3