Reputation: 33
I am writing a custom layer in Tensorflow 2.0 and I ran to a problem as follow:
I want to transform a 1D weight array (5x1) to a 2D array (10x10). Suppose I have the index to transform from 1D to 2D as follow, weight_index_lst:
weight_id, row, col
1,5,6
2,6,7
3,7,8
4,8,9
5,9,10
The others location of the 2D array will just get a value of 0. Here's my script for the custom layers. My input is in (10x1) shape. For the w_mat, it receives 0 anywhere else that self.w is not assigned
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class mylayer(layers.Layer):
def __init__(self, weight_index_lst, **kwargs):
super(mylayer, self).__init__(**kwargs)
self.weight_index_lst= weight_index_lst
def build(self):
self.w = self.add_weight(shape = (5,1),
initializer = 'he_normal',
trainable = True)
def call(self, inputs):
ct = 0
w_mat = tf.Variable(np.zeros((21, 21)),dtype='float32',trainable=False)
for i in range(20):
i1 = self.weight_index_lst[i,1] #row index
i2 = self.weight_index_lst[i,2] #column index
w_mat[i1,i2].assign(self.w[ct,0]) #problem with no gradient provided
#or w_mat[i1,i2] = self.w[ct,0] #resource variable cannot be assigned
ct = ct+1
y = tf.matmul(w_mat,inputs)
return y
I could have declared a (10x10) weight array but my deep learning wants the others weight to be 0 and cannot be trained.
Upvotes: 1
Views: 302
Reputation: 4990
If you want to specifically create a new layer with the weights and such then the resolution to your problem (no gradients propagating through assign) is to change all of your operations to be symbolic tensor operations - then TF will be able to propagate the gradients. One way to do so is to create 1d tensor of weights you want to train, append non-trainable const tensor with 0.0 value and then use tf.gather
to select the needed weights and/or constant zero for each of n**2
elements of the matrix you want to use to multiply the layer's input by. Since all operations are symbolic tensor operations TF will be able to propagate gradients with no problems. Code of such approach below:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
class mylayer(layers.Layer):
def __init__(self, n, weight_index_lst, **kwargs):
super(mylayer, self).__init__(**kwargs)
self.weight_index_lst = weight_index_lst
self.n = n
def build(self, input_shape):
self.w = self.add_weight(shape = (len(self.weight_index_lst),),
initializer = 'he_normal',
trainable = True)
def call(self, inputs):
const_zero = tf.constant([0.], dtype=tf.float32)
const_zero_and_weights = tf.concat([const_zero, self.w], axis=0)
ct = 1 # start with 1 since 0 means take the non-trainable 0. from const_zero_and_weights
selector = np.zeros((self.n ** 2), dtype=np.int32) # indicies
for i, j in self.weight_index_lst:
selector[i * self.n + j] = ct
ct = ct+1
t_ind = tf.constant(selector, dtype=tf.int32)
w_flattened = tf.gather(const_zero_and_weights, t_ind)
w_matrix = tf.reshape(w_flattened, (self.n, self.n))
y = tf.matmul(w_matrix, inputs)
return y
m = tf.keras.Sequential([
layers.Dense(21**2, input_shape=(45,)),
layers.Reshape(target_shape=(21,21)),
mylayer(21, [(4,5), (5,6), (6,7), (7,8), (8,9)]),
])
m.summary()
Upvotes: 1
Reputation: 4990
You don't need to create a trainable layer for this. Consider just using non-trainable lambda layer:
def select_as_needed(x, wrc, n):
selector = np.zeros(n * n, dtype=np.int32) # tensor with the index of input element we want to select in each cell (0 otherwise)
mask = np.zeros(n * n, dtype=np.float32) # 0./1. tensor with ones only on the positions where we put some selected element
for w, r, c in wrc:
selector[r * n + c] = w
mask[r * n + c] = 1.0
t_ind = tf.constant(selector, dtype=tf.int32)
t_mask = tf.constant(mask, dtype=tf.float32)
return tf.gather(x, t_ind, axis=1) * mask # if we don't multiply by mask the 0-index value of input will go to all positions for which we didn't select anything
wrc = [(0,4,5), (1,5,6), (2,6,7), (3,7,8), (4,8,9)] # same as your table, but 0-based
n = 10
model = tf.keras.models.Sequential([
# ... your stuff
tf.keras.layers.Dense(5, 'linear'), # output of 5 neurons (or replace with whatever else you have which is producing 5 outputs per sample)
tf.keras.layers.Lambda(select_as_needed, arguments={'wrc': wrc, 'n':n}),
tf.keras.layers.Reshape(target_shape=(n, n)),
])
Upvotes: 0