Reputation: 117
I'm attempting to reassigning the weights of a neuron to the weights it has in a previous epoch. In order to do this, I'm trying to save old weights but I'm having trouble making copies of the matrices.
I don't know how to copy an eager tensor within a custom layer. I tried using tf.identity and the copy library but both gave me errors although it is definitely possible I didn't implement it correctly. Any advice would be appreciated. I attached one of the errors below which says it should work if eager execution is on which confused me since I'm using tensorflow 2 and eager execution should be on by default.
class RevertWeightMatrixDenseLayer(keras.layers.Layer):
def __init__(self, units, prob, **kwargs):
super(RevertWeightMatrixDenseLayer, self).__init__(**kwargs)
self.units = units
self.prob = prob
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer="random_normal",
trainable=True,
)
self.b = self.add_weight(
shape=(self.units,), initializer="random_normal", trainable=True
)
self.last_weight_1 = self.w
self.last_weight_2 = self.w
def call(self, inputs, training=False):
current_weights = self.w
if training:
if self.prob > random.random():
self.w.assign(self.last_weight_2) # Assign preserves tf.Variable
#deep copy all the weights here here before assignement
self.last_weight_2 = self.last_weight_1
self.last_weight_1 = current_weights
else:
pass #could think about multiplying all weights by a constant here
return tf.nn.relu(tf.matmul(inputs, self.w) + self.b)
model = make_base_model() #sets up a sequential model with some conv layers
model.add(ResetWeightMatrixDenseLayer(units=dense_units, prob=0.1)) #Custom layer
model.add(ResetWeightMatrixDenseLayer(units=dense_units, prob=0.1)) #Custom layer
model.add(layers.Dense(classes, activation='softmax'))
model.compile(loss = 'CategoricalCrossentropy',
optimizer = 'adam',
metrics=['accuracy'])
history = model.fit(train_dataset, validation_data=validation_dataset, epochs=epochs)
plot(history)
Attempting to deepcopy where I commented leads to the following error * NotImplementedError: deepcopy() is only available when eager execution is enabled.*
Upvotes: 1
Views: 341
Reputation: 11651
You want to keep states in your layer: that's exactly what tf.Variable
are for. (See the guide: Introduction to Variables)
Set your last_weights
as a non trainable tf.Variable
, and use assign to copy the values around.
class RevertWeightMatrixDenseLayer(keras.layers.Layer):
def __init__(self, units, prob, **kwargs):
super(RevertWeightMatrixDenseLayer, self).__init__(**kwargs)
self.units = units
self.prob = prob
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer="random_normal",
trainable=True,
)
self.b = self.add_weight(
shape=(self.units,), initializer="random_normal", trainable=True
)
self.last_weight_1 = tf.Variable(self.w, trainable=False)
self.last_weight_2 = tf.Variable(self.w, trainable=False)
# we need an extra Variable to store the original value of w
# when shuffling around
self.tmp = tf.Variable(self.w, trainable=False)
def call(self, inputs, training=False):
self.tmp.assign(self.w)
if training:
if self.prob > random.random():
self.w.assign(self.last_weight_2) # Assign preserves tf.Variable
self.last_weight_2.assign(self.last_weight_1)
self.last_weight_1.assign(self.tmp)
else:
pass #could think about multiplying all weights by a constant here
return tf.nn.relu(tf.matmul(inputs, self.w) + self.b)
Upvotes: 1