Reputation: 8913
I've created the following Keras custom model:
import tensorflow as tf
from tensorflow.keras.layers import Layer
class MyModel(tf.keras.Model):
def __init__(self, num_classes):
super(MyModel, self).__init__()
self.dense_layer = tf.keras.layers.Dense(num_classes,activation='softmax')
self.lambda_layer = tf.keras.layers.Lambda(lambda x: tf.math.argmax(x, axis=-1))
def call(self, inputs):
x = self.dense_layer(inputs)
x = self.lambda_layer(x)
return x
# A convenient way to get model summary
# and plot in subclassed api
def build_graph(self, raw_shape):
x = tf.keras.layers.Input(shape=(raw_shape))
return tf.keras.Model(inputs=[x],
outputs=self.call(x))
The task is multi-class classification. Model consists of a dense layer with softmax activation and a lambda layer as a post-processing unit that converts the dense output vector to a single value (predicted class).
The train targets are a one-hot encoded matrix like so:
[
[0,0,0,0,1]
[0,0,1,0,0]
[0,0,0,1,0]
[0,0,0,0,1]
]
It would be nice if I could define a categorical_crossentropy
loss over the dense layer and ignore the lambda layer while still maintaining the functionality and outputting a single value when I call model.predict(x)
.
Please note
My workspace environment doesn't allow me to use a custom training loop as suggested by @alonetogether excellent answer.
Upvotes: 1
Views: 937
Reputation: 1
I had a similar issue where I needed to train the model on the normalized ground truth data, but I want to the model to output the "unnormalized" result.
What I did was add the "unnormalization" layer to the model and just add a wrapper around the loss function that renormalizes the output of the model for computing the loss only. This can be done in the compile function.
MyModel(keras.Model):
def __init__(output_mean, output_std, *args, **kwargs):
self.output_mean = output_mean
self.output_std = output_std
super(MyModel, self).__init__(*args, **kwargs)
def compile(optimizer, loss, *args, **kwargs):
def loss_wrapper(y_true, y_pred):
y_pred = (y_pred - self.output_mean) / self.output_std
return loss(y_true, y_pred)
super(MyModel, self).compile(optimizer, loss_wrapper, *args, **kwargs)
When I load the model later, I don't compile it so as to avoid the "Loss function 'loss_wrapper' not found" error
model.load_model("/model_path", compile=False)
Upvotes: 0
Reputation: 86620
I think there is a Model.predict_classes
function that would replace the need for that lambda layer. But if it doesn't work:
There doesn't seem to be a way to do that without using one of these hacks:
I'm quite convinced there is no other workaround for this.
So, I believe the "two models" version is the best for your case where you seem to "need" a model with single input, single output and fit
.
Then I'd do this:
inputs = tf.keras.layers.Input(input_shape_without_batch_size)
loss_outputs = tf.keras.layers.Dense(num_classes,activation='softmax')(inputs)
final_outputs = tf.keras.layers.Lambda(lambda x: tf.math.argmax(x, axis=-1))(loss_outputs)
training_model = tf.keras.models.Model(inputs, loss_outputs)
final_model = tf.keras.models.Model(inputs, final_outputs)
training_model.compile(.....)
training_model.fit(....)
results = final_model.predict(...)
Upvotes: 1
Reputation: 26708
You can try using a custom training loop, which is pretty straightforward IMO:
import tensorflow as tf
from tensorflow.keras.layers import Layer
class MyModel(tf.keras.Model):
def __init__(self, num_classes):
super(MyModel, self).__init__()
self.dense_layer = tf.keras.layers.Dense(num_classes,activation='softmax')
self.lambda_layer = tf.keras.layers.Lambda(lambda x: tf.math.argmax(x, axis=-1))
def call(self, inputs):
x = self.dense_layer(inputs)
x = self.lambda_layer(x)
return x
# A convenient way to get model summary
# and plot in subclassed api
def build_graph(self, raw_shape):
x = tf.keras.layers.Input(shape=(raw_shape))
return tf.keras.Model(inputs=[x],
outputs=self.call(x))
n_classes = 5
model = MyModel(n_classes)
labels = tf.keras.utils.to_categorical(tf.random.uniform((50, 1), maxval=5, dtype=tf.int32))
train_dataset = tf.data.Dataset.from_tensor_slices((tf.random.normal((50, 1)), labels)).batch(2)
optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.CategoricalCrossentropy()
epochs = 2
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch,))
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
with tf.GradientTape() as tape:
logits = model.layers[0](x_batch_train)
loss_value = loss_fn(y_batch_train, logits)
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
And prediction:
print(model.predict(tf.random.normal((1, 1))))
[3]
Upvotes: 2