Reputation: 25357
I have built a model which I can only train with a custom loss which I am trying to debug.
For this I have this simple loop here:
for (mel_specs, pred_inp), labels in train_dataset:
enc_predictions = model((mel_specs, pred_inp)) # <--- Returns a Tensor, not an EagerTensor
input_lengths = get_padded_length(mel_specs[:, :, 0])
label_lengths = get_padded_length(labels)
print(enc_predictions)
loss_value = rnnt_loss(enc_predictions, labels, input_lengths, label_lengths)
print(loss_value)
The model
is just:
model = tf.keras.Model(
inputs=[mel_specs, pred_inp],
outputs=[outputs]
)
The problem is that model((mel_specs, pred_inp))
just gives me a regular Tensor
but not a EagerTensor
and I don't understand why. mel_specs
and pred_inpu
are EagerTensor
s coming from train_dataset
which is a tf.data.Dataset
.
What am I missing here?
$ pip freeze | grep tensorflow
tensorflow==2.2.0
tensorflow-addons==0.10.0
tensorflow-datasets==3.1.0
tensorflow-estimator==2.2.0
tensorflow-metadata==0.22.2
warprnnt-tensorflow==0.1
I was able to boil it down to the encoder part of the model. If I run this it will fail and print:
Calling model(x) didn't return EagerTensor
Traceback (most recent call last):
...
return loss_value, tape.gradient(loss_value, model.trainable_variables)
File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/eager/backprop.py", line 1042, in gradient
flat_grad = imperative_grad.imperative_grad(
File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/eager/imperative_grad.py", line 71, in imperative_grad
return pywrap_tfe.TFE_Py_TapeGradient(
File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/eager/backprop.py", line 157, in _gradient_function
return grad_fn(mock_op, *out_grads)
File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/ops/math_grad.py", line 252, in _MeanGrad
sum_grad = _SumGrad(op, grad)[0]
File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/ops/math_grad.py", line 211, in _SumGrad
output_shape_kept_dims = math_ops.reduced_shape(input_shape,
File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/ops/math_ops.py", line 3735, in reduced_shape
input_shape = input_shape.numpy()
AttributeError: 'Tensor' object has no attribute 'numpy'
The code:
import numpy as np
import tensorflow as tf
from tensorflow.python.framework.ops import EagerTensor
class TimeReduction(tf.keras.layers.Layer):
def __init__(self,
reduction_factor,
batch_size=None,
**kwargs):
super(TimeReduction, self).__init__(**kwargs)
self.reduction_factor = reduction_factor
self.batch_size = batch_size
def call(self, inputs):
input_shape = tf.shape(inputs)
batch_size = self.batch_size
if batch_size is None:
batch_size = input_shape[0]
max_time = input_shape[1]
num_units = inputs.get_shape().as_list()[-1]
outputs = inputs
paddings = [[0, 0], [0, tf.math.floormod(max_time, self.reduction_factor)], [0, 0]]
outputs = tf.pad(outputs, paddings)
return tf.reshape(outputs, (batch_size, -1, num_units * self.reduction_factor))
def make_encoder_model(
input_shape: tuple,
out_dim: int,
num_layers: int,
d_model: int,
proj_size,
initializer=None,
dtype=tf.float32,
stateful: bool = False,
dropout=0.5,
reduction_index=1,
reduction_factor=2,
):
def lstm_cell():
return tf.compat.v1.nn.rnn_cell.LSTMCell(
d_model,
num_proj=proj_size,
initializer=initializer,
dtype=dtype
)
batch_size = None if not stateful else 1
inputs = tf.keras.Input(
shape=input_shape,
batch_size=batch_size,
dtype=tf.float32
)
x = tf.keras.layers.BatchNormalization()(inputs)
for i in range(num_layers):
rnn_layer = tf.keras.layers.RNN(lstm_cell(), return_sequences=True, stateful=stateful)
x = rnn_layer(x)
x = tf.keras.layers.Dropout(dropout)(x)
x = tf.keras.layers.LayerNormalization(dtype=dtype)(x)
if i == reduction_index:
x = TimeReduction(reduction_factor, batch_size=batch_size)(x)
outputs = tf.keras.layers.Dense(out_dim)(x)
return tf.keras.Model(
inputs=[inputs],
outputs=[outputs],
name='encoder'
)
def gradient(model, loss, inputs, y_true):
y_true = tf.transpose(y_true, perm=(0, 2, 1))
with tf.GradientTape() as tape:
y_pred = model(inputs, training=True)
loss_value = loss(y_true=y_true, y_pred=y_pred)
return loss_value, tape.gradient(loss_value, model.trainable_variables)
def main():
X, Y = [
np.random.rand(100, 512),
np.random.rand(100, 512)
], [[[0]*50], [[1]*50]]
# assert len(X) == len(Y)
encoder_model = make_encoder_model(
input_shape=(None, 512),
out_dim=1,
num_layers=2,
d_model=10,
proj_size=23,
dropout=0.5,
reduction_index=1,
reduction_factor=2
)
enc_dataset = tf.data.Dataset.from_generator(
lambda: zip(X, Y),
output_types=(tf.float32, tf.int32),
output_shapes=([None, 512], [None, None]),
).batch(2)
loss = tf.keras.losses.MeanSquaredError()
for x, y in enc_dataset:
from_predict = encoder_model.predict(x)
from_call = encoder_model(x)
if not isinstance(from_predict, np.ndarray):
print("Calling model.predict(x) didn't return np.ndarray")
if not isinstance(from_call, EagerTensor):
print("Calling model(x) didn't return EagerTensor")
loss_value, gradients = gradient(encoder_model, loss, x, y)
print(loss_value)
print(gradients)
print('All done.')
if __name__ == '__main__':
main()
Upvotes: 1
Views: 443
Reputation: 10475
Why do you use the LSTM cell from compat.v1
? I would imagine this leads to compatibility issues.
Most importantly, those "pure Tensorflow" RNN cells are not made to be used with the keras RNN anyway -- they were used with tf.nn.dymanic_rnn
for example, which is now deprecated and also found only in the compat.v1
module.
I would recommend that you simply use tf.keras.layers.LSTM
directly as it's much faster anyway -- it allows for the use of highly optimized GPU kernels. Alternatively, you can replace the compat.v1.LSTMCell
with a tf.keras.layers.LSTMCell
and put this into the RNN
.
Upvotes: 1