model(features) does not return EagerTensor

Question

I have built a model which I can only train with a custom loss which I am trying to debug.

For this I have this simple loop here:

for (mel_specs, pred_inp), labels in train_dataset:
    enc_predictions = model((mel_specs, pred_inp))  # <--- Returns a Tensor, not an EagerTensor
    input_lengths = get_padded_length(mel_specs[:, :, 0])
    label_lengths = get_padded_length(labels)
    print(enc_predictions)
    loss_value = rnnt_loss(enc_predictions, labels, input_lengths, label_lengths)
    print(loss_value)

The model is just:

model = tf.keras.Model(
    inputs=[mel_specs, pred_inp],
    outputs=[outputs]
)

The problem is that model((mel_specs, pred_inp)) just gives me a regular Tensor but not a EagerTensor and I don't understand why. mel_specs and pred_inpu are EagerTensors coming from train_dataset which is a tf.data.Dataset.

What am I missing here?

Environment

$ pip freeze | grep tensorflow
tensorflow==2.2.0
tensorflow-addons==0.10.0
tensorflow-datasets==3.1.0
tensorflow-estimator==2.2.0
tensorflow-metadata==0.22.2
warprnnt-tensorflow==0.1

Update: MVCE

I was able to boil it down to the encoder part of the model. If I run this it will fail and print:

Calling model(x) didn't return EagerTensor
Traceback (most recent call last):
    ...
    return loss_value, tape.gradient(loss_value, model.trainable_variables)
  File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/eager/backprop.py", line 1042, in gradient
    flat_grad = imperative_grad.imperative_grad(
  File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/eager/imperative_grad.py", line 71, in imperative_grad
    return pywrap_tfe.TFE_Py_TapeGradient(
  File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/eager/backprop.py", line 157, in _gradient_function
    return grad_fn(mock_op, *out_grads)
  File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/ops/math_grad.py", line 252, in _MeanGrad
    sum_grad = _SumGrad(op, grad)[0]
  File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/ops/math_grad.py", line 211, in _SumGrad
    output_shape_kept_dims = math_ops.reduced_shape(input_shape,
  File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/ops/math_ops.py", line 3735, in reduced_shape
    input_shape = input_shape.numpy()
AttributeError: 'Tensor' object has no attribute 'numpy'

The code:

import numpy as np
import tensorflow as tf
from tensorflow.python.framework.ops import EagerTensor


class TimeReduction(tf.keras.layers.Layer):

    def __init__(self,
                 reduction_factor,
                 batch_size=None,
                 **kwargs):
        super(TimeReduction, self).__init__(**kwargs)
        self.reduction_factor = reduction_factor
        self.batch_size = batch_size

    def call(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size = self.batch_size
        if batch_size is None:
            batch_size = input_shape[0]
        max_time = input_shape[1]
        num_units = inputs.get_shape().as_list()[-1]
        outputs = inputs
        paddings = [[0, 0], [0, tf.math.floormod(max_time, self.reduction_factor)], [0, 0]]
        outputs = tf.pad(outputs, paddings)
        return tf.reshape(outputs, (batch_size, -1, num_units * self.reduction_factor))


def make_encoder_model(
    input_shape: tuple,
    out_dim: int,
    num_layers: int,
    d_model: int,
    proj_size,
    initializer=None,
    dtype=tf.float32,
    stateful: bool = False,
    dropout=0.5,
    reduction_index=1,
    reduction_factor=2,
):
    def lstm_cell():
        return tf.compat.v1.nn.rnn_cell.LSTMCell(
            d_model,
            num_proj=proj_size,
            initializer=initializer,
            dtype=dtype
        )

    batch_size = None if not stateful else 1

    inputs = tf.keras.Input(
        shape=input_shape,
        batch_size=batch_size,
        dtype=tf.float32
    )

    x = tf.keras.layers.BatchNormalization()(inputs)

    for i in range(num_layers):
        rnn_layer = tf.keras.layers.RNN(lstm_cell(), return_sequences=True, stateful=stateful)
        x = rnn_layer(x)
        x = tf.keras.layers.Dropout(dropout)(x)
        x = tf.keras.layers.LayerNormalization(dtype=dtype)(x)
        if i == reduction_index:
            x = TimeReduction(reduction_factor, batch_size=batch_size)(x)

    outputs = tf.keras.layers.Dense(out_dim)(x)

    return tf.keras.Model(
        inputs=[inputs],
        outputs=[outputs],
        name='encoder'
    )


def gradient(model, loss, inputs, y_true):
    y_true = tf.transpose(y_true, perm=(0, 2, 1))
    with tf.GradientTape() as tape:
        y_pred = model(inputs, training=True)
        loss_value = loss(y_true=y_true, y_pred=y_pred)
        return loss_value, tape.gradient(loss_value, model.trainable_variables)


def main():
    X, Y = [
        np.random.rand(100, 512),
        np.random.rand(100, 512)
    ], [[[0]*50], [[1]*50]]
    # assert len(X) == len(Y)

    encoder_model = make_encoder_model(
        input_shape=(None, 512),
        out_dim=1,
        num_layers=2,
        d_model=10,
        proj_size=23,
        dropout=0.5,
        reduction_index=1,
        reduction_factor=2
    )

    enc_dataset = tf.data.Dataset.from_generator(
        lambda: zip(X, Y),
        output_types=(tf.float32, tf.int32),
        output_shapes=([None, 512], [None, None]),
    ).batch(2)

    loss = tf.keras.losses.MeanSquaredError()

    for x, y in enc_dataset:
        from_predict = encoder_model.predict(x)
        from_call = encoder_model(x)
        if not isinstance(from_predict, np.ndarray):
            print("Calling model.predict(x) didn't return np.ndarray")
        if not isinstance(from_call, EagerTensor):
            print("Calling model(x) didn't return EagerTensor")
        loss_value, gradients = gradient(encoder_model, loss, x, y)
        print(loss_value)
        print(gradients)

    print('All done.')


if __name__ == '__main__':
    main()

xdurch0 · Accepted Answer

Why do you use the LSTM cell from compat.v1? I would imagine this leads to compatibility issues.

Most importantly, those "pure Tensorflow" RNN cells are not made to be used with the keras RNN anyway -- they were used with tf.nn.dymanic_rnn for example, which is now deprecated and also found only in the compat.v1 module.

I would recommend that you simply use tf.keras.layers.LSTM directly as it's much faster anyway -- it allows for the use of highly optimized GPU kernels. Alternatively, you can replace the compat.v1.LSTMCell with a tf.keras.layers.LSTMCell and put this into the RNN.

model(features) does not return EagerTensor

Environment

Update: MVCE

Answers (1)

Related Questions