dl.meteo
dl.meteo

Reputation: 1766

Bidirectional LSTM fails on save_model()

I am using different LSTM architectures in combination with kerastuner.

This one:

def tf_encode_decode_lstm_multivariate(
    parameter_optimization: kt.HyperParameters,
) -> tf.keras.Model:
    """
    This LSTM model based on the work from this website:
    https://machinelearningmastery.com/
    how-to-develop-lstm-models-for-multi-step-time-series-forecasting-of-household-power-consumption/

    Args:
        parameter_optimization: is used within keras tuner for storing hyper-parameters

    Returns:
        A compiled LSTM keras Model
    """
    model = tf.keras.Sequential()

    model.add(
        tf.keras.layers.LSTM(
            input_shape=(
                parameter_optimization.get(TUNER_KEY_LSTM_SEQUENCE_LENGTH),
                parameter_optimization.get(TUNER_KEY_INPUT_SHAPE),
            ),
            units=parameter_optimization.get(TUNER_KEY_FIRST_NEURON),
            activation=parameter_optimization.get(TUNER_KEY_ACTIVATION_FUNCTION),
        )
    )
    model.add(
        tf.keras.layers.RepeatVector((parameter_optimization.get("output_shape")))
    )
    model.add(
        tf.keras.layers.LSTM(
            units=parameter_optimization.get(TUNER_KEY_SECOND_NEURON),
            activation=parameter_optimization.get(TUNER_KEY_ACTIVATION_FUNCTION_2),
            return_sequences=True,
        )
    )

    model.add(
        tf.keras.layers.TimeDistributed(
            tf.keras.layers.Dense(
                100,
                activation=parameter_optimization.get(TUNER_KEY_ACTIVATION_FUNCTION),
            )
        )
    )
    model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=1)))

    model.compile(
        optimizer=TF_OPTIMIZER_DICT[parameter_optimization.get(TUNER_KEY_OPTIMIZER)](
            parameter_optimization.get(TUNER_KEY_LEARNING_RATE)
        ),
        loss=TF_LOSS_FUNCTION_DICT[parameter_optimization.get(TUNER_KEY_LOSS)](),
        metrics=TF_VALIDATION_METRICS_DICTS[
            parameter_optimization.get(TUNER_KEY_VALIDATION_METRIC)
        ](name=parameter_optimization.get(TUNER_KEY_VALIDATION_METRIC)),
    )

    return model

Works correctly but for the one with a bidrectional layer:

def tf_bidirectional_lstm_multivariate(
    parameter_optimization: kt.HyperParameters,
) -> tf.keras.Model:
    """
    This function creates a bidirectional LSTM model to adjust hyper-parameters with kerastuner.
    Hyper-parameter settings can be adjusted in model_config: first_neuron, second_neuron,
    layer_activation_functions, loss_function, learning_rate
    Args:
        parameter_optimization: is used within keras tuner for storing hyper-parameters
    Returns:
        A compiled bidirectional LSTM neural net keras Model
    """
    model = tf.keras.Sequential()

    model.add(
        tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(
                input_shape=(
                    parameter_optimization.get(TUNER_KEY_LSTM_SEQUENCE_LENGTH),
                    parameter_optimization.get(TUNER_KEY_INPUT_SHAPE),
                ),
                units=parameter_optimization.get(TUNER_KEY_FIRST_NEURON),
                activation=parameter_optimization.get(TUNER_KEY_ACTIVATION_FUNCTION),
            )
        )
    )

    model.add(
        tf.keras.layers.RepeatVector((parameter_optimization.get("output_shape")))
    )
    model.add(
        tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(
                units=parameter_optimization.get(TUNER_KEY_SECOND_NEURON),
                activation=parameter_optimization.get(TUNER_KEY_ACTIVATION_FUNCTION_2),
                return_sequences=True,
            )
        )
    )

    model.add(
        tf.keras.layers.TimeDistributed(
            tf.keras.layers.Dense(
                100,
                activation=parameter_optimization.get(TUNER_KEY_ACTIVATION_FUNCTION),
            )
        )
    )
    model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=1)))

    model.compile(
        optimizer=TF_OPTIMIZER_DICT[parameter_optimization.get(TUNER_KEY_OPTIMIZER)](
            parameter_optimization.get(TUNER_KEY_LEARNING_RATE)
        ),
        loss=TF_LOSS_FUNCTION_DICT[parameter_optimization.get(TUNER_KEY_LOSS)](),
        metrics=TF_VALIDATION_METRICS_DICTS[
            parameter_optimization.get(TUNER_KEY_VALIDATION_METRIC)
        ](name=parameter_optimization.get(TUNER_KEY_VALIDATION_METRIC)),
    )

    return model

I receive the following ValueError when i try to save the model as h5 data after training:

/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py in save(self, filepath, overwrite, include_optimizer, save_format, signatures, options, save_traces)
   1999     """
   2000     # pylint: enable=line-too-long
-> 2001     save.save_model(self, filepath, overwrite, include_optimizer, save_format,
   2002                     signatures, options, save_traces)
   2003 

/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/saving/save.py in save_model(model, filepath, overwrite, include_optimizer, save_format, signatures, options, save_traces)
    151           'to the Tensorflow SavedModel format (by setting save_format="tf") '
    152           'or using `save_weights`.')
--> 153     hdf5_format.save_model_to_hdf5(
    154         model, filepath, overwrite, include_optimizer)
    155   else:

/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/saving/hdf5_format.py in save_model_to_hdf5(model, filepath, overwrite, include_optimizer)
     87   # entities like metrics added using `add_metric` and losses added using
     88   # `add_loss.`
---> 89   if len(model.weights) != len(model._undeduplicated_weights):
     90     logging.warning('Found duplicated `Variable`s in Model\'s `weights`. '
     91                     'This is usually caused by `Variable`s being shared by '

/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py in weights(self)
   2343       A list of variables.
   2344     """
-> 2345     return self._dedup_weights(self._undeduplicated_weights)
   2346 
   2347   @property

/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py in _undeduplicated_weights(self)
   2348   def _undeduplicated_weights(self):
   2349     """Returns the undeduplicated list of all layer variables/weights."""
-> 2350     self._assert_weights_created()
   2351     weights = []
   2352     for layer in self._layers:

/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/sequential.py in _assert_weights_created(self)
    525     # When the graph has not been initialized, use the Model's implementation to
    526     # to check if the weights has been created.
--> 527     super(functional.Functional, self)._assert_weights_created()  # pylint: disable=bad-super-call
    528 
    529 

/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py in _assert_weights_created(self)
   2471       # been invoked yet, this will cover both sequential and subclass model.
   2472       # Also make sure to exclude Model class itself which has build() defined.
-> 2473       raise ValueError('Weights for model %s have not yet been created. '
   2474                        'Weights are created when the Model is first called on '
   2475                        'inputs or `build()` is called with an `input_shape`.' %

ValueError: Weights for model sequential have not yet been created. Weights are created when the Model is first called on inputs or `build()` is called with an `input_shape`.

Nevertheless working with the trained model is not a problem and still works, just the save_model() step fails.

I am running tensorflow-gpu 2.4.1 and kerastuner 1.0.4

I hope anyone is able to tell me what I am doing wrong by using bidirectional layers? Thanks a lot for your support

Upvotes: 0

Views: 183

Answers (1)

Niklas Riewald
Niklas Riewald

Reputation: 56

You have to provide an input_shape to the first layer. In your case this is tf.keras.layers.Bidirectional() and not tf.keras.layers.LSTM.

Two alternatives are:

  • First call your model with actual data
  • Call model.build(input_shape) before calling model.compile()

Upvotes: 1

Related Questions