
Reputation: 6197

TPU: NameError: name '_minimize' is not defined when defining Keras custom train_step

I have model that runs just fine on the GPU, but gives an error on TPU.

I am trying to define my own custom model in Tensorflow Keras, code below:

class CustomModel(tf.keras.Model):
    def train_step(self, data):
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        x = data
        y = tf.constant([1.0], dtype=tf.float32)

        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)  # Forward pass
            # Compute the loss value
            # (the loss function is configured in `compile()`)
            loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
        _minimize(self.distribute_strategy, tape, self.optimizer, loss,

        self.compiled_metrics.update_state(y, y_pred, sample_weight)

but when I try to train, I run into

NameError: name '_minimize' is not defined, even through it is defined in the inherited model class defined in same code as the class.

Here is the full error message

NameError                                 Traceback (most recent call last)
<ipython-input-44-2b800165a5d8> in <module>()
     13         validation_data=val_dataset,
     14         validation_steps=val_steps,
---> 15         validation_freq=1)

10 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/ in _method_wrapper(self, *args, **kwargs)
     64   def _method_wrapper(self, *args, **kwargs):
     65     if not self._in_multi_worker_mode():  # pylint: disable=protected-access
---> 66       return method(self, *args, **kwargs)
     68     # Running inside `run_distribute_coordinator` already.

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/ in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
    846                 batch_size=batch_size):
    847               callbacks.on_train_batch_begin(step)
--> 848               tmp_logs = train_function(iterator)
    849               # Catch OutOfRangeError for Datasets of unknown size.
    850               # This blocks until the batch has finished executing.

/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/ in __call__(self, *args, **kwds)
    578         xla_context.Exit()
    579     else:
--> 580       result = self._call(*args, **kwds)
    582     if tracing_count == self._get_tracing_count():

/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/ in _call(self, *args, **kwds)
    625       # This is the first call of __call__, so we have to initialize.
    626       initializers = []
--> 627       self._initialize(args, kwds, add_initializers_to=initializers)
    628     finally:
    629       # At this point we know that the initialization is complete (or less

/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/ in _initialize(self, args, kwds, add_initializers_to)
    504     self._concrete_stateful_fn = (
    505         self._stateful_fn._get_concrete_function_internal_garbage_collected(  # pylint: disable=protected-access
--> 506             *args, **kwds))
    508     def invalid_creator_scope(*unused_args, **unused_kwds):

/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/ in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
   2444       args, kwargs = None, None
   2445     with self._lock:
-> 2446       graph_function, _, _ = self._maybe_define_function(args, kwargs)
   2447     return graph_function

/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/ in _maybe_define_function(self, args, kwargs)
   2776       self._function_cache.missed.add(call_context_key)
-> 2777       graph_function = self._create_graph_function(args, kwargs)
   2778       self._function_cache.primary[cache_key] = graph_function
   2779       return graph_function, args, kwargs

/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/ in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
   2665             arg_names=arg_names,
   2666             override_flat_arg_shapes=override_flat_arg_shapes,
-> 2667             capture_by_value=self._capture_by_value),
   2668         self._function_attributes,
   2669         # Tell the ConcreteFunction to clean up its graph once it goes out of

/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
    979         _, original_func = tf_decorator.unwrap(python_func)
--> 981       func_outputs = python_func(*func_args, **func_kwargs)
    983       # invariant: `func_outputs` contains only Tensors, CompositeTensors,

/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/ in wrapped_fn(*args, **kwds)
    439         # __wrapped__ allows AutoGraph to swap in a converted function. We give
    440         # the function a weak reference to itself to avoid a reference cycle.
--> 441         return weak_wrapped_fn().__wrapped__(*args, **kwds)
    442     weak_wrapped_fn = weakref.ref(wrapped_fn)

/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ in wrapper(*args, **kwargs)
    966           except Exception as e:  # pylint:disable=broad-except
    967             if hasattr(e, "ag_error_metadata"):
--> 968               raise e.ag_error_metadata.to_exception(e)
    969             else:
    970               raise

NameError: in user code:

    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/ train_function  *
        outputs =
    <ipython-input-4-56fa61b8449a>:14 train_step  *
        _minimize(self.distribute_strategy, tape, self.optimizer, loss,

    NameError: name '_minimize' is not defined

I know that

  gradients = tape.gradient(loss, trainable_variables)
  self.optimizer.apply_gradients(zip(gradients, trainable_variables))

is an equivalent to _minimize, but I can't use it in my case, since I am training over the TPU, and this code gives a name error for some reason (issue here "AttributeError: is meaningless when eager execution is enabled." when training on TPU at "self.optimizer.apply_gradients" )

I tried a workaround where I also define _minimize in the class itself when I overrided the class

class CustomModel(tf.keras.Model):
    def __init__(self):
    def train_step(self, data):
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        x = data
        y = tf.constant([1.0], dtype=tf.float32)

        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)  # Forward pass
            # Compute the loss value
            # (the loss function is configured in `compile()`)
            loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
        self._minimize(self.distribute_strategy, tape, self.optimizer, loss,

        self.compiled_metrics.update_state(y, y_pred, sample_weight)
        return { m.result() for m in self.metrics}

    def _minimize(strategy, tape, optimizer, loss, trainable_variables):
        with tape:
            if isinstance(optimizer, lso.LossScaleOptimizer):
                loss = optimizer.get_scaled_loss(loss)

        gradients = tape.gradient(loss, trainable_variables)
        gradients = [(ClipIfNotNone(grad)) for grad in gradients]
        gradients = [(ClipIfNotNone2(grad)) for grad in gradients]
        # Whether to aggregate gradients outside of optimizer. This requires support
        # of the optimizer and doesn't work with ParameterServerStrategy and
        # CentralStroageStrategy.
        aggregate_grads_outside_optimizer = (
            optimizer._HAS_AGGREGATE_GRAD and  # pylint: disable=protected-access
            not isinstance(strategy.extended,

        if aggregate_grads_outside_optimizer:
            # We aggregate gradients before unscaling them, in case a subclass of
            # LossScaleOptimizer all-reduces in fp16. All-reducing in fp16 can only be
            # done on scaled gradients, not unscaled gradients, for numeric stability.
            gradients = optimizer._aggregate_gradients(zip(gradients,  # pylint: disable=protected-access
        if isinstance(optimizer, lso.LossScaleOptimizer):
            gradients = optimizer.get_unscaled_gradients(gradients)
        gradients = optimizer._clip_gradients(gradients)  # pylint: disable=protected-access
        if trainable_variables:
            if aggregate_grads_outside_optimizer:
                    zip(gradients, trainable_variables),
                optimizer.apply_gradients(zip(gradients, trainable_variables))

But then I get

TypeError: tf___minimize() takes 5 positional arguments but 6 were given

Upvotes: 0

Views: 698

Answers (1)


Reputation: 2804

Try it:

class CustomModel(tf.keras.Model):
    def __init__(self):
    # your code

Upvotes: 0

Related Questions