iterating over `tf.Tensor` is not allowed in Graph execution. Use Eager execution or decorate this function with @tf.function

Question

I'm trying to implement of Tensorflow SegFormer, a semantic segmentation model based on Transformers. I'm following the official PyTorch implementation to implement it in tf.keras 2.5.

When I'm trying to build a simple version with only one stage, I get the following error.

OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed in Graph execution. Use Eager execution or decorate this function with @tf.function.

The thing is I do not know where this error comes from. I've already implemented ConvMLP which have a slightly identical architecture and I got no errors. The loop

src/model/backbone/mit.py:438 call  *
   inputs = blk(inputs)

whihc seems to be the root of the error comes from my implementation of ConvMLP and it works without a problem.

Here is the full traceback.

Traceback (most recent call last):

File "src/model/backbone/mit.py", line 520, in model = get_mix_vision_transformer(

File "src/model/backbone/mit.py", line 489, in get_mix_vision_transformer fmap_out = StageBlock(

File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/base_layer.py", line 969, in call return self._functional_construction_call(inputs, args, kwargs,

File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/base_layer.py", line 1107, in _functional_construction_call outputs = self._keras_tensor_symbolic_call(

File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/base_layer.py", line 840, in _keras_tensor_symbolic_call return self._infer_output_signature(inputs, args, kwargs, input_masks)

File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/base_layer.py", line 880, in _infer_output_signature outputs = call_fn(inputs, *args, **kwargs)

File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/impl/api.py", line 695, in wrapper raise e.ag_error_metadata.to_exception(e)

tensorflow.python.framework.errors_impl.OperatorNotAllowedInGraphError: in user code:

src/model/backbone/mit.py:438 call * inputs = blk(inputs)

src/model/backbone/mit.py:372 call * fmap = inputs + self.stochastic_drop(self.attn(self.norm1(inputs)))

src/model/backbone/mit.py:293 call * keys, values = fmap

/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/ops.py:520 iter self._disallow_iteration() /usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/ops.py:516 _disallow_iteration self._disallow_in_graph_mode("iterating over tf.Tensor") /usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/ops.py:494 _disallow_in_graph_mode raise errors.OperatorNotAllowedInGraphError(

OperatorNotAllowedInGraphError: iterating over tf.Tensor is not allowed in Graph execution. Use Eager execution or decorate this function with @tf.function.

Below, the full code for reproductibility, Python3.8, TensorFlow 2.5, Ubuntu 20.04.

from typing import Any, Dict, List

import numpy as np
import tensorflow as tf
from loguru import logger
from tensorflow.keras.layers import (
    Conv2D,
    Dense,
    DepthwiseConv2D,
    Dropout,
    Input,
    LayerNormalization,
    Permute,
    Reshape,
)
from tensorflow.keras.models import Model, Sequential


# Referred from: github.com:rwightman/pytorch-image-models.
# https://keras.io/examples/vision/cct/#stochastic-depth-for-regularization
class StochasticDepth(tf.keras.layers.Layer):
    def __init__(
        self,
        drop_prop,
        *args,
        **kwargs,
    ) -> None:
        super().__init__(*args, **kwargs)

        self.drop_prob = drop_prop

    def call(self, inputs, training=None) -> tf.Tensor:
        if training:
            keep_prob = tf.cast(1 - self.drop_prob, dtype=inputs.dtype)
            shape = (tf.shape(inputs)[0],) + (1,) * (len(tf.shape(inputs)) - 1)
            random_tensor = keep_prob + tf.random.uniform(
                shape, 0, 1, dtype=inputs.dtype
            )
            random_tensor = tf.floor(random_tensor)
            return (inputs / keep_prob) * random_tensor
        return inputs

    def get_config(self) -> Dict[str, Any]:

        config = super().get_config()
        config.update({"drop_prob": self.drop_prob})
        return config


class Identity(tf.keras.layers.Layer):
    def __init__(self) -> None:
        super().__init__(name="IdentityTF")

    def call(self, inputs) -> tf.Tensor:
        return inputs


class OverlapPatchEmbed(tf.keras.layers.Layer):
    def __init__(
        self,
        patch_size: int = 7,
        strides: int = 4,
        emb_dim: int = 768,
        l2_regul: float = 1e-4,
        *args,
        **kwargs,
    ) -> None:

        super().__init__(*args, **kwargs)

        self.patch_size = patch_size
        self.strides = strides
        self.emb_dim = emb_dim
        self.l2_regul = l2_regul

        self.norm = LayerNormalization()

    def build(self, input_shape) -> None:

        _, height, width, channels = input_shape

        self.H = height // self.patch_size
        self.W = width // self.patch_size

        self.proj = Conv2D(
            self.emb_dim,
            kernel_size=self.patch_size,
            strides=self.strides,
            padding="same",
            use_bias=False,
            kernel_initializer="he_uniform",
            kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
        )

        self.reshape = Reshape(target_shape=(self.H * self.W, -1))

    def call(self, inputs, training=None) -> tf.Tensor:

        fmap = self.proj(inputs)
        fmap = self.reshape(fmap)
        return self.norm(fmap)

    def get_config(self) -> Dict[str, Any]:

        config = super().get_config()
        config.update(
            {
                "patch_size": self.patch_size,
                "strides": self.strides,
                "emb_dim": self.emb_dim,
                "l2_regul": self.l2_regul,
            }
        )
        return config


class Mlp(tf.keras.layers.Layer):
    def __init__(
        self,
        fc1_units: int,
        fc2_units: int,
        l2_regul: float = 1e-4,
        *args,
        **kwargs,
    ) -> None:

        super().__init__(*args, **kwargs)

        self.fc1_units = fc1_units
        self.fc2_units = fc2_units
        self.l2_regul = l2_regul

        self.gelu = tf.keras.activations.gelu

    def build(self, input_shape) -> None:

        _, units, _ = input_shape

        height = int(tf.sqrt(float(units)))
        width = int(tf.sqrt(float(units)))

        self.square_reshape = Reshape(target_shape=(height, width, -1))
        self.wide_reshape = Reshape(target_shape=(units, -1))

        self.fc1 = Dense(
            units=self.fc1_units,
            kernel_initializer="he_uniform",
            kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
        )

        self.fc2 = Dense(
            units=self.fc2_units,
            kernel_initializer="he_uniform",
            kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
        )

        self.depth_conv = DepthwiseConv2D(
            depth_multiplier=1,
            kernel_size=3,
            strides=1,
            padding="same",
            use_bias=False,
            kernel_initializer="he_uniform",
            kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
        )

    def call(self, inputs, training=None) -> tf.Tensor:
        fmap = self.fc1(inputs)

        fmap = self.square_reshape(fmap)
        fmap = self.depth_conv(fmap)
        fmap = self.wide_reshape(fmap)

        fmap = self.gelu(fmap)
        return self.fc2(fmap)

    def get_config(self) -> Dict[str, Any]:

        config = super().get_config()
        config.update(
            {
                "fc1_units": self.fc1_units,
                "fc2_units": self.fc2_units,
                "l2_regularization": self.l2_regul,
            }
        )
        return config


class Attention(tf.keras.layers.Layer):
    def __init__(
        self,
        fc_units: int,
        num_heads: int = 8,
        attn_drop_prob: float = 0,
        proj_drop_prob: float = 0,
        attn_reduction_ratio: int = 1,
        l2_regul: float = 1e-4,
        *args,
        **kwargs,
    ) -> None:

        super().__init__(*args, **kwargs)
        assert (
            fc_units % num_heads == 0
        ), f"dim {fc_units} should be divided by num_heads {num_heads}."

        self.fc_units = fc_units
        self.num_heads = num_heads
        self.attn_drop_prob = attn_drop_prob
        self.proj_drop_prob = proj_drop_prob
        self.attn_reduction_ratio = attn_reduction_ratio
        self.l2_regul = l2_regul

        self.head_dims = fc_units / num_heads
        self.scale = 1 / tf.sqrt(self.head_dims)

        self.softmax = tf.keras.activations.softmax

    def build(self, input_shape) -> None:

        _, units, _ = input_shape

        height = int(tf.sqrt(float(units)))
        width = int(tf.sqrt(float(units)))

        reduction_height = height // self.attn_reduction_ratio
        reduction_width = width // self.attn_reduction_ratio

        self.heads_reshape = Reshape(target_shape=(units, self.num_heads, -1))
        self.square_reshape = Reshape(target_shape=(height, width, -1))
        self.wide_reshape = Reshape(target_shape=(units, -1))
        self.wide_reduction_reshape = Reshape(
            target_shape=(reduction_height * reduction_width, -1)
        )
        self.kv_reshape = Reshape(
            target_shape=(-1, 2, self.num_heads, int(self.head_dims))
        )

        self.query = Dense(
            units=self.fc_units,
            use_bias=False,
            kernel_initializer="he_uniform",
            kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
        )

        self.key_value = Dense(
            units=self.fc_units * 2,
            use_bias=False,
            kernel_initializer="he_uniform",
            kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
        )

        self.proj = Dense(
            units=self.fc_units,
            kernel_initializer="he_uniform",
            kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
        )

        self.attn_drop = Dropout(rate=self.attn_drop_prob)
        self.proj_drop = Dropout(rate=self.proj_drop_prob)

        self.permute = Permute((2, 1, 3))

        if self.attn_reduction_ratio > 1:
            self.attn_conv = Conv2D(
                self.fc_units,
                kernel_size=self.attn_reduction_ratio,
                strides=self.attn_reduction_ratio,
                padding="same",
                use_bias=False,
                kernel_initializer="he_uniform",
                kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
            )
            self.norm = LayerNormalization()

    def call(self, inputs, training=None) -> tf.Tensor:
        queries = self.query(inputs)

        queries = self.heads_reshape(queries)
        queries = self.permute(queries)

        fmap = inputs
        if self.attn_reduction_ratio > 1:
            fmap = self.square_reshape(fmap)
            fmap = self.attn_conv(fmap)
            fmap = self.wide_reduction_reshape(fmap)
            fmap = self.norm(fmap)

        fmap = self.key_value(fmap)
        fmap = self.kv_reshape(fmap)
        fmap = tf.transpose(fmap, perm=[2, 0, 3, 1, 4])
        keys, values = fmap

        attn = tf.matmul(queries, keys, transpose_b=True) * self.scale
        attn = self.softmax(attn)
        attn = self.attn_drop(attn)

        x = tf.matmul(attn, values)
        x = tf.transpose(x, perm=[0, 2, 1, 3])
        x = self.wide_reshape(x)
        x = self.proj(x)

        return self.proj_drop(x)

    def get_config(self) -> Dict[str, Any]:

        config = super().get_config()
        config.update(
            {
                "fc_units": self.fc_units,
                "num_heads": self.num_heads,
                "attn_drop_prob": self.attn_drop_prob,
                "proj_drop_prob": self.proj_drop_prob,
                "attn_reduction_ratio": self.attn_reduction_ratio,
                "l2_regul": self.l2_regul,
            }
        )
        return config


class FFNAttentionBlock(tf.keras.layers.Layer):
    def __init__(
        self,
        fc_units: int,
        num_heads: int = 8,
        mlp_ratio: int = 4,
        attn_drop_prob: float = 0,
        proj_drop_prob: float = 0,
        attn_reduction_ratio: int = 1,
        stochastic_depth_rate: float = 0.1,
        *args,
        **kwargs,
    ) -> None:

        super().__init__(*args, **kwargs)

        self.fc_units = fc_units
        self.num_heads = num_heads
        self.mlp_ratio = mlp_ratio
        self.attn_drop_prob = attn_drop_prob
        self.proj_drop_prob = proj_drop_prob
        self.attn_reduction_ratio = attn_reduction_ratio
        self.stochastic_depth_rate = stochastic_depth_rate

    def build(self, input_shape) -> None:

        self.attn = Attention(
            fc_units=self.fc_units,
            num_heads=self.num_heads,
            attn_drop_prob=self.attn_drop_prob,
            proj_drop_prob=self.proj_drop_prob,
            attn_reduction_ratio=self.attn_reduction_ratio,
        )

        self.stochastic_drop = (
            StochasticDepth(drop_prop=self.stochastic_depth_rate)
            if self.stochastic_depth_rate > 0
            else Identity()
        )

        self.mlp = Mlp(
            fc1_units=self.fc_units * self.mlp_ratio,
            fc2_units=self.fc_units,
        )

        self.norm1 = LayerNormalization()
        self.norm2 = LayerNormalization()

    def call(self, inputs, training=None) -> tf.Tensor:

        fmap = inputs + self.stochastic_drop(self.attn(self.norm1(inputs)))
        fmap = fmap + self.stochastic_drop(self.mlp(self.norm2(fmap)))

        return fmap

    def get_config(self) -> Dict[str, Any]:

        config = super().get_config()
        config.update(
            {
                "fc_units": self.fc_units,
                "num_heads": self.num_heads,
                "mlp_ratio": self.mlp_ratio,
                "attn_drop_prob": self.attn_drop_prob,
                "proj_drop_prob": self.proj_drop_prob,
                "attn_reduction_ratio": self.attn_reduction_ratio,
                "stochastic_depth_rate": self.stochastic_depth_rate,
            }
        )
        return config


class StageBlock(tf.keras.layers.Layer):
    def __init__(
        self,
        fc_units: int,
        depth: int,
        num_heads: int = 8,
        mlp_ratio: int = 4,
        attn_drop_prob: float = 0,
        proj_drop_prob: float = 0,
        attn_reduction_ratio: int = 1,
        stochastic_depth_rate: float = 0.1,
        *args,
        **kwargs,
    ) -> None:

        super().__init__(*args, **kwargs)

        self.fc_units = fc_units
        self.num_heads = num_heads
        self.mlp_ratio = mlp_ratio
        self.attn_drop_prob = attn_drop_prob
        self.proj_drop_prob = proj_drop_prob
        self.attn_reduction_ratio = attn_reduction_ratio
        self.stochastic_depth_rate = stochastic_depth_rate
        self.depth = depth

    def build(self, input_shape) -> None:

        self.blocks = [
            FFNAttentionBlock(
                fc_units=self.fc_units,
                num_heads=self.num_heads,
                mlp_ratio=self.mlp_ratio,
                attn_drop_prob=self.attn_drop_prob,
                proj_drop_prob=self.proj_drop_prob,
                attn_reduction_ratio=self.attn_reduction_ratio,
                stochastic_depth_rate=self.stochastic_depth_rate,
            )
            for _ in range(self.depth)
        ]

    def call(self, inputs, training=None) -> tf.Tensor:

        for blk in self.blocks:
            inputs = blk(inputs)

        return inputs

    def get_config(self) -> Dict[str, Any]:

        config = super().get_config()
        config.update(
            {
                "fc_units": self.fc_units,
                "depth": self.depth,
                "num_heads": self.num_heads,
                "mlp_ratio": self.mlp_ratio,
                "attn_drop_prob": self.attn_drop_prob,
                "proj_drop_prob": self.proj_drop_prob,
                "attn_reduction_ratio": self.attn_reduction_ratio,
                "stochastic_depth_rate": self.stochastic_depth_rate,
            }
        )
        return config


def get_mix_vision_transformer(
    img_shape: List[int],
    patch_size: List[int],
    strides: List[int],
    emb_dims: List[int],
    num_heads: List[int],
    mlp_ratios: List[int],
    proj_drop_prob: float,
    attn_drop_prob: float,
    stochastic_depth_rate: float,
    attn_reduction_ratios: List[int],
    depths: List[int],
) -> tf.keras.Model:
    """Instantiate a MiT model.

    Returns:
        A `tf.keras` model.
    """

    dpr = [
        rates for rates in np.linspace(0, stochastic_depth_rate, np.sum(depths))
    ]

    img_input = Input(img_shape)

    fmap = OverlapPatchEmbed(
        patch_size=patch_size[0], strides=strides[0], emb_dim=emb_dims[0]
    )(img_input)

    fmap_out = StageBlock(
        fc_units=emb_dims[0],
        depth=depths[0],
        num_heads=num_heads[0],
        mlp_ratio=mlp_ratios[0],
        attn_drop_prob=attn_drop_prob,
        proj_drop_prob=proj_drop_prob,
        attn_reduction_ratio=attn_reduction_ratios[0],
        stochastic_depth_rate=dpr[0],
        name="stage_1",
    )(fmap)

    return Model(img_input, fmap_out)


if __name__ == "__main__":

    fmap = np.random.rand(1, 224, 224, 3)

    patch_size = [7, 3, 3, 3]
    strides = [4, 2, 2, 2]
    emb_dims = [64, 128, 256, 512]
    num_heads = [1, 2, 4, 8]
    mlp_ratios = [4, 4, 4, 4]
    proj_drop_prob = 0
    attn_drop_prob = 0
    stochastic_depth_rate = 0
    attn_reduction_ratios = [8, 4, 2, 1]
    depths = [3, 4, 6, 3]

    # out = StageBlock(fc_units=16, depth=4)(fmap)
    model = get_mix_vision_transformer(
        img_shape=[224, 224, 3],
        patch_size=patch_size,
        strides=strides,
        emb_dims=emb_dims,
        num_heads=num_heads,
        mlp_ratios=mlp_ratios,
        proj_drop_prob=proj_drop_prob,
        attn_drop_prob=attn_drop_prob,
        stochastic_depth_rate=stochastic_depth_rate,
        attn_reduction_ratios=attn_reduction_ratios,
        depths=depths,
    )
    out = model(fmap)
    print(f"{out.shape.as_list()}")
    model.summary()

iterating over `tf.Tensor` is not allowed in Graph execution. Use Eager execution or decorate this function with @tf.function

Answers (1)

Related Questions