Printing the size of the input and output of all the layers of a pretrained model

Question

I want to print the sizes of the inputs and outputs of all the layers of a pretrained model. I uae this pretrained model as self.feature in my class.

The print of this pretrained model is as follows:

TimeSformer(
  (model): VisionTransformer(
(dropout): Dropout(p=0.0, inplace=False)
(patch_embed): PatchEmbed(
  (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
)
(pos_drop): Dropout(p=0.0, inplace=False)
(time_drop): Dropout(p=0.0, inplace=False)
(blocks): ModuleList(  
  (0): Block(
    (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (attn): Attention(
      (qkv): Linear(in_features=768, out_features=2304, bias=True)
      (proj): Linear(in_features=768, out_features=768, bias=True)
      (proj_drop): Dropout(p=0.0, inplace=False)
      (attn_drop): Dropout(p=0.0, inplace=False)
    )
    (temporal_norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (temporal_attn): Attention(
      (qkv): Linear(in_features=768, out_features=2304, bias=True)
      (proj): Linear(in_features=768, out_features=768, bias=True)
      (proj_drop): Dropout(p=0.0, inplace=False)
      (attn_drop): Dropout(p=0.0, inplace=False)
    )
    (temporal_fc): Linear(in_features=768, out_features=768, bias=True)
    (drop_path): Identity()
    (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (mlp): Mlp(
      (fc1): Linear(in_features=768, out_features=3072, bias=True)
      (act): GELU()
      (fc2): Linear(in_features=3072, out_features=768, bias=True)
      (drop): Dropout(p=0.0, inplace=False)
    )
  )
  (1): Block(
    (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (attn): Attention(
      (qkv): Linear(in_features=768, out_features=2304, bias=True)
      (proj): Linear(in_features=768, out_features=768, bias=True)
      (proj_drop): Dropout(p=0.0, inplace=False)
      (attn_drop): Dropout(p=0.0, inplace=False)
    )
    (temporal_norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (temporal_attn): Attention(  # *********
      (qkv): Linear(in_features=768, out_features=2304, bias=True)
      (proj): Linear(in_features=768, out_features=768, bias=True) # @@@@@@@
      (proj_drop): Dropout(p=0.0, inplace=False)
      (attn_drop): Dropout(p=0.0, inplace=False)
    )
    (temporal_fc): Linear(in_features=768, out_features=768, bias=True)
    (drop_path): DropPath()
    (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (mlp): Mlp(
      (fc1): Linear(in_features=768, out_features=3072, bias=True)
      (act): GELU()
      (fc2): Linear(in_features=3072, out_features=768, bias=True)
      (drop): Dropout(p=0.0, inplace=False)
    )
  )
.
.
.
.
.
.
  (11): Block(
    (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (attn): Attention(
      (qkv): Linear(in_features=768, out_features=2304, bias=True)
      (proj): Linear(in_features=768, out_features=768, bias=True)
      (proj_drop): Dropout(p=0.0, inplace=False)
      (attn_drop): Dropout(p=0.0, inplace=False)
    )
    (temporal_norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (temporal_attn): Attention(
      (qkv): Linear(in_features=768, out_features=2304, bias=True)
      (proj): Linear(in_features=768, out_features=768, bias=True)
      (proj_drop): Dropout(p=0.0, inplace=False)
      (attn_drop): Dropout(p=0.0, inplace=False)
    )
    (temporal_fc): Linear(in_features=768, out_features=768, bias=True)
    (drop_path): DropPath()
    (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (mlp): Mlp(
      (fc1): Linear(in_features=768, out_features=3072, bias=True)
      (act): GELU()
      (fc2): Linear(in_features=3072, out_features=768, bias=True)
      (drop): Dropout(p=0.0, inplace=False)
    )
  )
)
(norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) 
(head): Linear(in_features=768, out_features=400, bias=True)

)

This the the code of my class and my method for printing the size of layers:

class Class(nn.Module):
    def __init__(self, pretrained=False):
        super(Class, self).__init__()
        
        
        self.feature =TimeSformer(img_size=224, num_classes=400, num_frames=8, attention_type='divided_space_time',  
                                           pretrained_model='path/to/the/weight.pyth')

def forward(self, x):
    for layer in self.feature:
        x = layer(x)
        print(x.size())
    return x

I'm using the following approach for printing

But I am facing this error:

TypeError: 'TimeSformer' object is not iterable

How can I print the sizes of all the layers?

Update:

using the Following code receives the error mentioned in the comment:

def forward(self, x, out_consp = False):
layers=list(self.featureExtractor.children())
for layer in layers:
    x = layer(x)
    print(x.size())
return x

Hatem · Accepted Answer

You can use hooks to print the shape of the input and the output of each layer. You can use this code to do what you want.

def hook_function(module, input, output):
    print(f'{module.name} :')
    print(module)
    #print(module)
    if isinstance(input[0], tuple):
        print('input shapes:')
        for elem in input[0]:
            print(elem.shape)
    else:
        print(f'input shape: {input[0].shape}')
    if isinstance(output, tuple):
        print('output shapes:')
        for elem in output:
            print(elem.shape)
    else:
        print(f'output shape: {output.shape}')
    print('')

def set_names(net):
    def recurs(net,parent_name=None):
        for name, mod in net.named_children():
            if parent_name is not None:
                name = '_'.join([parent_name, name])
            recurs(mod, name)
            setattr(mod,'name',name)
        
    recurs(net)
def print_shapes(network, dummy_input_shape, device='cuda', eval=True):
    network = network.to(device)
    if eval:
        network.eval()
    else:
        network.train()
        assert dummy_input_shape[0] > 1
    #print(network)
    dummy = torch.randn(dummy_input_shape, device=device)
    set_names(network)
    handles = []
    def attach_hooks(net):
        leaf_layers = 0
        for mod in net.children():
            leaf_layers += 1
            attach_hooks(mod)
        if leaf_layers == 0:
            handles.append(net.register_forward_hook(hook_function))
    attach_hooks(network)
    network(dummy) 
    # if needed
    for handle in handles:
        handle.remove()

Example:

network = TimeSformer(img_size=224, num_classes=400, num_frames=8, attention_type='divided_space_time',  
                                           pretrained_model='path/to/the/weight.pyth')
# The behaviour of a forward function could be different during training
print_shapes(network,(1,3,224,224),'cpu', eval=True)
print_shapes(network,(2,3,224,224),'cpu', eval=False)

A snippet of the output includes a layer that is defined before 'temporal_norm1' layer in 'Block' module but called or executed later (norm1).

model_blocks_11_temporal_fc :
Linear(in_features=768, out_features=768, bias=True)
input shape: torch.Size([2, 1568, 768])
output shape: torch.Size([2, 1568, 768])

model_blocks_11_norm1 :
LayerNorm((768,), eps=1e-06, elementwise_affine=True)
input shape: torch.Size([16, 197, 768])
output shape: torch.Size([16, 197, 768])

Printing the size of the input and output of all the layers of a pretrained model

Answers (1)

Related Questions