meraxes
meraxes

Reputation: 541

Pytorch: Getting the correct dimensions for final layer

Pytorch newbie here! I am trying to fine-tune a VGG16 model to predict 3 different classes. Part of my work involves converting FC layers to CONV layers. However, the values of my predictions don't fall between 0 to 2 (the 3 classes).

Can someone point me to a good resource on how to compute the correct dimensions for the final layer?

Here are the original fC layers of VGG16:

(classifier): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace)
    (2): Dropout(p=0.5)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace)
    (5): Dropout(p=0.5)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )

My code for converting FC layers to CONV:

 def convert_fc_to_conv(self, fc_layers):
        # Replace first FC layer with CONV layer
        fc = fc_layers[0].state_dict()
        in_ch = 512
        out_ch = fc["weight"].size(0)
        first_conv = nn.Conv2d(512, out_ch, kernel_size=(1, 1), stride=(1, 1))

        conv_list = [first_conv]
        for idx, layer in enumerate(fc_layers[1:]):
            if isinstance(layer, nn.Linear):
                fc = layer.state_dict()
                in_ch = fc["weight"].size(1)
                out_ch = fc["weight"].size(0)
                if idx == len(fc_layers)-4:
                    in_ch = 3
                conv = nn.Conv2d(out_ch, in_ch, kernel_size=(1, 1), stride=(1, 1))
                conv_list += [conv]
            else:
                conv_list += [layer]
            gc.collect()

        avg_pool = nn.AvgPool2d(kernel_size=2, stride=1, ceil_mode=False)
        conv_list += [avg_pool, nn.Softmax()]
        top_layers = nn.Sequential(*conv_list)  
        return top_layers

Final model architecture:

    Model(
    (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (18): ReLU(inplace)
    (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (20): ReLU(inplace)
    (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (22): ReLU(inplace)
    (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (25): ReLU(inplace)
    (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (27): ReLU(inplace)
    (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (29): ReLU(inplace)
    (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False))

    (classifier): Sequential(
    (0): Conv2d(512, 4096, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU(inplace)
    (2): Dropout(p=0.5)
    (3): Conv2d(4096, 3, kernel_size=(1, 1), stride=(1, 1))
    (4): ReLU(inplace)
    (5): Dropout(p=0.5)
    (6): AvgPool2d(kernel_size=2, stride=1, padding=0)
    (7): Softmax()
  )
)

Summary of the model:

            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256, 56, 56]               0
           Conv2d-15          [-1, 256, 56, 56]         590,080
             ReLU-16          [-1, 256, 56, 56]               0
        MaxPool2d-17          [-1, 256, 28, 28]               0
           Conv2d-18          [-1, 512, 28, 28]       1,180,160
             ReLU-19          [-1, 512, 28, 28]               0
           Conv2d-20          [-1, 512, 28, 28]       2,359,808
             ReLU-21          [-1, 512, 28, 28]               0
           Conv2d-22          [-1, 512, 28, 28]       2,359,808
             ReLU-23          [-1, 512, 28, 28]               0
        MaxPool2d-24          [-1, 512, 14, 14]               0
           Conv2d-25          [-1, 512, 14, 14]       2,359,808
             ReLU-26          [-1, 512, 14, 14]               0
           Conv2d-27          [-1, 512, 14, 14]       2,359,808
             ReLU-28          [-1, 512, 14, 14]               0
           Conv2d-29          [-1, 512, 14, 14]       2,359,808
             ReLU-30          [-1, 512, 14, 14]               0
        MaxPool2d-31            [-1, 512, 7, 7]               0
           Conv2d-32           [-1, 4096, 7, 7]       2,101,248
             ReLU-33           [-1, 4096, 7, 7]               0
          Dropout-34           [-1, 4096, 7, 7]               0
           Conv2d-35              [-1, 3, 7, 7]          12,291
             ReLU-36              [-1, 3, 7, 7]               0
          Dropout-37              [-1, 3, 7, 7]               0
        AvgPool2d-38              [-1, 3, 6, 6]               0
          Softmax-39              [-1, 3, 6, 6]               0

Upvotes: 3

Views: 4005

Answers (1)

srmsoumya
srmsoumya

Reputation: 366

I wrote a function that takes a Pytorch model as input and converts the classification layer to convolution layer. It works for VGG and Alexnet for now, but you can extend it for other models as well.

import torch
import torch.nn as nn
from torchvision.models import alexnet, vgg16

def convolutionize(model, num_classes, input_size=(3, 224, 224)):
    '''Converts the classification layers of VGG & Alexnet to convolutions

    Input:
        model: torch.models
        num_classes: number of output classes
        input_size: size of input tensor to the model

    Returns:
        model: converted model with convolutions
    '''
    features = model.features
    classifier = model.classifier

    # create a dummy input tensor and add a dim for batch-size
    x = torch.zeros(input_size).unsqueeze_(dim=0)

    # change the last layer output to the num_classes
    classifier[-1] = nn.Linear(in_features=classifier[-1].in_features,
                               out_features=num_classes)

    # pass the dummy input tensor through the features layer to compute the output size
    for layer in features:
        x = layer(x)

    conv_classifier = []
    for layer in classifier:
        if isinstance(layer, nn.Linear):
            # create a convolution equivalent of linear layer
            conv_layer = nn.Conv2d(in_channels=x.size(1),
                                   out_channels=layer.weight.size(0),
                                   kernel_size=(x.size(2), x.size(3)))

            # transfer the weights
            conv_layer.weight.data.view(-1).copy_(layer.weight.data.view(-1))
            conv_layer.bias.data.view(-1).copy_(layer.bias.data.view(-1))
            layer = conv_layer

        x = layer(x)
        conv_classifier.append(layer)

    # replace the model.classifier with newly created convolution layers
    model.classifier = nn.Sequential(*conv_classifier)

    return model

def visualize(model, input_size=(3, 224, 224)):
    '''Visualize the input size though the layers of the model'''
    x = torch.zeros(input_size).unsqueeze_(dim=0)
    print(x.size())
    for layer in list(model.features) + list(model.classifier):
        x = layer(x)
        print(x.size())

This is how the input looks when passed through the model

_vgg = vgg16()
vgg = convolutionize(_vgg, 100)
print('\n\nVGG')
visualize(vgg)

...

VGG
torch.Size([1, 3, 224, 224])
torch.Size([1, 64, 224, 224])
torch.Size([1, 64, 224, 224])
torch.Size([1, 64, 224, 224])
torch.Size([1, 64, 224, 224])
torch.Size([1, 64, 112, 112])
torch.Size([1, 128, 112, 112])
torch.Size([1, 128, 112, 112])
torch.Size([1, 128, 112, 112])
torch.Size([1, 128, 112, 112])
torch.Size([1, 128, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 7, 7])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 100, 1, 1])

Upvotes: 1

Related Questions