Reputation: 541
Pytorch newbie here! I am trying to fine-tune a VGG16 model to predict 3 different classes. Part of my work involves converting FC layers to CONV layers. However, the values of my predictions don't fall between 0 to 2 (the 3 classes).
Can someone point me to a good resource on how to compute the correct dimensions for the final layer?
Here are the original fC layers of VGG16:
(classifier): Sequential(
(0): Linear(in_features=25088, out_features=4096, bias=True)
(1): ReLU(inplace)
(2): Dropout(p=0.5)
(3): Linear(in_features=4096, out_features=4096, bias=True)
(4): ReLU(inplace)
(5): Dropout(p=0.5)
(6): Linear(in_features=4096, out_features=1000, bias=True)
)
My code for converting FC layers to CONV:
def convert_fc_to_conv(self, fc_layers):
# Replace first FC layer with CONV layer
fc = fc_layers[0].state_dict()
in_ch = 512
out_ch = fc["weight"].size(0)
first_conv = nn.Conv2d(512, out_ch, kernel_size=(1, 1), stride=(1, 1))
conv_list = [first_conv]
for idx, layer in enumerate(fc_layers[1:]):
if isinstance(layer, nn.Linear):
fc = layer.state_dict()
in_ch = fc["weight"].size(1)
out_ch = fc["weight"].size(0)
if idx == len(fc_layers)-4:
in_ch = 3
conv = nn.Conv2d(out_ch, in_ch, kernel_size=(1, 1), stride=(1, 1))
conv_list += [conv]
else:
conv_list += [layer]
gc.collect()
avg_pool = nn.AvgPool2d(kernel_size=2, stride=1, ceil_mode=False)
conv_list += [avg_pool, nn.Softmax()]
top_layers = nn.Sequential(*conv_list)
return top_layers
Final model architecture:
Model(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace)
(16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace)
(28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(29): ReLU(inplace)
(30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False))
(classifier): Sequential(
(0): Conv2d(512, 4096, kernel_size=(1, 1), stride=(1, 1))
(1): ReLU(inplace)
(2): Dropout(p=0.5)
(3): Conv2d(4096, 3, kernel_size=(1, 1), stride=(1, 1))
(4): ReLU(inplace)
(5): Dropout(p=0.5)
(6): AvgPool2d(kernel_size=2, stride=1, padding=0)
(7): Softmax()
)
)
Summary of the model:
Conv2d-1 [-1, 64, 224, 224] 1,792
ReLU-2 [-1, 64, 224, 224] 0
Conv2d-3 [-1, 64, 224, 224] 36,928
ReLU-4 [-1, 64, 224, 224] 0
MaxPool2d-5 [-1, 64, 112, 112] 0
Conv2d-6 [-1, 128, 112, 112] 73,856
ReLU-7 [-1, 128, 112, 112] 0
Conv2d-8 [-1, 128, 112, 112] 147,584
ReLU-9 [-1, 128, 112, 112] 0
MaxPool2d-10 [-1, 128, 56, 56] 0
Conv2d-11 [-1, 256, 56, 56] 295,168
ReLU-12 [-1, 256, 56, 56] 0
Conv2d-13 [-1, 256, 56, 56] 590,080
ReLU-14 [-1, 256, 56, 56] 0
Conv2d-15 [-1, 256, 56, 56] 590,080
ReLU-16 [-1, 256, 56, 56] 0
MaxPool2d-17 [-1, 256, 28, 28] 0
Conv2d-18 [-1, 512, 28, 28] 1,180,160
ReLU-19 [-1, 512, 28, 28] 0
Conv2d-20 [-1, 512, 28, 28] 2,359,808
ReLU-21 [-1, 512, 28, 28] 0
Conv2d-22 [-1, 512, 28, 28] 2,359,808
ReLU-23 [-1, 512, 28, 28] 0
MaxPool2d-24 [-1, 512, 14, 14] 0
Conv2d-25 [-1, 512, 14, 14] 2,359,808
ReLU-26 [-1, 512, 14, 14] 0
Conv2d-27 [-1, 512, 14, 14] 2,359,808
ReLU-28 [-1, 512, 14, 14] 0
Conv2d-29 [-1, 512, 14, 14] 2,359,808
ReLU-30 [-1, 512, 14, 14] 0
MaxPool2d-31 [-1, 512, 7, 7] 0
Conv2d-32 [-1, 4096, 7, 7] 2,101,248
ReLU-33 [-1, 4096, 7, 7] 0
Dropout-34 [-1, 4096, 7, 7] 0
Conv2d-35 [-1, 3, 7, 7] 12,291
ReLU-36 [-1, 3, 7, 7] 0
Dropout-37 [-1, 3, 7, 7] 0
AvgPool2d-38 [-1, 3, 6, 6] 0
Softmax-39 [-1, 3, 6, 6] 0
Upvotes: 3
Views: 4005
Reputation: 366
I wrote a function that takes a Pytorch model as input and converts the classification layer to convolution layer. It works for VGG and Alexnet for now, but you can extend it for other models as well.
import torch
import torch.nn as nn
from torchvision.models import alexnet, vgg16
def convolutionize(model, num_classes, input_size=(3, 224, 224)):
'''Converts the classification layers of VGG & Alexnet to convolutions
Input:
model: torch.models
num_classes: number of output classes
input_size: size of input tensor to the model
Returns:
model: converted model with convolutions
'''
features = model.features
classifier = model.classifier
# create a dummy input tensor and add a dim for batch-size
x = torch.zeros(input_size).unsqueeze_(dim=0)
# change the last layer output to the num_classes
classifier[-1] = nn.Linear(in_features=classifier[-1].in_features,
out_features=num_classes)
# pass the dummy input tensor through the features layer to compute the output size
for layer in features:
x = layer(x)
conv_classifier = []
for layer in classifier:
if isinstance(layer, nn.Linear):
# create a convolution equivalent of linear layer
conv_layer = nn.Conv2d(in_channels=x.size(1),
out_channels=layer.weight.size(0),
kernel_size=(x.size(2), x.size(3)))
# transfer the weights
conv_layer.weight.data.view(-1).copy_(layer.weight.data.view(-1))
conv_layer.bias.data.view(-1).copy_(layer.bias.data.view(-1))
layer = conv_layer
x = layer(x)
conv_classifier.append(layer)
# replace the model.classifier with newly created convolution layers
model.classifier = nn.Sequential(*conv_classifier)
return model
def visualize(model, input_size=(3, 224, 224)):
'''Visualize the input size though the layers of the model'''
x = torch.zeros(input_size).unsqueeze_(dim=0)
print(x.size())
for layer in list(model.features) + list(model.classifier):
x = layer(x)
print(x.size())
This is how the input looks when passed through the model
_vgg = vgg16()
vgg = convolutionize(_vgg, 100)
print('\n\nVGG')
visualize(vgg)
...
VGG
torch.Size([1, 3, 224, 224])
torch.Size([1, 64, 224, 224])
torch.Size([1, 64, 224, 224])
torch.Size([1, 64, 224, 224])
torch.Size([1, 64, 224, 224])
torch.Size([1, 64, 112, 112])
torch.Size([1, 128, 112, 112])
torch.Size([1, 128, 112, 112])
torch.Size([1, 128, 112, 112])
torch.Size([1, 128, 112, 112])
torch.Size([1, 128, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 7, 7])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 100, 1, 1])
Upvotes: 1