Benedikt
Benedikt

Reputation: 25

Missing/unexpected keys in resnet50 with pytorch

I get the following errors and no idea why:

Missing key(s) in state_dict: "layer2.0.layer.0.inplace.0.weight", "layer2.0.layer.0.inplace.0.bias",...

Unexpected key(s) in state_dict: "layer2.0.layer.0.0.weight", "layer2.0.layer.0.0.bias",...

The channel sizes I set seem to be what I wanted. however, I don't see where the mistake is

import torch.nn as nn

#1x1 convolution
def conv1x1(in_channels: object, out_channels: object, stride: object, padding: object) -> object:

    model = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=padding),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True)
    )
    return model


# 3x3 convolution
def conv3x3(in_channels: object, out_channels: object, stride: object, padding: object) -> object:
    model = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=padding),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True)
    )
    return model


class ResidualBlock(nn.Module):
    def __init__(self, in_channels, middle_channels, out_channels, downsample=False):
        super(ResidualBlock, self).__init__()
        self.downsample = downsample

        if self.downsample:
            self.layer = nn.Sequential(
                nn.ReLU(conv1x1(in_channels, middle_channels,1,0)),
                nn.ReLU(conv3x3(middle_channels, middle_channels,1,0)),
                nn.ReLU(conv1x1(middle_channels,out_channels,1,0))
            )
            self.downsize = conv1x1(in_channels, out_channels, 2, 0)

        else:
            self.layer = nn.Sequential(
                nn.ReLU(conv1x1(in_channels,middle_channels,2,0)),
                nn.ReLU(conv3x3(middle_channels,middle_channels,2,0)),
                nn.ReLU(conv1x1(middle_channels,out_channels,2,0))
            )
            self.make_equal_channel = conv1x1(in_channels, out_channels, 1, 0)

    def forward(self, x):
        if self.downsample:
            out = self.layer(x)
            x = self.downsize(x)
            return out + x
        else:
            out = self.layer(x)
            if x.size() is not out.size():
                x = self.make_equal_channel(x)
            return out + x


class ResNet50_layer4(nn.Module):
    def __init__(self, num_classes= 10 ): # Hint : How many classes in Cifar-10 dataset?
        super(ResNet50_layer4, self).__init__()
        self.layer1 = nn.Sequential(
            #in_channels, out_channels, kernel_size, stride, padding
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3),
                # Hint : Through this conv-layer, the input image size is halved.
                #        Consider stride, kernel size, padding and input & output channel sizes.
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
        )
        self.layer2 = nn.Sequential(
            #in_channels, middle_channels, out_channels, downsample=False
            ResidualBlock(in_channels=64, middle_channels=64, out_channels=256, downsample=False),
            ResidualBlock(in_channels=256, middle_channels=64, out_channels=256, downsample=False),
            ResidualBlock(in_channels=256, middle_channels=64,out_channels=256, downsample=True)
        )
        self.layer3 = nn.Sequential(
            ResidualBlock(in_channels=256, middle_channels=128, out_channels=512, downsample=False),
            ResidualBlock(in_channels=512, middle_channels=128, out_channels=512, downsample=False),
            ResidualBlock(in_channels=512, middle_channels=128, out_channels=512, downsample=False),
            ResidualBlock(in_channels=512, middle_channels=128, out_channels=512, downsample=True)
        )
        self.layer4 = nn.Sequential(
            ResidualBlock(in_channels=512, middle_channels=256, out_channels=1024, downsample=False),
            ResidualBlock(in_channels=1024, middle_channels=256, out_channels=1024, downsample=False),
            ResidualBlock(in_channels=1024, middle_channels=256, out_channels=1024, downsample=False),
            ResidualBlock(in_channels=1024, middle_channels=256, out_channels=1024, downsample=False),
            ResidualBlock(in_channels=1024, middle_channels=256, out_channels=1024, downsample=False),
            ResidualBlock(in_channels=1024, middle_channels=256, out_channels=1024, downsample=False)

        )

        self.fc = nn.Linear(1024, 10) 
        self.avgpool = nn.AvgPool2d(7, stride=1)

        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight.data)
            elif isinstance(m, nn.Conv2d):
                nn.init.xavier_uniform_(m.weight.data)

    def forward(self, x):

        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = out.view(out.size()[0], -1)
        out = self.fc(out)

        return out

Upvotes: 0

Views: 658

Answers (1)

Wasi Ahmad
Wasi Ahmad

Reputation: 37691

You have changed your model, and as a result, the keys have changed. So, you are getting a mismatch error. I think you have added nn.ReLU() in the sequential wrappers in ResidualBlock.

In your ResidualBlock, you have:

self.layer = nn.Sequential(
    nn.ReLU(conv1x1(in_channels, middle_channels, 2, 0)),
    nn.ReLU(conv3x3(middle_channels, middle_channels, 2, 0)),
    nn.ReLU(conv1x1(middle_channels, out_channels, 2, 0))
)

However, in your conv1x1 and conv3x3, you already have nn.ReLU(inplace=True) as the last layer in nn.Sequential. Hence, having another nn.ReLU() in

nn.ReLU(conv1x1(in_channels, middle_channels, 2, 0))

seems unnecessary. If you remove the nn.ReLU(), then the keys will match.

I revise ResidualBlock as follows.

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, middle_channels, out_channels, downsample=False):
        super(ResidualBlock, self).__init__()
        self.downsample = downsample

        if self.downsample:
            self.layer = nn.Sequential(
                conv1x1(in_channels, middle_channels, 1, 0),
                conv3x3(middle_channels, middle_channels, 1, 0),
                conv1x1(middle_channels, out_channels, 1, 0)
            )
            self.downsize = conv1x1(in_channels, out_channels, 2, 0)

        else:
            self.layer = nn.Sequential(
                conv1x1(in_channels, middle_channels, 2, 0),
                conv3x3(middle_channels, middle_channels, 2, 0),
                conv1x1(middle_channels, out_channels, 2, 0)
            )
            self.make_equal_channel = conv1x1(in_channels, out_channels, 1, 0)

    def forward(self, x):
        '''Your forward method description'''

Now, let's test.

model = ResNet50_layer4()
for k, v in model.named_parameters():
    print(k)

Output:

layer1.0.weight
layer1.0.bias
layer1.1.weight
layer1.1.bias
layer2.0.layer.0.0.weight
layer2.0.layer.0.0.bias
layer2.0.layer.0.1.weight
layer2.0.layer.0.1.bias
...
...

If you want to still use an additional nn.ReLU(), you can train the revised model and save weights, and then try to load the weights back, it will work.

Upvotes: 1

Related Questions