Reputation: 25
I get the following errors and no idea why:
Missing key(s) in state_dict: "layer2.0.layer.0.inplace.0.weight", "layer2.0.layer.0.inplace.0.bias",...
Unexpected key(s) in state_dict: "layer2.0.layer.0.0.weight", "layer2.0.layer.0.0.bias",...
The channel sizes I set seem to be what I wanted. however, I don't see where the mistake is
import torch.nn as nn
#1x1 convolution
def conv1x1(in_channels: object, out_channels: object, stride: object, padding: object) -> object:
model = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=padding),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
return model
# 3x3 convolution
def conv3x3(in_channels: object, out_channels: object, stride: object, padding: object) -> object:
model = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=padding),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
return model
class ResidualBlock(nn.Module):
def __init__(self, in_channels, middle_channels, out_channels, downsample=False):
super(ResidualBlock, self).__init__()
self.downsample = downsample
if self.downsample:
self.layer = nn.Sequential(
nn.ReLU(conv1x1(in_channels, middle_channels,1,0)),
nn.ReLU(conv3x3(middle_channels, middle_channels,1,0)),
nn.ReLU(conv1x1(middle_channels,out_channels,1,0))
)
self.downsize = conv1x1(in_channels, out_channels, 2, 0)
else:
self.layer = nn.Sequential(
nn.ReLU(conv1x1(in_channels,middle_channels,2,0)),
nn.ReLU(conv3x3(middle_channels,middle_channels,2,0)),
nn.ReLU(conv1x1(middle_channels,out_channels,2,0))
)
self.make_equal_channel = conv1x1(in_channels, out_channels, 1, 0)
def forward(self, x):
if self.downsample:
out = self.layer(x)
x = self.downsize(x)
return out + x
else:
out = self.layer(x)
if x.size() is not out.size():
x = self.make_equal_channel(x)
return out + x
class ResNet50_layer4(nn.Module):
def __init__(self, num_classes= 10 ): # Hint : How many classes in Cifar-10 dataset?
super(ResNet50_layer4, self).__init__()
self.layer1 = nn.Sequential(
#in_channels, out_channels, kernel_size, stride, padding
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3),
# Hint : Through this conv-layer, the input image size is halved.
# Consider stride, kernel size, padding and input & output channel sizes.
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
)
self.layer2 = nn.Sequential(
#in_channels, middle_channels, out_channels, downsample=False
ResidualBlock(in_channels=64, middle_channels=64, out_channels=256, downsample=False),
ResidualBlock(in_channels=256, middle_channels=64, out_channels=256, downsample=False),
ResidualBlock(in_channels=256, middle_channels=64,out_channels=256, downsample=True)
)
self.layer3 = nn.Sequential(
ResidualBlock(in_channels=256, middle_channels=128, out_channels=512, downsample=False),
ResidualBlock(in_channels=512, middle_channels=128, out_channels=512, downsample=False),
ResidualBlock(in_channels=512, middle_channels=128, out_channels=512, downsample=False),
ResidualBlock(in_channels=512, middle_channels=128, out_channels=512, downsample=True)
)
self.layer4 = nn.Sequential(
ResidualBlock(in_channels=512, middle_channels=256, out_channels=1024, downsample=False),
ResidualBlock(in_channels=1024, middle_channels=256, out_channels=1024, downsample=False),
ResidualBlock(in_channels=1024, middle_channels=256, out_channels=1024, downsample=False),
ResidualBlock(in_channels=1024, middle_channels=256, out_channels=1024, downsample=False),
ResidualBlock(in_channels=1024, middle_channels=256, out_channels=1024, downsample=False),
ResidualBlock(in_channels=1024, middle_channels=256, out_channels=1024, downsample=False)
)
self.fc = nn.Linear(1024, 10)
self.avgpool = nn.AvgPool2d(7, stride=1)
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight.data)
elif isinstance(m, nn.Conv2d):
nn.init.xavier_uniform_(m.weight.data)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.avgpool(out)
out = out.view(out.size()[0], -1)
out = self.fc(out)
return out
Upvotes: 0
Views: 658
Reputation: 37691
You have changed your model, and as a result, the keys have changed. So, you are getting a mismatch error. I think you have added nn.ReLU()
in the sequential wrappers in ResidualBlock
.
In your ResidualBlock
, you have:
self.layer = nn.Sequential(
nn.ReLU(conv1x1(in_channels, middle_channels, 2, 0)),
nn.ReLU(conv3x3(middle_channels, middle_channels, 2, 0)),
nn.ReLU(conv1x1(middle_channels, out_channels, 2, 0))
)
However, in your conv1x1
and conv3x3
, you already have nn.ReLU(inplace=True)
as the last layer in nn.Sequential
. Hence, having another nn.ReLU()
in
nn.ReLU(conv1x1(in_channels, middle_channels, 2, 0))
seems unnecessary. If you remove the nn.ReLU()
, then the keys will match.
I revise ResidualBlock
as follows.
class ResidualBlock(nn.Module):
def __init__(self, in_channels, middle_channels, out_channels, downsample=False):
super(ResidualBlock, self).__init__()
self.downsample = downsample
if self.downsample:
self.layer = nn.Sequential(
conv1x1(in_channels, middle_channels, 1, 0),
conv3x3(middle_channels, middle_channels, 1, 0),
conv1x1(middle_channels, out_channels, 1, 0)
)
self.downsize = conv1x1(in_channels, out_channels, 2, 0)
else:
self.layer = nn.Sequential(
conv1x1(in_channels, middle_channels, 2, 0),
conv3x3(middle_channels, middle_channels, 2, 0),
conv1x1(middle_channels, out_channels, 2, 0)
)
self.make_equal_channel = conv1x1(in_channels, out_channels, 1, 0)
def forward(self, x):
'''Your forward method description'''
Now, let's test.
model = ResNet50_layer4()
for k, v in model.named_parameters():
print(k)
Output:
layer1.0.weight
layer1.0.bias
layer1.1.weight
layer1.1.bias
layer2.0.layer.0.0.weight
layer2.0.layer.0.0.bias
layer2.0.layer.0.1.weight
layer2.0.layer.0.1.bias
...
...
If you want to still use an additional nn.ReLU()
, you can train the revised model and save weights, and then try to load the weights back, it will work.
Upvotes: 1