Reputation: 117
I have the following implementation of residual block and Resnet, inspired by d2l
import torch as t
import torch.nn as nn
from torch.nn import functional as F
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, use_1x1conv=False, strides=1, kernel_size=3, padding=1):
super().__init__()
self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=kernel_size, padding=padding,
stride=strides)
self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=kernel_size, padding=padding)
if use_1x1conv:
self.conv3 = nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=strides)
else:
self.conv3 = None
self.bn1 = nn.BatchNorm1d(out_channels)
self.bn2 = nn.BatchNorm1d(out_channels)
def forward(self, X):
Y = F.relu(self.bn1(self.conv1(X)))
Y = self.bn2(self.conv2(Y))
print("Before: ", X.size(), Y.size())
if self.conv3:
X = self.conv3(X)
Y += X
print("After: ", X.size(), Y.size())
return F.relu(Y)
and the following implementation of Resnet:
import torch as t
import torch.nn as nn
from skeleton.layers.residual_block import ResidualBlock
class ResNet(nn.Module):
def __init__(self, triples):
super(ResNet, self).__init__()
modules = []
modules.append(self.starting_block(128))
for _, triple in enumerate(triples):
in_channels, num_residuals, out_channels = triple[0], triple[1], triple[2]
block = self.block(in_channels,num_residuals,out_channels)
modules.append(block)
modules.append(nn.Sequential(nn.ReLU(), nn.AdaptiveAvgPool1d(3)))
modules.append(nn.Sequential(
nn.Flatten(),
nn.LazyLinear(256), nn.ReLU(),
nn.Dropout(p=0.5),
nn.LazyLinear(128)
))
self.net = nn.Sequential(*modules)
def starting_block(self, input_channels):
return nn.Sequential(
nn.Conv1d(input_channels, 128, kernel_size=7, stride=2, padding=3),
nn.BatchNorm1d(128),
nn.ReLU(),
nn.MaxPool1d(kernel_size=3, stride=2, padding=1))
def block(self, in_channels, num_residuals, out_channels):
blk = []
for _ in range(num_residuals):
blk.append(ResidualBlock(in_channels, out_channels, use_1x1conv=True))
return nn.Sequential(*blk)
def forward(self, x):
return self.net(x)
And initialize the model like this
self.resnet = ResNet(((num_embedding, 2,num_embedding*2),(num_embedding*2, 2, num_embedding*4)))
Where num_embedding is 128
However, when training, the model gives the error inside the forward function of the Residual block, in the line Y = F.relu(self.bn1(self.conv1(X)))
.
The error is RuntimeError: Given groups=1, weight of size [256, 128, 3], expected input[128, 256, 60] to have 128 channels, but got 256 channels instead
.
However, I tried to initialize the layers with LazyConv1d
(only use out_channels
as parameter) and it doesn't give that dimensions error and the training went fine. I'm not sure what the issue is and can this be fixed without using LazyConv1d
.
I would really appreciate any help.
Upvotes: 2
Views: 103
Reputation: 11218
Please check the dimension of input tensors.
The error clearly says, the model is not receiving expected tensor shape. If you are training your model with different width and height make sure in the input tensor they are in the right order.
It seems your tensors are in shape 128, 256, C
instead of 256, 128, C
.
Upvotes: 2