Residual Block works with LazyConv1d, but gives error with normal Conv1d

Question

I have the following implementation of residual block and Resnet, inspired by d2l

import torch as t
import torch.nn as nn
from torch.nn import functional as F

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, use_1x1conv=False, strides=1, kernel_size=3, padding=1):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=kernel_size, padding=padding,
                               stride=strides)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=kernel_size, padding=padding)
        if use_1x1conv:
            self.conv3 = nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.bn2 = nn.BatchNorm1d(out_channels)

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        print("Before: ", X.size(), Y.size())
        if self.conv3:
            X = self.conv3(X)
        Y += X
        print("After: ", X.size(), Y.size())
        return F.relu(Y)

and the following implementation of Resnet:

import torch as t
import torch.nn as nn

from skeleton.layers.residual_block import ResidualBlock


class ResNet(nn.Module):
    def __init__(self, triples):
        super(ResNet, self).__init__()
        modules = []
        modules.append(self.starting_block(128))
        for _, triple in enumerate(triples):
            in_channels, num_residuals, out_channels = triple[0], triple[1], triple[2]
            block = self.block(in_channels,num_residuals,out_channels)
            modules.append(block)
            modules.append(nn.Sequential(nn.ReLU(), nn.AdaptiveAvgPool1d(3)))

        modules.append(nn.Sequential(
            nn.Flatten(),
            nn.LazyLinear(256), nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.LazyLinear(128)
        ))
        
        self.net = nn.Sequential(*modules)


    def starting_block(self, input_channels):
        return nn.Sequential(
            nn.Conv1d(input_channels, 128, kernel_size=7, stride=2, padding=3),
            nn.BatchNorm1d(128), 
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=3, stride=2, padding=1))

    def block(self, in_channels, num_residuals, out_channels):
        blk = []
        for _ in range(num_residuals):
            blk.append(ResidualBlock(in_channels, out_channels, use_1x1conv=True))
        return nn.Sequential(*blk)

    def forward(self, x):
        return self.net(x)

And initialize the model like this

self.resnet = ResNet(((num_embedding, 2,num_embedding*2),(num_embedding*2, 2, num_embedding*4)))

Where num_embedding is 128

However, when training, the model gives the error inside the forward function of the Residual block, in the line Y = F.relu(self.bn1(self.conv1(X))).

The error is RuntimeError: Given groups=1, weight of size [256, 128, 3], expected input[128, 256, 60] to have 128 channels, but got 256 channels instead.

However, I tried to initialize the layers with LazyConv1d (only use out_channels as parameter) and it doesn't give that dimensions error and the training went fine. I'm not sure what the issue is and can this be fixed without using LazyConv1d.

I would really appreciate any help.

Residual Block works with LazyConv1d, but gives error with normal Conv1d

Answers (1)

Related Questions