Reputation: 73
I am trying to learn pytorch and I am starting with Fashion Mnist dataset. I created a model and it was giving horrible results. I found out that if I rewrite the model without using nn.Sequential, it actually works. I have no idea where is actual difference between these two and why is the one with nn.Sequential not working properly.
This version achieves around 10%
class Down(nn.Module):
def __init__(self, in_channels, out_channels):
super().__init__()
self.down = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size= 3, padding = 1),
nn.BatchNorm2d(out_channels),
nn.ReLU(),
nn.MaxPool2d(2)
)
def forward(self, x):
return self.down(x)
class MyNet(nn.Module):
def __init__(self):
super(MyNet, self).__init__()
self.net = nn.Sequential(
Down(1, 128),
Down(128,256))
def forward(self, x):
x = self.net(x)
# print(x.size())
x = torch.flatten(x, start_dim=1)
x = nn.Linear(12544, 10)(x)
return F.log_softmax(x, dim = 1)
And this model achieves around 90%
class MyNet(nn.Module):
def __init__(self):
super(MyNet, self).__init__()
self.conv1 = nn.Conv2d(1,128, kernel_size = 3, padding = 1)
self.pool = nn.MaxPool2d(2)
self.conv2 = nn.Conv2d(128, 256, kernel_size = 3, padding = 1)
self.lin1 = nn.Linear(12544, 10)
self.lin2 = nn.Linear(64, 10)
self.norm1 = nn.BatchNorm2d(128)
self.norm2 = nn.BatchNorm2d(256)
self.relu = nn.ReLU()
def forward(self, x):
x = self.conv1(x)
x = self.norm1(x)
x = self.relu(x)
x = self.pool(x)
x = self.conv2(x)
x = self.norm2(x)
x = self.relu(x)
x = self.pool(x)
x = torch.flatten(x, start_dim = 1)
x = self.lin1(x)
return F.log_softmax(x, dim = 1)
Thank you for any advice
Upvotes: 1
Views: 564
Reputation: 537
You're making a linear layer every time you pass an input to your network.
Declaring nn.Linear
at __init__
will fix your problem
Upvotes: 1