accuracy is not increasing in classification images

Question

I try to implement the classification of images with bayesian CNN using dropout.
I have defined two classes:

with dropout for the training phase
without dropout for the test(Don’t drop out on testing? to be confirmed please)

When I started the program I remade that the train/test accuracy remain stable they don’t increase. I don’t see what the problem is.
I don’t know if it’s because of convolution and pooling layer parameters or what? Any idea please.

class Net(nn.Module):
   
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5, padding=2)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5, padding=2)
        self.fc1 = nn.Linear(16 * 8 * 8, 1024)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 192 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

            # Lenet with MCDO
    class Net_MCDO(nn.Module):
        def __init__(self):
            super(Net_MCDO, self).__init__()
            self.conv1 = nn.Conv2d(3, 6, 5, padding=2)
            self.pool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(16, 192, 5, padding=2)
            self.fc1 = nn.Linear(16 * 8 * 8, 120)
            self.fc2 = nn.Linear(120, 84)
            self.fc3 = nn.Linear(84, 10)
            self.dropout = nn.Dropout(p=0.3)

    def forward(self, x):
            x = self.pool(self.dropout(self.conv1(x)))
            x = self.pool(self.dropout(self.conv2(x)))
            x = x.view(-1, 192 * 8 * 8)
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(self.dropout(x)))
            x = F.softmax(self.fc3(self.dropout(x)),dim=1)
            return x

    net=Net()
    mcdo=Net_MCDO()
    
    CE = nn.CrossEntropyLoss()
    learning_rate=0.001
    optimizer=optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
    epoch_num = 30
    train_accuracies=np.zeros(epoch_num)
    test_accuracies=np.zeros(epoch_num)
    for epoch in range(epoch_num):
        average_loss = 0.0
        total=0
        success=0
        
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = Variable(inputs), Variable(labels)
            
            optimizer.zero_grad()
            outputs = mcdo(inputs)
            loss=CE(outputs, labels)
            loss.backward()
            optimizer.step()
 
            average_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            success += (predicted==labels.data).sum()
 
        train_accuracy = 100.0*success/total
        succes=0
        total=0

        for (inputs, labels) in testloader:
            inputs, labels = Variable(inputs), Variable(labels)
            outputs = net(inputs)
            _,predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            success += (predicted==labels.data).sum()

        test_accuracy = 100.0*success/total
        print(u"epoch{}, average_loss{}, train_accuracy{},                 
          test_accuracy{}".format(
          epoch,
          average_loss/n_batches,
          train_accuracy,
          100*success/total
          ))
            #save
            train_accuracies[epoch] = train_accuracy
            test_accuracies[epoch] = 100.0*success/total

    plt.plot(np.arange(1, epoch_num+1), train_accuracies)
    plt.plot(np.arange(1, epoch_num+1), test_accuracies)
    plt.show()

Victor Zuanazzi · Accepted Answer

Pytorch merges Softmax inside the CrossEntroplyLoss for numerical stability (and better training). So you should remove the softmax layer of your models. (check the documentation here: https://pytorch.org/docs/stable/nn.html#crossentropyloss). Keeping the Sofmax layer in your model will lead to slower training and possibly worse metrics, that is because you are squashing the gradient twice, thus the weight update is a lot less significant.

Change your code to:

 class Net_MCDO(nn.Module):
        def __init__(self):
            super(Net_MCDO, self).__init__()
            self.conv1 = nn.Conv2d(3, 6, 5, padding=2)
            self.pool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(16, 192, 5, padding=2)
            self.fc1 = nn.Linear(16 * 8 * 8, 120)
            self.fc2 = nn.Linear(120, 84)
            self.fc3 = nn.Linear(84, 10)
            self.dropout = nn.Dropout(p=0.3)

    def forward(self, x):
            x = self.pool(F.relu(self.dropout(self.conv1(x))))  # recommended to add the relu
            x = self.pool(F.relu(self.dropout(self.conv2(x))))  # recommended to add the relu
            x = x.view(-1, 192 * 8 * 8)
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(self.dropout(x)))
            x = self.fc3(self.dropout(x)) # no activation function needed for the last layer
            return x

Furthermore, I would recommend you using an activation function, such as ReLU() after every conv or linear layer. Otherwise you are just performing a bunch of linear operations that could be learned in one single layer.

I hope that helps =)

accuracy is not increasing in classification images

Answers (2)

Related Questions