Aditya Kendre
Aditya Kendre

Reputation: 146

PyTorch CNN: Loss is unchanging

I have tried researching a situation for my unchanging loss, and all the answers I found were specific to the code. I just started learning about CNNs and majority of the CNN is from an example and modified to fit the needs of my dataset. I am trying to classify types of ECGs (normal, atrial fibrillation, other, noisy). When I try to train the CNN the loss remains the same, I think this is because my CNN does not learn and only outputs zeros. So far I have tried changing the learning rate/loss function and have made no difference.

I am doing this on Google Colab so feel free to edit the code, and don't forget to change hardware acceleration under the runtime tab to GPU.

Code:

import os
import cv2
import numpy as np
from tqdm import tqdm
from scipy.io import loadmat
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt

if torch.cuda.is_available():
  device = torch.device("cuda:0")
  print("Running on GPU -", device )
else:
  device = torch.device("cpu")
  print("Running on CPU -", device )


REBUILD_DATA = True # processing data

class ECG_DATA():
  ECG_LENGTH = 3000
  LABEL_SIZE = 485
  DATA = "physionet.org/files/challenge-2017/1.0.0/training/"
  NORMAL = "physionet.org/files/challenge-2017/1.0.0/training/RECORDS-normal" 
  AF = "physionet.org/files/challenge-2017/1.0.0/training/RECORDS-af"
  OTHER = "physionet.org/files/challenge-2017/1.0.0/training/RECORDS-other"
  NOISY = "physionet.org/files/challenge-2017/1.0.0/training/RECORDS-noisy"
  LABELS = {NORMAL: 0, AF: 1, OTHER:2, NOISY: 3}
  trainingData = []
  dataCount = {NORMAL: 0, AF: 0, OTHER: 0, NOISY: 0}

  def make_training_data(self):
    for records in self.LABELS:
      with open(records) as label:
        for ecgFile in tqdm(label):
          ecg = loadmat(self.DATA+ecgFile[:-1]+".mat")["val"][0].tolist()

          if records == self.NOISY:
            #self.zero_padding(ecg)
            for x in range(self.ECG_LENGTH, len(ecg), self.ECG_LENGTH):
              if self.dataCount[records] <= self.LABEL_SIZE and x <= len(ecg):
                self.trainingData.append([np.array(ecg[x-self.ECG_LENGTH:x]), np.eye(len(self.LABELS))[self.LABELS[records]]])
                self.dataCount[records] += 1

          elif self.dataCount[records] <= self.LABEL_SIZE and self.ECG_LENGTH <= len(ecg):
            self.trainingData.append([np.array(ecg[:self.ECG_LENGTH]), np.eye(len(self.LABELS))[self.LABELS[records]]])
            self.dataCount[records] += 1    
    
    print(self.dataCount)
    np.random.shuffle(self.trainingData)
    np.save("training_Data.npy", self.trainingData)
  
  def zero_padding(self, ecg):
    ecg += [0] * (self.ECG_LENGTH-(len(ecg)%self.ECG_LENGTH))
class Net(nn.Module):
    def __init__(self):
        super().__init__() # just run the init of parent class (nn.Module)
        self.conv1 = nn.Conv1d(1, 32, 5) # input is 1 image, 32 output channels, 5x5 kernel / window
        self.conv2 = nn.Conv1d(32, 64, 5) # input is 32, bc the first layer output 32. Then we say the output will be 64 channels, 5x5 kernel / window
        self.conv3 = nn.Conv1d(64, 128, 5)

        x = torch.randn(1,3000).view(-1,1,3000)
        self._to_linear = None
        self.convs(x)

        self.fc1 = nn.Linear(self._to_linear, 512) #flattening.
        self.fc2 = nn.Linear(512, 4) # 512 in, 2 out bc we're doing 2 classes (dog vs cat).

    def convs(self, x):
        x = F.max_pool1d(F.relu(self.conv1(x)), 1) # adjust shape of pooling?
        x = F.max_pool1d(F.relu(self.conv2(x)), 1) # x = F.max_pool1d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool1d(F.relu(self.conv3(x)), 1)

        if self._to_linear is None:
            self._to_linear = x[0].shape[0]*x[0].shape[1]
        return x

    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, self._to_linear)  # .view is reshape ... this flattens X before 
        x = F.relu(self.fc1(x))
        x = self.fc2(x) # bc this is our output layer. No activation here.
        return F.softmax(x, dim=1)


net = Net().to(device)
print(net)

if REBUILD_DATA:
  ECG = ECG_DATA()
  ECG.make_training_data()

training_data = np.load("training_Data.npy", allow_pickle=True)
print(len(training_data))

optimizer = optim.Adam(net.parameters(), lr = 0.01)
loss_function = nn.MSELoss().to(device)

X = torch.Tensor([i[0] for i in training_data])
y = torch.Tensor([i[1] for i in training_data])
 
VAL_PCT = 0.1
val_size = int(len(X)*VAL_PCT)
print(val_size)

train_X = X[:-val_size]
train_y = y[:-val_size]

test_X = X[-val_size:]
test_y = y[-val_size:]
print(len(train_X), len(test_X))

BATCH_SIZE = 100
EPOCHS = 1
plot = []

for epoch in range(EPOCHS):
    for i in tqdm(range(0, len(train_X), BATCH_SIZE)): # from 0, to the len of x, stepping BATCH_SIZE at a time. [:50] ..for now just to dev
        #print(f"{i}:{i+BATCH_SIZE}")
        batch_X = train_X[i:i+BATCH_SIZE].view(-1,1,3000).to(device)
        batch_y = train_y[i:i+BATCH_SIZE].to(device)

        net.zero_grad()

        outputs = net(batch_X)
        loss = loss_function(outputs, batch_y)
        loss.backward()
        optimizer.step()    # Does the update
    
    plot.append([epoch, float(loss)])
    print(f"\nEpoch: {epoch}. Loss: {loss}")

plot = list(map(list, zip(*plot)))
plt.plot(plot[0], plot[1])

Upvotes: 0

Views: 500

Answers (1)

conv3d
conv3d

Reputation: 2896

At the end of your network there's a softmax layer but in your training you use MSELoss. This tells me that your model is outputting probabilities but then you are calculating loss as if it is continuous. Not sure exactly how that is working for you but I would suspect this is a reason for faulty loss.

As mentioned below in the comments, you can use CrossEntropyLoss with your softmax layer intact - since your task is about classification.

Upvotes: 5

Related Questions