Reputation: 146
I have tried researching a situation for my unchanging loss, and all the answers I found were specific to the code. I just started learning about CNNs and majority of the CNN is from an example and modified to fit the needs of my dataset. I am trying to classify types of ECGs (normal, atrial fibrillation, other, noisy). When I try to train the CNN the loss remains the same, I think this is because my CNN does not learn and only outputs zeros. So far I have tried changing the learning rate/loss function and have made no difference.
I am doing this on Google Colab so feel free to edit the code, and don't forget to change hardware acceleration under the runtime tab to GPU.
Code:
import os
import cv2
import numpy as np
from tqdm import tqdm
from scipy.io import loadmat
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
if torch.cuda.is_available():
device = torch.device("cuda:0")
print("Running on GPU -", device )
else:
device = torch.device("cpu")
print("Running on CPU -", device )
REBUILD_DATA = True # processing data
class ECG_DATA():
ECG_LENGTH = 3000
LABEL_SIZE = 485
DATA = "physionet.org/files/challenge-2017/1.0.0/training/"
NORMAL = "physionet.org/files/challenge-2017/1.0.0/training/RECORDS-normal"
AF = "physionet.org/files/challenge-2017/1.0.0/training/RECORDS-af"
OTHER = "physionet.org/files/challenge-2017/1.0.0/training/RECORDS-other"
NOISY = "physionet.org/files/challenge-2017/1.0.0/training/RECORDS-noisy"
LABELS = {NORMAL: 0, AF: 1, OTHER:2, NOISY: 3}
trainingData = []
dataCount = {NORMAL: 0, AF: 0, OTHER: 0, NOISY: 0}
def make_training_data(self):
for records in self.LABELS:
with open(records) as label:
for ecgFile in tqdm(label):
ecg = loadmat(self.DATA+ecgFile[:-1]+".mat")["val"][0].tolist()
if records == self.NOISY:
#self.zero_padding(ecg)
for x in range(self.ECG_LENGTH, len(ecg), self.ECG_LENGTH):
if self.dataCount[records] <= self.LABEL_SIZE and x <= len(ecg):
self.trainingData.append([np.array(ecg[x-self.ECG_LENGTH:x]), np.eye(len(self.LABELS))[self.LABELS[records]]])
self.dataCount[records] += 1
elif self.dataCount[records] <= self.LABEL_SIZE and self.ECG_LENGTH <= len(ecg):
self.trainingData.append([np.array(ecg[:self.ECG_LENGTH]), np.eye(len(self.LABELS))[self.LABELS[records]]])
self.dataCount[records] += 1
print(self.dataCount)
np.random.shuffle(self.trainingData)
np.save("training_Data.npy", self.trainingData)
def zero_padding(self, ecg):
ecg += [0] * (self.ECG_LENGTH-(len(ecg)%self.ECG_LENGTH))
class Net(nn.Module):
def __init__(self):
super().__init__() # just run the init of parent class (nn.Module)
self.conv1 = nn.Conv1d(1, 32, 5) # input is 1 image, 32 output channels, 5x5 kernel / window
self.conv2 = nn.Conv1d(32, 64, 5) # input is 32, bc the first layer output 32. Then we say the output will be 64 channels, 5x5 kernel / window
self.conv3 = nn.Conv1d(64, 128, 5)
x = torch.randn(1,3000).view(-1,1,3000)
self._to_linear = None
self.convs(x)
self.fc1 = nn.Linear(self._to_linear, 512) #flattening.
self.fc2 = nn.Linear(512, 4) # 512 in, 2 out bc we're doing 2 classes (dog vs cat).
def convs(self, x):
x = F.max_pool1d(F.relu(self.conv1(x)), 1) # adjust shape of pooling?
x = F.max_pool1d(F.relu(self.conv2(x)), 1) # x = F.max_pool1d(F.relu(self.conv1(x)), (2, 2))
x = F.max_pool1d(F.relu(self.conv3(x)), 1)
if self._to_linear is None:
self._to_linear = x[0].shape[0]*x[0].shape[1]
return x
def forward(self, x):
x = self.convs(x)
x = x.view(-1, self._to_linear) # .view is reshape ... this flattens X before
x = F.relu(self.fc1(x))
x = self.fc2(x) # bc this is our output layer. No activation here.
return F.softmax(x, dim=1)
net = Net().to(device)
print(net)
if REBUILD_DATA:
ECG = ECG_DATA()
ECG.make_training_data()
training_data = np.load("training_Data.npy", allow_pickle=True)
print(len(training_data))
optimizer = optim.Adam(net.parameters(), lr = 0.01)
loss_function = nn.MSELoss().to(device)
X = torch.Tensor([i[0] for i in training_data])
y = torch.Tensor([i[1] for i in training_data])
VAL_PCT = 0.1
val_size = int(len(X)*VAL_PCT)
print(val_size)
train_X = X[:-val_size]
train_y = y[:-val_size]
test_X = X[-val_size:]
test_y = y[-val_size:]
print(len(train_X), len(test_X))
BATCH_SIZE = 100
EPOCHS = 1
plot = []
for epoch in range(EPOCHS):
for i in tqdm(range(0, len(train_X), BATCH_SIZE)): # from 0, to the len of x, stepping BATCH_SIZE at a time. [:50] ..for now just to dev
#print(f"{i}:{i+BATCH_SIZE}")
batch_X = train_X[i:i+BATCH_SIZE].view(-1,1,3000).to(device)
batch_y = train_y[i:i+BATCH_SIZE].to(device)
net.zero_grad()
outputs = net(batch_X)
loss = loss_function(outputs, batch_y)
loss.backward()
optimizer.step() # Does the update
plot.append([epoch, float(loss)])
print(f"\nEpoch: {epoch}. Loss: {loss}")
plot = list(map(list, zip(*plot)))
plt.plot(plot[0], plot[1])
Upvotes: 0
Views: 500
Reputation: 2896
At the end of your network there's a softmax layer but in your training you use MSELoss
. This tells me that your model is outputting probabilities but then you are calculating loss as if it is continuous. Not sure exactly how that is working for you but I would suspect this is a reason for faulty loss.
As mentioned below in the comments, you can use CrossEntropyLoss
with your softmax layer intact - since your task is about classification.
Upvotes: 5