Reputation: 1712
My dataset has 14 features and a target containing {0,1}.
I have trained this binary classifier:
class SimpleBinaryClassifier(nn.Module):
def __init__(self,input_shape):
super().__init__()
self.fc1 = nn.Linear(input_shape,64)
self.fc2 = nn.Linear(64,32)
self.dropout = nn.Dropout(p=0.1)
self.fc3 = nn.Linear(32,1)
def forward(self,x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.dropout(x)
x = self.fc3(x)
return x
with the following criterion and training loop:
criterion = nn.BCEWithLogitsLoss()
def binary_acc(y_pred, y_test):
y_pred_tag = torch.round(torch.sigmoid(y_pred))
correct_results_sum = (y_pred_tag == y_test).sum().float()
acc = correct_results_sum/y_test.shape[0]
acc = torch.round(acc * 100)
return acc
model.train()
for e in range(1, EPOCHS+1):
epoch_loss = 0
epoch_acc = 0
for X_batch, y_batch in train_loader:
X_batch, y_batch = X_batch.to(device), y_batch.to(device)
optimizer.zero_grad()
y_pred = model(X_batch)
loss = criterion(y_pred, y_batch.unsqueeze(1))
acc = binary_acc(y_pred, y_batch.unsqueeze(1))
loss.backward()
optimizer.step()
epoch_loss += loss.item()
epoch_acc += acc.item()
print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')
This model, when called like sigmoid(model(input_tensor))
outputs a single number in [0,1]. The pipeline I'm working with, expects a model to output probabilities [p_class1, p_class2].
How can I adapt the model and the training loop?
If I set the output of the last layer to 2, I have problems with the criterion inside the training loop.
class SimpleBinaryClassifier2(nn.Module):
def __init__(self,input_shape):
super().__init__()
self.fc1 = nn.Linear(input_shape,64)
self.fc2 = nn.Linear(64,32)
self.dropout = nn.Dropout(p=0.1)
self.fc3 = nn.Linear(32,2) # now it's 2
def forward(self,x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.dropout(x)
x = self.fc3(x)
return x
I use the CrossEntropy
model = SimpleBinaryClassifier2(input_shape=14)
model.to(device)
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss()
and replace y_pred_tag = torch.round(torch.sigmoid(y_pred))
with argmax(softmax)
def binary_acc2(y_pred, y_test):
y_pred_tag = torch.argmax(torch.softmax(y_pred), dim=1)
correct_results_sum = (y_pred_tag == y_test).sum().float()
acc = correct_results_sum/y_test.shape[0]
acc = torch.round(acc * 100)
return acc
Then the train loop rises an error:
model.train()
for e in range(1, EPOCHS+1):
epoch_loss = 0
epoch_acc = 0
for X_batch, y_batch in train_loader:
X_batch, y_batch = X_batch.to(device), y_batch.to(device)
optimizer.zero_grad()
y_pred = model(X_batch)
loss = criterion(y_pred, y_batch)
acc = binary_acc(y_pred, y_batch)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
epoch_acc += acc.item()
print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')
The error is the following:
RuntimeError: "nll_loss_forward_reduce_cuda_kernel_2d_index" not implemented for 'Float'
I already looked up on this other post where the cause of that error was that the element was a Float and not a tensor, but in my case the datasets are tensors:
train_dataset = GenericDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train))
test_dataset = GenericDataset(torch.FloatTensor(X_test), torch.FloatTensor(y_test))
Upvotes: 1
Views: 1326
Reputation: 1016
According to nn.CrossEntropyLoss
description it expects target as long and not float, while in your train_dataset
you clearly convert it to float
Upvotes: 2