Reputation: 359
I use Pytorch Lightning to train a small NN transfert learning) with the hymenoptera photos (inspired from here).
In the test_step method, it prints the real classes (classes) and the predictions (preds). After the training, I do the same (verification step) but I get different results.
import torch
from torch import nn
from torch.optim import Adam,SGD
import pytorch_lightning as pl
from torchvision import models
from torch.optim import lr_scheduler
from pytorch_lightning.metrics.functional import accuracy
from pytorch_lightning.loggers import TensorBoardLogger
from hymenoptereDataModule import HymenopteraDataModule
class LitHymenoptera(pl.LightningModule):
def __init__(self, batch_size=4):
super().__init__()
torch.manual_seed(42)
self.batch_size = batch_size
self.dataModule = HymenopteraDataModule()
self.dataModule.setup()
self.criterion = nn.CrossEntropyLoss()
self.logger = TensorBoardLogger('tb_logs', name=f'Model')
# Define the model
self.model = models.resnet18(pretrained=True)
num_ftrs = self.model.fc.in_features
self.model.fc = nn.Linear(num_ftrs, 2)
def forward(self, x):
return self.model(x)
def training_step(self, batch, batch_idx):
x, y = batch
logits = self.model(x)
# Compute loss
loss = self.criterion(logits, y)
# training metrics
preds = torch.argmax(logits, dim=1)
acc = accuracy(preds, y)
num_correct = torch.eq(preds.view(-1), y.view(-1)).sum()
return {'loss': loss,
'acc': acc,
'num_correct': num_correct}
def training_epoch_end(self, outputs):
self.exp_lr_scheduler.step()
def validation_step(self, batch, batch_idx):
x, y = batch
logits = self.model(x)
loss = self.criterion(logits, y)
# validation metrics
preds = torch.argmax(logits, dim=1)
acc = accuracy(preds, y)
num_correct = torch.eq(preds.view(-1), y.view(-1)).sum()
return {'loss': loss,
'acc': acc,
'num_correct': num_correct}
def test_step(self, batch, batch_idx):
inputs, classes = batch
logits = self(inputs)
preds = torch.argmax(logits, dim=1)
print('###############################')
print('classes1 = ',classes)
print('preds1 = ',preds)
print(logits)
def configure_optimizers(self):
optimizer = SGD(self.parameters(), lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
self.exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
return optimizer
model = LitHymenoptera()
trainer = pl.Trainer(gpus=1, max_epochs=5, progress_bar_refresh_rate=100)
trainer.fit(model,model.dataModule)
trainer.test(model)
# Now, another test
for inputs, classes in model.dataModule.val_dataloader():
print('###############################')
logits = model(inputs.cuda())
preds = torch.argmax(logits, dim=1)
print('classes2 = ', classes)
print('preds2 = ', preds)
print(logits)
Here's the first output from test_step method :
classes1 = tensor([0, 0, 0, 0], device='cuda:0')
preds1 = tensor([1, 0, 0, 0], device='cuda:0') tensor([[0.1626, 0.2195],
[1.1437, 0.5745],
[0.9351, 0.4271],
[0.7365, 0.5342]], device='cuda:0')
and now the first output from the verification step :
classes2 = tensor([0, 0, 0, 0])
preds2 = tensor([1, 0, 1, 0], device='cuda:0')
tensor([[-0.0168, 0.0800],
[ 0.6817, 0.2949],
[-0.2205, 0.1009],
[ 0.6126, 0.4924]], device='cuda:0', grad_fn=<AddmmBackward>)
Both classes are identical (and I check the images, they are the same) but the preds are different. Where does it come from?
Upvotes: 1
Views: 1983
Reputation: 359
I realize that I forget to add:
model.freeze()
before using the model for the second time. So, now, both results are the same.
Upvotes: 1