Reputation: 35
Could anybody breakdown the code and explain it to me? The part that needs help is indicated with the "#This part". I would greatly appreciate any help thanks
def validation_epoch_end(self, outputs):
batch_losses = [x["val_loss"]for x in outputs] #This part
epoch_loss = torch.stack(batch_losses).mean()
batch_accs = [x["val_acc"]for x in outputs] #This part
epoch_acc = torch.stack(batch_accs).mean()
return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
def epoch_end(self, epoch, result):
print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format( epoch,result['val_loss'], result['val_acc'])) #This part
Upvotes: 3
Views: 15966
Reputation: 577
In Pytorch 2.01 validation_epoch_end
has been replaced by on_validation_epoch_end
. Here it is the merged commentary on GitHub (link).
To check all the model hooks available in Pytorch Lightning you can visit this documentation.
Here you can see that on_validation_epoch_end
now does not receive any outputs
argument. Thus, in order to compute the validation loss or any metric after each epoch you should (1) create a list at the constructor of the class, (2) save the outputs in the model hook validation_step
and (3) clean memory of the outputs after each epoch in on_validation_epoch_end
.
Here it is a minimal working exmaple where I am measuring the F1-macro score and Cross Entropy loss in a 3 multi-class problem with a MLP model with flexible configuration of hidden layers and number of neurons:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import pytorch_lightning as pl
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
# from lightning.pytorch.callbacks import ModelCheckpoint ## IMPORTANT - Do not import both, just lightning.pytorch or pytorch_lightning (recommended)
from pytorch_lightning.callbacks import ModelCheckpoint
from lightning.pytorch.loggers import CSVLogger
# DATA LOADER
class DataFrameDataset(Dataset):
def __init__(self, dataframe, labels):
self.data = torch.tensor(dataframe.values, dtype=torch.float32)
self.labels = torch.tensor(labels.values, dtype=torch.long)
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx], self.labels[idx]
# MODEL AND MODEL HOOKS
class MLP(pl.LightningModule):
def __init__(self, input_dim, hidden_layers, output_dim):
super().__init__()
self.layers = nn.ModuleList()
in_dim = input_dim
# --> HERE STEP 1 <--
# ATTRIBUTES TO SAVE BATCH OUTPUTS
self.training_step_outputs = [] # save outputs in each batch to compute metric overall epoch
self.training_step_targets = [] # save targets in each batch to compute metric overall epoch
self.val_step_outputs = [] # save outputs in each batch to compute metric overall epoch
self.val_step_targets = [] # save targets in each batch to compute metric overall epoch
for hidden_dim in hidden_layers:
self.layers.append(nn.Linear(in_dim, hidden_dim))
in_dim = hidden_dim
self.layers.append(nn.Linear(in_dim, output_dim))
def forward(self, x):
for layer in self.layers[:-1]:
x = nn.functional.relu(layer(x))
x = self.layers[-1](x)
return x
def training_step(self, batch, batch_idx):
# Train loss
x, y = batch
y_hat = self(x)
train_loss = criterion(y_hat, y)
# Train loss per batch in epoch
self.log('train_loss', train_loss, on_step=False, on_epoch=True, prog_bar=True )
# GET AND SAVE OUTPUTS AND TARGETS PER BATCH
y_pred = y_hat.argmax(dim=1).cpu().numpy()
y_true = y.cpu().numpy()
# --> HERE STEP 2 <--
self.training_step_outputs.extend(y_pred)
self.training_step_targets.extend(y_true)
return train_loss
def on_train_epoch_end(self):
## F1 Macro all epoch saving outputs and target per batch
train_all_outputs = self.training_step_outputs
train_all_targets = self.training_step_targets
f1_macro_epoch = f1_score(train_all_outputs, train_all_targets, average='macro')
self.log("training_f1_epoch", f1_macro_epoch, on_step=False, on_epoch=True, prog_bar=True)
# free up the memory
# --> HERE STEP 3 <--
self.training_step_outputs.clear()
self.training_step_targets.clear()
def validation_step(self, batch, batch_idx):
# Val loss
x, y = batch
y_hat = self(x)
val_loss = criterion(y_hat, y)
# Train loss per batch in epoch
self.log('val_loss', val_loss, on_step=False, on_epoch=True, prog_bar=True )
# GET AND SAVE OUTPUTS AND TARGETS PER BATCH
y_pred = y_hat.argmax(dim=1).cpu().numpy()
y_true = y.cpu().numpy()
# --> HERE STEP 2 <--
self.val_step_outputs.extend(y_pred)
self.val_step_targets.extend(y_true)
return val_loss
def on_validation_epoch_end(self):
## F1 Macro all epoch saving outputs and target per batch
val_all_outputs = self.val_step_outputs
val_all_targets = self.val_step_targets
val_f1_macro_epoch = f1_score(val_all_outputs, val_all_targets, average='macro')
self.log("val_f1_epoch", val_f1_macro_epoch, on_step=False, on_epoch=True, prog_bar=True)
# free up the memory
# --> HERE STEP 3 <--
self.val_step_outputs.clear()
self.val_step_targets.clear()
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=0.001, betas=(0.9, 0.999))
return optimizer
Now you can create and train a model (e.g two hidden layers of 100 and 50 neurons size):
# MODEL CREATION
# Assuming df is your dataframe
df = pd.read_csv("insert_path")
# Split your data into features and labels
features = df.drop(columns=['LABEL'])
labels = df['LABEL']
# Split your data into training and validation sets
features_train, features_val, labels_train, labels_val = train_test_split(features , labels, test_size=0.3, random_state=42)
# Create your custom datasets
train_dataset = DataFrameDataset(features_train, labels_train)
val_dataset = DataFrameDataset(features_val, labels_val)
# Create your Data Loaders
train_loader = DataLoader(train_dataset, batch_size=200, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=200, shuffle=False)
# Define the model (Flexible configuration)
model_MLP = MLP(input_dim=features_train.shape[1], hidden_layers=[100, 50], output_dim=3)
# TRAIN
# Define the Lightning trainer with logger callback
logger = CSVLogger(
save_dir = "outputs/",
name="my_exp_name",
flush_logs_every_n_steps=500,
)
checkpoint_callback = ModelCheckpoint(
mode="min",
filename="MLP-{epoch:02d}",
dirpath="outputs/",
save_top_k=1, monitor="val_loss"
)
# Criterion
class_weights = torch.Tensor([0.35, 0.35, 0.3])
criterion = nn.CrossEntropyLoss(weight=class_weights, reduction='mean')
epochs = 20
device = "gpu" if torch.cuda.is_available() else "cpu"
trainer = pl.Trainer(logger=logger,
max_epochs=epochs,
callbacks=[checkpoint_callback],
accelerator = device
)
# Train the model
trainer.fit(model_MLP , train_loader, val_loader)
Upvotes: 12
Reputation: 1392
Based on the structure, I assume you are using pytorch_lightning
.
validation_epoch_end()
will collect outputs from validation_step()
, so it's a list
of dict
with the length of number of batch in your validation dataloader. Thus, the first two #This part
is just unwrapping the result from your validation set.
epoch_end()
catch the result {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
from validation_epoch_end()
.
Upvotes: 2
Reputation: 40638
In your provided snippet, outputs
is a list containing dicts elements which seem to contain at least keys "val_loss"
, and "val_acc"
. It would be fair to assume they correspond to the validation loss and validation accuracy respectively.
Those two lines (annotated with the # This path
comment) correspond to list comprehensions going over the elements inside the outputs
list. The first one gathers the values of the key "val_loss"
for each element in outputs. The second one does the same this time gathering the values of the "val_acc"
key.
A minimal example would be:
## before
outputs = [{'val_loss': tensor(a), # element 0
'val_acc': tensor(b)},
{'val_loss': tensor(c), # element 1
'val_acc': tensor(d)}]
## after
batch_losses = [tensor(a), tensor(c)]
batch_acc = [tensor(b), tensor(d)]
Upvotes: 0