PyTorch RuntimeError: Expected target size [8, 1182], got [8, 256]

Question

I have a PyTorch model composed of a Distilbert and a BiLSTM with the following structure. Its purpose involves performing token classification over a vast amount of categories (num_labels=1182) by attaching the output of the transformer to the input of the BiLSTM.

import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModelForTokenClassification

import utilities as utils
from global_constants import MAX_DOC_LENGTH

class CustomTorchModel(nn.Module):
    def __init__(self, args_model_name_or_path):
        id_to_label, label_to_id = utils.unshelve_label_converters()
        label_qty = len(list(label_to_id))
        self.distilbert_layer = AutoModelForTokenClassification.from_pretrained(
            args_model_name_or_path,
            id2label=id_to_label,
            label2id=label_to_id,
            num_labels=label_qty
        )
        self.bilstm_layer = nn.LSTM(input_size=MAX_DOC_LENGTH,
                                    hidden_size=self.distilbert_layer.config.dim,
                                    num_layers=1, 
                                    batch_first=True,
                                    bidirectional=True)

    def forward(self, inputs):
        print("input_ids size: " + str(inputs[0].size()))
        print("attention_mask size: " + str(inputs[1].size()))
        distilbert_output = self.distilbert_layer(input_ids=inputs[0], attention_mask=inputs[1])
        print("distilbert_output.last_hidden_state size: " + str(distilbert_output.last_hidden_state.size()))
        bilstm_output, (last_hidden, last_cell) = self.bilstm_layer(distilbert_output.last_hidden_state)
        print("BiLSTM output size: " + str(bilstm_output.size()))
        output = self.classification_layer(bilstm_output)
        print("output size: " + str(output.size()))
        return F.softmax(output)

Output showing the shapes after each layer. Notes: 256 is the value of MAX_DOC_LENGTH, 768 is self.distilbert_layer.config.dim and 1182 is num_labels.

input_ids size: torch.Size([8, 256])
attention_mask size: torch.Size([8, 256])
distilbert_output.last_hidden_state size: torch.Size([8, 256, 768])
BiLSTM output size: torch.Size([8, 256, 1536])
output size: torch.Size([8, 256, 1182])

This custom model is used in a pretty standard Ignite script which leverages to train the model. Since there are multiple categories and this is not binary classification, the loss function should be nn.CrossEntropyLoss:

criterion = nn.CrossEntropyLoss(reduction='mean')
optimizer = AdamW(model.parameters(), lr=1e-5)
lr_scheduler = ExponentialLR(optimizer, gamma=0.90)
trainer = create_supervised_trainer1(model.to(device), optimizer, criterion, device=device)

This is the definition of the methods used above:

def _prepare_batch(batch, device=None, non_blocking=False):

    x = [batch["input_ids"], batch["attention_mask"]] # list
    y = batch["labels"]
    return (convert_tensor(x, device=device, non_blocking=non_blocking),
            convert_tensor(y, device=device, non_blocking=non_blocking))

def create_supervised_trainer1(model, optimizer, loss_fn, metrics={}, device=None):

    def _update(engine, batch):
        model.train()
        optimizer.zero_grad()
        x, y = _prepare_batch(batch, device=device)
        y_pred = model(x)
        transposed_y_pred = torch.transpose(y_pred, 1, 2)
        loss = loss_fn(transposed_y_pred, y.long())
        loss.backward()
        optimizer.step()

        return loss.item(), transposed_y_pred, y.long()

    def _metrics_transform(output):
        return output[1], output[2]

    engine = Engine(_update)

    for name, metric in metrics.items():
        metric._output_transform = _metrics_transform
        metric.attach(engine, name)

    return engine

I know I am missing something, however I'm not being able to figure out what. The execution produces an error related to the shapes (the "y" of the DataLoaders has [8, 256] and the network produces [8, 1182]. This happens even though I rearranged the tensors in the order required by CrossEntropyLoss:

Current run is terminating due to exception: Expected target size [8, 1182], got [8, 256]
Engine run is terminating due to exception: Expected target size [8, 1182], got [8, 256]
Traceback (most recent call last):
  File "/home/users/user/august/src/main/ignite_script.py", line 456, in run
    trainer.run(train_dataloader, max_epochs=epochs)
  File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 892, in run
    return self._internal_run()
  File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 935, in _internal_run
    return next(self._internal_run_generator)
  File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 993, in _internal_run_as_gen
    self._handle_exception(e)
  File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 638, in _handle_exception
    raise e
  File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 959, in _internal_run_as_gen
    epoch_time_taken += yield from self._run_once_on_dataset_as_gen()
  File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 1087, in _run_once_on_dataset_as_gen
    self._handle_exception(e)
  File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 638, in _handle_exception
    raise e
  File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 1068, in _run_once_on_dataset_as_gen
    self.state.output = self._process_function(self, self.state.batch)
  File "/home/users/user/august/src/main/ignite_script.py", line 321, in _update
    loss = loss_fn(y_pred, y.float())
  File "/home/users/user/.local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/users/user/.local/lib/python3.9/site-packages/torch/nn/modules/loss.py", line 1163, in forward
    return F.cross_entropy(input, target, weight=self.weight,
  File "/home/users/user/.local/lib/python3.9/site-packages/torch/nn/functional.py", line 2996, in cross_entropy
    return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
RuntimeError: Expected target size [8, 1182], got [8, 256]

PyTorch RuntimeError: Expected target size [8, 1182], got [8, 256]

Answers (1)

Related Questions