(HuggingFace Transformers) NLP with RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

Question

I'm trying to make a Sarcasm detector with Lightning in this Kaggle notebook.

I'm using HuggingFace Transformers to achieve this.

When I start the training, I get this error:

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

This is my LightningModule:

class SarcasmTagger(pl.LightningModule):

def __init__(
    self, 
    model_name: str, 
    n_classes: int, 
    n_training_steps=None, 
    n_warmup_steps=None
):
    super().__init__()
    self.bert = BertModel.from_pretrained(model_name, return_dict=True)
    #self.bert =  BertForSequenceClassification.from_pretrained(model_name, return_dict=True)
    self.classifier = nn.Linear(self.bert.config.hidden_size, n_classes)
    self.n_training_steps = n_training_steps
    self.n_warmup_steps = n_warmup_steps

def forward(self, input_ids, attention_mask):
    outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
    #print(outputs)
    logits = self.classifier(outputs.pooler_output)
    return logits

def shared_step(self, batch, batch_idx):
    input_ids = batch["input_ids"]
    attention_mask = batch["attention_mask"]
    label = batch["label"].view(-1, 1)
    logits = self(input_ids=input_ids, attention_mask=attention_mask)
    loss = nn.functional.cross_entropy(logits, label)
    return logits, loss, label
    

def training_step(self, batch, batch_idx):
    logits, loss, label = self.shared_step(batch, batch_idx)
    self.log("train_loss", loss, prog_bar=True, logger=True)
    return {"loss": loss, "predictions": logits, "label": label}

def validation_step(self, batch, batch_idx):
    logits, loss, label = self.shared_step(batch, batch_idx)
    self.log("val_loss", loss, prog_bar=True, logger=True)
    return loss

def test_step(self, batch, batch_idx):
    logits, loss, label = self.shared_step(batch, batch_idx)
    self.log("test_loss", loss, prog_bar=True, logger=True)
    return loss

def configure_optimizers(self):
    optimizer = AdamW(self.parameters(), lr=2e-5)

    scheduler = get_linear_schedule_with_warmup(
      optimizer,
      num_warmup_steps=self.n_warmup_steps,
      num_training_steps=self.n_training_steps
    )

    return dict(
        optimizer=optimizer,
        lr_scheduler=dict(
            scheduler=scheduler,
            interval='step')
    )

As I'm aware, this error is related to the back propagation. But I'm not personally calling detach anywhere in the code to generate this issue.

What can be happening here?

(HuggingFace Transformers) NLP with RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

Answers (1)

Related Questions