Reputation: 17
I am facing issue with Bert while training the model. I am using the bert-base-uncased model for training. The error says there is string passed into the dropout layer rather than tensor, however I am passing in Tensor as an input to the model. Please find the attached error:
in <module>:49 │
│ │
│ 46 │ │ if accuracy>best_accuracy: │
│ 47 │ │ │ best_accuracy=accuracy │
│ 48 │ │ │ torch.save(model.state_dict(),CFG.MODEL_PATH) │
│ ❱ 49 train() │
│ 50 │
│ 51 │
│ 52 │
│ │
│ in train:41 │
│ │
│ 38 │ │
│ 39 │ best_accuracy=0 │
│ 40 │ for epoch in range(CFG.EPOCHS): │
│ ❱ 41 │ │ train_fn(train_loader,model,optimizer,device,scheduler) │
│ 42 │ │ outputs,targets= eval_fn(val_loader,model,device) │
│ 43 │ │ outputs= np.array(outputs)>=0.5 │
│ 44 │ │ accuracy= metrics.accuracy_score(targets,outputs) │
│ │
│ in train_fn:20 │
│ │
│ 17 │ │ targets= targets.to(device,dtype=torch.float) │
│ 18 │ │ │
│ 19 │ │ optimizer.zero_grad() │
│ ❱ 20 │ │ outputs= model(ids=ids, token_type_ids=token_type_ids, mask=mask) │
│ 21 │ │ loss= loss_fn(outputs,targets) │
│ 22 │ │ loss.backward() │
│ 23 │ │ optimizer.step() │
│ │
│ /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501 in _call_impl │
│ │
│ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │
│ 1502 │ │ # Do not call functions when jit is used │
│ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1504 │ │ backward_pre_hooks = [] │
│ │
│ /opt/conda/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py:153 in forward │
│ │
│ 150 │ def forward(self, *inputs, **kwargs): │
│ 151 │ │ with torch.autograd.profiler.record_function("DataParallel.forward"): │
│ 152 │ │ │ if not self.device_ids: │
│ ❱ 153 │ │ │ │ return self.module(*inputs, **kwargs) │
│ 154 │ │ │ │
│ 155 │ │ │ for t in chain(self.module.parameters(), self.module.buffers()): │
│ 156 │ │ │ │ if t.device != self.src_device_obj: │
│ │
│ /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501 in _call_impl │
│ │
│ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │
│ 1502 │ │ # Do not call functions when jit is used │
│ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1504 │ │ backward_pre_hooks = [] │
│ │
│ in forward:11 │
│ │
│ 8 │ │
│ 9 │ def forward(self,ids,mask,token_type_ids): │
│ 10 │ │ _,o2= self.bert(ids,attention_mask=mask,token_type_ids=token_type_ids) │
│ ❱ 11 │ │ bo= self.bert_drop(o2) │
│ 12 │ │ output= self.out(bo) │
│ 13 │ │ return output │
│ 14 │
│ │
│ /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501 in _call_impl │
│ │
│ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │
│ 1502 │ │ # Do not call functions when jit is used │
│ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1504 │ │ backward_pre_hooks = [] │
│ │
│ /opt/conda/lib/python3.10/site-packages/torch/nn/modules/dropout.py:59 in forward │
│ │
│ 56 │ """ │
│ 57 │ │
│ 58 │ def forward(self, input: Tensor) -> Tensor: │
│ ❱ 59 │ │ return F.dropout(input, self.p, self.training, self.inplace) │
│ 60 │
│ 61 │
│ 62 class Dropout1d(_DropoutNd): │
│ │
│ /opt/conda/lib/python3.10/site-packages/torch/nn/functional.py:1252 in dropout │
│ │
│ 1249 │ │ return handle_torch_function(dropout, (input,), input, p=p, training=training, i │
│ 1250 │ if p < 0.0 or p > 1.0: │
│ 1251 │ │ raise ValueError("dropout probability has to be between 0 and 1, " "but got {}". │
│ ❱ 1252 │ return _VF.dropout_(input, p, training) if inplace else _VF.dropout(input, p, traini │
│ 1253 │
│ 1254 │
│ 1255 def alpha_dropout(input: Tensor, p: float = 0.5, training: bool = False, inplace: bool = │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
TypeError: dropout(): argument 'input' (position 1) must be Tensor, not str
Please also find the attached Model Class and trainer functions.
class BertDataset:
def __init__(self,review,target):
self.review=review
self.target=target
self.tokenizer= CFG.TOKENIZER
self.max_len= CFG.MAX_LEN
def __len__(self):
return len(self.review)
def __getitem__(self,idx):
review= str(self.review[idx])
review= " ".join(review.split())
inputs= self.tokenizer.encode_plus(review,
None,
add_special_tokens=True,
max_length=self.max_len,
pad_to_max_length=True,
)
ids= inputs["input_ids"]
mask= inputs["attention_mask"]
token_type_ids= inputs["token_type_ids"]
return {
"ids": torch.tensor(ids,dtype=torch.long),
"mask": torch.tensor(mask,dtype=torch.long),
"token_type_ids": torch.tensor(token_type_ids,dtype= torch.long),
"targets": torch.tensor(self.target[idx],dtype=torch.float)
}
import torch.nn as nn
class BertBaseUncased(nn.Module):
def __init__(self):
super(BertBaseUncased,self).__init__()
self.bert= transformers.BertModel.from_pretrained('bert-base-uncased')
self.bert_drop=nn.Dropout(0.3)
self.out= nn.Linear(768,1)
def forward(self,ids,mask,token_type_ids):
_,o2= self.bert(ids,attention_mask=mask,token_type_ids=token_type_ids)
bo= self.bert_drop(o2)
output= self.out(bo)
return output
import torch
def loss_fn(outputs,targets):
return nn.BCEWithLogitsLoss()(outputs,targets.view(-1,1))
def train_fn(data_loader,model,optimizer,device,scheduler):
model.train()
for d in data_loader:
ids=d["ids"]
token_type_ids= d["token_type_ids"]
mask= d["mask"]
targets= d["targets"]
ids=ids.to(device,dtype=torch.long)
token_type_ids= token_type_ids.to(device,dtype= torch.long)
mask= mask.to(device,dtype=torch.long)
targets= targets.to(device,dtype=torch.float)
optimizer.zero_grad()
outputs= model(ids=ids, token_type_ids=token_type_ids, mask=mask)
loss= loss_fn(outputs,targets)
loss.backward()
optimizer.step()
scheduler.step()
def eval_fn(data_loader,model,device):
model.eval()
fin_targets=[]
fin_outputs=[]
with torch.no_grad():
for d in data_loader:
ids= d["ids"]
token_type_ids= d["token_type_ids"]
mask=d["mask"]
targets=d["targets"]
ids= ids.to(device,dtype=torch.long)
mask= mask.to(device,dtype=torch.long)
targets= targets.to(device,dtype=torch.float)
token_type_ids= token_type_ids.to(device,dtype=torch.long)
outputs= model(ids=ids, token_type_ids=token_type_ids, mask=mask)
targets=targets.cpu().detach()
fin_targets.extend(targets.numpy().tolist())
outputs=torch.sigmoid(outputs).cpu().detach()
fin_outputs.extend(outputs.numpy().tolist())
return fin_outputs,fin_targets
import pandas as pd
import numpy as np
from sklearn import metrics, model_selection
from transformers import AdamW, get_linear_schedule_with_warmup
def train():
df= pd.read_csv('/kaggle/input/aamlp-text-data/imdb_folds.csv')
#df.sentiment= df.sentiment.apply(lambda x: 1 if x=="positive" else "negative")
df_train,df_valid=model_selection.train_test_split(df,test_size=0.1,random_state=42,shuffle=True,stratify=df.sentiment.values)
df_train=df_train.reset_index(drop=True)
df_valid=df_valid.reset_index(drop=True)
train_dataset= BertDataset(review=df_train.review.values,target=df_train.sentiment.values)
val_dataset= BertDataset(review=df_valid.review.values,target=df_valid.sentiment.values)
train_loader= torch.utils.data.DataLoader(train_dataset,batch_size=CFG.TRAIN_BS,num_workers=4)
val_loader= torch.utils.data.DataLoader(val_dataset,batch_size=CFG.VAL_BS,num_workers=1)
device= torch.device("cuda" if torch.cuda.is_available() else "cpu")
model= BertBaseUncased()
model.to(device)
param_optimizer= list(model.named_parameters())
no_decay=["bias","LayerNorm.bias","LayerNorm.weight"]
optimizer_parameters=[{
"params": [p for n,p in param_optimizer if not any(nd in n for nd in no_decay)],
"weight_decay":1e-3,
},{"params": [p for n,p in param_optimizer if any(nd in n for nd in no_decay)],
"weight_decay":0.0,},]
num_train_steps= int(len(df_train)/CFG.TRAIN_BS*CFG.EPOCHS)
optimizer=AdamW(optimizer_parameters,lr=3e-3)
scheduler= get_linear_schedule_with_warmup(optimizer,
num_warmup_steps=0,
num_training_steps=num_train_steps)
model=nn.DataParallel(model)
best_accuracy=0
for epoch in range(CFG.EPOCHS):
train_fn(train_loader,model,optimizer,device,scheduler)
outputs,targets= eval_fn(val_loader,model,device)
outputs= np.array(outputs)>=0.5
accuracy= metrics.accuracy_score(targets,outputs)
print(f"Accuracy:{accuracy}")
if accuracy>best_accuracy:
best_accuracy=accuracy
torch.save(model.state_dict(),CFG.MODEL_PATH)
train()
Upvotes: 0
Views: 143
Reputation: 17
The issue was caused due to update in format of Bert outputs due to version change.
To solve this issue the forward pass needs to be modified.
class BertBaseUncased(nn.Module):
def __init__(self):
super(BertBaseUncased,self).__init__()
self.bert= transformers.BertModel.from_pretrained('bert-base-uncased')
self.bert_drop=nn.Dropout(0.3)
self.out= nn.Linear(768,1)
def forward(self,ids,mask,token_type_ids):
o2= self.bert(ids,attention_mask=mask,token_type_ids=token_type_ids)
output_mod= o2.poolee_output
bo= self.bert_drop(output_mod)
output= self.out(bo)
return output
Upvotes: 0