Reputation: 5240
lora_config=LoraConfig(
r=8,
lora_alpha=32,
target_modules=['q_lin','v_lin'],
lora_dropout=0.1,
bias='all'
)
class distilbertMultiClass(nn.Module):
def __init__(self,model,num_labels,dropout_rate):
super(distilbertMultiClass,self).__init__()
self.model=model
self.classifier1=nn.Linear(model.config.hidden_size,model.config.hidden_size)
self.tanh=nn.Tanh()
self.dropout=nn.Dropout(dropout_rate)
self.classifier2=nn.Linear(model.config.hidden_size,num_labels)
self.config=model.config
def forward(self,input_ids,attention_mask,labels=None):
outputs=self.model(input_ids,attention_mask)
pooled_output=outputs[0]
pooled_output=pooled_output[:,0]
logits=self.classifier1(pooled_output)
logits=self.tanh(logits)
logits=self.dropout(logits)
logits=self.classifier2(logits)
loss=None
if labels is not None:
loss_fn=nn.CrossEntropyLoss()
loss=loss_fn(logits.view(-1,self.classifier2.out_features),labels.view(-1))
return (loss,logits) if loss is not None else logits
d_model=distilbertMultiClass(model,num_labels,0.3)
d_model=get_peft_model(d_model,lora_config)
def objective(trail):
learning_rate=trail.suggest_float('learning_rate',1e-5,2e-5,log=True)
num_train_epochs=trail.suggest_int('num_train_epochs',1,5)
warmup_steps=trail.suggest_int('warmup_steps',300,1000)
weight_decay=trail.suggest_int('weight_decay',0.01,0.1)
training_arguments=TrainingArguments(
output_dir='resume_evaluation_model',
learning_rate=learning_rate,
warmup_steps=warmup_steps,
eval_strategy='epoch',
num_train_epochs=num_train_epochs,
weight_decay=weight_decay,
overwrite_output_dir=True,
optim='adamw_torch',
fp16=True
)
trainer=Trainer(
model=d_model,
args=training_arguments,
train_dataset=resume_datasetdict['train'],
eval_dataset=resume_datasetdict['test'],
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
trainer.train()
eval_result=trainer.evaluate()
# metric_to_optimize=eval_result['eval_accuracy']
# if metric_to_optimize is None:
# return {
# 'training Loss':None,
# 'Validation Loss':None
# }
return eval_result
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
d_model.to(device)
study=optuna.create_study(direction='maximize')
study.optimize(objective,n_trials=3)
The above is my code
Trial 0 failed with parameters: {'learning_rate': 1.0240245394100422e-05, 'num_train_epochs': 3, 'warmup_steps': 770, 'weight_decay': 0} because of the following error: The value {'eval_runtime': 2.3964, 'eval_samples_per_second': 207.395, 'eval_steps_per_second': 26.29, 'epoch': 3.0} could not be cast to float.
Everything is working fine if i dont use the QLORA config ie. if i remove this line
d_model=get_peft_model(d_model,lora_config)
am i missing some config, how to resolve this
Upvotes: 0
Views: 19