Lijin Durairaj
Lijin Durairaj

Reputation: 5240

While trying to implement QLORA using trainer class, getting casting error

lora_config=LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=['q_lin','v_lin'],
    lora_dropout=0.1,
    bias='all'
)

class distilbertMultiClass(nn.Module):

  def __init__(self,model,num_labels,dropout_rate):
    super(distilbertMultiClass,self).__init__()
    self.model=model
    self.classifier1=nn.Linear(model.config.hidden_size,model.config.hidden_size)
    self.tanh=nn.Tanh()
    self.dropout=nn.Dropout(dropout_rate)
    self.classifier2=nn.Linear(model.config.hidden_size,num_labels)
    self.config=model.config

  def forward(self,input_ids,attention_mask,labels=None):
    outputs=self.model(input_ids,attention_mask)
    pooled_output=outputs[0]
    pooled_output=pooled_output[:,0]
    logits=self.classifier1(pooled_output)
    logits=self.tanh(logits)
    logits=self.dropout(logits)
    logits=self.classifier2(logits)

    loss=None
    if labels is not None:
      loss_fn=nn.CrossEntropyLoss()
      loss=loss_fn(logits.view(-1,self.classifier2.out_features),labels.view(-1))
    return (loss,logits) if loss is not None else logits

d_model=distilbertMultiClass(model,num_labels,0.3)
d_model=get_peft_model(d_model,lora_config)

def objective(trail):
  learning_rate=trail.suggest_float('learning_rate',1e-5,2e-5,log=True)
  num_train_epochs=trail.suggest_int('num_train_epochs',1,5)
  warmup_steps=trail.suggest_int('warmup_steps',300,1000)
  weight_decay=trail.suggest_int('weight_decay',0.01,0.1)

  training_arguments=TrainingArguments(
      output_dir='resume_evaluation_model',
      learning_rate=learning_rate,
      warmup_steps=warmup_steps,
      eval_strategy='epoch',
      num_train_epochs=num_train_epochs,
      weight_decay=weight_decay,
      overwrite_output_dir=True,
      optim='adamw_torch',
      fp16=True
  )

  trainer=Trainer(
      model=d_model,
      args=training_arguments,
       train_dataset=resume_datasetdict['train'],
      eval_dataset=resume_datasetdict['test'],
      tokenizer=tokenizer,
      compute_metrics=compute_metrics
  )

  trainer.train()
  eval_result=trainer.evaluate()
  # metric_to_optimize=eval_result['eval_accuracy']

  # if metric_to_optimize is None:
  #   return {
  #       'training Loss':None,
  #       'Validation Loss':None
  #   }

  return eval_result

device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
d_model.to(device)

study=optuna.create_study(direction='maximize')
study.optimize(objective,n_trials=3)

The above is my code

  1. I tried to return eval_result['eval_accuracy'] but i was getting eval_accuracy not found error
  2. when i am returning eval_result, i am getting an error like this

Trial 0 failed with parameters: {'learning_rate': 1.0240245394100422e-05, 'num_train_epochs': 3, 'warmup_steps': 770, 'weight_decay': 0} because of the following error: The value {'eval_runtime': 2.3964, 'eval_samples_per_second': 207.395, 'eval_steps_per_second': 26.29, 'epoch': 3.0} could not be cast to float.

Everything is working fine if i dont use the QLORA config ie. if i remove this line

d_model=get_peft_model(d_model,lora_config)

am i missing some config, how to resolve this

Upvotes: 0

Views: 19

Answers (0)

Related Questions