Error while converting google flan T5 model to onnx

Question

I am looking to convert flan-T5 model downloaded from Hugging face into onnx format and make inference with the same.

My input data is the symptoms of disease and expected output is the Disease name

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import onnx

# Set the device to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the model and tokenizer
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-xl").to(device)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl")

  

# Export the model to ONNX format
onnx_path = "flan-t5-xl.onnx"
dummy_input = tokenizer("What's the disease name in this text: Example text", return_tensors="pt", padding=True).to(device)
dummy_input_ids = dummy_input["input_ids"]
dummy_attention_mask = dummy_input["attention_mask"]
dummy_decoder_input_ids = tokenizer("", return_tensors="pt").input_ids.to(device)

with torch.no_grad():
    torch.onnx.export(
        model,
        (dummy_input_ids, dummy_attention_mask, dummy_decoder_input_ids),
        onnx_path,
        opset_version=11,
        input_names=["input_ids", "attention_mask", "decoder_input_ids"],
        output_names=["output"],
        dynamic_axes={
            "input_ids": {0: "batch_size"},
            "attention_mask": {0: "batch_size"},
            "decoder_input_ids": {0: "batch_size"},
            "output": {0: "batch_size", 1: "sequence_length"},
        },
    )
print(f"Model saved to {onnx_path}")

# Inference using the ONNX model on GPU

import onnxruntime

onnx_model = onnxruntime.InferenceSession(onnx_path, providers=["CUDAExecutionProvider"]

)

InvalidGraph: [ONNXRuntimeError] : 10 : INVALID_GRAPH : Load model from flan-t5-xl.onnx failed:This is an invalid model. Type Error: Type 'tensor(int64)' of input parameter (/decoder/block.0/layer.0/SelfAttention/Sub_output_0) of operator (Min) in node (/decoder/block.0/layer.0/SelfAttention/Min) is invalid.

input_text = input("Enter Disease/Symptom Detail: ")
inputs = tokenizer(input_text, return_tensors="pt", padding=True).to(device)
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]
decoder_input_ids = tokenizer("", return_tensors="pt").input_ids.to(device)

onnx_inputs = {
    "input_ids": input_ids.cpu().numpy(),
    "attention_mask": attention_mask.cpu().numpy(),
    "decoder_input_ids": decoder_input_ids.cpu().numpy(),
}

onnx_output = onnx_model.run(None, onnx_inputs)[0]
decoded_output = tokenizer.decode(onnx_output[0], skip_special_tokens=True)

print('-' * 100)
print(f"Name of Disease based on Entered Text: {decoded_output}")

Error while converting google flan T5 model to onnx

Answers (1)

Related Questions