Suhail Muhammed
Suhail Muhammed

Reputation: 1

ONNX I/O Binding

I need to bind tensor input and output using I/O Binding for ONNX runtime model. but i didn't get output. the output tensor return NULL pointer. I will attached the code below.

std::vector<Ort::Value> input_tensors;
std::vector<Ort::Value> output_tensors;
std::vector<const char*> input_node_names_c_str;
std::vector<const char*> output_node_names_c_str;
int64_t input_height = input_node_dims[0].at(2);
int64_t input_width = input_node_dims[0].at(3);

// // Pass gpu_graph_id to RunOptions through RunConfigs
Ort::RunOptions run_option;
// gpu_graph_id is optional if the session uses only one cuda graph
run_option.AddConfigEntry("gpu_graph_id", "1");

// Dimension expansion [CHW -> NCHW]
std::vector<int64_t> input_tensor_shape = {1, 3, input_height, input_width};
std::vector<int64_t> output_tensor_shape = {1, 300, 84};
size_t input_tensor_size = vector_product(input_tensor_shape);
size_t output_tensor_size = vector_product(output_tensor_shape);
std::vector<float> input_tensor_values(p_blob, p_blob + input_tensor_size);

Ort::IoBinding io_binding{session};
Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);

input_tensors.push_back(Ort::Value::CreateTensor<float>(
        memory_info, input_tensor_values.data(), input_tensor_size,
        input_tensor_shape.data(), input_tensor_shape.size()
));

// Check if input and output node names are empty
for (const auto& inputNodeName : input_node_names) {
    if (std::string(inputNodeName).empty()) {
        std::cerr << "Empty input node name found." << std::endl;
    }
}

// format conversion
for (const auto& inputName : input_node_names) {
    input_node_names_c_str.push_back(inputName.c_str());
}

for (const auto& outputName : output_node_names) {
    output_node_names_c_str.push_back(outputName.c_str());
}

io_binding.BindInput(input_node_names_c_str[0], input_tensors[0]);

Ort::MemoryInfo output_mem_info{"Cuda", OrtDeviceAllocator, 0,
                                OrtMemTypeDefault};

cudaMalloc(&output_data_ptr, output_tensor_size * sizeof(float));
output_tensors.push_back(Ort::Value::CreateTensor<float>(
    output_mem_info,  static_cast<float*>(output_data_ptr),output_tensor_size, 
    output_tensor_shape.data(),output_tensor_shape.size()));                            

io_binding.BindOutput(output_node_names_c_str[0],  output_tensors[0]);
session.Run(run_option, io_binding);

//Get output results
auto* rawOutput = output_tensors[0].GetTensorData<float>();
cout<<rawOutput<<endl; //suhail
cudaFree(output_data_ptr); //suhail
std::vector<int64_t> outputShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
for(auto i:outputShape){cout<<i<<" ";} cout<<endl; //suhail
size_t count = output_tensors[0].GetTensorTypeAndShapeInfo().GetElementCount();
cout<<count<<endl; //suhail
std::vector<float> output(rawOutput, rawOutput + count);

i cross checked the input tensor data and shape as well as output tensor. but i am getting NULL pointer. How can i solve the issue. anyone have the experience with I/O Binding. please give me a tip for solving this.

Upvotes: 0

Views: 409

Answers (2)

rvimieiro
rvimieiro

Reputation: 1145

Try something similar to what they suggest on the official page. I guess the main point is letting Onnxruntime allocate CUDA memory for you:

/*INPUT*/
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);

// Assign memory for input tensor (Note that memory was allocated by the user)
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memoryInfo, inputTensorValues.data(), inputTensorSize, mInputDims.data(), mInputDims.size());

// Bind the input tensor to the session for inference
Ort::IoBinding io_binding{ *mSession };         // Create IoBinding object for session
io_binding.BindInput(mInputName, input_tensor); // Bind input tensor to the model

/*OUTPUT*/

std::vector<Ort::Value> outputTensors;

// Define memory information for output tensor, specifying CUDA (GPU)
Ort::MemoryInfo output_mem_info{ "Cuda", OrtDeviceAllocator, 0, OrtMemTypeDefault };

// Let Onnx allocate GPU memory for the output tensor
Ort::Allocator gpu_allocator(*mSession, output_mem_info);
outputTensors.push_back(Ort::Value::CreateTensor(gpu_allocator, mOutputDims.data(), mOutputDims.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT));

// Bind the output tensor to the model
io_binding.BindOutput(mOutputName, outputTensors[0]);

/*INFERENCE*/

// Execute the model with I/O bindings
Ort::RunOptions run_options{ nullptr };
mSession->Run(run_options, io_binding);

// Retrieve the output tensor data
float* floatarr = outputTensors.front().GetTensorMutableData<float>();

// Copy the output data from GPU to CPU memory
cudaError_t err = cudaMemcpy((void*)outputTensorValues.data(), 
                            (void*)floatarr, 
                            outputTensorSize * sizeof(float), 
                            cudaMemcpyDeviceToHost);

Upvotes: 0

mkm
mkm

Reputation: 21

try this

auto rawOutput = output_tensors.front().GetTensorMutableData<float>();

instead of

auto* rawOutput = output_tensors[0].GetTensorData<float>();

Upvotes: 0

Related Questions