Reputation: 1
I need to bind tensor input and output using I/O Binding for ONNX runtime model. but i didn't get output. the output tensor return NULL pointer. I will attached the code below.
std::vector<Ort::Value> input_tensors;
std::vector<Ort::Value> output_tensors;
std::vector<const char*> input_node_names_c_str;
std::vector<const char*> output_node_names_c_str;
int64_t input_height = input_node_dims[0].at(2);
int64_t input_width = input_node_dims[0].at(3);
// // Pass gpu_graph_id to RunOptions through RunConfigs
Ort::RunOptions run_option;
// gpu_graph_id is optional if the session uses only one cuda graph
run_option.AddConfigEntry("gpu_graph_id", "1");
// Dimension expansion [CHW -> NCHW]
std::vector<int64_t> input_tensor_shape = {1, 3, input_height, input_width};
std::vector<int64_t> output_tensor_shape = {1, 300, 84};
size_t input_tensor_size = vector_product(input_tensor_shape);
size_t output_tensor_size = vector_product(output_tensor_shape);
std::vector<float> input_tensor_values(p_blob, p_blob + input_tensor_size);
Ort::IoBinding io_binding{session};
Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
input_tensors.push_back(Ort::Value::CreateTensor<float>(
memory_info, input_tensor_values.data(), input_tensor_size,
input_tensor_shape.data(), input_tensor_shape.size()
));
// Check if input and output node names are empty
for (const auto& inputNodeName : input_node_names) {
if (std::string(inputNodeName).empty()) {
std::cerr << "Empty input node name found." << std::endl;
}
}
// format conversion
for (const auto& inputName : input_node_names) {
input_node_names_c_str.push_back(inputName.c_str());
}
for (const auto& outputName : output_node_names) {
output_node_names_c_str.push_back(outputName.c_str());
}
io_binding.BindInput(input_node_names_c_str[0], input_tensors[0]);
Ort::MemoryInfo output_mem_info{"Cuda", OrtDeviceAllocator, 0,
OrtMemTypeDefault};
cudaMalloc(&output_data_ptr, output_tensor_size * sizeof(float));
output_tensors.push_back(Ort::Value::CreateTensor<float>(
output_mem_info, static_cast<float*>(output_data_ptr),output_tensor_size,
output_tensor_shape.data(),output_tensor_shape.size()));
io_binding.BindOutput(output_node_names_c_str[0], output_tensors[0]);
session.Run(run_option, io_binding);
//Get output results
auto* rawOutput = output_tensors[0].GetTensorData<float>();
cout<<rawOutput<<endl; //suhail
cudaFree(output_data_ptr); //suhail
std::vector<int64_t> outputShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
for(auto i:outputShape){cout<<i<<" ";} cout<<endl; //suhail
size_t count = output_tensors[0].GetTensorTypeAndShapeInfo().GetElementCount();
cout<<count<<endl; //suhail
std::vector<float> output(rawOutput, rawOutput + count);
i cross checked the input tensor data and shape as well as output tensor. but i am getting NULL pointer. How can i solve the issue. anyone have the experience with I/O Binding. please give me a tip for solving this.
Upvotes: 0
Views: 409
Reputation: 1145
Try something similar to what they suggest on the official page. I guess the main point is letting Onnxruntime allocate CUDA memory for you:
/*INPUT*/
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
// Assign memory for input tensor (Note that memory was allocated by the user)
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memoryInfo, inputTensorValues.data(), inputTensorSize, mInputDims.data(), mInputDims.size());
// Bind the input tensor to the session for inference
Ort::IoBinding io_binding{ *mSession }; // Create IoBinding object for session
io_binding.BindInput(mInputName, input_tensor); // Bind input tensor to the model
/*OUTPUT*/
std::vector<Ort::Value> outputTensors;
// Define memory information for output tensor, specifying CUDA (GPU)
Ort::MemoryInfo output_mem_info{ "Cuda", OrtDeviceAllocator, 0, OrtMemTypeDefault };
// Let Onnx allocate GPU memory for the output tensor
Ort::Allocator gpu_allocator(*mSession, output_mem_info);
outputTensors.push_back(Ort::Value::CreateTensor(gpu_allocator, mOutputDims.data(), mOutputDims.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT));
// Bind the output tensor to the model
io_binding.BindOutput(mOutputName, outputTensors[0]);
/*INFERENCE*/
// Execute the model with I/O bindings
Ort::RunOptions run_options{ nullptr };
mSession->Run(run_options, io_binding);
// Retrieve the output tensor data
float* floatarr = outputTensors.front().GetTensorMutableData<float>();
// Copy the output data from GPU to CPU memory
cudaError_t err = cudaMemcpy((void*)outputTensorValues.data(),
(void*)floatarr,
outputTensorSize * sizeof(float),
cudaMemcpyDeviceToHost);
Upvotes: 0
Reputation: 21
try this
auto rawOutput = output_tensors.front().GetTensorMutableData<float>();
instead of
auto* rawOutput = output_tensors[0].GetTensorData<float>();
Upvotes: 0