Toru
Toru

Reputation: 33

How cv::MAT convert to NCHW format?

In User Guide.html, Input/output of tensorRT is need to use NCHW format.
What's NCHW fomat ?
How can I convert cv::MAT to NCHW format?

I run inference using TensorRT like below code.
Nothing error. But, It's not right result of output.

int batchSize = 1;
int size_of_single_input = 256 * 256 * 3 * sizeof(float);
int size_of_single_output = 100 * 1 * 1 * sizeof(float); 

IBuilder* builder = createInferBuilder(gLogger);

INetworkDefinition* network = builder->createNetwork();

CaffeParser parser;
auto blob_name_to_tensor = parser.parse(“deploy.prototxt”,
                                        "sample.caffemodel",
                                        *network,
                                        DataType::kFLOAT);

network->markOutput(*blob_name_to_tensor->find("prob"));

builder->setMaxBatchSize(1);
builder->setMaxWorkspaceSize(1 << 30); 
ICudaEngine* engine = builder->buildCudaEngine(*network);

IExecutionContext *context = engine->createExecutionContext();

int inputIndex = engine->getBindingIndex(INPUT_LAYER_NAME),
int outputIndex = engine->getBindingIndex(OUTPUT_LAYER_NAME);

cv::Mat input;
input = imread("./sample.jpg");
cvtColor(input, input, CV_BGR2RGB);
cv::resize(input, input, cv::Size(256, 256));

float output[OUTPUTSIZE];

void* buffers = malloc(engine->getNbBindings() * sizeof(void*));
cudaMalloc(&buffers[inputIndex], batchSize * size_of_single_input);
cudaMalloc(&buffers[outputIndex], batchSize * size_of_single_output);

cudaStream_t stream;
cudaStreamCreate(&stream);

cudaMemcpyAsync(buffers[inputIndex], (float *)input, 
                batchSize * size_of_single_input, 
                cudaMemcpyHostToDevice, stream);

context.enqueue(batchSize, buffers, stream, nullptr);


cudaMemcpyAsync(output, buffers[outputIndex], 
                batchSize * size_of_single_output, 
                cudaMemcpyDeviceToHost, stream));

cudaStreamSynchronize(stream);

Upvotes: 3

Views: 10158

Answers (6)

fytao
fytao

Reputation: 331

Use cv::dnn::blobFromImage to build a NCHW blob from image:

Mat input_img = imread("/path/to/image");
Mat input_blob_nhwc = cv::dnn::blobFromImage(input_img);

In case you need an input of format NHWC, you can use transposeND from opencv2/core.hpp (OpenCV >= 4.6):

std::vector<int> order = {0, 2, 3, 1};
Mat input_blob_nhwc;
transposeND(input_blob_nhwc, order, input_blob_nhwc);

Upvotes: 3

Roy Shilkrot
Roy Shilkrot

Reputation: 3558

Here's a short a straightforward solution for NHWC to NCHW on the CPU:

static void hwc_to_chw(cv::InputArray src, cv::OutputArray dst) {
  std::vector<cv::Mat> channels;
  cv::split(src, channels);

  // Stretch one-channel images to vector
  for (auto &img : channels) {
    img = img.reshape(1, 1);
  }

  // Concatenate three vectors to one
  cv::hconcat( channels, dst );
}

Upvotes: 2

ma.mehralian
ma.mehralian

Reputation: 1285

This is another simple OpenCV-based answer:

cv::resize(img, img, input_size);
img.convertTo(img, CV_32FC3, 1.f / 255.f);
vector<float> chw_buffer(img.elemSize() * img.total());
std::vector<Mat> chw;
for (size_t n = 0; n < channels; ++n)
    chw.emplace_back(Mat(input_size, CV_32FC1, chw_buffer.data() + n * input_width * input_height));
cv::split(img, chw);
cudaMemcpyAsync(gpu_buffers[inputIndex], chw_buffer.size(), cudaMemcpyHostToDevice, stream);

Upvotes: 0

Yingwei Ji
Yingwei Ji

Reputation: 1

// suppose all data types are int.
// size of mat is 256*256*3.

cv::Mat NCHW,NHWC;

std::vector<cv::Mat> channels;
split(NHWC, channels);

memcpy(NCHW.data,channels[0].data,256*256*sizeof(int));
memcpy(NCHW.data+256*256,channels[1].data,256*256*sizeof(int));
memcpy(NCHW.data+2*256*256,channels[2].data,256*256*sizeof(int));

Upvotes: -1

aunsid
aunsid

Reputation: 397

This code snippet does the conversion as explained by Ashwin

bool SampleUffSSD::processInput(const samplesCommon::BufferManager& buffers)
     const int batchSize = mParams.batchSize;
 
     // Available images


     std::vector<std::string> imageList = {"test.jpeg"};
     mPPMs.resize(batchSize);
     assert(mPPMs.size() <= imageList.size());
     for (int i = 0; i < batchSize; ++i)
     {

        readImage(locateFile(imageList[i], mParams.dataDirs), image);
     }
 
     float* hostDataBuffer = static_cast<float*>(buffers.getHostBuffer(mParams.inputTensorNames[0]));
     // Host memory for input buffer

    for (int i = 0, volImg = inputH * inputW; i < mParams.batchSize; ++i)
     {

        for (unsigned j = 0, volChl = inputH * inputW; j < inputH; ++j)
         {

                   for( unsigned k = 0; k < inputW; ++ k)
                       {
                cv::Vec3b bgr = image.at<cv::Vec3b>(j,k);
                hostDataBuffer[i * volImg + 0 * volChl + j * inputW + k] = (2.0 / 255.0) * float(bgr[2]) - 1.0;
                hostDataBuffer[i * volImg + 1 * volChl + j * inputW + k] = (2.0 / 255.0) * float(bgr[1]) - 1.0;
                hostDataBuffer[i * volImg + 2 * volChl + j * inputW + k] = (2.0 / 255.0) * float(bgr[0]) - 1.0;
             }
         }
     }

source: https://forums.developer.nvidia.com/t/custom-trained-ssd-inception-model-in-tensorrt-c-version/143048/14

Upvotes: 0

Ashwin Nanjappa
Ashwin Nanjappa

Reputation: 78528

NCHW: For a 3 channel image, say BGR, pixels of the B channel are stored first, then the G channel and finally the R channel.

NHWC: For each pixel, its 3 colors are stored together in BGR order.

TensorRT requires your image data to be in NCHW order. But OpenCV reads it in NHWC order. You can write a simple function to read the data from NHWC to a buffer where you store them in NCHW order. Copy this buffer to device memory and pass to TensorRT.

You can find an example of this operation in the samples/sampleFasterRCNN/sampleFasterRCNN.cpp file in your TensorRT installation. It reads a PPM file, which is also in NHWC order and then converts it to NCHW order and subtracts the mean values, both in a single step. You can modify that to suit your purpose.

Upvotes: 4

Related Questions