Reputation: 1
I'm trying to create a region groiwing algorithm using OpenCL,for that i will open one image using OpenCV. The problem is how transform the data into OpenCL.
I'm using opencv version : 2.4.9 and opencl : AMD APP SDK\2.9-1 in visual studio
Would someone tell me after openning the image using opencv what should i do
Upvotes: 0
Views: 980
Reputation: 119
Generally, there are two ways in order to transfer images (or any other data) from host program to device program in OpenCL applications: 1-Using Buffers 2- Using Image2d.
Both of them use cl_mem
type. Because using buffer is simpler than using image2d (especially in gray-scale images), I explain how to transfer images from host program to device using buffers in OpenCL.
After reading input image by the openCV object Mat
, convert it to a gray-scale image. Then, we use the method clCreateBuffer
which returns a cl_mem
buffer. We can simply pass data
(a property of Mat
obeject) to the clCreateBuffer
to initialize our input kernel buffer by input image data. Then we can transfer created buffer to kernel using clSetKernelArg
method. Finally, when kernel finish its job, we can read the results by clEnqueueReadBuffer
.
Read the comments to understand this code and don't hesitate to ask questions.
Host code:
// Make Contex, Kerenl and other requirements for OpenCL before this section....
Mat image = imread("logo.bmp", CV_LOAD_IMAGE_COLOR); // reading input image by opencv to Mat type
Mat input_;
cvtColor(image, input_, CV_BGR2GRAY); // convert input image to gray scale
cl_mem inputSignalBuffer = clCreateBuffer(
context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, input_.rows *input_.cols *input_.elemSize(),
static_cast<void *>(
input_.data), // inputSignalBuffers will be initialized by input_.data which contains input image data
&errNum);
cl_mem outputSignalBuffer =
clCreateBuffer( // make and preparing an empty output buffer to use after opencl kernel call back
context, CL_MEM_WRITE_ONLY, input_.rows *input_.cols *input_.elemSize(), NULL, &errNum);
checkErr(errNum, "clCreateBuffer(outputSignal)");
errNum = clSetKernelArg(
kernel, 0, sizeof(cl_mem),
&inputSignalBuffer); // passing input buffer and output buffer to kernel in order to be used on device
errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &maskBuffer);
errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &outputSignalBuffer);
errNum |= clSetKernelArg(kernel, 3, sizeof(cl_uint), &input_.rows);
errNum |= clSetKernelArg(kernel, 4, sizeof(cl_uint), &input_.cols);
errNum |= clSetKernelArg(kernel, 5, sizeof(cl_uint), &maskWidth);
size_t localWorkSize[2] = {16, 16}; // Using 2 dimensional range with size of work group 16
size_t globalWorkSize[2] = {
input_
.rows, // Note: Global work size (input image rows and cols) should be multiple of size of work group.
input_.cols};
// Queue the kernel up for execution across the array
errNum =
clEnqueueNDRangeKernel( // enqueue kernel with enabling host blocking until finishing kernel execution
queue, kernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
checkErr(errNum, "clEnqueueNDRangeKernel");
Mat output_ = cv::Mat(input_.rows, input_.cols, CV_8UC1);
errNum = clEnqueueReadBuffer( // reading from ourput parameter of kernel
queue, outputSignalBuffer, CL_TRUE, 0, input_.rows *input_.cols *input_.elemSize(),
output_.data, // initialize OpenCV Mat by output_.data which contains output results of kernel
0, NULL, NULL);
checkErr(errNum, "clEnqueueReadBuffer");
// cut the extra border spaces which has been added in the first part of the code in order to adjust image
// size with Work Group Size;
cv::imwrite("output.bmp", output_); // saving output in image file
Kernel code:
__kernel void convolve(const __global uchar *const input, __constant uint *const mask,
__global uchar *const output, const int inputHeight, const int inputWidth,
const int maskWidth) {
uint sum = 0;
const int curr_x = get_global_id(0); // current curr_x (row)
const int curr_y = get_global_id(1); // current curr_y (col)
int d = maskWidth / 2;
if (curr_x > d - 1 && curr_y > d - 1 && curr_x < inputHeight - d &&
curr_y < inputWidth - d) // checking mask borders not to be out of input matrix
for (int i = -d; i <= d; i++)
for (int j = -d; j <= d; j++) {
int mask_ptr =
maskWidth * (i + d) + (j + d); // you can also use mad24(maskWidth, i+d, j+d) which is faster.
sum += input[(curr_x + i) * inputWidth + curr_y + j] * mask[mask_ptr];
}
sum /= (maskWidth * maskWidth); // miangin gereftan
sum = clamp(sum, (uint)0, (uint)255); // clamp == min(max(x, minval), maxval)
output[curr_x * inputWidth + curr_y] = sum;
}
Upvotes: 2