Reputation: 2150
I'm trying to get cuda to execute a kernel on my webcam video.
I would like it to, retrieve data from my webcam, send it to my GPU, process a kernel, then send the resulting image back to be dislpayed.
#include "cuda.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <Windows.h>
#include "Bitmap.h"
#include "OpenCVTest.h"
#include "OpenCVTest.h"
#include <opencv2/opencv.hpp>
using namespace cv;
#define Pixel unsigned char
__global__ void TestKernel(unsigned char * img)
{
int index = threadIdx.x + blockIdx.x * blockDim.x;
img[index] = 100;
}
int main(void)
{
VideoCapture cap(0);
Mat input;
Mat frame;
Mat Output;
cap >> frame;
//cap >> Output;
cvtColor(frame, Output, CV_BGR2GRAY);
uchar *d_frame;
size_t size = (int) (640 * 480);
cudaMalloc((void **)&d_frame, size);
namedWindow("Window",1);
for(;;)
{
cap >> input;
cvtColor(input, frame, CV_BGR2GRAY);
cudaMemcpy(d_frame, frame.data, size, cudaMemcpyHostToDevice);
TestKernel<<<640 * 480, 1>>>( d_frame );
cudaMemcpy(Output.data, d_frame, size, cudaMemcpyDeviceToHost);
imshow("Window", Output);
if(waitKey(30) >= 0) break;
}
cudaFree(d_frame);
return 0;
}
I've just written a very basic test kernel to start with. But it appears that the kernel is not being executed as the image that I get displays is simply the gray scale video from my webcam.
EDIT
As Robert sugested I've added some error checking by adding
gpuErrchk( cudaPeekAtLastError() );
After calling the kernel
Where gpuErrchk is
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
Upvotes: 0
Views: 1519
Reputation: 152173
640 * 480 = 307200
Unless you have compiled the code for, and are running on a cc 3.0 or higher GPU, this is not an acceptable choice for the first configuration parameter of the kernel:
TestKernel<<<640 * 480, 1>>>( d_frame );
For pre-cc3.0 devices, the first parameter (i.e. Maximum x-dimension of a grid of thread blocks
) is limited to 65535 for the first 2 dimensions of the dim3 quantity.
If you did proper cuda error checking you would find out that your kernel is not running (and/or other errors.) You can also try running your code with cuda-memcheck
as a quick test.
Upvotes: 3