Reputation: 149
I m trying to convert RGB image to greyscale, I am using the Lena.jpg image. I added some lines of code in TODO
section but unfortunately I am getting a black image in the output.
My kernel:
#define CHANNELS 3
__global__ void colorConvert(float * grayImage,
float * rgbImage,
int width, int height) {
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
if (x < width && y < height) {
// get 1D coordinate for the grayscale image
int grayOffset = y*width + x;
// one can think of the RGB image having
// CHANNEL times columns than the gray scale image
int rgbOffset = grayOffset*CHANNELS;
float r = rgbImage[rgbOffset];
float g = rgbImage[rgbOffset+1];
float b = rgbImage[rgbOffset+2];
//perform the rescaling and store it
// multiply by constant values
grayImage[grayOffset] = 0.21f*r + 0.71f*g + 0.07f*b;
}
}
and here is my main function:
int main(int argc, char **argv)
{
if(argc!=3) {cout<<"Program takes two image filenames as parameters"<<endl;exit(3);}
float *imgIn, *imgOut;
int nCols, nRows, channels;
// Allocate images and initialize from file
imgIn = read_colored_image_asfloat(argv[1],&nCols, &nRows, &channels);
if(channels!=3){cout<<"Input image is not a colored image"<<endl;exit(4);}
// Allocate host images
//imgIn = (float *)calloc(nCols*nRows, sizeof(float));
imgOut = (float *)calloc(nCols*nRows, sizeof(float));
// Allocates device images
float *d_imgIn, *d_imgOut;
//@TODO@ : Complete for device allocations
int size = (nCols*nRows)*sizeof(float);
// allocate memory on device
cudaMalloc((float**) &d_imgIn, size);
cudaMalloc((float**) &d_imgOut, size);
// Copy input data
//@TODO@ : Complete for data copy
cudaMemcpy(d_imgIn, imgIn, size, cudaMemcpyHostToDevice);
// Call the kernel
//@TODO@ : Compute threads block and grid dimensions
dim3 GridDim((nCols/16.0)+1, (nRows/16.0)+1, 1);
dim3 BlockDim(16, 16, 1);
//@TODO@ : Call the CUDA kernel
colorConvert<<<GridDim, BlockDim>>>(d_imgOut, d_imgIn, nRows, nCols);
// Copy output data
//@TODO@ : Complete for data copy
cudaMemcpy(imgOut, d_imgOut, size, cudaMemcpyDeviceToHost);
// Write gray image to file
write_gray_image_fromfloat(argv[2], imgOut, nCols, nRows, 1);
// Free memory
//@TODO@ : Free host and device memory
// free host
free(imgIn); free(imgOut);
// free device
cudaFree(d_imgIn);cudaFree(d_imgOut);
return 0;
}
Upvotes: 0
Views: 683
Reputation: 32094
You forgot to multiply the size of the RGB matrix by 3.
It should be: cudaMalloc((float**) &d_imgIn, size*3);
and cudaMemcpy(d_imgIn, imgIn, size*3, cudaMemcpyHostToDevice);
.
You also swapped nCols
and nRows
.
It should be: colorConvert<<<GridDim, BlockDim>>>(d_imgOut, d_imgIn, nCols, nRows);
The following code should work:
int main()
{
//int nCols = 512;int nRows = 384;int channels = 3;
float *imgIn, *imgOut;
int nCols, nRows, channels;
// Allocate images and initialize from file
imgIn = read_colored_image_asfloat(argv[1],&nCols, &nRows, &channels);
//imgIn = (float*)calloc(nCols*nRows*3, sizeof(float));
//FILE *f = NULL;fopen_s(&f, "rgb32f.raw", "rb");fread(imgIn, sizeof(float), nCols*nRows*3, f);fclose(f);f = NULL;
imgOut = (float*)calloc(nCols*nRows, sizeof(float));
// Allocates device images
float *d_imgIn, *d_imgOut;
//@TODO@ : Complete for device allocations
int size = (nCols*nRows)*sizeof(float);
// allocate memory on device
cudaMalloc((float**)&d_imgIn, size*3);
cudaMalloc((float**)&d_imgOut, size);
// Copy input data
//@TODO@ : Complete for data copy
cudaMemcpy(d_imgIn, imgIn, size*3, cudaMemcpyHostToDevice);
// Call the kernel
//@TODO@ : Compute threads block and grid dimensions
dim3 GridDim((nCols/16)+1, (nRows/16)+1, 1);
dim3 BlockDim(16, 16, 1);
//@TODO@ : Call the CUDA kernel
colorConvert<<<GridDim, BlockDim>>>(d_imgOut, d_imgIn, nCols, nRows);
// Copy output data
//@TODO@ : Complete for data copy
cudaMemcpy(imgOut, d_imgOut, size, cudaMemcpyDeviceToHost);
//fopen_s(&f, "gray32f.raw", "wb");fwrite(imgOut, sizeof(float), nCols*nRows, f);fclose(f);f = NULL;
// Write gray image to file
write_gray_image_fromfloat(argv[2], imgOut, nCols, nRows, 1);
// Free memory
//@TODO@ : Free host and device memory
// free host
free(imgIn);
free(imgOut);
// free device
cudaFree(d_imgIn);
cudaFree(d_imgOut);
return 0;
}
Which bug results a black image?
Result of swapping nRows
with nCols
:
Result of cudaMemcpy(d_imgIn, imgIn, size, cudaMemcpyHostToDevice);
(instead of size*3
):
Result of cudaMalloc((float**)&d_imgIn, size);
(instead of size*3
):
Conclusion:
The cudaMalloc
negligence is the main reason for the black result.
Is there any CUDA error indication?
Reading the return value of cudaMemcpy
:
cudaError_t err = cudaMemcpy(imgOut, d_imgOut, size, cudaMemcpyDeviceToHost);
Returns error status: cudaErrorIllegalAddress
Conclusion:
Checking the return status is important - sometimes it helps detecting bugs in the code.
Upvotes: 2