Jaroslav Petrík
Jaroslav Petrík

Reputation: 29

cudaMemcpy - seems to not work properly

I am trying to copy data from host to device in my GPU greyscale filter program. However, there is some kind of problem because when I try to do so, nothing happens. Probably I have some mistakes in my code but compiler doesn't show any errors. I need to copy variables d_bufferRGB into GPU, process it and return it in d_new_bufferRGB in order to save it with function save_bmp();

EDIT 1: implemented CUDA error checking in main() It says there is invalid argument in this line cudaMemcpy(d_bufferRGB, bufferRGB, size, cudaMemcpyHostToDevice)

HERE is the code >>>

     #include <stdio.h>
        #include <stdlib.h>
        #include <Windows.h>
        #include <cuda_runtime.h>
        #include <cuda.h>
        #include "device_launch_parameters.h"
        #include <iostream>


        #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
{
    if (code != cudaSuccess)
    {
        fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
        if (abort) exit(code);
    }
}
        int width, heigth;
        long size;
        long *d_size;
        RGBTRIPLE *bufferRGB, *new_bufferRGB;
        RGBTRIPLE *d_bufferRGB, *d_new_bufferRGB;


        void load_bmp(RGBTRIPLE **bufferRGB, int *width, int *heigth, const char *file_name)
        {
            BITMAPFILEHEADER bmp_file_header;
            BITMAPINFOHEADER bmp_info_header;
            FILE *file;

            file = fopen(file_name, "rb");

            fread(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);

            fread(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);

            *width = bmp_info_header.biWidth;
            *heigth = bmp_info_header.biHeight;
            size = (bmp_file_header.bfSize - bmp_file_header.bfOffBits);
            std::cout << "velkost nacitanych pixelov je " << size <<'\n';

            int x, y;
            *bufferRGB = (RGBTRIPLE *)malloc(*width* *heigth * 4);

            fseek(file, bmp_file_header.bfOffBits - sizeof(bmp_file_header) - sizeof(bmp_info_header), SEEK_CUR);

            for (y = 0; y < *heigth; y++)
            {
                for (x = 0; x < *width; x++)
                {
                    (*bufferRGB)[(y * *width + x)].rgbtBlue = fgetc(file);
                    (*bufferRGB)[(y * *width + x)].rgbtGreen = fgetc(file);
                    (*bufferRGB)[(y * *width + x)].rgbtRed = fgetc(file);
                }
                for (x = 0; x < (4 - (3 * *width) % 4) % 4; x++)
                    fgetc(file);
            }
            fclose(file);
        }

        void save_bmp(RGBTRIPLE *bufferRGB, const char *new_name, const char *old_name)
        {
            BITMAPFILEHEADER bmp_file_header;
            BITMAPINFOHEADER bmp_info_header;
            FILE *file;

            file = fopen(old_name, "rb");

            fread(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);

            fread(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);
            fclose(file);

            file = fopen(new_name, "wb");

            fwrite(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);
            fwrite(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);
            fseek(file, bmp_file_header.bfOffBits - sizeof(bmp_file_header) - sizeof(bmp_info_header), SEEK_CUR);

            int alligment_x = (4 - (3 * width) % 4) % 4;
            unsigned char *to_save = (unsigned char *)malloc((width * 3 + alligment_x)*heigth);
            unsigned int index = 0;
            int x, y;

            for (y = 0; y < heigth; y++)
            {
                for (x = 0; x < width; x++)
                {
                    to_save[index++] = bufferRGB[(y * width + x)].rgbtBlue;
                    to_save[index++] = bufferRGB[(y * width + x)].rgbtGreen;
                    to_save[index++] = bufferRGB[(y * width + x)].rgbtRed;
                }
                for (x = 0; x < alligment_x; x++)
                    to_save[index++] = 0;
            }
            std::cout << "velkost na ulozenie je " << sizeof(&to_save) << '\n';
            fwrite(to_save, (width * 3 + alligment_x)*heigth, 1, file);

            fclose(file);
            free(to_save);
        }


        __global__ void CUDA_filter_grayscale(const RGBTRIPLE *d_bufferRGB, RGBTRIPLE *d_new_bufferRGB, long *d_size)
        {
            int idx = blockIdx.x*blockDim.x + threadIdx.x;
            BYTE grayscale;

            if (idx < *d_size)
            {
                grayscale = ((d_bufferRGB[idx].rgbtRed + d_bufferRGB[idx].rgbtGreen + d_bufferRGB[idx].rgbtBlue) / 3);
                d_new_bufferRGB[idx].rgbtRed = grayscale;
                d_new_bufferRGB[idx].rgbtGreen = grayscale;
                d_new_bufferRGB[idx].rgbtBlue = grayscale;
            }
        }

        int main()
    {

            gpuErrchk(cudaMalloc(&d_new_bufferRGB, width*heigth * 4));
            gpuErrchk(cudaMalloc(&d_bufferRGB, width*heigth * 4));
            gpuErrchk(cudaMalloc(&d_size, sizeof(size)));

            load_bmp(&bufferRGB, &width, &heigth, "test.bmp"); //tu je vztvoreny a naplneny smernik *buffer_RGB

            gpuErrchk(cudaMemcpy(d_size, &size, sizeof(size), cudaMemcpyHostToDevice));
            gpuErrchk(cudaMemcpy(d_bufferRGB, bufferRGB, size, cudaMemcpyHostToDevice));

            CUDA_filter_grayscale << <32, 512 >> > (d_bufferRGB, d_new_bufferRGB, d_size); //size of kernel dont bother me for now

            gpuErrchk(cudaMemcpy(new_bufferRGB, d_new_bufferRGB, size, cudaMemcpyDeviceToHost));

            save_bmp(new_bufferRGB, "filter_grayscale_GPU.bmp", "test.bmp");
    } 

It's killing my brain for several days, plese help me with this.

Upvotes: 0

Views: 1311

Answers (1)

Jaroslav Petr&#237;k
Jaroslav Petr&#237;k

Reputation: 29

So, with significant help obtained from @Robert Crovella i had finished my code. I also made some extra features like dynamic kernel allocation as a free gift for internet users. Code is fully functional for BMP ver. 3 from Microsoft(one can create some in Paint). I've tried to upload some image but it can be max 2MB big, which is not enough for true color depth. When compiling, there is error of null pointer but the program is created and stored in project Debug folder. When you run it with an image in the folder, it works without problem.

The problem with code above are > 1, uninicialised new_bufferRGB 2, load function do not provide variables sooner then I use them 3, mistakes in cudaMemcpy function

SO, HERE IS THE CODE >>>

#include <stdio.h>
#include <stdlib.h>
#include <Windows.h>
#include <cuda_runtime.h>
#include <cuda.h>
#include "device_launch_parameters.h"
#include <iostream>


int width, heigth;
long size;
long *d_size;
RGBTRIPLE *bufferRGB, *new_bufferRGB;
RGBTRIPLE *d_bufferRGB, *d_new_bufferRGB;

#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
{
    if (code != cudaSuccess)
    {
        fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
        //if (abort) exit(code);
    }
}

void load_bmp(RGBTRIPLE **bufferRGB, int *width, int *heigth, const char *file_name)
{
    BITMAPFILEHEADER bmp_file_header;
    BITMAPINFOHEADER bmp_info_header;
    FILE *file;

    file = fopen(file_name, "rb");

    fread(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);

    fread(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);

    *width = bmp_info_header.biWidth;
    *heigth = bmp_info_header.biHeight;
    size = (bmp_file_header.bfSize - bmp_file_header.bfOffBits);
    std::cout << "size of loaded pixels is " << size << '\n';

    int x, y;
    *bufferRGB = (RGBTRIPLE *)malloc(*width* *heigth * 4);

    fseek(file, bmp_file_header.bfOffBits - sizeof(bmp_file_header) - sizeof(bmp_info_header), SEEK_CUR);

    for (y = 0; y < *heigth; y++)
    {
        for (x = 0; x < *width; x++)
        {
            (*bufferRGB)[(y * *width + x)].rgbtBlue = fgetc(file);
            (*bufferRGB)[(y * *width + x)].rgbtGreen = fgetc(file);
            (*bufferRGB)[(y * *width + x)].rgbtRed = fgetc(file);
        }
        for (x = 0; x < (4 - (3 * *width) % 4) % 4; x++)
            fgetc(file);
    }
    fclose(file);
}

void save_bmp(RGBTRIPLE *bufferRGB, const char *new_name, const char *old_name)
{
    BITMAPFILEHEADER bmp_file_header;
    BITMAPINFOHEADER bmp_info_header;
    FILE *file;

    file = fopen(old_name, "rb");

    fread(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);

    fread(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);
    fclose(file);

    file = fopen(new_name, "wb");

    fwrite(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, file);
    fwrite(&bmp_info_header, sizeof(BITMAPINFOHEADER), 1, file);
    fseek(file, bmp_file_header.bfOffBits - sizeof(bmp_file_header) - sizeof(bmp_info_header), SEEK_CUR);

    int alligment_x = (4 - (3 * width) % 4) % 4;
    unsigned char *to_save = (unsigned char *)malloc((width * 3 + alligment_x)*heigth);
    unsigned int index = 0;
    int x, y;

    for (y = 0; y < heigth; y++)
    {
        for (x = 0; x < width; x++)
        {
            to_save[index++] = bufferRGB[(y * width + x)].rgbtBlue;
            to_save[index++] = bufferRGB[(y * width + x)].rgbtGreen;
            to_save[index++] = bufferRGB[(y * width + x)].rgbtRed;
        }
        for (x = 0; x < alligment_x; x++)
            to_save[index++] = 0;
    }
    fwrite(to_save, (width * 3 + alligment_x)*heigth, 1, file);

    fclose(file);
    free(to_save);
}


__global__ void CUDA_filter_grayscale(const RGBTRIPLE *d_bufferRGB, RGBTRIPLE *d_new_bufferRGB, long *d_size)
{
    int idx = blockIdx.x*blockDim.x + threadIdx.x;
    BYTE grayscale;

    if (idx < *d_size)
    {
        grayscale = ((d_bufferRGB[idx].rgbtRed + d_bufferRGB[idx].rgbtGreen + d_bufferRGB[idx].rgbtBlue) / 3);
        d_new_bufferRGB[idx].rgbtRed = grayscale;
        d_new_bufferRGB[idx].rgbtGreen = grayscale;
        d_new_bufferRGB[idx].rgbtBlue = grayscale;
    }
}

int main()
{
    // load to have all variables reachable and loaded
    load_bmp(&bufferRGB, &width, &heigth, "test.bmp");

    // inicialise buffer for copy of proccesed image from device to host 
    new_bufferRGB = (RGBTRIPLE *)malloc(width* heigth * 4);

    //inicializing variables on GPU
    gpuErrchk(cudaMalloc(&d_new_bufferRGB, width*heigth * 4));
    gpuErrchk(cudaMalloc(&d_bufferRGB, width*heigth * 4));
    gpuErrchk(cudaMalloc(&d_size, sizeof(size)));

    // copying variables to GPU
    gpuErrchk(cudaMemcpy(d_size, &size, sizeof(size), cudaMemcpyHostToDevice));
    gpuErrchk(cudaMemcpy(d_bufferRGB, bufferRGB, size, cudaMemcpyHostToDevice));

    // find out the kernel size, number of threads depends on your GPU max number of threads
    int numbThreads = 1024;
    int numbBlocks = (width*heigth) / numbThreads;
    if (((width*heigth) % numbThreads)>0)   numbBlocks++;

    CUDA_filter_grayscale <<<numbBlocks, numbThreads >>> (d_bufferRGB, d_new_bufferRGB, d_size); 

    //copy result from device to host
    gpuErrchk(cudaMemcpy(new_bufferRGB, d_new_bufferRGB, size, cudaMemcpyDeviceToHost));

    //save result
    save_bmp(new_bufferRGB, "filter_grayscale_GPU.bmp", "test.bmp");

    return 0;
}

Upvotes: 1

Related Questions