Parallel programming addition using CUDA not successful

Question

I tried to make a array addition using a code below but ended up not adding the array and not error, this is a gpu (CUDA) based parallel program.

#include 
#include 
#include 
#include 

using namespace std;

__global__ void AddInts(int *a, int *b, int count)
{
    int id = blockIdx.x * blockDim.x + threadIdx.x;
    if (id < count)
    {
        a[id] += b[id];
    }
}


int main() 
{
    srand(time(NULL));
    int count = 100;
    int *h_a = new int[count];
    int *h_b = new int[count];

    for (int i = 0; i < count; i++)
    {
        h_a[i] = rand() % 1000;
        h_b[i] = rand() % 1000;
    }

    cout << "Prior to addition:" << endl;
    for (int i = 0; i < 5; i++)
        cout << h_a[i] << " " << h_b[i] << endl;

    int *d_a, *d_b;

    if (cudaMalloc(&d_a, sizeof(int) * count) != cudaSuccess)
    {
        cout << "Nope! No";
        return 0;
    }

    if (cudaMalloc(&d_b, sizeof(int) * count) != cudaSuccess)
    {
        cout << "Nope!";
        cudaFree(d_a);
        return 0;
    }

    if (cudaMemcpy(d_a, h_a, sizeof(int) * count, cudaMemcpyHostToDevice) != cudaSuccess)
    {
        cout << "Could not copy!" << endl;
        cudaFree(d_a);
        cudaFree(d_b);
        return 0;
    }

    if (cudaMemcpy(d_b, h_b, sizeof(int) * count, cudaMemcpyHostToDevice) != cudaSuccess)
    {
        cout << "Could not copy!" << endl;
        cudaFree(d_a);
        cudaFree(d_b);
        return 0;
    }

    AddInts <<>> (h_a, h_b, count);

    if (cudaMemcpy(h_a, h_b, sizeof(int) * count, cudaMemcpyDeviceToHost) == cudaSuccess)
    {
        delete[] h_a;
        delete[] h_b;
        cudaFree(d_a);
        cudaFree(d_b);
        cout << "Nope!" << endl;
        return 0;
    }

    for (int i = 0; i < 5; i++)
        cout << "It's " << h_a[i] << endl;

    cudaFree(d_a);
    cudaFree(d_b);


    delete[] h_a;
    delete[] h_b;

    return 0;
}

My results are:

Prior to addition: 188 336 489 593 706 673 330 792 329 588 It's 188
It's 489 It's 706 It's 330 It's 329

D:\Learn\CUDA\Visual_stidio\matrxAdd\x64\Release\matrxAdd.exe (process
8468) exited with code 0. To automatically close the console when
debugging stops, enable Tools->Options->Debugging->Automatically close
the console when debugging stops. Press any key to close this window .
. .

Parallel programming addition using CUDA not successful

Answers (1)

Related Questions