CUDA Initialize Array on Device

Question

I am very new to CUDA and I am trying to initialize an array on the device and return the result back to the host to print out to show if it was correctly initialized. I am doing this because the end goal is a dot product solution in which I multiply two arrays together, storing the results in another array and then summing up the entire thing so that I only need to return the host one value.

In the code I am working on all I am only trying to see if I am initializing the array correctly. I am trying to create an array of size N following the patterns of 1,2,3,4,5,6,7,8,1,2,3....

This is the code that I've written and it compiles without issue but when I run it the terminal is hanging and I have no clue why. Could someone help me out here? I'm so incredibly confused :\

#include 
#include 
#include 

#define ARRAY_SIZE 100
#define BLOCK_SIZE 32

__global__ void cu_kernel (int *a_d,int *b_d,int *c_d, int size)
{

    int x = blockIdx.x * blockDim.x + threadIdx.x;
    __shared__ int temp; 

    if(temp != 8){
        a_d[x] = temp;
        temp++;
     } else {
        a_d[x] = temp;
        temp = 1;
                 } 


}

int main (int argc, char *argv[])
{

//declare pointers for arrays
int *a_d, *b_d, *c_d, *sum_h, *sum_d,a_h[ARRAY_SIZE];

//set space for device variables 
cudaMalloc((void**) &a_d, sizeof(int) * ARRAY_SIZE); 
cudaMalloc((void**) &b_d, sizeof(int) * ARRAY_SIZE);
cudaMalloc((void**) &c_d, sizeof(int) * ARRAY_SIZE);
cudaMalloc((void**) &sum_d, sizeof(int)); 


    // set execution configuration
        dim3 dimblock (BLOCK_SIZE);
        dim3 dimgrid (ARRAY_SIZE/BLOCK_SIZE);

    // actual computation: call the kernel
        cu_kernel <<>> (a_d,b_d,c_d,ARRAY_SIZE);
    
        cudaError_t result;

   // transfer results back to host
        result = cudaMemcpy (a_h, a_d, sizeof(int) * ARRAY_SIZE, cudaMemcpyDeviceToHost);
        if (result != cudaSuccess) {
            fprintf(stderr, "cudaMemcpy failed.");
            exit(1);
        }

    // print reversed  array
        printf ("Final state of the array:
");
        for (int i =0; i < ARRAY_SIZE; i++) {
            printf ("%d ", a_h[i]);
        }
        printf ("
");

}

CUDA Initialize Array on Device

Answers (1)

Related Questions