Copying array of structs from host to device cuda

Question

Suppose I have a struct as follows:

typedef struct values{
int one, int two, int three
} values;

Now, suppose I create an array of values on the host and populate with random data

values vals*;
__device__ values* d_vals;
int main(){
     vals = (values*)malloc(sizeof(values) * A_LARGE_NUMBER);
     PopulateWithDate(); //populates vals with random data
}

Now I want to be able to copy the values to the device such that I can access them in my kernel like so:

__global__ void myKernel(){
     printf("%d", d_vals[0].one);//I don't really want to print, but whenever I try to access I get an error
}

Whatever I try I get an illegal memory access was encountered error.

Here's my current attempt:

int main(){
     vals = (values*)malloc(sizeof(values) * A_LARGE_NUMBER);
     PopulateWithDate(); //populates vals with random data

     values* d_ptr;
     cudaGetSymbolAddress((void**)&d_ptr, d_vals);
     cudaMalloc((void**)&d_ptr, A_LARGE_NUMBER * sizeof(values));

     cudaMemcpyToSymbol(d_ptr, &vals, sizeof(values) * A_LARGE_NUMBER);
     cudaDeviceSynchronize();
     dim3    blocksPerGrid(2, 2);
     dim3    threadsPerBlock(16, 16);

    myKernel<< > >();
}

Robert Crovella · Accepted Answer

For what you have shown so far, using a __device__ pointer variable just creates needless complexity. Just use an ordinary dynamic allocation using cudaMalloc for device storage, and otherwise follow an approach similar to any of the CUDA sample codes such as vectorAdd. Here is an example:

$ cat t1315.cu
#include 
#define A_LARGE_NUMBER 10

struct values{
int one, two, three;
};

values *vals;

__global__ void myKernel(values *d_vals){
     printf("%d
", d_vals[0].one);
}

void PopulateWithData(){
  for (int i = 0; i < A_LARGE_NUMBER; i++){
    vals[i].one = 1;
    vals[i].two = 2;
    vals[i].three = 3;
  }
}


int main(){
     vals = (values*)malloc(sizeof(values) * A_LARGE_NUMBER);
     PopulateWithData(); //populates vals with random data

     values* d_ptr;
     cudaMalloc((void**)&d_ptr, A_LARGE_NUMBER * sizeof(values));
     cudaMemcpy(d_ptr, vals, A_LARGE_NUMBER *sizeof(values),cudaMemcpyHostToDevice);
     dim3    blocksPerGrid(1,1);
     dim3    threadsPerBlock(1, 1);

    myKernel<< > >(d_ptr);
    cudaDeviceSynchronize();
}
$ nvcc -arch=sm_35 -o t1315 t1315.cu
$ cuda-memcheck ./t1315
========= CUDA-MEMCHECK
1
========= ERROR SUMMARY: 0 errors
$

You had a variety of other basic (non-CUDA) coding errors in what you had shown, I'm not going to try and run through them all.

If you really want to retain your __device__ pointer variable, and use that to point to the device data (array of structs) then you will still need to use cudaMalloc, and the overall process takes additional steps. You can follow the example worked out in the answer here.

Following that example, here's a set of changes to the above code to make it work with a __device__ pointer variable instead of a pointer passed as a kernel parameter:

$ cat t1315.cu
#include 
#define A_LARGE_NUMBER 10

struct values{
int one, two, three;
};

values *vals;
__device__ values *d_vals;

__global__ void myKernel(){
     printf("%d
", d_vals[0].one);
}

void PopulateWithData(){
  for (int i = 0; i < A_LARGE_NUMBER; i++){
    vals[i].one = 1;
    vals[i].two = 2;
    vals[i].three = 3;
  }
}


int main(){
     vals = (values*)malloc(sizeof(values) * A_LARGE_NUMBER);
     PopulateWithData(); //populates vals with random data

     values* d_ptr;
     cudaMalloc((void**)&d_ptr, A_LARGE_NUMBER * sizeof(values));
     cudaMemcpy(d_ptr, vals, A_LARGE_NUMBER *sizeof(values),cudaMemcpyHostToDevice);
     cudaMemcpyToSymbol(d_vals, &d_ptr, sizeof(values*));
     dim3    blocksPerGrid(1,1);
     dim3    threadsPerBlock(1, 1);

    myKernel<< > >();
    cudaDeviceSynchronize();
}
$ nvcc -arch=sm_35 -o t1315 t1315.cu
$ cuda-memcheck ./t1315
========= CUDA-MEMCHECK
1
========= ERROR SUMMARY: 0 errors
$

Copying array of structs from host to device cuda

Answers (1)

Related Questions