Rezaeimh7
Rezaeimh7

Reputation: 1545

cudaMemcpyFromSymbol() does not work correctly

I have a problem in reading from device memory of GPU. When I copy values to __device__ memory, everything is OK! But when I am trying to get the result back, the answer some times is OK and sometimes is exactly the first values of the array !

I have a device array like this:

__device__ array[50];

at start I copied some values into that:

cudaStatus = cudaMemcpyToSymbol(dev_state, &CipherState, statesize, 0, cudaMemcpyHostToDevice);
    if (cudaStatus != cudaSuccess) {
        printf(" \n%s\n", cudaGetErrorString(cudaStatus));
        getchar();
    }

after doing some changes in the Kernel, I try to read values from the array:

Kernel << <8, 16 >> >();

unsigned char CipherState2[50];

cudaStatus = cudaMemcpyFromSymbol(&CipherState2, dev_state, 50*sizeof(unsigned char),0, cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) 
    {
        printf(" \n%s\n", cudaGetErrorString(cudaStatus));
        getchar();
    } 

The results are sometimes TRUE and sometimes first values of array.

Here is more of my code:

//before Kernel Function body

__device__ unsigned char dev_state[128];

//////////////////////////////////////

void test()
{

    unsigned char CipherState[128];

    for (int i = 0; i<128; i++)                 
        CipherState[i] = 0x01;

    cudaError_t cudaStatus;

    cudaStatus = cudaMemcpyToSymbol(dev_state, CipherState, 128*sizeof(unsigned char), 0, cudaMemcpyHostToDevice);
    if (cudaStatus != cudaSuccess) {
        printf(" \n%s\n", cudaGetErrorString(cudaStatus));
        getchar();
    }

    printf("\n initialized:\n 0x");
    for (size_t i = 0; i < 16; i+=16)
    {
        if (i % 16 == 0)
            printf("\n0x");
        for (int j =0 ; j <=15; j++)
        {
            printf("%x", CipherState[i+j]);
        }
    }
    // set all of the dev_state to "0x05"
    Kernel << <8, 16 >> >();

//  until this line, everythings OK

unsigned char CipherState2[128];    
cudaStatus = cudaMemcpyFromSymbol(CipherState2, dev_state, 128*sizeof(unsigned char),0, cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) 
{
    printf(" \n%s\n", cudaGetErrorString(cudaStatus));
    getchar();
}


    printf("\n State at the end:\n ");
    for (size_t i = 0; i < 16; i+=16)
    {
        if (i % 16 == 0)
            printf("\n0x");
        for (int j = 0; j <= 15; j++)
            printf("%x",  CipherState2[i + j]);

    }
  }

sometimes , printing the cipherstate2 get this :

0x55555555555555555......5555555555

and sometimes:

0x11111111111111111.....11111111111;

Upvotes: 1

Views: 757

Answers (1)

Robert Crovella
Robert Crovella

Reputation: 151799

This is incorrect:

unsigned char CipherState2[50];

cudaStatus = cudaMemcpyFromSymbol(&CipherState2, dev_state, 50*sizeof(unsigned char),0, cudaMemcpyDeviceToHost);
                                  ^

CipherState2 is already a pointer. You should not be taking the address of it. Instead you should do the call like this:

cudaStatus = cudaMemcpyFromSymbol(CipherState2, dev_state, 50*sizeof(unsigned char),0, cudaMemcpyDeviceToHost);

And although you haven't shown what the CipherState variable looks like, it's quite possible you made a similar error here:

cudaStatus = cudaMemcpyToSymbol(dev_state, &CipherState, statesize, 0, cudaMemcpyHostToDevice);
                                           ^

It's quite possible the correct form of that call would be:

cudaStatus = cudaMemcpyToSymbol(dev_state, CipherState, statesize, 0, cudaMemcpyHostToDevice);

In the future, please provide an MCVE for questions like this.

As an example, note that this is not valid code:

__device__ array[50];

Perhaps you meant something like this:

__device__ unsigned char dev_state[50];

EDIT: the code you have now posted (in an answer) is still incomplete, but it appears to be mostly correct. The remaining problem may be in your kernel which you haven't shown, or it's possible your CUDA install is not working correctly. Here's a completely worked code around what you have shown (I added a simple kernel) that demonstrates expected behavior (note that your for-loops for printout are not constructed correctly, I don't think):

$ cat t966.cu
#include <stdio.h>
//before Kernel Function body

__device__ unsigned char dev_state[128];

//////////////////////////////////////

__global__ void Kernel(){
  int idx = threadIdx.x+blockDim.x*blockIdx.x;
  if (idx < 128) dev_state[idx] = 0x5;
}

void test()
{

    unsigned char CipherState[128];

    for (int i = 0; i<128; i++)
        CipherState[i] = 0x01;

    cudaError_t cudaStatus;

    cudaStatus = cudaMemcpyToSymbol(dev_state, CipherState, 128*sizeof(unsigned char), 0, cudaMemcpyHostToDevice);
    if (cudaStatus != cudaSuccess) {
        printf(" \n%s\n", cudaGetErrorString(cudaStatus));
        getchar();
    }

    printf("\n initialized:\n 0x");
    for (size_t i = 0; i < 16; i+=16)
    {
        if (i % 16 == 0)
            printf("\n0x");
        for (int j =0 ; j <=15; j++)
        {
            printf("%x", CipherState[i+j]);
        }
    }
    // set all of the dev_state to "0x05"
    Kernel << <8, 16 >> >();

//  until this line, everythings OK

unsigned char CipherState2[128];
cudaStatus = cudaMemcpyFromSymbol(CipherState2, dev_state, 128*sizeof(unsigned char),0, cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess)
{
    printf(" \n%s\n", cudaGetErrorString(cudaStatus));
    getchar();
}


    printf("\n State at the end:\n ");
    for (size_t i = 0; i < 16; i+=16)
    {
        if (i % 16 == 0)
            printf("\n0x");
        for (int j = 0; j <= 15; j++)
            printf("%x",  CipherState2[i + j]);

    }
  printf("\n");
}
int main(){

  test();
}
$ nvcc t966.cu -o t966
$ cuda-memcheck ./t966
========= CUDA-MEMCHECK

 initialized:
 0x
0x1111111111111111
 State at the end:

0x5555555555555555
========= ERROR SUMMARY: 0 errors
$

Upvotes: 2

Related Questions