Unable to print device variable value within kernel in CUDA

Question

I am trying to understand how pointers function in CUDA.

Given below is a simple program which assigns certain value to a variable allocated in the device. (I'm using CUDA toolkit 8.0 with NVIDIA Quadro K2000 Graphics card)

When I print the value within the kernel using printf(), it displays a wrong value.

However, when I do a cudaMemcopy from host function and then print from the host function, it displays the correct value...

//CODE...
#include 
#include 
#include
#include
#define N 3

__global__ void tempker(int *jk,int value)
{
    (*jk) = value*2;
    printf("displayed from inside the kernel :
value of jk = %d
value of *jk = %d
",jk,*jk);

}
int tempfunc(int *kp)
{
    int *jk = NULL,*lm=NULL;
    lm = (int *)(malloc(sizeof(int)));
    *lm = 150;
    cudaError_t err = cudaSuccess;
    cudaMalloc((void**)&jk, sizeof(int));
    printf("jk pointer after cudaMalloc: displayed from host = %d
",jk);
    tempker<<<1,1>>>(jk,150);
    err = cudaGetLastError();//brief Returns the last error from a runtime call
    cudaDeviceSynchronize();

    err = cudaMemcpy(lm, jk, (sizeof(int)), cudaMemcpyDeviceToHost);
    if (err != cudaSuccess)
    {
        fprintf(stderr, "Failed to copy jk from device to host (error code %s)!
", cudaGetErrorString(err));
        exit(EXIT_FAILURE);
    }
    printf("Displayed in host function after memcopy: value of *lm = *jk = %d
",*lm);
    cudaFree(jk);
    err = cudaMalloc((void**)&kp, sizeof(int));
    if (err != cudaSuccess)
    {
        fprintf(stderr, "Failed to allocate device kp (error code %s)!
", cudaGetErrorString(err));
        exit(EXIT_FAILURE);
    }
    tempker<<<1,1>>>(kp,(N*N*N));
    err = cudaGetLastError();//brief Returns the last error from a runtime call
    cudaDeviceSynchronize();

    err = cudaMemcpy(lm, kp, (sizeof(int)), cudaMemcpyDeviceToHost);
    if (err != cudaSuccess)
    {
        fprintf(stderr, "Failed to copy kp from device to host (error code %s)!
", cudaGetErrorString(err));
        exit(EXIT_FAILURE);
    }
    printf("Displayed in host function after memcopy: value of *lm = *kp = %d
",*lm);
    cudaFree(kp);
    free(lm);
    return 100;
}

int main(){

    int *kp = NULL;
    printf("tempfunc(): return value = %d
",tempfunc(kp));
    return 0;
}

Output:

jk pointer after cudaMalloc: displayed from host = 13238272
displayed from inside the kernel :
value of jk = 13238272
value of *jk = 9
Displayed in host function after memcopy: value of *lm = *jk = 300
displayed from inside the kernel :
value of jk = 13238272
value of *jk = 9
Displayed in host function after memcopy: value of *lm = *kp = 54
tempfunc(): return value = 100

The Question is: Is it possible to print value of a variable allocated in device within the kernel?

Taro · Accepted Answer

Displaying integer value with "%d" is considered correct. Displaying address with "%d" might lead to unpredictable behaviour depending on your compiler.

Simply use "%p". As it expects a void pointer, explicitely cast your pointer.

So in your kernel :

printf("value = %d, address = %p
",*jk,(void *)jk);

More info is available if you look further into printf.

Unable to print device variable value within kernel in CUDA

Answers (1)

Related Questions