Reputation: 23
This is the code below:
#include <stdio.h>
#include <cuda.h>
#include <device_launch_parameters.h>
#include <cuda_runtime.h>
#define _crt_nonstdc_no_deprecate
__global__ void Kernel(float *d_arr_i,float *d_arr_o)
{
int i = threadIdx.x;
int j = threadIdx.y;
int k = threadIdx.z;
float f = d_arr_i[i];
d_arr_o[i] = f * f;
printf("%d \n",f);
printf("x = %d & y = %d & z = %d \n",i,j,k);
}
int main ()
{
//printf("Hello C..!");
const unsigned int arr_s = 12;
float h_arr_i[arr_s];
for (int i = 0;i < arr_s;i++)
{
h_arr_i[i] = float(i);
}
for (int i = 0;i<arr_s;i++)
{
printf("input arr %d : %e \n",(int)i,h_arr_i[i]);
}
float h_arr_o[arr_s];
float* d_arr_i;
float* d_arr_o;
const unsigned int d_arr_s = arr_s*sizeof(float);
cudaMalloc((void**)&d_arr_i,d_arr_s);
cudaMalloc((void**)&d_arr_o,d_arr_s);
cudaMemcpy(d_arr_i,h_arr_i,d_arr_s,cudaMemcpyHostToDevice);
Kernel<<<1,arr_s>>>(d_arr_i,d_arr_o);
cudaMemcpy(h_arr_o,d_arr_o,d_arr_s,cudaMemcpyDeviceToHost);
printf("\n");
for (int i = 0;i < arr_s;i++)
{
printf("output arr : %d \n",h_arr_o[i]);
}
int d;
cudaDeviceProp c;
int e;
cudaGetDeviceProperties(&c,e);
printf("\n %e",e);
cudaGetDeviceCount(&d);
printf("\n %d \n",d);
cudaFree(d_arr_i);
cudaFree(d_arr_o);
system("Pause");
return 0;
}
In the above code "output arr " should give out the square of the numbers in the array, but it does not.
Can somebody please explain why is this error taking place ? some details , I'm running the code on : CUDA 2.3 / Emulation mode/without a NVIDIA GPU OS : Windows 7 64bit Visual Studio 2005 SP1
Upvotes: 0
Views: 186
Reputation: 152173
You are using the wrong printf
format specifier. This error occurs twice, once in the kernel, and once at the end of your code for the output printout.
Instead of %d
you should use %f
. When I make that change to your code, I get the correct results when running on a real GPU using CUDA 5.0:
$ ./t212
input arr 0 : 0.000000e+00
input arr 1 : 1.000000e+00
input arr 2 : 2.000000e+00
input arr 3 : 3.000000e+00
input arr 4 : 4.000000e+00
input arr 5 : 5.000000e+00
input arr 6 : 6.000000e+00
input arr 7 : 7.000000e+00
input arr 8 : 8.000000e+00
input arr 9 : 9.000000e+00
input arr 10 : 1.000000e+01
input arr 11 : 1.100000e+01
0.000000
1.000000
2.000000
3.000000
4.000000
5.000000
6.000000
7.000000
8.000000
9.000000
10.000000
11.000000
x = 0 & y = 0 & z = 0
x = 1 & y = 0 & z = 0
x = 2 & y = 0 & z = 0
x = 3 & y = 0 & z = 0
x = 4 & y = 0 & z = 0
x = 5 & y = 0 & z = 0
x = 6 & y = 0 & z = 0
x = 7 & y = 0 & z = 0
x = 8 & y = 0 & z = 0
x = 9 & y = 0 & z = 0
x = 10 & y = 0 & z = 0
x = 11 & y = 0 & z = 0
output arr : 0.000000
output arr : 1.000000
output arr : 4.000000
output arr : 9.000000
output arr : 16.000000
output arr : 25.000000
output arr : 36.000000
output arr : 49.000000
output arr : 64.000000
output arr : 81.000000
output arr : 100.000000
output arr : 121.000000
Upvotes: 3