Reputation: 11
I try to make a small code to generate numbers and return the result in array but once I run this code it's not working, I have tried to use Nsight debugger to understand where is my problem but it freezes and closes immediately.
Could you help me please to understand where is the problem in this code?
__global__ void mykernel( int* PF_tmp, int* PL_tmp, int* QF_tmp, int* QL_tmp,
int m[2], int p[5], int q[5], int i, int* n,
int out[10][5], int N)
{
int id = blockDim.x * blockIdx.x + threadIdx.x;
int idx = blockIdx.x;
int idy = blockIdx.y;
int w = idx/100;
int x = idx%100;
int y = idy;
int z = threadIdx.x;
int len = ((i * 2) + 5);
// Fill PF_tmp & QF_tmp
if( i > 0){
for(int k = 0; k < (i * 2); k++)
{
p[k] = PF_tmp[k];
q[k] = QF_tmp[k];
}
}
// Fill X
if( x > 10)
{
p[(i*2)] = (x - (x % 10)) / 10;
p[(i*2)+1] = x % 10;
}else{
p[(i*2)] = 0;
p[(i*2)+1] = x;
}
// Fill Y
if( y > 10)
{
q[(i*2)] = (y - (y % 10)) / 10;
q[(i*2)+1] = y % 10;
}else{
q[(i*2)] = 0;
q[(i*2)+1] = y;
}
// Fill m
p[(i * 2)+2] = m[0];
q[(i * 2)+2] = m[1];
// Fill W
if( w > 10)
{
p[(i*2)+3] = (w - (w % 10)) / 10;
p[(i*2)+4] = w % 10;
}else{
p[(i*2)+3] = 0;
p[(i*2)+4] = w;
}
// Fill Z
if( z > 10)
{
q[(i*2)+3] = (z - (z % 10)) / 10;
q[(i*2)+4] = z % 10;
}else{
q[(i*2)+3] = 0;
q[(i*2)+4] = z;
}
// Fill PL_tmp & QL_tmp
if( i > 0)
{
for(int k = 0; k < (i * 2); k++)
{
p[(len-(i * 2))+k] = PL_tmp[k];
q[(len-(i * 2))+k] = QL_tmp[k];
}
}
if(id<10)
{
for(int k =0; k<5; k++)
out[id][k] = p[k];
}
}
int main()
{
cudaError err;
dim3 blocks(10000, 100);
dim3 threads(100);
int m[2] = {4,5};
int hst_out[10][5];
int p[5];
int q[5];
err = cudaMalloc((void **)&p, 5);
err = cudaMalloc((void **)&q, 5);
err = cudaMalloc((void **)&hst_out, 50);
mykernel<<<blocks, threads>>>(NULL, NULL, NULL, NULL, m, p, q, 0, NULL, hst_out, 100000000);
return 0;
}
Upvotes: 0
Views: 431
Reputation: 6753
The error very obvious, it is all C programming.
when you declare
int m[2] = {4,5}; int hst_out[10][5]; int p[5]; int q[5];
now hst_out, p, q are not a pointer, but later it is used as a pointer:
err = cudaMalloc((void **)&p, 5); err = cudaMalloc((void **)&q, 5); err = cudaMalloc((void **)&hst_out, 50);
so u should have declare it initially as a pointer instead, eg,
int *p;
and used it as this way:
err = cudaMalloc((void **)&p, 5*sizeof(int));
And notice too that the size you have declared is just 5 bytes....whereas I declared it as 5*sizeof(int).
For more example see:
http://cuda-programming.blogspot.sg/2013/03/how-to-avoid-uses-of-cudamalloc-in.html
Upvotes: 1