Reputation: 17
I'm trying to convert following c code in CUDA.... In following code I've array of elements [12 ,13, 23, 24, 25 ]... I want output like [123, 234, 234, 245 ].. You'll get the logic by looking at the code...
#include <stdio.h>
#define N 5
int main(){
int i,j;
int array[N] = { 12, 13, 23, 24, 25};
int new_array[(N-1)*N/2] = { 0 };
int k=0;
for ( i = 0; i < N; i++)
for( j = i+1; j < N; j++)
{
if((array[i]-array[i]%10) == (array[j]-array[j]%10))
{
new_array[i*N+(j-(i+1))-(i)*(i+1)/2] = array[i]*10 + array[j]%10;
k++;
}
}
for ( i = 0; i < (N-1)*N/2; i++) printf("new_array[%d] = %d\n", i, new_array[i]);
return 0;
}
Now problem comes when i try it to convert into CUDA kernel... I m getting something in output but i dont know how to choose selective elements and store it in the array...
#include <stdio.h>
#define N 5
__global__ void kernel(int* new_array_d, int* array_d) {
int i = threadIdx.x;
int j = threadIdx.y;
if (j > i && (array_d[i]-(array_d[i]%10)) == (array_d[j]-(array_d[j]%10)))
new_array_d[i*N+(j-(i+1))-(i)*(i+1)/2] = array_d[i]*10 + (array_d[j]%10);
}
int main(){
int array[N] = { 12, 13, 23, 24, 25};
int new_array[N-1)*N/2] = { 0 };
int* array_d; cudaMalloc((void**)&array_d,N*sizeof(int));
int* new_array_d; cudaMalloc((void**)&new_array_d,(N-1)*N/2*sizeof(int));
cudaMemcpy(array_d,array,N*sizeof(int),cudaMemcpyHostToDevice);
dim3 grid(1,1);
dim3 block(N,N);
kernel<<<grid,block>>>(new_array_d,array_d);
cudaMemcpy(new_array,new_array_d,(N-1)*N/2*sizeof(int),cudaMemcpyDeviceToHost);
for (int i = 0; i < (N-1)*N/2; i++) printf("new_array[%d] = %d\n", i, new_array[i]);
return 0;
}
Should i try for different index or do it on the CPU only...?? Please help me... thnks in advance...
please Note : I'm trying to combine elements 12 and 13 only if the number except unit place is same... 12 and 13 ....(have 1 in common)...do 12*10+(13%10) = 123
23 and 25 .....(have 2 in common)...do 23*10+(25%10) = 235...
Upvotes: 0
Views: 94
Reputation: 21515
Apart from the compilation issue reported in the comments above, your code does not seem to show any logic issue. You are observing different results for the unset array values right because the final arrays new_array
and new_array_d
have not been initialized.
You can solve this issue by adding the following lines to your code:
memset(new_array,0,(N-1)*N/2*sizeof(int));
cudaMemset(new_array_d, 0, (N-1)*N/2*sizeof(int));
For your convenience, I report below the full code along with initializations and CUDA error checking in the sense of What is the canonical way to check for errors using the CUDA runtime API?, an important thing that you have overlooked in your code:
#include <stdio.h>
#define N 7
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
__global__ void kernel(int* new_array_d, int* array_d) {
int i = threadIdx.x;
int j = threadIdx.y;
if ((j > i) && ((array_d[i]-(array_d[i]%10)) == (array_d[j]-(array_d[j]%10))))
new_array_d[i*N+(j-(i+1))-(i)*(i+1)/2] = array_d[i]*10 + (array_d[j]%10);
}
int main(){
int array[N] = { 12, 13, 23, 24, 25, 32, 33};
int new_array[(N-1)*N/2] = { 0 };
int* array_d; cudaMalloc((void**)&array_d,N*sizeof(int));
int* new_array_d; cudaMalloc((void**)&new_array_d,(N-1)*N/2*sizeof(int));
memset(new_array,0,(N-1)*N/2*sizeof(int));
gpuErrchk(cudaMemset(new_array_d, 0, (N-1)*N/2*sizeof(int)));
gpuErrchk(cudaMemcpy(array_d,array,N*sizeof(int),cudaMemcpyHostToDevice));
for (int i = 0; i < N; i++)
for(int j = i+1; j < N; j++)
{
if((array[i]-array[i]%10) == (array[j]-array[j]%10))
new_array[i*N+(j-(i+1))-(i)*(i+1)/2] = array[i]*10 + array[j]%10;
}
for (int i = 0; i < (N-1)*N/2; i++) printf("new_array[%d] = %d\n", i, new_array[i]);
printf("\n\n");
dim3 grid(1,1);
dim3 block(N,N);
kernel<<<grid,block>>>(new_array_d,array_d);
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
gpuErrchk(cudaMemcpy(new_array,new_array_d,(N-1)*N/2*sizeof(int),cudaMemcpyDeviceToHost));
for (int i = 0; i < (N-1)*N/2; i++) printf("new_array[%d] = %d\n", i, new_array[i]);
getchar();
return 0;
}
Upvotes: 2