Reputation: 115
I have a problem with using of pointer to pointer in cuda. Code snippet is below.
char** d_ppcPtr, *d_pcPtr, *h_pcPtr;
cudaMalloc(&d_ppcPtr, sizeof(char*) * 10);
h_pcPtr = (char*)malloc(sizeof(char) * 100);
for(int i = 0; i < 10; i ++)
{
cudaMalloc(&d_pcPtr, sizeof(char) * 100);
cudaMemset(d_pcPtr, 1, sizeof(char) * 100);
cudaMemcpy(&d_ppcPtr[i], &d_pcPtr, sizeof(char*), cudaMemcpyHostToDevice);
cudaMemcpy(h_pcPtr, d_ppcPtr[i], sizeof(char) * 100, cudaMemcpyDeviceToHost); //crash here
cudaFree(d_ppcPtr[i]); //crash also here
}
cudaFree(d_ppcPtr);
how can i fix above two crashes? Thanks in advance.
Upvotes: 1
Views: 3281
Reputation: 151799
The following modification will "fix" your code (fully worked example, including host and device verification):
$ cat t583.cu
#include <stdio.h>
__global__ void testkernel(char **data, unsigned n){
for (int i = 0; i < 100; i++) if (data[n][i] != 1) printf("kernel error\n");
}
int main(){
char** d_ppcPtr, *d_pcPtr, *h_pcPtr;
cudaMalloc(&d_ppcPtr, sizeof(char*) * 10);
h_pcPtr = (char*)malloc(sizeof(char) * 100);
for(int i = 0; i < 10; i ++)
{
cudaMalloc(&d_pcPtr, sizeof(char) * 100);
cudaMemset(d_pcPtr, 1, sizeof(char) * 100);
cudaMemcpy(&d_ppcPtr[i], &d_pcPtr, sizeof(char*), cudaMemcpyHostToDevice);
memset(h_pcPtr, 0, sizeof(char)*100);
testkernel<<<1,1>>>(d_ppcPtr, i);
cudaMemcpy(h_pcPtr, d_pcPtr, sizeof(char) * 100, cudaMemcpyDeviceToHost);
cudaFree(d_pcPtr);
for (int i = 0; i < 100; i++) if (h_pcPtr[i] != 1) printf("Error!");
}
cudaFree(d_ppcPtr);
}
$ nvcc -arch=sm_20 -o t583 t583.cu
$ cuda-memcheck ./t583
========= CUDA-MEMCHECK
========= ERROR SUMMARY: 0 errors
Note that conceptually, there is no difference between my code and yours, because the pointer that you are attempting to use in location d_ppcPtr[i]
, (and is crashing, because it is located on the device,) is already contained in d_pcPtr
, which is on the host.
Upvotes: 4