Reputation: 5345
Following sample problem:
#include <iostream>
using namespace std;
__device__ __constant__ float* data;
template<class T> void allocOnly(T* deviceDest, size_t numElem)
{
cudaError_t errCode = cudaMalloc((void**)&deviceDest, numElem*sizeof(T));
if(errCode != cudaSuccess)
cout << "Got error with code " << errCode << endl;
}
int main()
{
float* test(0);
allocOnly<float>(test,10);
cout << "test = " << test << endl;
float* test2(0);
cudaError_t errCode = cudaMalloc((void**)&test2, 10*sizeof(float));
if(errCode != cudaSuccess)
cout << "Got error with code " << errCode << endl;
cout << "test2 = " << test2 << endl;
return 0;
}
compiled with nvcc test.cu -o testBin
returns
test = 0
test2 = 0x310100
Why is test not modified when called through template function, cudaMalloc is supposed to modify it to be a pointer to the newly allocated device memory!
Upvotes: 0
Views: 379
Reputation: 16796
The pointer is not being modified because cudaMalloc
in the function allocOnly
is allocating memory to the argument deviceTest
which is local to the function allocOnly
. You can modify the function allocOnly
to allocate memory as follows:
template<class T> void allocOnly(T** deviceDest, size_t numElem)
{
cudaError_t errCode = cudaMalloc((void**)deviceDest, numElem*sizeof(T));
if(errCode != cudaSuccess)
cout << "Got error with code " << errCode << endl;
}
Inside the main function:
int main()
{
float* test(0);
allocOnly<float>(&test,10);
cout << "test = " << test << endl;
.
.
.
}
Upvotes: 3