CUDA calling device functions from separate files (Name mangling?)

Question

How should I do this properly? Here is a simplification of the code:

//main.cu    
#include "math.cuh"

__global__ void test(float *x, unsigned numElements)
{
    int i = blockDim.x * blockIdx.x + threadIdx.x;

    if (i < numElements)
    {
        float array[5] = {1, 2, 3, 4, 5};
        copyArray(x + 5*i, array, 5);
    }
}

int main(int argc, char **argv)
{
    test<<>>(d_A, numElements);
}

//math.cuh
__device__ void copyArray(float *dest, float *src, unsigned length);

//math.cu
#include "math.cuh"
__device__ void copyArray(float *dest, float *src, size_t length)
{
    for (int i = 0; i < length; i++) {
        dest[i] = src[i];
    }
}

compiled with this command:

nvcc -rdc=true -arch=sm_20 -o cudaMain main.cu math.cu -Xlinker -framework,OpenGL,-framework,GLUT && ./cudaMain

and got this error:

nvlink error   : Undefined reference to '_Z9copyArrayPfS_j' in '/tmp/tmpxft_00000265_00000000-21_main.o'

This clearly looks like a name mangling error, but I tried putting extern "C" in all sorts of places and it didn't work.

shaoyl85 · Accepted Answer

The function prototype uses unsigned while the definition uses size_t. Is that the cause?

CUDA calling device functions from separate files (Name mangling?)

Answers (1)

Related Questions