Reputation: 3291
I am trying to copy a structure, containing an array of function pointers, to the device. I can't figure out what's wrong with the following code. The code inside the kernel doesn't work.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line,bool abort = true)
{
if (code != cudaSuccess)
{
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
#define N_MODELS 2
#define N_PARAMS 2
struct userData
{
float (*eval[N_MODELS]) (const float params[N_PARAMS]);
};
__device__ float add(const float params[N_PARAMS])
{
return params[0] + params[1];
}
__device__ float mult(const float params[N_PARAMS])
{
return params[0] * params[1];
}
// function pointer for device
__device__ float (*add_ptr)(const float params[N_PARAMS]) = add;
__device__ float (*mult_ptr)(const float params[N_PARAMS]) = mult;
__global__ void kernel(float *d_result,struct userData *d_user, const float *d_params)
{
//this is currently not working
*d_result = (d_user->eval[0]) (d_params);
printf("d_result = %g\n", *d_result);
}
int main(void)
{
//*************//
// struct part //
//*************//
// function pointer
float(*fpAdd)(const float params[N_PARAMS]);
float(*fpMult)(const float params[N_PARAMS]);
// copy function pointers to device
gpuErrchk(cudaMemcpyFromSymbol(&fpAdd, add_ptr, sizeof(void *)));
gpuErrchk(cudaMemcpyFromSymbol(&fpMult, mult_ptr, sizeof(void *)));
struct userData h_user;
h_user.eval[0] = add;
h_user.eval[1] = mult;
struct userData *d_user;
gpuErrchk(cudaMalloc(&d_user, sizeof(userData)));
gpuErrchk(cudaMemcpy(d_user, &h_user, sizeof(userData), cudaMemcpyHostToDevice));
// parameters
float h_params[N_PARAMS] = { 3.0f, 2.0f };
float *d_params;
gpuErrchk(cudaMalloc(&d_params, N_PARAMS*sizeof(float)));
gpuErrchk(cudaMemcpy(d_params, h_params, N_PARAMS*sizeof(float), cudaMemcpyHostToDevice));
// result
float h_result = 1.0f;
float *d_result;
gpuErrchk(cudaMalloc(&d_result, sizeof(float)));
gpuErrchk(cudaMemcpy(d_result, &h_result, sizeof(float), cudaMemcpyHostToDevice));
kernel << <1, 1 >> >(d_result, d_user, d_params);
gpuErrchk(cudaMemcpy(&h_result, d_result, sizeof(float), cudaMemcpyDeviceToHost));
printf("result = %g\n", h_result);
gpuErrchk(cudaFree(d_result));
gpuErrchk(cudaFree(d_params));
gpuErrchk(cudaFree(d_user));
return EXIT_SUCCESS;
}
Upvotes: 0
Views: 77
Reputation: 72348
The error is here:
struct userData h_user;
h_user.eval[0] = add;
h_user.eval[1] = mult;
You are populating the structure with the wrong values. Having read the __device__
memory values to get the function pointers from the device, you need to use those values to populate the function structure, rather than the host symbols for the device functions. So this:
struct userData h_user;
h_user.eval[0] = fpAdd;
h_user.eval[1] = fpMult;
should work as you intended,
Upvotes: 1