beginneR
beginneR

Reputation: 3291

Copy struct with array of function pointers to device

I am trying to copy a structure, containing an array of function pointers, to the device. I can't figure out what's wrong with the following code. The code inside the kernel doesn't work.

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda.h>

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
    inline void gpuAssert(cudaError_t code, const char *file, int line,bool abort = true)
    {
    if (code != cudaSuccess)
    {
        fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
        if (abort) exit(code);
    }
}

#define N_MODELS 2
#define N_PARAMS 2

struct userData
{
  float (*eval[N_MODELS]) (const float params[N_PARAMS]);

};


__device__ float add(const float params[N_PARAMS])
{
  return params[0] + params[1];

}

__device__ float mult(const float params[N_PARAMS])
{
  return params[0] * params[1];

}

// function pointer for device
__device__ float (*add_ptr)(const float params[N_PARAMS]) = add;
__device__ float (*mult_ptr)(const float params[N_PARAMS]) = mult;




__global__ void kernel(float *d_result,struct userData *d_user, const float *d_params)
{
    //this is currently not working
    *d_result =  (d_user->eval[0]) (d_params);
    printf("d_result = %g\n", *d_result);

}

int main(void)
{

    //*************//
    // struct part //
    //*************//

    // function pointer
    float(*fpAdd)(const float params[N_PARAMS]);
    float(*fpMult)(const float params[N_PARAMS]);

    // copy function pointers to device
    gpuErrchk(cudaMemcpyFromSymbol(&fpAdd, add_ptr, sizeof(void *)));
    gpuErrchk(cudaMemcpyFromSymbol(&fpMult, mult_ptr, sizeof(void *)));


    struct userData h_user;
    h_user.eval[0] = add;
    h_user.eval[1] = mult;

    struct userData *d_user;
    gpuErrchk(cudaMalloc(&d_user, sizeof(userData)));
    gpuErrchk(cudaMemcpy(d_user, &h_user, sizeof(userData), cudaMemcpyHostToDevice));


    // parameters
    float h_params[N_PARAMS] = { 3.0f, 2.0f };
    float *d_params;
    gpuErrchk(cudaMalloc(&d_params, N_PARAMS*sizeof(float)));
    gpuErrchk(cudaMemcpy(d_params, h_params, N_PARAMS*sizeof(float), cudaMemcpyHostToDevice));


    // result
    float h_result = 1.0f;
    float *d_result;
    gpuErrchk(cudaMalloc(&d_result, sizeof(float)));
    gpuErrchk(cudaMemcpy(d_result, &h_result, sizeof(float), cudaMemcpyHostToDevice));

    kernel << <1, 1 >> >(d_result, d_user, d_params);

    gpuErrchk(cudaMemcpy(&h_result, d_result, sizeof(float), cudaMemcpyDeviceToHost));

    printf("result = %g\n", h_result);

    gpuErrchk(cudaFree(d_result));
    gpuErrchk(cudaFree(d_params));
    gpuErrchk(cudaFree(d_user));


    return EXIT_SUCCESS;
}

Upvotes: 0

Views: 77

Answers (1)

talonmies
talonmies

Reputation: 72348

The error is here:

struct userData h_user;
h_user.eval[0] = add;
h_user.eval[1] = mult;

You are populating the structure with the wrong values. Having read the __device__ memory values to get the function pointers from the device, you need to use those values to populate the function structure, rather than the host symbols for the device functions. So this:

struct userData h_user;
h_user.eval[0] = fpAdd;
h_user.eval[1] = fpMult;

should work as you intended,

Upvotes: 1

Related Questions