Device pointer in a device class (Cuda C++)

Question

I would like to implement a device side vector class which encapsulates a pointer to the elements of the container.

After I instantiate an object of this class I have no access to the inside pointer. It always says 'Access violation writing location some device memory address'.

My code is the following:

#include 
#include 

template 
class DeviceVector
{
private:
    T* m_bValues;
    std::size_t m_bSize;

public:
    __host__
    void* operator new(std::size_t size)
    {
        DeviceVector* object = nullptr;
        cudaMalloc((void**)&object, size);
        return object;
    }

    __host__
    void operator delete(void* object)
    {
        cudaFree(object);
    }

    __host__
    DeviceVector(std::size_t size = 1)
    {
        cudaMemcpy(&m_bSize, &size, sizeof(std::size_t), cudaMemcpyHostToDevice);

        // At this cudaMalloc I get Access violation writing location...
        cudaMalloc((void**)&m_bValues, size * sizeof(T));

        // It's an alternative solution here
        T* ptr;
        cudaMalloc((void**)&ptr, size * sizeof(T));
        cudaMemcpy(&m_bValues, &ptr, sizeof(T*), cudaMemcpyHostToDevice);
        // The memory is allocated
        // But I can't access it through m_bValues pointer
        // It is also Access violation writing location...
    }

    __host__
    ~DeviceVector()
    {
        // Access violation here if I use the second solution in the constructor
        cudaFree(m_bValues);
    }
};

int main()
{
    DeviceVector* vec = new DeviceVector();

    delete vec;

    return 0;
}

Note: I have access to the size attribute.

So my questions are:
How to allocate memory for this class to get access to the pointer inside?
Is this even possible to encapsulate a pointer into a class on the device?

Robert Crovella · Accepted Answer

This line is illegal:

    cudaMalloc((void**)&m_bValues, size * sizeof(T));

because your new operator allocated the object on the device:

    cudaMalloc((void**)&object, size);
    return object;

and the constructor was called to operate on that allocation. Therefore &m_bValues is taking the address of a device variable in host code which is illegal in CUDA. If you do that, and then attempt to use it in host code (i.e. the cudaMalloc operation), you're going to get a seg fault. cudaMalloc creates a device allocation of a particular size, and then stores the device pointer to that allocation in a variable that is expected to be resident on the host. If you pass it a device address to store that pointer into instead, cudaMalloc will segfault trying to write the pointer value.

Your alternative solution is a somewhat better approach, and is the general idea when it's necessary to copy a pointer to a device allocation to a variable resident on the device.

But you've still basically made the allocation that m_bValues points to inaccessible from the host. (ptr, being a temporary variable, won't help, and creating another variable in the class to hold a value like ptr won't help either because the entire class is allocated and resident on the device.) For the same reason that you're not allowed to use &m_bValues in the previous cudaMalloc operation, you won't be able to use it directly in any other host code (except as the target for cudaMempcy host->device when copying the pointer value itself).

I don't think there are any simple fixes for this. I suggest re-crafting the object to live on the host, and provide appropriate host- and device-side allocations for corresponding pointers and parameters (like size).

It also seems like you're re-inventing the wheel. You might want to investigate thrust device vectors (which are easily usable with ordinary CUDA code.)

Anyway, this was the closest I could come up with:

#include 
#include 
#include 

#define cudaCheckErrors(msg) \
    do { \
        cudaError_t __err = cudaGetLastError(); \
        if (__err != cudaSuccess) { \
            fprintf(stderr, "Fatal error: %s (%s at %s:%d)
", \
                msg, cudaGetErrorString(__err), \
                __FILE__, __LINE__); \
            fprintf(stderr, "*** FAILED - ABORTING
"); \
            exit(1); \
        } \
    } while (0)

template 
class DeviceVector
{
private:
    T* m_bValues;
    std::size_t m_bSize;
    std::size_t eleSize;
public:
    __host__
    void* operator new(std::size_t size)
    {
        DeviceVector* object = NULL;
        object = (DeviceVector *)malloc(size*sizeof(DeviceVector));
        return object;
    }

    __host__
    void operator delete(void* object)
    {
        free(object);
    }

    __host__
    DeviceVector(std::size_t size = 1)
    {
        m_bSize = size;
        eleSize = sizeof(T);
        cudaMalloc(&m_bValues, m_bSize*sizeof(T));
        cudaCheckErrors("constructor cudaMalloc fail");
        cudaMemset(m_bValues, 0, m_bSize*sizeof(T));
    }

    __host__
    ~DeviceVector()
    {
        cudaFree(m_bValues);
        cudaCheckErrors("destructor cudaFree fail");
    }

    __host__
    T* getDevPtr(){
        return m_bValues;}

    __host__
    std::size_t getSize(){
        return m_bSize;}

    __host__
    std::size_t geteleSize(){
        return eleSize;}
};

int main()
{
    DeviceVector* vec = new DeviceVector();
    cudaMemset(vec->getDevPtr(), 0xFF, vec->getSize()*vec->geteleSize());
    cudaCheckErrors("vector fill fail");
    delete vec;

    return 0;
}

You've shown very little about how you want to interact with an object of this class, so I'm just guessing here.

Device pointer in a device class (Cuda C++)

Answers (1)

Related Questions