Mikhail
Mikhail

Reputation: 8028

How do I use CUDA driver functions?

I have a GUI application with a producer thread and an OpenGL thread, the OpenGL thread needs to call CUDA functions and the producer needs to call cudaMemcpy etc.

No matter what I do I can't seem to get the CUDA driver api to work. Every time I try to use these function I get a cudaErrorMissingConfiguration.

I want to use multi-threaded CUDA, what is the paradigmatic way to accomplish this?

Original

void program::initCuda()
{
    CUresult a;pctx=0;
    cudaSafeCall(cudaSetDevice(0));
    cudaSafeCall(cudaGLSetGLDevice(0));
    a=cuInit(0);
    cudaSafeCall(cudaFree(0));
    cout <<"cuInit :" <<a << endl;assert(a == cudaSuccess);
    //a=cuCtxGetCurrent(pctx);
    a=cuCtxCreate(pctx,CU_CTX_SCHED_AUTO,0);
    cout <<"GetContext :" <<a << endl;assert(a == cudaSuccess);
    //Fails with cudaErrorMissingConfiguration
    a=cuCtxPopCurrent(pctx);
    cout <<"cuCtxPopCurrent :" <<a << endl;assert(a == cudaSuccess);
    cout <<"Initialized CUDA" << endl;
}

Revised

void glStream::initCuda()
{
    CUresult a;
    pctx=0;
    cudaSafeCall(cudaSetDevice(0));
    cudaSafeCall(cudaGLSetGLDevice(0));
    cudaFree(0);// From post http://stackoverflow.com/questions/10415204/how-to-create-a-cuda-context seems to indicate that `cudaSetDevice` should make a context.
    a=cuCtxGetCurrent(pctx);
    cout <<"GetContext :" <<a << endl;assert(a == cudaSuccess);
    a=cuCtxPopCurrent(pctx);
    cout <<"cuCtxPopCurrent :" <<a << endl;assert(a == cudaSuccess);
    cout <<"Initialized CUDA" << endl;
}

Upvotes: 3

Views: 5108

Answers (1)

talonmies
talonmies

Reputation: 72349

The simplest version of your second code should look like this:

#include <iostream>
#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>

int main(void)
{
    CUresult a;
    CUcontext pctx;
    cudaSetDevice(0); // runtime API creates context here
    a = cuCtxGetCurrent(&pctx);
    std::cout << "GetContext : " << a << std::endl;
    assert(a == CUDA_SUCCESS);
    a = cuCtxPopCurrent(&pctx);
    std::cout << "cuCtxPopCurrent : " << a << std::endl;
    assert(a == CUDA_SUCCESS);
    std::cout << "Initialized CUDA" << std::endl;

    return 0;
}

which yields the following on OS X 10.6 with CUDA 5.0:

$ g++ -I/usr/local/cuda/include -L/usr/local/cuda/lib driver.cc -lcuda -lcudart
$ ./a.out
GetContext :0
cuCtxPopCurrent :0
Initialized CUDA

ie. "just works". Here the context is lazily initiated by the cudaSetDevice call (note I incorrectly asserted that cudaSetDevice doesn't establish a context, but at least in CUDA 5 it appears to. This behaviour may have changed when the runtime API was revised in CUDA 4).

Alternatively, you can use the driver API to initiate the context:

#include <iostream>
#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>

int main(void)
{
    CUresult a;
    CUcontext pctx;
    CUdevice device;
    cuInit(0);
    cuDeviceGet(&device, 0);
    std::cout << "DeviceGet : " << a << std::endl;
    cuCtxCreate(&pctx, CU_CTX_SCHED_AUTO, device ); // explicit context here
    std::cout << "CtxCreate : " << a << std::endl;
    assert(a == CUDA_SUCCESS);
    a = cuCtxPopCurrent(&pctx);
    std::cout << "cuCtxPopCurrent : " << a << std::endl;
    assert(a == CUDA_SUCCESS);
    std::cout << "Initialized CUDA" << std::endl;

    return 0;
}

which also "just works":

$ g++ -I/usr/local/cuda/include -L/usr/local/cuda/lib driver.cc -lcuda -lcudart
$ ./a.out
DeviceGet : 0
CtxCreate : 0
cuCtxPopCurrent : 0
Initialized CUDA

What you shouldn't do is mix both as in your first example. All I can suggest is try both of these and confirm they work for you, then adopt the call sequences to whatever it is you are actually trying to achieve.

Upvotes: 3

Related Questions