Reputation: 8028
I have a GUI application with a producer thread and an OpenGL thread, the OpenGL thread needs to call CUDA functions and the producer needs to call cudaMemcpy
etc.
No matter what I do I can't seem to get the CUDA driver api to work. Every time I try to use these function I get a cudaErrorMissingConfiguration
.
I want to use multi-threaded CUDA, what is the paradigmatic way to accomplish this?
Original
void program::initCuda()
{
CUresult a;pctx=0;
cudaSafeCall(cudaSetDevice(0));
cudaSafeCall(cudaGLSetGLDevice(0));
a=cuInit(0);
cudaSafeCall(cudaFree(0));
cout <<"cuInit :" <<a << endl;assert(a == cudaSuccess);
//a=cuCtxGetCurrent(pctx);
a=cuCtxCreate(pctx,CU_CTX_SCHED_AUTO,0);
cout <<"GetContext :" <<a << endl;assert(a == cudaSuccess);
//Fails with cudaErrorMissingConfiguration
a=cuCtxPopCurrent(pctx);
cout <<"cuCtxPopCurrent :" <<a << endl;assert(a == cudaSuccess);
cout <<"Initialized CUDA" << endl;
}
Revised
void glStream::initCuda()
{
CUresult a;
pctx=0;
cudaSafeCall(cudaSetDevice(0));
cudaSafeCall(cudaGLSetGLDevice(0));
cudaFree(0);// From post http://stackoverflow.com/questions/10415204/how-to-create-a-cuda-context seems to indicate that `cudaSetDevice` should make a context.
a=cuCtxGetCurrent(pctx);
cout <<"GetContext :" <<a << endl;assert(a == cudaSuccess);
a=cuCtxPopCurrent(pctx);
cout <<"cuCtxPopCurrent :" <<a << endl;assert(a == cudaSuccess);
cout <<"Initialized CUDA" << endl;
}
Upvotes: 3
Views: 5108
Reputation: 72349
The simplest version of your second code should look like this:
#include <iostream>
#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>
int main(void)
{
CUresult a;
CUcontext pctx;
cudaSetDevice(0); // runtime API creates context here
a = cuCtxGetCurrent(&pctx);
std::cout << "GetContext : " << a << std::endl;
assert(a == CUDA_SUCCESS);
a = cuCtxPopCurrent(&pctx);
std::cout << "cuCtxPopCurrent : " << a << std::endl;
assert(a == CUDA_SUCCESS);
std::cout << "Initialized CUDA" << std::endl;
return 0;
}
which yields the following on OS X 10.6 with CUDA 5.0:
$ g++ -I/usr/local/cuda/include -L/usr/local/cuda/lib driver.cc -lcuda -lcudart
$ ./a.out
GetContext :0
cuCtxPopCurrent :0
Initialized CUDA
ie. "just works". Here the context is lazily initiated by the cudaSetDevice
call (note I incorrectly asserted that cudaSetDevice
doesn't establish a context, but at least in CUDA 5 it appears to. This behaviour may have changed when the runtime API was revised in CUDA 4).
Alternatively, you can use the driver API to initiate the context:
#include <iostream>
#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>
int main(void)
{
CUresult a;
CUcontext pctx;
CUdevice device;
cuInit(0);
cuDeviceGet(&device, 0);
std::cout << "DeviceGet : " << a << std::endl;
cuCtxCreate(&pctx, CU_CTX_SCHED_AUTO, device ); // explicit context here
std::cout << "CtxCreate : " << a << std::endl;
assert(a == CUDA_SUCCESS);
a = cuCtxPopCurrent(&pctx);
std::cout << "cuCtxPopCurrent : " << a << std::endl;
assert(a == CUDA_SUCCESS);
std::cout << "Initialized CUDA" << std::endl;
return 0;
}
which also "just works":
$ g++ -I/usr/local/cuda/include -L/usr/local/cuda/lib driver.cc -lcuda -lcudart
$ ./a.out
DeviceGet : 0
CtxCreate : 0
cuCtxPopCurrent : 0
Initialized CUDA
What you shouldn't do is mix both as in your first example. All I can suggest is try both of these and confirm they work for you, then adopt the call sequences to whatever it is you are actually trying to achieve.
Upvotes: 3