Reputation: 152
my purpose is to use CUDA through a stub library, which encapsulates CUDA specific functions, methods and data. CUDA stuff will reside inside a dll file and it will be used dynamically via LoadLibrary and GetProcAddress Windows API functions. I am using Visual Studio 2010 C++ compiler to create CUDA dll, but the rest is done by another compiler. That means, that I cannot use heap memory (ie. malloc, new or anything, which lives outside stack memory. Even global variables cause memory corruption) inside CUDA dll, so it is literally a stub library.
However, first I want to code a test program: CUDAhost.exe and CUDAdevice.dll, which both are compiled by Visual Studio 2010, two projects in a single solution. This program draws an OpenGL texture to the screen, but first the image data is copied from readGLTexture to viewGLTexture by CUDA. Notice that to avoid using heap memory, I use void pointer references void* &cReadCudaResource and void* &cViewCudaResource. My problem is I cannot get the program working, the window is black. I cannot find the error. I'm not sure is this possible at all or should I choose completely different solution. I hope you can help me. Any suggestions are appreciated. Below is the source code:
CUDAhost.cpp:
#include "stdafx.h"
const unsigned int window_width = 512;
const unsigned int window_height = 512;
GLuint viewGLTexture;
GLuint readGLTexture;
void* cViewCudaResource;
void* cReadCudaResource;
HINSTANCE dll;
typedef void (*SETCUDA)(unsigned int& readGLTexture, void* &cReadCudaResource, unsigned int& viewGLTexture, void* &cViewCudaResource);
SETCUDA setCuda;
typedef void (*DRAWPICTURE)(void* &cReadCudaResource, void* &cViewCudaResource);
DRAWPICTURE drawPicture;
bool loadTexture(const wchar_t* name, GLuint& number) {
FILE* file;
BITMAPFILEHEADER bitmapFileHeader;
BITMAPINFOHEADER bitmapInfoHeader;
unsigned char *bitmap;
unsigned char temp;
wchar_t path[45]={0};
int width;
int height;
//prepare file path
wcsncat_s(path, L"Textures\\", 45);
wcsncat_s(path, name, 45);
wcsncat_s(path, L".bmp", 45);
//open BMP file
file=_wfopen(path, L"rb");
if (file==NULL) {
return false;
}
//read bmp file header and sequre it is bmp file
fread(&bitmapFileHeader, sizeof(BITMAPFILEHEADER), 1, file);
if (bitmapFileHeader.bfType != 0x4D42) {
fclose(file);
return false;
}
//read bmp info header and move to the beginning of image data
fread(&bitmapInfoHeader, sizeof(BITMAPINFOHEADER), 1, file);
fseek(file, bitmapFileHeader.bfOffBits, SEEK_SET);
//allocate memory space
bitmap=(unsigned char*)malloc(bitmapInfoHeader.biSizeImage);
if (!bitmap) {
free(bitmap);
bitmap=NULL;
fclose(file);
return false;
}
//read image
fread(bitmap, 1, bitmapInfoHeader.biSizeImage, file);
if (file==NULL) {
free(bitmap);
bitmap=NULL;
fclose(file);
return false;
}
//rearrange bgr to rgb
for (int i=0; i<bitmapInfoHeader.biSizeImage; i+=3) {
temp=bitmap[i];
bitmap[i]=bitmap[i+2];
bitmap[i+2]=temp;
}
//query image width and height
width=bitmapInfoHeader.biWidth;
height=abs(bitmapInfoHeader.biHeight);
//close bmp file
fclose(file);
glGetError();
//create OpenGL texture
glGenTextures(1, &number);
glBindTexture(GL_TEXTURE_2D, number);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGB, GL_UNSIGNED_BYTE, bitmap);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
//free temporary buffer
free(bitmap);
bitmap=NULL;
//if success, return true
if (0==glGetError()) {
return true;
} else {
return false;
}
}
void initGLandCUDA(int argc, char* argv[]) {
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGBA);
glutInitWindowSize(window_width, window_height);
glutCreateWindow("CUDA GL Interop");
glewInit();
glEnable(GL_TEXTURE_2D);
bool success=loadTexture(L"Tex", readGLTexture);
glGenTextures(1, &viewGLTexture);
glBindTexture(GL_TEXTURE_2D, viewGLTexture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, window_width, window_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_2D, 0);
dll=LoadLibraryW(L"CUDAdevice.dll");
if (dll) {
setCuda=(SETCUDA)GetProcAddress(dll, "setCuda");
setCuda(readGLTexture, cReadCudaResource, viewGLTexture, cViewCudaResource);
}
}
void renderFrame() {
if (dll) {
drawPicture=(DRAWPICTURE)GetProcAddress(dll, "drawPicture");
drawPicture(cReadCudaResource, cViewCudaResource);
}
glBindTexture(GL_TEXTURE_2D, viewGLTexture);
{
glBegin(GL_QUADS);
{
glTexCoord2f(0.0f, 0.0f); glVertex2f(-1.0f, -1.0f);
glTexCoord2f(1.0f, 0.0f); glVertex2f(+1.0f, -1.0f);
glTexCoord2f(1.0f, 1.0f); glVertex2f(+1.0f, +1.0f);
glTexCoord2f(0.0f, 1.0f); glVertex2f(-1.0f, +1.0f);
}
glEnd();
}
glBindTexture(GL_TEXTURE_2D, 0);
glFinish();
}
int _tmain(int argc, _TCHAR* argv[])
{
initGLandCUDA(argc, reinterpret_cast<char**>(argv));
glutDisplayFunc(renderFrame);
glutMainLoop();
return 0;
}
dllmain.cpp:
BOOL APIENTRY DllMain( HMODULE hModule,
DWORD ul_reason_for_call,
LPVOID lpReserved
)
{
switch (ul_reason_for_call)
{
case DLL_PROCESS_ATTACH:
case DLL_THREAD_ATTACH:
case DLL_THREAD_DETACH:
case DLL_PROCESS_DETACH:
break;
}
return TRUE;
}
//this function is used to setup CUDA
void setCuda(unsigned int& readGLTexture, void* &cReadCudaResource, unsigned int& viewGLTexture, void* &cViewCudaResource) {
struct cudaGraphicsResource* viewCudaResource;
struct cudaGraphicsResource* readCudaResource;
cudaError cError;
cudaGLSetGLDevice(0);
cError=cudaGraphicsGLRegisterImage(&viewCudaResource, viewGLTexture, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsReadOnly);
cError=cudaGraphicsGLRegisterImage(&readCudaResource, readGLTexture, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsSurfaceLoadStore);
cReadCudaResource=reinterpret_cast<void*>(readCudaResource);
cViewCudaResource=reinterpret_cast<void*>(viewCudaResource);
}
//this function is used to draw texture image via CUDA
void drawPicture(void* &cReadCudaResource, void* &cViewCudaResource) {
cudaError cError;
struct cudaGraphicsResource* viewCudaResource=reinterpret_cast<cudaGraphicsResource*>(cReadCudaResource);
struct cudaGraphicsResource* readCudaResource=reinterpret_cast<cudaGraphicsResource*>(cViewCudaResource);
cudaArray *readCudaArray;
cudaArray *viewCudaArray;
cError=cudaGraphicsMapResources(1, &readCudaResource);
cError=cudaGraphicsMapResources(1, &viewCudaResource);
cError=cudaGraphicsSubResourceGetMappedArray(&readCudaArray, readCudaResource, 0, 0);
cError=cudaGraphicsSubResourceGetMappedArray(&viewCudaArray, viewCudaResource, 0, 0);
callCUDAKernel(readCudaArray, viewCudaArray);
cudaGraphicsUnmapResources(1, &viewCudaResource);
cudaStreamSynchronize(0);
}
kernels.cu:
#include "stdafx.h"
texture<uchar4, cudaTextureType2D, cudaReadModeElementType> readCudaTextureObject;
surface<void, cudaSurfaceType2D> viewCudaSurfaceObject;
__global__ void renderingKernel() {
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
uchar4 dd=tex2D(readCudaTextureObject, x, y);
surf2Dwrite(dd, viewCudaSurfaceObject, x*sizeof(dd), y, cudaBoundaryModeZero);
}
void callCUDAKernel(cudaArray *readCudaArray, cudaArray *viewCudaArray) {
cudaError cError;
cError=cudaBindTextureToArray(readCudaTextureObject, readCudaArray);
cError=cudaBindSurfaceToArray(viewCudaSurfaceObject, viewCudaArray);
dim3 block(256, 1, 1);
dim3 grid(2, 512, 1);
renderingKernel<<<grid, block>>>();
cudaPeekAtLastError();
cudaDeviceSynchronize();
}
CUDAdevice's stdafx.h:
#pragma once
#include "targetver.h"
#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
// Windows Header Files:
#include <windows.h>
// TODO: reference additional headers your program requires here
#include <cuda_runtime_api.h>
#include <cuda_gl_interop.h>
#include "kernels.h"
#if defined (__cplusplus)
extern "C"
{
#endif
__declspec(dllexport) void setCuda(unsigned int& readGLTexture, void* &cReadCudaResource, unsigned int& viewGLTexture, void* &cViewCudaResource);
__declspec(dllexport) void drawPicture(void* &cReadCudaResource, void* &cViewCudaResource);
#if defined (__cplusplus)
}
#endif
kernels.h:
#ifndef __kernels_H
#define __kernels_H
void callCUDAKernel(cudaArray *readCudaArray, cudaArray *viewCudaArray);
#endif
PS. I have also set paths to CUDA libraries, headers, sources and binaries, added cudart.lib to additional dependencies, set CUDA 4.2 targets and compute_20,sm_21. Test program uses GLEW and GLUT libraries.
Upvotes: 0
Views: 1307
Reputation: 152
Better update solution here if someone reads this...
The above code cannot work. The whole idea was to avoid using heap memory in a DLL module. But, CUDA needs textures and surfaces to be global:
texture<uchar4, cudaTextureType2D, cudaReadModeElementType>readCudaTextureObject;
surface<void, cudaSurfaceType2D> viewCudaSurfaceObject;
However, to avoid using heap memory requires these to be local, so this is impossible.
Furthermore, according to my understanding, there shouldn't be any problem to use a heap memory inside a CUDA DLL module. I've already made working EXE/DLL test programs, which use heap memory inside DLL module. I haven't found the bug in my previous attempts, but it may be due to the GLEW library. I'm not using GLEW with my working programs.
Upvotes: 1