Marcel Rudolf
Marcel Rudolf

Reputation: 43

calculate a picture with cuda and display it directly with OpenGL

I would like to write a program which calculates me a picture (actually a hologram for a Spatial Light Modulator (SLM)). This should happen in real time. The picture should be calculated on the GPU and then displayed from there directly on the screen (800x600 pixels). I would like to use cuda and OpenGL. I wrote a little program myself which is just an example which displays a checker board on the screen. It is not working as I don't know how to pass the picture from cuda to OpenGL. Especially I do not know what an image rescource is. How do I declare it. How do I assign the calculated picture to it?

Here is my code:

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <GL\glew.h>
#include <GL\freeglut.h>
#include "cuda_gl_interop.h"

/*  Create checkerboard texture  */
#define checkImageWidth 1024
#define checkImageHeight 1024
#define SIZE_X 1024
#define SIZE_Y 1024
static GLubyte checkImage[ 1024 ][ 1024 ][ 1 ];
/*static GLubyte checkImage[1024][1024][1];*/
static GLuint texName;
// Texture reference for 2D float texture
float tex[ 1024 ][ 1024 ];
float dA[ 1024 * 1024 ];
// 2D float texture
texture<float, cudaTextureType2D, cudaReadModeElementType> texRef;
float *d_A;
size_t dsize = 1024 * 1024 * sizeof( float );
struct mystruct
{
    int x;
    int y;
};

void makeCheckImage( void )
{
    int i, j, c;

    for( i = 0; i < 600; i++ )
    {
        for( j = 0; j < 800; j++ )
        {
            c = ( ( ( ( i % 2 ) == 0 ) ) ^ ( j % 2 == 0 ) ) * 255;
            checkImage[ i ][ j ][ 0 ] = (GLubyte)c;
        }
    }
}

__global__ void cudaMakeCheckImage( float *c )
{

    int col = threadIdx.x + blockIdx.x * blockDim.x;
    int row = threadIdx.y + blockIdx.y * blockDim.y;
    int index = col + row * 1024;
    if( col < 1024 && row < 1024 )
    {
        c[ index ] = ( ( ( ( col % 2 ) == 0 ) ) ^ ( row % 2 == 0 ) ) * 255;
    }
}

void init( void )
{
    glClearColor( 0.0, 0.0, 0.0, 0.0 );
    glShadeModel( GL_FLAT );
    glEnable( GL_DEPTH_TEST );

    cudaMakeCheckImage << <1024, 1024 >> > ( d_A );

    glPixelStorei( GL_UNPACK_ALIGNMENT, 1 );

    //makeCheckImage();
    glGenTextures( 1, &texName );
    glBindTexture( GL_TEXTURE_2D, texName );
    // set basic parameters
    glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_NEAREST );
    glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_NEAREST );
    glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
    glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
    // Create texture data 
    glTexImage2D( GL_TEXTURE_2D, 0, GL_RGB, checkImageWidth, checkImageHeight, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, checkImage );
    // Unbind the texture
    glBindTexture( GL_TEXTURE_2D, 0 );

    cudaMalloc( &d_A, dsize );
    cudaGraphicsResource* Res;

    // Allocate CUDA array in device memory
    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc( 32, 0, 0, 0, cudaChannelFormatKindFloat );

    cudaArray* CuArr;

    cudaMallocArray( &CuArr, &channelDesc, 1024, 1024 );

    cudaError_t eError = cudaGraphicsGLRegisterImage( &Res, texName, GL_TEXTURE_2D, cudaGraphicsMapFlagsWriteDiscard );
    cudaGraphicsMapResources( 1, &Res, 0 );
    cudaMemcpy2DToArray( CuArr, 0, 0, d_A, 1024, 1024, 1024, cudaMemcpyDeviceToDevice );
    cudaGraphicsSubResourceGetMappedArray( &CuArr, Res, 0, 0 );
    cudaGraphicsUnmapResources( 1, &Res, 0 );
}

void display( void )
{
    glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
    glEnable( GL_TEXTURE_2D );
    glTexEnvf( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_DECAL );
    glBindTexture( GL_TEXTURE_2D, texName ); /* binds texname wit active textureunit   */
    glBegin( GL_QUADS );
    glTexCoord2f( 1.0 * 800 / 1024, 1.0 * 600 / 1024 );  glVertex2f( 1.0, 1.0 );
    glTexCoord2f( 1.0 * 800 / 1024, 0.0 );  glVertex2f( 1.0, -1.0 );
    glTexCoord2f( 0.0, 0.0 ); glVertex2f( -1.0, -1.0 );
    glTexCoord2f( 0.0, 1.0 * 600 / 1024 ); glVertex2f( -1.0, 1.0 );

    glEnd();
    glFlush();
    glBindTexture( GL_TEXTURE_2D, 0 ); /*  unbinds texname with active textureunit  ?? */
    glDisable( GL_TEXTURE_2D );
}

void keyboard( unsigned char key, int x, int y )
{
    switch( key )
    {
    case 27:
        exit( 0 );
        break;
    default:
        break;
    }
}

int main( int argc, char** argv )
{
    glutInit( &argc, argv );
    glutInitDisplayMode( GLUT_SINGLE | GLUT_RGB | GLUT_DEPTH | GLUT_BORDERLESS | GLUT_CAPTIONLESS );
    glutInitWindowSize( 800, 600 );
    glutInitWindowPosition( 100, 100 );
    glutCreateWindow( argv[ 0 ] );
    cudaSetDevice( 0 );
    cudaGLSetGLDevice( 0 );
    init();

    glutDisplayFunc( display );

    glutKeyboardFunc( keyboard );

    glutMainLoop();
    return 0;
}

Is this the right way to do it? Or do I have to use framebuffers. I actually don't want to. I would like to keep it as simple as possible. What do I have to change to make it work?

Upvotes: 0

Views: 1279

Answers (1)

Paritosh Kulkarni
Paritosh Kulkarni

Reputation: 872

I think you can look at DRM/ DRI or Linux flat framebuffers. You can refer to DirectFB project http://www.webos-internals.org/wiki/Directfb.You will need fbDev0 module for that and may need to recompile your kernel with that module. I am assuming you are using linux.

So what here you are trying to do is bypassing whole API layer and directly trying to manipulate framebuffer. DRM is module in kernel that manages access to GPU resources and hence you might use that.

On Windows you can write mini filter driver that directly writes to frambuffer or you can use something like http://www.blackhat.com/presentations/win-usa-04/bh-win-04-butler.pdf. This is Direct Kernel Object Manipulation.

Upvotes: 1

Related Questions