jozxyqk
jozxyqk

Reputation: 17258

Creating and addressing large buffers in OpenGL (on the scale of GBs)

I was surprised to find my shaders would start reading zeroes out of buffers when addressing higher indices. I'm guessing this has something to do with the precision of the addressing internals in the driver. I don't ever get any out of memory error, shaders just seem to silently stop accessing them. Correct me if I'm wrong but I believe CUDA supports 64 bit pointers and large amounts of memory just fine.

I've built a MWE (below) where I create a buffer one vec4 shy of 2GB. If I hit or go over 2GB the shaders don't write anything even to the first element. Using image_load_store to write to the buffer in a shader only works up to 512MiB. I have much more luck with bindless graphics, which correctly writes to the entire buffer, but am still stuck with a max of 2GB even though I can create a larger buffer and it seems bindless graphics uses 64 bit addressing, so I don't see any reason this limit should exist.

How can I create and use buffers larger than 2GB with OpenGL?

I'm using a GTX Titan (6GB).

//#include <windows.h>
#include <assert.h>
#include <stdio.h>
#include <memory.h>
#include <GL/glew.h>
#include <GL/glut.h>

const char* imageSource =
"#version 440\n"
"uniform layout(rgba32f) imageBuffer data;\n"
"uniform float val;\n"
"void main() {\n"
"   imageStore(data, gl_VertexID, vec4(val));\n"
"   gl_Position = vec4(0.0);\n"
"}\n";

const char* bindlessSource =
"#version 440\n"
"#extension GL_NV_gpu_shader5 : enable\n"
"#extension GL_NV_shader_buffer_load : enable\n"
"uniform vec4* data;\n"
"uniform float val;\n"
"void main() {\n"
"   data[gl_VertexID] = vec4(val);\n"
"   gl_Position = vec4(0.0);\n"
"}\n";

GLuint compile(GLenum type, const char* shaderSrc)
{
    GLuint shader = glCreateShader(type);
    glShaderSource(shader, 1, (const GLchar**)&shaderSrc, NULL);
    glCompileShader(shader);
    int success = 0;
    int loglen = 0;
    glGetShaderiv(shader, GL_COMPILE_STATUS, &success);
    glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &loglen);
    GLchar* log = new GLchar[loglen];
    glGetShaderInfoLog(shader, loglen, &loglen, log);
    if (!success)
    {
        printf("%s\n", log);
        exit(0);
    }
    GLuint program = glCreateProgram();
    glAttachShader(program, shader);
    glLinkProgram(program);
    return program;
}

int main(int argc, char** argv)
{
    float* check;
    glutInit(&argc, argv);
    glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB | GLUT_DEPTH);
    glutCreateWindow("test");
    glewInit();

    GLsizeiptr bufferSize = 1024 * 1024 * 1024; //1GB
    bufferSize *= 2;
    bufferSize -= 16;
    GLsizeiptr numFloats = bufferSize/sizeof(float);
    GLsizeiptr numVec4s = bufferSize/(sizeof(float)*4);
    float testVal = 123.123f;

    glEnable(GL_RASTERIZER_DISCARD);

    float* dat = new float[numFloats];
    memset(dat, 0, bufferSize);

    //create a buffer with data
    GLuint buffer;
    glGenBuffers(1, &buffer);
    glBindBuffer(GL_TEXTURE_BUFFER, buffer);
    glBufferData(GL_TEXTURE_BUFFER, bufferSize, NULL, GL_STATIC_DRAW);

    //get a bindless address
    GLuint64 address;
    glMakeBufferResidentNV(GL_TEXTURE_BUFFER, GL_READ_WRITE);
    glGetBufferParameterui64vNV(GL_TEXTURE_BUFFER, GL_BUFFER_GPU_ADDRESS_NV, &address);

    //make a texture alias for it
    GLuint bufferTexture;
    glGenTextures(1, &bufferTexture);
    glBindTexture(GL_TEXTURE_BUFFER, bufferTexture);
    glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, buffer); //should be GL_RGBA32F (see update)
    glBindImageTextureEXT(0, bufferTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32F); //should be GL_RGBA32F (see update)

    //compile the shaders
    GLuint imageShader = compile(GL_VERTEX_SHADER, imageSource);
    GLuint bindlessShader = compile(GL_VERTEX_SHADER, bindlessSource);

    //initialize buffer
    glBufferData(GL_TEXTURE_BUFFER, bufferSize, dat, GL_STATIC_DRAW);
    glMakeBufferResidentNV(GL_TEXTURE_BUFFER, GL_READ_WRITE);
    glGetBufferParameterui64vNV(GL_TEXTURE_BUFFER, GL_BUFFER_GPU_ADDRESS_NV, &address);
    assert(glIsBufferResidentNV(GL_TEXTURE_BUFFER)); //sanity check

    //run image_load_store
    glUseProgram(imageShader);
    glUniform1i(glGetUniformLocation(imageShader, "data"), 0);
    glUniform1f(glGetUniformLocation(imageShader, "val"), testVal);
    glDrawArrays(GL_POINTS, 0, numVec4s);
    glMemoryBarrier(GL_ALL_BARRIER_BITS);
    check = (float*)glMapBuffer(GL_TEXTURE_BUFFER, GL_READ_ONLY);
    for (GLsizeiptr i = 0; i < numFloats; ++i)
    {
        if (check[i] != testVal)
        {
            printf("failed image_load_store: dat[%td] = %f (%fMiB)\n", i, check[i], (double)i*sizeof(float)/1024.0/1024.0);
            break;
        }
    }
    glUnmapBuffer(GL_TEXTURE_BUFFER);

    //initialize buffer
    glBufferData(GL_TEXTURE_BUFFER, bufferSize, dat, GL_STATIC_DRAW);
    glMakeBufferResidentNV(GL_TEXTURE_BUFFER, GL_READ_WRITE);
    glGetBufferParameterui64vNV(GL_TEXTURE_BUFFER, GL_BUFFER_GPU_ADDRESS_NV, &address);
    assert(glIsBufferResidentNV(GL_TEXTURE_BUFFER)); //sanity check

    //run bindless
    glUseProgram(bindlessShader);
    glProgramUniformui64NV(bindlessShader, glGetUniformLocation(bindlessShader, "data"), address);
    glUniform1f(glGetUniformLocation(bindlessShader, "val"), testVal);
    glDrawArrays(GL_POINTS, 0, numVec4s);
    glMemoryBarrier(GL_ALL_BARRIER_BITS);
    check = (float*)glMapBuffer(GL_TEXTURE_BUFFER, GL_READ_ONLY);
    for (GLsizeiptr i = 0; i < numFloats; ++i)
    {
        if (check[i] != testVal)
        {
            printf("failed bindless: dat[%td] = %f (%fMiB)\n", i, check[i], (double)i*sizeof(float)/1024.0/1024.0);
            break;
        }
    }
    glUnmapBuffer(GL_TEXTURE_BUFFER);

    return 0;
}

This is the output I get:

> make && ./a.out 
g++ -lGL -lGLEW -lglut main.c
failed image_load_store: dat[134217727] = 0.000000 (511.999996MiB)

UPDATE:

Found a mistake. The GL_R32F internal format should be GL_RGBA32F, allows image_load_store to reach the ~2GB mark. The program correctly executes with no output until the size reaches 2GB or more at which point it still fails for both image_load_store and bindless.

GL_MAX_TEXTURE_BUFFER_SIZE is 134217728 for me, which puts the max size at exactly 2GB for RGBA32F. However my question about getting larger than 2GB remains. Sure, I could allocate multiple buffers but that's a bunch of house keeping and overhead I'd prefer not to deal with.

Upvotes: 4

Views: 665

Answers (1)

Gernot Ziegler
Gernot Ziegler

Reputation: 11

You might need to go vendor-specific; for NVIDIA, you have the following extensions available, allowing you to use 64 bit-sized addresses (and buffer sizes) in the shaders: https://www.khronos.org/registry/OpenGL/extensions/NV/NV_shader_buffer_load.txt https://www.khronos.org/registry/OpenGL/extensions/NV/NV_shader_buffer_store.txt

Basically, you can start using pointers inside GLSL with it, and pass them up as 64bit values from the CPU host.

The maximum buffer size is returned by

GLuint64EXT max_shader_buffer_address; glGetIntegerui64vNV(GL_MAX_SHADER_BUFFER_ADDRESS_NV, &max_shader_buffer_address);

printf("Maximum shader buffer address: %lu\n", max_shader_buffer_address);

on my machine with an RTX 3070, it is 18446744073709551615.

Upvotes: 1

Related Questions