user1873073
user1873073

Reputation: 3660

What might cause OpenCL to crash on cl::Program.build?

This program crashes when I try to cl::Program.build() but I don't know why. It crashes on the last line of this block of code:

#define __NO_STD_VECTOR 
#define __CL_ENABLE_EXCEPTIONS
#include <CL/cl.hPP>
#include <iostream>
#include <fstream>
#include <string>
#include <CL/cl.h>

using namespace std;
using namespace cl;
int _tmain(int argc, _TCHAR* argv[])
{
    int tmpSize = 1024;
    float **my2D = new float*[tmpSize];
    for(int i = 0; i < tmpSize; i++)
    {
        my2D[i] = new float[tmpSize];
        for(int i2 = 0; i2 < tmpSize; i2++)
        {
            my2D[i][i2] = 5;
        }
    }
    cl::vector <Platform> platforms;
    Platform::get(&platforms);

    cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[1]()), 0};
    Context context(CL_DEVICE_TYPE_ALL, cps);

    cl::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();

    CommandQueue queue = CommandQueue(context, devices[0], 0);

    int W = tmpSize; //i.width();
    int H = tmpSize; //i.height();

    Buffer d_ip = Buffer(context, CL_MEM_READ_ONLY, W*H*sizeof(float));
    Buffer d_op = Buffer(context, CL_MEM_WRITE_ONLY, W*H*sizeof(float));

    queue.enqueueWriteBuffer(d_ip, CL_TRUE, 0, W*H*sizeof(float), my2D);

    std::ifstream sourceFileName("c:\\users\\me\\desktop\\img_rotate_kernel.cl");
    std::string sourceFile(istreambuf_iterator<char>(sourceFileName), (istreambuf_iterator<char>()));

    Program::Sources rotn_source(1,std::make_pair(sourceFile.c_str(), sourceFile.length() + 1));
    Program rotn_program(context, rotn_source);
    rotn_program.build(devices); // <----- CRASHES HERE
}

using this kernel

__kernel void img_rotate(__global float* dest_data, __global float* src_data, int W, int H, float sinTheta, float cosTheta)
    const int ix = get_global_id(0);
    const int iy = get_global_id(1);
    float x0 = W/2.0f;
    float y0 = W/2.0f;
    float xOff = ix-x0;
    float yOff = iy - y0;
    int xpos = (int)(xOff*cosTheta + yOff*sinTheta + x0);
    int ypos = (int)(yOff*cosTheta - yOff*sinTheta + y0);
    if(((int)xpos>=0) && ((int)xpos < W) && ((int)ypos>=0) && ((int)ypos<H))
    {
        dest_data[iy*W+ix] = src_data[ypos*W+xpos];
    }
}

Here is exception dialog I get when it crashes

enter image description here

Upvotes: 1

Views: 1816

Answers (1)

anthonyvd
anthonyvd

Reputation: 7590

From the OpenCL C++ wrapper spec:

cl::Program::Program returns a valid program object and err is set to CL_SUCCESS if the program object is created successfully. Otherwise, it returns one of the following error values returned in err [...]

Your program object was likely not created properly, change your program construction call to use the err parameter following this signature

cl::Program::Program(const Context& context, const Sources& sources, cl_int * err = NULL)

And make sure err == CL_SUCCESS before doing anything else with your program object.

Most OpenCL calls allow you to pass a pointer to an error parameter. You should really do so and check it after your calls (at least in debug builds I guess) to reduce future headaches.


Ok so I modified your source code a little. Here it is I'll explain my changes right after.

#define __NO_STD_VECTOR 
#define __CL_ENABLE_EXCEPTIONS
#include <CL/cl.hpp>
#include <iostream>
#include <fstream>
#include <string>
#include <CL/cl.h>

#define ARRAY_SIZE 128

using namespace std;
using namespace cl;

int main(int, char**)
{
    int err;

    float my2D[ARRAY_SIZE * ARRAY_SIZE] = { 0 };
    for(int i = 0; i < ARRAY_SIZE * ARRAY_SIZE; i++)
    {
        my2D[i] = 5;
    }

    cl::vector <Platform> platforms;
    err = Platform::get(&platforms);

    if(err != CL_SUCCESS) {
        std::cout << "Platform::get failed - " << err << std::endl;
        std::cin.get();
    }

    cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0]()), 0 };
    Context context(CL_DEVICE_TYPE_ALL, cps, nullptr, nullptr, &err);

    if(err != CL_SUCCESS) {
        std::cout << "Context::Context failed - " << err << std::endl;
        std::cin.get();
    }

    cl::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>(&err);

    if(err != CL_SUCCESS) {
        std::cout << "Context::getInfo failed - " << err << std::endl;
        std::cin.get();
    }

    CommandQueue queue = CommandQueue(context, devices[0], 0, &err);

    if(err != CL_SUCCESS) {
        std::cout << "CommandQueue::CommandQueue failed - " << err << std::endl;
        std::cin.get();
    }

    int W = ARRAY_SIZE; //i.width();
    int H = ARRAY_SIZE; //i.height();

    Buffer d_ip = Buffer(context, CL_MEM_READ_ONLY, W*H*sizeof(float), nullptr, &err);

    if(err != CL_SUCCESS) {
        std::cout << "Buffer::Buffer 1 failed - " << err << std::endl;
        std::cin.get();
    }

    Buffer d_op = Buffer(context, CL_MEM_WRITE_ONLY, W*H*sizeof(float), nullptr, &err);

    if(err != CL_SUCCESS) {
        std::cout << "Buffer::Buffer 2 failed - " << err << std::endl;
        std::cin.get();
    }

    err = queue.enqueueWriteBuffer(d_ip, CL_TRUE, 0, W*H*sizeof(float), &my2D[0]);

    if(err != CL_SUCCESS) {
        std::cout << "Queue::enqueueWriteBuffer failed - " << err << std::endl;
        std::cin.get();
    }

    std::ifstream sourceFileName("so_question.cl");
    std::string sourceFile(std::istreambuf_iterator<char>(sourceFileName), (std::istreambuf_iterator<char>()));

    Program::Sources rotn_source(1,std::make_pair(sourceFile.c_str(), sourceFile.length() + 1));
    Program rotn_program(context, rotn_source, &err);

    if(err != CL_SUCCESS) {
        std::cout << "Program::Program failed - " << err << std::endl;
        std::cin.get();
    }

    err = rotn_program.build(devices);

    if(err != CL_SUCCESS) {
        std::cout << "Program::build failed - " << err << std::endl;
        std::cin.get();
    }
}

You'll notice I added a lot more error checks. This allowed me to find out that the call to Context::Context actually did fail in your initial program. The issue likely was that platforms[1] didn't exist (there was 1 element in the vector) so I switched it to platforms[0].

Once that was fixed, I was getting an access violation on the queue.enqueueWriteBuffer(); call. The issue was that your 2-dimensional array was actually an array of heap allocated arrays. That's a problem because OpenCL expects to be able to read data from contiguous memory, which is not the case when allocating with new in a loop like you did. There actually was no guarantee that your arrays were next to each other in memory.

To solve this point, I allocated a one dimensional array on the stack (see the loop at the beginning). The call then becomes

queue.enqueueWriteBuffer(d_ip, CL_TRUE, 0, W*H*sizeof(float), &my2D[0]);

However, you probably won't be able to do so with a 1024 x 1024 array of float because you'll bust stack space. If you need an array that big, you probably want to new a single one dimensional array large enough to contain your data and perform the index arithmetic yourself. This ensures you get your entire storage space as one contiguous chunk.

The code now errors with CL_BUILD_PROGRAM_FAILURE on the err = rotn_program.build() call which means there's probably an error in your CL program code. Since this is an entirely different issue, I'll let you figure this one out.

Upvotes: 1

Related Questions