Randall Fairman
Randall Fairman

Reputation: 306

OpenCL: memory recovery and threading?

This is my first attempt at using OpenCL. It's being done using Java with the glue from jocl.org. I suspect that there's some fundamental issue about memory management or threads that I'm not aware of.

As an initial trial, the OpenCL code takes a 1D array of source bytes that represents a 2D image and samples it to form a smaller dest image. The fact that this data represents an image is irrelevant for what follows, aside from the fact that the src and dest images have width and height used to index into the arrays.

The program works the first time it's called, and it might work for a 2nd and 3rd time, but it will quickly bomb. I've been playing with this for a while now, and the exact error reported varies so much that I think it's something fundamental. The current incarnation (below) reports CL_INVALID_COMMAND_QUEUE in the call to clReleaseCommandQueue(). Another common error is CL_INVALID_MEM_OBJECT at the call to clReleaseMemObject() and sometimes the JVM itself will crash with EXCEPTION_ACCESS_VIOLATION.

My best guess is that there's some basic problem about repeatedly calling the same code and ensuring that everything that code does is cleaned up before making a second call.

Here's the code, particularly the scaleGPU() function:


import static org.jocl.CL.*;
import org.jocl.*;

public class Render {
  
  private byte[] srcData = null;
  private int imageW = 0;
  private int imageH = 0;

  private static String programSource = """
    __kernel void sampleKernel(const double scale,
      const int srcW,const int srcH,
      const int destW,const int destH,
      constant char *src,__global char *dest)
      {
        // Index in src image. This is a "1D index."
        int gid = get_global_id(0);
        
        // I don't think this test should be necessary, but maybe (?)
        if (gid < srcW * srcH)
          {
            // Convert gid to an (x,y) position.
            // There's no need for rounding or other trickiness.
            // This kind of integer division truncates to a whole number.
            // This explicit convert() shouldn't be needed, for the reason just given.
            int yi = convert_int_rtz(gid / srcW);
            int xi = gid - yi*srcW;
            
            int xd = convert_int_rtz(xi * scale);
            int yd = convert_int_rtz(yi * scale);
            
            // This shouldn't be needed, but try it as a test.
            if (xd >= destW) xd = destW - 1;
            if (yd >= destH) yd = destH - 1;
            if (xd < 0) xd = 0;
            if (yd < 0) yd = 0;
                    
            //int index = convert_int(yd*destW + xd); 
            int index = yd*destW + xd;
            dest[index] = src[gid];
          }
      }
      """;
  
  public void setSrcData(byte[] theData, int w,int h) {
    
    this.srcData = theData;
    this.imageW = w;
    this.imageH = h;
  }  

  private byte[] scaleGPU(double scale,int destW,int destH) {
    
    // Will hold the answer, as calculated using the GPU.
    byte[] scaled = new byte[destW * destH];
    
    // Tedious setup...
    CL.setExceptionsEnabled(true);
    
    int numPlatformsArray[] = new int[1];
    clGetPlatformIDs(0, null, numPlatformsArray);
    int numPlatforms = numPlatformsArray[0];
    
    final int platformIndex = 0;
    cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
    clGetPlatformIDs(platforms.length, platforms, null);
    cl_platform_id platform = platforms[platformIndex];
    
    cl_context_properties contextProperties = new cl_context_properties();
    contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
    
    final long deviceType = CL_DEVICE_TYPE_ALL;
    int numDevicesArray[] = new int[1];
    clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
    int numDevices = numDevicesArray[0];
    
    final int deviceIndex = 0;
    cl_device_id devices[] = new cl_device_id[numDevices];
    clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
    cl_device_id device = devices[deviceIndex];
    
    cl_context context = clCreateContext(
        contextProperties, 1, new cl_device_id[]{device}, 
        null, null, null);
    
    cl_queue_properties properties = new cl_queue_properties();
    cl_command_queue commandQueue = clCreateCommandQueueWithProperties(
        context, device, properties, null);
    
    cl_program program = clCreateProgramWithSource(context,
        1, new String[]{ programSource }, null, null);
    clBuildProgram(program, 0, null, null, null, null);
    
    cl_kernel kernel = clCreateKernel(program, "sampleKernel", null);
    
    // Set up to make the call
    int n = this.imageW * this.imageH;
    
    cl_mem cl_srcData = clCreateBuffer(context, 
        CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
        Sizeof.cl_char * n,Pointer.to(this.srcData),null);
    
    cl_mem cl_destData = clCreateBuffer(context,CL_MEM_READ_WRITE,
        Sizeof.cl_char * n,null,null);
    
    int a = 0;
    clSetKernelArg(kernel, a++, Sizeof.cl_double, Pointer.to(new double[] {scale}));
    clSetKernelArg(kernel, a++, Sizeof.cl_int, Pointer.to(new int[] {this.imageW}));
    clSetKernelArg(kernel, a++, Sizeof.cl_int, Pointer.to(new int[] {this.imageH}));
    clSetKernelArg(kernel, a++, Sizeof.cl_int, Pointer.to(new int[] {destW}));
    clSetKernelArg(kernel, a++, Sizeof.cl_int, Pointer.to(new int[] {destH}));
    clSetKernelArg(kernel, a++, Sizeof.cl_mem, Pointer.to(cl_srcData));
    clSetKernelArg(kernel, a++, Sizeof.cl_mem, Pointer.to(cl_destData));
    
    // Make the call
    long global_work_size[] = new long[]{n};
    clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,
        global_work_size, null, 0, null, null);
    
    clEnqueueReadBuffer(commandQueue, cl_destData, CL_TRUE, 0,
        n * Sizeof.cl_char, Pointer.to(scaled), 0, null, null);
    
    // Clean up and return.
    clReleaseMemObject(cl_srcData);
    clReleaseMemObject(cl_destData);
    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseCommandQueue(commandQueue);
    clReleaseContext(context);
    
    return scaled;
  }
}

Upvotes: 0

Views: 41

Answers (0)

Related Questions