Reputation: 15
I wrote some code to parallelize convolution with one filter in python on my gpu. I keep receiving this error and I am unsure how to fix it. I posted the error below as well as my code. Thank you so much in advance.
I checked out some past stack overflow responses for this question but none of them seemed to do the trick. So it's possible I may have unaccounted for something that you may be able to catch.
File "gpu_test_prgrm.py", line 127, in <module>
prg.multiplymatrices(queue, conv_img[0].shape , None, 3,3,2,2,2,2 ,cl_a, cl_b, cl_c)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/pyopencl/__init__.py", line 888, in kernel_call
return self._enqueue(self, queue, global_size, local_size, *args, **kwargs)
File "<generated code>", line 7, in enqueue_knl_multiplymatrices
pyopencl._cl.LogicError: Kernel.set_arg failed: INVALID_VALUE - when processing arg#1 (1-based): invalid kernel argument
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
fake_img = np.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
fake_filters = np.array([[[1.0,1.0],[1.0,1.0]],[[2.0,2.0],[2.0,2.0]]])
conv_img = np.zeros((2,2,2))
#print(fake_img)
#print(fake_filters)
#print(fake_img[0:2,0:2])
#print(fake_img.flatten())
#print(fake_filters.flatten())
#print(fake_filters[0].flatten())
#print(conv_img[0].flatten())
mf = cl.mem_flags
cl_a = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf = fake_img.flatten())
cl_b = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf = fake_filters[0].flatten())
cl_c = cl.Buffer(ctx, mf.WRITE_ONLY, conv_img[0].flatten().nbytes)
#ROWS IN FILTER, COLS IN FILTER = frow, fcol
#ROWS IN CONV IMAGE, COLS IN CONV IMAGE = crow, ccol
#ROWS IN IMAGE, COLS IN IMAGE = irow,icol
prg = cl.Program(ctx, """
__kernel void multiplymatrices(int irow, int icol, int frow, int fcol, int crow, int ccol, __global float * fake_img, __global float * fake_filters, __global float * conv_img)
{
int i = get_global_id(0);
int j = get_global_id(1);
printf("You entered: %d", i);
printf("You entered: %d", j);
conv_img[i * ccol + j ] = 0;
for (int row=0; row < frow; row++)
{
for (int col=0; col < fcol; col++)
{
/*(i * col + j) = translation of conv image to reg image start*/
/*(row * icol + col) = creating the subarray in the matrix*/
conv_img[i * ccol + j] += fake_img[(row * icol + col)+(i*icol+j)] * fake_filters[row * frow + col];
}
}
}
""").build()
t0 = datetime.datetime.now()
print(conv_img[0].shape)
prg.multiplymatrices(queue, conv_img[0].shape , None, 3,3,2,2,2,2 ,cl_a, cl_b, cl_c)
conv_img2 = np.zeros((2,2))
cl.enqueue_copy(queue, conv_img2 , cl_c)
#print(conv_img2)
delta_t = datetime.datetime.now() - t0
print('OpenCL Multiplication: ' + str(delta_t))
Upvotes: 0
Views: 478
Reputation: 8484
Numbers in python are python objects and need to be wrapped into np.int32()
to pass them as int
to the kernel:
prg.multiplymatrices(queue, conv_img[0].shape , None, np.int32(3),np.int32(3),np.int32(2),np.int32(2),np.int32(2),np.int32(2) ,cl_a, cl_b, cl_c)
Upvotes: 1