Reputation: 129
I trying to put gradient on image - and that works.CPU and GPU programs should do the same. I have problem with output images because code for GPU giving me diffrent image than code for CPU and I don't know where is mistake. I think that CPU code it's fine but GPU not. Output images - orginal, cpu, gpu - Please check my code. Thanks.
import pyopencl as cl
import sys
import Image
import numpy
from time import time
def gpu_gradient():
if len(sys.argv) != 3:
print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
return 1
# create context and command queue
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
# load image
im = Image.open(sys.argv[1])
if im.mode != "RGBA":
im = im.convert("RGBA")
imgSize = im.size
buffer = im.tostring() # len(buffer) = imgSize[0] * imgSize[1] * 4
# Create ouput image object
clImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
cl.channel_type.UNSIGNED_INT8)
input_image = cl.Image(ctx,
cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
clImageFormat,
imgSize,
None,
buffer)
output_image = cl.Image(ctx,
cl.mem_flags.WRITE_ONLY,
clImageFormat,
imgSize)
# load the kernel source code
kernelFile = open("gradient.cl", "r")
kernelSrc = kernelFile.read()
# Create OpenCL program
program = cl.Program(ctx, kernelSrc).build()
# Call the kernel directly
globalWorkSize = ( imgSize[0],imgSize[1] )
gpu_start_time = time()
program.gradientcover(queue,
globalWorkSize,
None,
input_image,
output_image)
# Read the output buffer back to the Host
buffer = numpy.zeros(imgSize[0] * imgSize[1] * 4, numpy.uint8)
origin = ( 0, 0, 0 )
region = ( imgSize[0], imgSize[1], 1 )
cl.enqueue_read_image(queue, output_image,
origin, region, buffer).wait()
# Save the image to disk
gsim = Image.fromstring("RGBA", imgSize, buffer.tostring())
gsim.save("GPU_"+sys.argv[2])
gpu_end_time = time()
print("GPU Time: {0} s".format(gpu_end_time - gpu_start_time))
def cpu_gradient():
if len(sys.argv) != 3:
print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
return 1
gpu_start_time = time()
im = Image.open(sys.argv[1])
if im.mode != "RGBA":
im = im.convert("RGBA")
pixels = im.load()
for i in range(im.size[0]):
for j in range(im.size[1]):
RGBA= pixels[i,j]
RGBA2=RGBA[0],RGBA[1],0,0
pixel=RGBA[0]+RGBA2[0],RGBA[1]+RGBA2[1],RGBA[2],RGBA[3]
final_pixels=list(pixel)
if final_pixels[0]>255:
final_pixels[0]=255
elif final_pixels[1]>255:
final_pixels[1]=255
pixel=tuple(final_pixels)
pixels[i,j]=pixel
im.save("CPU_"+sys.argv[2])
gpu_end_time = time()
print("CPU Time: {0} s".format(gpu_end_time - gpu_start_time))
cpu_gradient()
gpu_gradient()
Kernel code:
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
__kernel void gradientcover(read_only image2d_t srcImg,
write_only image2d_t dstImg)
{
int2 coord = (int2) (get_global_id(0), get_global_id(1));
uint4 pixel = read_imageui(srcImg, sampler, coord);
uint4 pixel2 = (uint4)(coord.x, coord.y,0,0);
pixel=pixel + pixel2;
if(pixel.x > 255) pixel.x=255;
if(pixel.y > 255) pixel.y=255;
// Write the output value to image
write_imageui(dstImg, coord, pixel);
}
Upvotes: 0
Views: 385
Reputation: 129364
Your CL and Python code do not do the same thing!
RGBA= pixels[i,j]
RGBA2=RGBA[0],RGBA[1],0,0
pixel=RGBA[0]+RGBA2[0],RGBA[1]+RGBA2[1],RGBA[2],RGBA[3]
adds the RG component to the pixel.
uint4 pixel = read_imageui(srcImg, sampler, coord);
uint4 pixel2 = (uint4)(coord.x, coord.y,0,0);
pixel=pixel + pixel2;
adds the X, Y from the coordinates to the pixel.
It is highly likely that this is the cause of difference between your results.
Assuming (from the description) that you want to darkenlighten the image by coordinates, I'd sugest the python code should be:
RGBA= pixels[i,j]
RGBA2=i,j,0,0
instead.
Upvotes: 1