I'm learning opencl and for some reason the kernel does nothing:
#include <stdlib.h>
#include <stdio.h>
#include <CL/cl.h>
int err = 0;
#define PRINTERR() fprintf(stderr, "Error at line %u.\n", __LINE__)
#define CHECKERR(x) if(x){PRINTERR();return __LINE__;}
#define CHECKNOTERR(x) if(!x){PRINTERR();return __LINE__;}
const char *KernelSource =
"__kernel void square( \n" \
" __global float* input, \n" \
" __global float* output, \n" \
" const unsigned int count) \n" \
"{ \n" \
" int i = get_global_id(0); \n" \
" if(i == 0) printf(\"test\\n\"); \n" \
" if(i < count) \n" \
" output[i] = input[i] * input[i]; \n" \
"} \n" ;
#define DATA_SIZE 1024
int main(){
float data[DATA_SIZE];
float results[DATA_SIZE];
size_t global;
size_t local;
cl_platform_id platform_id;
cl_device_id device_id;
cl_context context;
cl_command_queue commands;
cl_program program;
cl_kernel kernel;
cl_mem input;
cl_mem output;
unsigned int i = 0;
unsigned int count = DATA_SIZE;
for(i = 0; i < count; ++i)
//data[i] = rand() / (float)RAND_MAX;
data[i] = 2.f;
int gpu = 1;
err = clGetPlatformIDs (1, &platform_id, NULL); CHECKERR(err)
err = clGetDeviceIDs(platform_id, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL); CHECKERR(err)
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err); CHECKERR(!context)
commands = clCreateCommandQueueWithProperties(context, device_id, NULL, &err); CHECKERR(err)
input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * count, NULL, &err); CHECKERR(err)
output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, &err); CHECKERR(err)
CHECKERR(!input || !output)
err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL); CHECKERR(err)
program = clCreateProgramWithSource(context, 1, &KernelSource, NULL, &err); CHECKERR(err)
err = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); CHECKERR(err)
kernel = clCreateKernel(program, "square", &err); CHECKERR(err)
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); CHECKERR(err)
err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL); CHECKERR(err)
err = clEnqueueReadBuffer(commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL ); CHECKERR(err)
unsigned int correct = 0;
for(i = 0; i < count; ++i)
printf("Computed '%d/%d' correct values!\n", correct, count);
// free
return 0;
i want it to do things, but it doesn't.
i tried reading the input instead of the output and it goes fine. the printf in the kernel does nothing and if i run it clEnqueueReadBuffer gives just 0. i have an amd, so i can't test it on the cpu. i tried another example and it worked. (the one here) help appreciated.
