Reputation: 1116
My goal is to reduce calling overhead when I call cl_kernel I started by creating functor class called clfunctor
The class consists of constructor with 'kernel sourcecode', 'main function name' and compile 'options' as parameters then the constructor will call set_kernel_code(...) to compile and build the code with those parameters. finally will get 'mykernel' as the output of the 'set_kernel_code' function
class clfunctor
{
// comile sourcecode to cl_kernel
void set_kernel_code(const std::string& sourcecode, const std::string&
program_name, const std::string& options="")
{
char* kernel_source = new char[ sourcecode.size() + 1 ];
std::copy( sourcecode.begin(), sourcecode.end(), kernel_source
);
kernel_source[ sourcecode.size() ] = '\0'; // opencl need \0 to end kernel_code
cl_program program;
program = clCreateProgramWithSource( myclcontext, 1, (const char **)&kernel_source, NULL, &err);
if( err != 0 ) echo_error( cl_error_string(err) ); // if error here.. call cl_init() ???
//build program from opencl device
clBuildProgram(program, 1, &device[iplatform][idevice], options.c_str(), NULL, NULL);
if( err != 0 ) echo_error( cl_error_string(err) );
// no have kernel
mykernel = clCreateKernel( program, program_name.c_str(), &err );
const size_t LOG_SIZ = 2040;
clReleaseProgram( program );
delete kernel_source;
}
public:
cl_kernel mykernel;
std::vector<size_t> local_nd;
std::vector<size_t> global_nd;
clfunctor(const std::string& sourcecode, const std::string& program_name, const std::string& options)
{
set_kernel_code( sourcecode, program_name, options );
}
void operator()() const
{
cl_int err;
//-----------------------------------
// create GPU queue
//----------------------------------
cl_command_queue queue = clCreateCommandQueue( myclcontext, device[iplatform][idevice], 0, &err);
if( err != 0 ) echo_error( cl_error_string(err) );
cl_uint work_dim = local_nd.size(); // ND dimension ( N )
err = clEnqueueNDRangeKernel( queue, kernel, work_dim, NULL, global_nd.data(), local_nd.data(), 0, NULL, NULL);
if( err != 0 ) echo_error( cl_error_string(err) );
clFinish( queue );
clReleaseCommandQueue( queue );
}
virtual ~clfunctor()
{
// relase kernel with object is destroyed
clReleaseKernel( mykernel );
}
};
and I have a sample cl file 'nothing.cl 'is
kernel void nothing()
{
printf("[ echo ]: %d\n", MYID );
}
after that I create this functor object by
clfunctor0 hello( file2code("nothing.cl"), "nothing", "-DMYID=100");
hello.local_nd = {1,1,1};
hello.global_nd = {1,1,1};
hello(); // print 100
clfunctor0 hello2( file2code("nothing.cl"), "nothing", "-DMYID=123");
hello2.local_nd = {1,1,1};
hello2.global_nd = {1,1,1};
hello2(); // print 123
hello(); // THIS IS WRONG I expect it to print 100 but it print 123
as the code above, you can see I create two objects. one is 'hello' and another is 'hello2' both objects share the same code from 'nothing.cl' but differ in DEFINE (-D) compilation. 'hello' uses -DMYID=100 ( MYID = 100 ) but hello2 uses -DMYID=123 ( MYID = 123 )
I expect those objects hold different mykernel since the compilation options are different. First I call hello() I print correctly '100'
then I call hello2() it also print correctly '123'
However when I call hello() again It instead of print '100' it print the parameter from hello2 which is 123
How could I make the kernel for each object differently with only -D(DEFINE) compilation parameter. Or I did something wrong or misunderstand about opencl kernel.
Upvotes: 1
Views: 451
Reputation: 4254
I am afraid caching happen. Compiler generates binary with 123 inside and simply reused. create on-fly cl file with random name and u will be always with correct value. Or better don't use flags but just pass argument to kernel
Upvotes: 1