Reputation: 677
I am trying to test the new OpenCl 2.0 SVM features. I am using AMD-APP-SDK on a machine that has the following SVM capabilities:
I am testing with a very simple example in which I do the following (after setting up OpenCL objects of course):
On the host side:
Here is the relevant host code:
int status;
cl_int cl_status;
int num_elements = 10;
status = SetupKernel("test_svm_kernel.cl", "test_svm_kernel");
CHECK_ERROR(status, "SetupKernel");
svm_input = clSVMAlloc(context, CL_MEM_READ_WRITE, num_elements*sizeof(int),
0);
CHECK_ALLOCATION(svm_input, "svm_input");
cl_status = clEnqueueSVMMap(queue, CL_TRUE,
CL_MAP_WRITE_INVALIDATE_REGION,
svm_input, num_elements*sizeof(int), 0, NULL,
NULL);
CHECK_OPENCL_ERROR(cl_status, "clEnqueueSVMMap");
for(int i=0 ; i<num_elements ; i++)
{
((int*)svm_input)[i] = i;
}
for(int i=0 ; i<num_elements ; i++)
{
std::cout << i << ", " << ((int*)svm_input)[i] << std::endl;
}
cl_status = clEnqueueSVMUnmap(queue, svm_input, 0, NULL, NULL);
CHECK_OPENCL_ERROR(cl_status, "clEnqueueSVMUnmap");
cl_status = clGetKernelWorkGroupInfo(
kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t),
&kernel_wrkgrp_size, NULL);
CHECK_OPENCL_ERROR(cl_status, "clGetKernelWorkGroupInfo");
cl_status = clGetKernelWorkGroupInfo(
kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(cl_ulong),
&compile_wrkgrp_size, NULL);
CHECK_OPENCL_ERROR(cl_status, "clGetKernelWorkGroupInfo");
cl_status = clGetKernelWorkGroupInfo(
kernel, device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE,
sizeof(size_t)*3, &compile_wrkgrp_size, NULL);
CHECK_OPENCL_ERROR(cl_status, "clGetKernelWorkGroupInfo");
size_t local_threads = 1;//kernel_wrkgrp_size;
size_t globl_threads = num_elements;
cl_status = clSetKernelArgSVMPointer(kernel, 0, (void*)(svm_input));
CHECK_OPENCL_ERROR(cl_status, "clSetKernelArgSVMPointer");
cl_event ndr_event;
cl_status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
&globl_threads, NULL,
0, NULL, &ndr_event);
CHECK_OPENCL_ERROR(cl_status, "clEnqueueNDRangeKernel");
cl_status = clFlush(queue);
CHECK_OPENCL_ERROR(cl_status, "clFlush");
On the kernel side: the kernel is really simple.
__kernel void test_svm_kernel(__global void* input)
{
__global int* intInput = (__global int*)(input);
int idx = get_global_id(0);
printf("intInput[%d] = %d\n", idx, intInput[idx]);
}
The output that I get in the host is:
Host: input[0] = 0
Host: input[1] = 1
Host: input[2] = 2
Host: input[3] = 3
Host: input[4] = 4
Host: input[5] = 5
Host: input[6] = 6
Host: input[7] = 7
Host: input[8] = 8
Host: input[9] = 9
which is the natural expected output.
On the kernel, I get strange output (and it changes sometimes):
input[0] = 0
input[1] = 2
input[3] = 1
input[5] = 5
input[1] = 7
input[8] = 1
input[0] = 0
input[0] = 0
input[0] = 0
input[0] = 0
I don't expect printf's on the device to be in order. However, at least to print array in a correct manner.
Any ideas how do I get such strange output?
Upvotes: 0
Views: 885
Reputation: 19375
Generally speaking, using printf to check something is risky, since it is fully debug and experimental (it also breaks the OpenCL execution model[fully parallel out of sync]). Checking the output is a better idea. For example, maybe the compiler optimized out all the kernel since you are not writing anywhere the input data. – DarkZeros
It turns out printf is not reliable and when I changed the input array and mapped it back to the host, the contents were perfectly correct. – Kareem Ergawy
Upvotes: 2