Reputation: 1
I am trying to run the following code on NVIDIA GPU, but I get different results each time. As far as I could figure out, the problem is not with the spinlock itself (it correctly enforces the locking of the variable in local memory), but with the broken barrier after the atomicFunc
call. I tried to run this example with 1 workgroup of size 256. The problem is observed only on NVIDIA GPU.
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
int baseFunc(private int x)
{
return (x + 1);
}
int atomicFunc(__local int* localAccMutex, __local int* x)
{
int oldValue;
bool flag = 1;
while (flag) {
int old = atom_xchg(&localAccMutex[0], 1);
if (old == 0) {
oldValue = *x;
*x = baseFunc(*x);
atom_xchg(&localAccMutex[0], 0);
flag = 0;
};
barrier(CLK_LOCAL_MEM_FENCE);
};
return oldValue;
}
__kernel void kernel(__global int* result)
{
__local int localAcc[1];
__local int localAccMutex[1];
if (get_local_id(0) == 0) {
localAccMutex[0] = 0;
};
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) == 0) {
localAcc[0] = 0;
};
barrier(CLK_LOCAL_MEM_FENCE);
atomicFunc(localAccMutex, &localAcc[0]);
// warps are ignoring this barrier
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) == 0) {
result[0] = localAcc[0];
};
}
I would be grateful for any help.
Upvotes: 0
Views: 138
Reputation: 74
The problem could be the barrier inside while(flag)
. According to barrier
specification:
This function must be encountered by all work-items in a work-group executing the kernel.
If barrier is inside a loop, all work-items must execute the barrier for each iteration of the loop before any are allowed to continue execution beyond the barrier.
You also don't need to run atom_xchg(&localAccMutex[0], 0)
after you have changed the value to one. You can flip the checked value each iteration instead:
int atomicFunc(__local int* localAccMutex, __local int* x)
{
int oldValue;
int flip = 0;
bool flag = 1;
while (flag) {
int old = atom_xchg(&localAccMutex[0], 1 - flip);
if (old == flip) {
oldValue = *x;
*x = baseFunc(*x);
flag = 0;
}
flip = 1 - flip; // 0 -> 1; 1 -> 0
}
return oldValue;
}
Upvotes: 2