Reputation: 91
I have this code, but sometimes It works, sometimes NOT(write printf("ERR:%d\n", id)). I work under CUDA 4.1 and have GTS450 which is compute capability 2.1.
The code doesnt have higher purpose, Iam just trying to find why Its not working, because My mind tell me, that right :]
If you want to run it, maybe you need to execute few times, when "error" appear or change grid size!
PS: here you can download exe file for win64 - you need to have cuda4.1 driver
class MAN
{
public:
int m_id;
int m_use;
__device__
MAN()
{
m_id = -1;
m_use = 0;
}
};
__device__ int* d_ids = NULL;
__device__ int d_last_ids = 0;
__device__ MAN* d_mans = NULL;
__global__ void init()
{
d_mans = new MAN[500]; //note: 500 is more than enough!
d_ids = new int[500];
for(int i=0; i < 500; i++)
d_ids[i] = 0;
}
__device__ int getMAN() //every block get unique number, so at one moment all running blocks has different id
{
while(true)
{
for(int i=0; i < 500; i++)
if(atomicCAS(&(d_mans[i].m_use), 0, 1)==0)
return i;
}
}
__device__ void returnMAN(int id)
{
int s = atomicExch(&(d_mans[id].m_use), 0);
}
__global__ void testIt()
{
if(threadIdx.x==0)
{
int man = getMAN();
int id = d_mans[man].m_id;
if(id == -1) //If It never works with this "id", its creating new
{
id = atomicAdd(&d_last_ids, 2);
d_ids[id] = 10; //set to non-zero
d_mans[man].m_id = id; //save new id for next time
printf("ADD:%d\n", id);
}
if(d_ids[id]==0)
printf("ERR:%d\n", id); //THIS SHOULD NEVER HAPPEN, BUT BECOMES !!!
returnMAN(man);
}
}
int main()
{
init<<<1, 1>>>();
printf("init() err: %d\n", cudaDeviceSynchronize());
testIt<<<20000, 512>>>();
printf("testIt() err: %d\n", cudaDeviceSynchronize());
getchar();
return 0;
}
Upvotes: 1
Views: 899
Reputation: 91
I have changed this:
__device__ int* d_ids = NULL;
to this:
__device__ volatile int* d_ids = NULL;
and it works ok!!!
And even It doesnt need __threadfence();
Upvotes: 0
Reputation: 293
This seems to happen, because this code
int id = d_mans[man].m_id;
if(id == -1) //If It never works with this "id", its creating new
{
id = atomicAdd(&d_last_ids, 2);
d_ids[id] = 10; //set to non-zero
d_mans[man].m_id = id; //save new id for next time
printf("ADD:%d\n", id);
}
if(d_ids[id]==0)
printf("ERR:%d\n", id); //THIS SHOULD NEVER HAPPEN, BUT BECOMES !!!
Contains race condition if some block wrote to d_mans[man].m_id, but still haven't wrote to d_ids[id]. Probably compiler exchanges instruction "set to non-zero" and "save new id for next time" or cache just don't get updated in-time.
Actually, problem is with your allocator -- it is better to remember index of last used 'man' than look for it.
Upvotes: 1