Reputation: 11
I'm playing around with locks and critical sections for making a loop thread safe. Here is the code:
#pragma omp parallel for num_threads(4) private(k, f_part_k, len, len_3, mg, fact)
for (k = part+1; k < n; k++) {
/* Compute force on part due to k */
f_part_k[X] = curr[part].s[X] - curr[k].s[X];
f_part_k[Y] = curr[part].s[Y] - curr[k].s[Y];
len = sqrt(f_part_k[X]*f_part_k[X] + f_part_k[Y]*f_part_k[Y]);
len_3 = len*len*len;
mg = -G*curr[part].m*curr[k].m;
fact = mg/len_3;
f_part_k[X] *= fact;
f_part_k[Y] *= fact;
/* Add force in to total forces */
omp_set_lock(&(locks[k]));
//#pragma omp critical
{
forces[part][X] += f_part_k[X];
forces[part][Y] += f_part_k[Y];
forces[k][X] -= f_part_k[X];
forces[k][Y] -= f_part_k[Y];
}
omp_unset_lock(&(locks[k]));
}
for (i = 0; i < n; i++)
omp_destroy_lock(&(locks[i]));
}
When I use only the critical directive which is commented out the results are fine, i.e. match that of the sequential version. However, if I use locks as shown in the code the results are way off. I guess I misunderstood the concept of locks because in my understanding using this lock approach the write access to the forces array should be safe. Could you point me in the right direction?
Upvotes: 1
Views: 4609
Reputation: 8032
I think the problem with your code is a race condition on:
omp_set_lock(&(locks[k]));
{
forces[part][X] += f_part_k[X]; // Race condition for different k
forces[part][Y] += f_part_k[Y]; // Race condition for different k
forces[k][X] -= f_part_k[X];
forces[k][Y] -= f_part_k[Y];
}
omp_unset_lock(&(locks[k]));
In fact, for different values of k
, multiple threads try to write to forces[part][X]
and forces[part][Y]
. Further I think that there is no need to explicitely synchronize the access to forces[k][X]
and forces[k][Y]
, as each thread will update its own k
.
If you want to experiment with different synchronization constructs that gives the right semantic, you could try:
Atomic level synchronization
#pragma omp atomic
forces[part][X] += f_part_k[X];
#pragma omp atomic
forces[part][Y] += f_part_k[Y];
forces[k][X] -= f_part_k[X];
forces[k][Y] -= f_part_k[Y];
Explicit lock
omp_set_lock(&lock);
{
forces[part][X] += f_part_k[X];
forces[part][Y] += f_part_k[Y];
}
omp_unset_lock(&lock);
forces[k][X] -= f_part_k[X];
forces[k][Y] -= f_part_k[Y];
Named critical sections
#pragma omp critical(PART)
{
forces[part][X] += f_part_k[X];
forces[part][Y] += f_part_k[Y];
}
forces[k][X] -= f_part_k[X];
forces[k][Y] -= f_part_k[Y];
I suggest you to read the definition of the critical
and atomic
constructs here (section 2.8.2 and 2.8.5), and have a look at the examples A.19.1c, A.22.* and A.45.1c
That said, in the case you presented I would try the following:
float fredx = 0.0f;
float fredy = 0.0f;
#pragma omp parallel for private(k, f_part_k, len, len_3, mg, fact) reduction(+:fredx,fredy)
for (k = part+1; k < n; k++) {
/* Compute force on part due to k */
f_part_k[X] = curr[part].s[X] - curr[k].s[X];
f_part_k[Y] = curr[part].s[Y] - curr[k].s[Y];
len = sqrt(f_part_k[X]*f_part_k[X] + f_part_k[Y]*f_part_k[Y]);
len_3 = len*len*len;
mg = -G*curr[part].m*curr[k].m;
fact = mg/len_3;
f_part_k[X] *= fact;
f_part_k[Y] *= fact;
/* Add force in to total forces */
fredx += f_part_k[X];
fredy += f_part_k[Y];
forces[k][X] -= f_part_k[X];
forces[k][Y] -= f_part_k[Y];
}
forces[part][X] += fredx;
forces[part][Y] += fredy;
to avoid any explicit synchronization.
Upvotes: 5