Reputation: 43
I am trying to use openmp work sharing constructs. The code shared is a simpler example of what's going wrong with my bigger openmp code. I'm assigning values to an integer matrix, printing the matrix element values, initialising them to 0 and repeating it in a 't' loop. I'm counting the number of times the value assignments (done by parallel for) fail through the integer 'p'. p is supposed to be 0 if the code is correct, but it gives me different answers for different runs, so the work construct is failing somewhere. I had to run it around 12 times before I got the first wrong value of p as output (1, 2, 3, etc.)
The barrier directives in the code aren't really necessary, I was getting different values of p without it and thought an explicit barrier would help but I was wrong. This is the code:
#define NRA 10 /* number of rows in matrix A */
#define NCA 10 /* number of columns in matrix A */
int main()
{
int i, j, ir, p = 0, t;
int *a;
a = (int*) malloc(sizeof(int)*NRA*NCA);
omp_set_num_threads(5);
for(t=0;t<100000;t++)
{
#pragma omp barrier
#pragma omp parallel for schedule (static,2) collapse(2)
for(i=0;i<NRA;i++)
{
for(j=0;j<NCA;j++)
{
ir=j*NRA+i;
a[ir] = 1;
}
}
#pragma omp single
{
for(i=0;i<NRA;i++)
{
for(j=0;j<NCA;j++)
{
ir=j*NRA+i;
if(a[ir] != 1)
{
p += 1;
}
}
}
}
#pragma omp parallel for schedule (static,2) collapse(2)
for(i=0;i<NRA;i++)
{
for(j=0;j<NCA;j++)
{
ir=j*NRA+i;
a[ir] = 0;
}
}
# pragma omp barrier
}//end t
printf("p is %d\n",p);
}
This is the bigger code, and I don't think the race condition is an issue because I declared all variables outside parallel loop shared and all other variables locally inside the parallel loop. Any suggestions would be helpful!
#define NRA 10 /* number of rows in matrix A */
#define NCA 10 /* number of columns in matrix A */
#define NCB 10 /* number of columns in matrix B */
void matrixcalc (double *ad, double *bd, double *cd, int chunkd);
void printresults (double *cd, int chunkd);
void printrep (double *cd, int chunkd);
int main ()
{
int nthreads, chunk, p = 0;
double *a,*b,*c;
a = (double*)malloc(NRA*NCA*sizeof(double));
if(a==NULL)
printf("ho\n");
b = (double*)malloc(NCA*NCB*sizeof(double));
c = (double*)malloc(NRA*NCB*sizeof(double));
omp_set_num_threads(5);
chunk = 2; /* set loop iteration chunk size */
int ir3, i1, j1;
/*** Spawn a parallel region explicitly scoping all variables ***/
int t, tmax = 100000;
for(t=0;t<tmax;t++)
{
#pragma omp parallel shared(a,b,c,nthreads,chunk,t,tmax)
{
int tid = omp_get_thread_num();
int i, j, ir;
if (tid == 0)
{
nthreads = omp_get_num_threads();
// printf("Starting matrix multiple example with %d threads\n",nthreads);
// printf("Initializing matrices...\n");
}
/*** Initialize matrices ***/
#pragma omp for schedule (static, chunk) collapse(2)
for (i=0; i<NRA; i++)
{
for (j=0; j<NCA; j++)
{
ir =j*NRA+i;
a[ir]= 1.0;
}
}
#pragma omp for schedule (static, chunk) collapse(2)
for (i=0; i<NCA; i++)
{
for (j=0; j<NCB; j++)
{
ir = j*NCA+i;
b[ir] = 1.0;
}
}
#pragma omp for schedule (static, chunk) collapse(2)
for (i=0; i<NRA; i++)
{
for (j=0; j<NCB; j++)
{
ir=j*NRA+i;
c[ir]= 0.0;
}
}
/*** Do matrix multiply sharing iterations on outer loop ***/
/*** Display who does which iterations for demonstration purposes ***/
matrixcalc(a,b,c,chunk);
if(t!=tmax-1)
{
#pragma omp for schedule (static, chunk) collapse(2)
for(i=0;i<NRA;i++)
{
for(j=0;j<NCB;j++)
{
ir=j*NRA+i;
c[ir]=0.0;
}
}
}
}//end parallel region
for(i1=0;i1<NRA;i1++)
{
for(j1=0;j1<NCB;j1++)
{
ir3=j1*NRA+i1;
if(c[ir3]!=12.20000&&c[ir3]!=0.0)
{
printf("%lf\n",c[ir3]);
p+=1;
}
}
}
}//end t
printf("finalp\t%d\n",p);
for(i1=0;i1<NRA;i1++)
{
for(j1=0;j1<NCB;j1++)
{
ir3=j1*NRA+i1;
printf("%lf\t",c[ir3]);
}
printf("\n");
}
}
void matrixcalc (double *a, double *b, double *c, int chunk)
{
int i,j,k,ir,ir1,ir2;
//printf("Thread %d starting matrix multiply...%d\n",tid,chunk);
double r = 1.0;
#pragma omp for schedule (static, chunk) collapse(3)
for (i=0; i<NRA; i++)
{
for(j=0; j<NCB; j++)
{
for (k=0; k<NCA; k++)
{
ir=j*NRA+i;
ir1=k*NRA+i;
ir2=j*NCA+k;
c[ir] += a[ir1] * b[ir2];
}
}
}
#pragma omp for schedule (static, chunk) collapse(2)
for(i=0;i<NRA;i++)
{
for(j=0;j<NCB;j++)
{
ir=j*NRA+i;
c[ir]+=r*2.0;
}
}
#pragma omp single
{
double h;
h = 0.1;
h = 2.0*h;
for(i=0;i<NRA;i++)
{
for(j=0;j<NCB;j++)
{
ir=j*NRA+i;
c[ir]+=h;
}
}
}
Upvotes: 0
Views: 202
Reputation: 22660
The issue is a race condition on ir
. Since it is defined outside of the loop, it is implicitly shared
. You could force it to be private
, but it is better to declare variables as locally as possible. That makes reasoning about OpenMP code much easier:
#pragma omp parallel for schedule (static,2) collapse(2)
for(int i=0;i<NRA;i++)
{
for(int j=0;j<NCA;j++)
{
int ir = j*NRA+i;
a[ir] = 1;
}
}
As commented by Jorge Bellón, there are other issues in your code with respect to redundant barriers and efficiency.
Upvotes: 2