Reputation: 1103
When I run this code I am getting 2542199.979500
as the answer. However, the correct answer is 1271099.989750
. Could someone please tell me where the error is?
This is the code which contains the bug:
#include <omp.h>
#define N 1000
main ()
{
int i, nthreads;
int chunk = 10;
float a[N], b[N], c[N], d[N];
double result;
#pragma omp parallel
{
nthreads = omp_get_num_threads();
printf("no of threads %d", nthreads);
#pragma for shared(a,b,c,d,result) private(i) schedule(static,chunk)
for (i=0; i < N; i++){
a[i] = i * 1.5;
b[i] = i + 22.35;
}
#pragma for shared(a,b,c,d,result) private(i) schedule(static,chunk)
for(i=0; i < N; i++){
result = result + (a[i]+b[i]);
}
}
printf("value is %f", result);
}
Furthermore, when the number of threads is 3 I get
3813299.969250
The result depends on the number of threads used. Could this be a bug in openmp, or am I doing something wrong?
Upvotes: 0
Views: 1376
Reputation: 45444
In openMP one should try to minimise the parallel regions, in this case one is possible and hence enough. Here is a simple C++ version doing just that.
#include <iostream>
#include <iomanip>
#include <omp.h>
const int N=1000;
int main ()
{
const double A = 22.35;
const double B = 1.5;
double a[N], b[N], c[N], d[N];
double result=0;
#pragma omp parallel
{ // begin parallel region
#pragma omp master
std::cout << "no of threads: " << omp_get_num_threads() << std::endl;
// this loop and the following could be merged and the arrays avoided.
#pragma omp for
for(int i=0; i<N; ++i) {
a[i] = i * B;
b[i] = i + A;
}
#pragma omp for reduction(+:result)
for(int i=0; i<N; ++i)
result += a[i]+b[i];
} // end parallel region
double answer = N*(A+0.5*(B+1)*(N-1));
std::cout << "computed result = " << std::setprecision(16) << result
<< '\n'
<< "correct answer = " << std::setprecision(16) << answer
<< std::endl;
return 0;
}
I get (using gcc 4.6.2 on Mac OS X 10.6.8):
no of threads: 2
computed result = 1271099.999999993
correct answer = 1271100
Upvotes: 0
Reputation: 1087
Please see comments inline.
// openmp.c
#include <stdio.h>
#include <omp.h>
#define N 1000
// main should return a int
int main(){
int i, nthreads;
float a[N], b[N];
// give result a initial value !
double result = 0;
#pragma omp parallel
{
nthreads = omp_get_num_threads();
// just print numthreads ONCE
#pragma omp single
printf("no. of threads %d\n", nthreads);
#pragma omp for
for (int i = 0; i < N; i++) {
a[i] = i *1.5;
b[i] = i + 22.35;
}
#pragma omp for
for (int i = 0; i < N; i++) {
double sum = a[i] + b[i];
// atomic operation needed !
#pragma omp atomic
result += sum;
}
#pragma omp single
printf("result = %f\n", result);
}
return 0;
}
Compile using cc -fopenmp -std=gnu99 openmp.c
, the output is:
no. of threads 4
result = 1271099.989750
Upvotes: 1
Reputation: 54919
I suggest at least the following two changes...
for the declaration of result
...
// result should be initialized
double result = 0;
For your final pragma...
// specify the "reduction"
#pragma omp parallel for reduction(+:result)
Without specifying the "reduction", the summation to result
is invalid since result
would be modified independently in each thread -- resulting in a race condition.
See http://en.wikipedia.org/wiki/OpenMP#Reduction
#include <stdio.h>
#include <omp.h>
#define N 1000
int main ()
{
int i, nthreads;
int chunk = 10;
float a[N], b[N], c[N], d[N];
double result=0;
#pragma omp parallel
nthreads = omp_get_num_threads();
printf("no of threads %d\n", nthreads);
#pragma omp parallel for
for (i=0; i < N; i++){
a[i] = i * 1.5;
b[i] = i + 22.35;
}
#pragma omp parallel for reduction(+:result)
for(i=0; i < N; i++){
result = result + (a[i]+b[i]);
}
printf("value is %f", result);
return 0;
}
Upvotes: 1