Reputation: 17468
I want to launch an OpenMP multi-thread region in one process within my MPI application code. For example:
#include <iostream>
#include <omp.h>
#include <mpi.h>
#include <Eigen/Dense>
using std::cin;
using std::cout;
using std::endl;
using namespace Eigen;
int main(int argc, char ** argv)
{
int rank, num_process;
MatrixXd A = MatrixXd::Ones(8, 4);
MatrixXd B = MatrixXd::Zero(8, 4);
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &num_process);
MPI_Status status;
if (rank == 0)
{
int i, j, bnum = 2, brow = 4, thid;
#pragma omp parallel shared(A, B) private(i, j, brow, bnum, thid) num_threads(2)
for (i = 0; i < brow; i ++)
{
for (j = 0; j < 4; j ++)
{
thid = omp_get_thread_num();
//cout << "thid " << thid << endl;
B(thid * brow+i,j) = A(thid*brow+i, j);
}
}
cout << "IN rank 0" << endl;
cout << B << endl;
cout << "IN rank 0" << endl;
MPI_Send(B.data(), 32, MPI_DOUBLE, 1, 1, MPI_COMM_WORLD);
}
else
{
MPI_Recv(B.data(), 32, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD, &status);
cout << "IN rank 1" << endl;
cout << B << endl;
cout << "IN rank 1" << endl;
}
MPI_Finalize();
return 0;
}
In my example code, I want to launch 2 threads to copy data from matrix A to matrix B, and my machine has 4 cores. But when run the program, the matrix B has only got half the data.
$ mpirun -n 2 ./shareMem
IN rank 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
IN rank 0
IN rank 1
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
IN rank 1
$ mpirun -n 4 ./shareMem # it just hang on and doesn't exit
IN rank 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
IN rank 0
IN rank 1
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
IN rank 1
And the output I expected is
$ mpirun -n 2 ./shareMem # it just hang on and doesn't exit
IN rank 0
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
IN rank 0
IN rank 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
IN rank 1
How can I fix it and make 2 threads run in my code? Thank you!
Upvotes: 1
Views: 708
Reputation: 66
Change
#pragma omp parallel shared(A, B) private(i, j, brow, bnum, thid) num_threads(2)
to
#pragma omp parallel shared(A, B) private(i, j, thid) num_threads(2)
brow
, bnum
are shared variables.
By adding names bnum
and brow
to private clause you are making new auto variables with such names for each thread and by default they are undefined.
Upvotes: 1
Reputation: 118
There's a typo in the word parallel that the compiler doesn't catch.
#pragma omp prallel
PS: I don't have enough reputation to add a comment
Upvotes: 1