Reputation: 971
I am trying to get started on dynamic process creation in MPI. I have a parent code (main.c) trying to spawn new worker/child processes (worker.c) and merge both into one intracommunicator. The parent code (main.c) is
#include<stdio.h>
#include "mpi.h"
MPI_Comm child_comm;
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if(rank == 0 )
{
int num_processes_to_spawn = 2;
MPI_Comm_spawn("worker", MPI_ARGV_NULL, num_processes_to_spawn, MPI_INFO_NULL, 0, MPI_COMM_SELF, &child_comm, MPI_ERRCODES_IGNORE );
MPI_Comm intra_comm;
MPI_Intercomm_merge(child_comm,0, &intra_comm);
MPI_Barrier(child_comm);
int tmp_size;
MPI_Comm_size(intra_comm, &tmp_size);
printf("size of intra comm world = %d\n", tmp_size);
MPI_Comm_size(child_comm, &tmp_size);
printf("size of child comm world = %d\n", tmp_size);
MPI_Comm_size(MPI_COMM_WORLD, &tmp_size);
printf("size of parent comm world = %d\n", tmp_size);
}
MPI_Finalize();
The worker (child) code is:
#include<stdio.h>
#include "mpi.h"
int main( int argc, char *argv[] )
{
int numprocs, myrank;
MPI_Comm parentcomm;
MPI_Comm intra_comm;
MPI_Init( &argc, &argv );
MPI_Comm_size( MPI_COMM_WORLD, &numprocs );
MPI_Comm_rank( MPI_COMM_WORLD, &myrank );
MPI_Comm_get_parent( &parentcomm );
MPI_Intercomm_merge(parentcomm, 1, &intra_comm);
MPI_Barrier(parentcomm);
if(myrank == 0)
{
int tmp_size;
MPI_Comm_size(parentcomm, &tmp_size);
printf("child size of parent comm world = %d\n", tmp_size);
MPI_Comm_size(MPI_COMM_WORLD, &tmp_size);
printf("child size of child comm world = %d\n", tmp_size);
MPI_Comm_size(intra_comm, &tmp_size);
printf("child size of intra comm world = %d\n", tmp_size);
MPI_Finalize( );
return 0;
}
}
I run this code using
mpirun -np 12 main.c
After split and merge, I expect the output as
size of intra comm world = 14
size of child comm world = 2
size of parent comm world = 12
child size of parent comm world = 12
child size of child comm world = 2
child size of intra comm world = 14
But I get the following incorrect output.
size of intra comm world = 3
size of child comm world = 1
size of parent comm world = 12
child size of parent comm world = 2
child size of child comm world = 2
child size of intra comm world = 3
I do not understand where the mistake it, could kindly someone let me know where the mistake is.
Thanks, Kris
Upvotes: 2
Views: 946
Reputation: 9489
Your code suffers from a few problems, which I'll try to list here:
MPI_Comm_spawn()
. This isn't a mistake as such (especially since you use MPI_COMM_SELF
as parent communicator), but it de facto excludes all other processes from the subsequent merging.MPI_Comm_size()
to get the size of the remote communicator instead of MPI_Comm_remote_size()
. Therefore you will only get the size of the local communicator inside the inter-communicator, instead of the size of the remote communicator.MPI_Finalise()
(not to mention that main()
and MPI_Init()
are missing)Here are some fixed version of your codes:
master.c
#include <stdio.h>
#include <mpi.h>
int main( int argc, char *argv[] ) {
MPI_Init( &argc, &argv );
int rank;
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
MPI_Comm child_comm;
int num_processes_to_spawn = 2;
MPI_Comm_spawn( "./worker", MPI_ARGV_NULL,
num_processes_to_spawn, MPI_INFO_NULL,
0, MPI_COMM_WORLD,
&child_comm, MPI_ERRCODES_IGNORE );
MPI_Comm intra_comm;
MPI_Intercomm_merge( child_comm, 0, &intra_comm );
if ( rank == 0 ) {
int tmp_size;
MPI_Comm_size( intra_comm, &tmp_size );
printf( "size of intra comm world = %d\n", tmp_size );
MPI_Comm_remote_size( child_comm, &tmp_size );
printf( "size of child comm world = %d\n", tmp_size );
MPI_Comm_size( MPI_COMM_WORLD, &tmp_size );
printf( "size of parent comm world = %d\n", tmp_size );
}
MPI_Finalize();
return 0;
}
worker.c
#include <stdio.h>
#include <mpi.h>
int main( int argc, char *argv[] ) {
MPI_Init( &argc, &argv );
int myrank;
MPI_Comm_rank( MPI_COMM_WORLD, &myrank );
MPI_Comm parentcomm;
MPI_Comm_get_parent( &parentcomm );
MPI_Comm intra_comm;
MPI_Intercomm_merge( parentcomm, 1, &intra_comm );
if ( myrank == 0 ) {
int tmp_size;
MPI_Comm_remote_size( parentcomm, &tmp_size );
printf( "child size of parent comm world = %d\n", tmp_size );
MPI_Comm_size( MPI_COMM_WORLD, &tmp_size );
printf( "child size of child comm world = %d\n", tmp_size );
MPI_Comm_size( intra_comm, &tmp_size );
printf( "child size of intra comm world = %d\n", tmp_size );
}
MPI_Finalize();
return 0;
}
Which gives on my laptop:
~> mpirun -n 12 ./master
child size of parent comm world = 12
child size of child comm world = 2
child size of intra comm world = 14
size of intra comm world = 14
size of child comm world = 2
size of parent comm world = 12
Upvotes: 1