Reputation: 23
I am trying to gather sub-2D arrays, which have different numbers of rows but have the same numbers of columns, into a global 2D array. For example, assuming to use 2 MPI processes, the first process (i.e., rank == 0) has:
local = [11,12,13,14]
, and the second process (i.e., rank == 1) has:
local = [21,22,23,24
31,32,33,34]
Then, I want to concatenate these two arrays into one 2D array as:
global = [11,12,13,14
21,22,23,24
31,32,33,34]
Since each "local" array has a different number of rows, I (may) want to use mpi_gatherv (or mpi_allgatherv). I found identical problems here: Using Gatherv for 2d Arrays in Fortran and Using MPI_gatherv to create a new matrix from other smaller matrices, but I still don't quite understand. So, please teach me. Here is my example code:
program main
use mpi
implicit none
integer :: i, j
integer :: rank, npro, ierr
integer, allocatable :: local(:,:)
integer, allocatable :: global(:,:), displs(:), counts(:)
integer :: loc_size(2), glob_size(2), starts(2)
integer :: newtype, int_size, resizedtype
integer(kind=MPI_ADDRESS_KIND) :: extent, begin
! End of local variables ==================================================!
call MPI_Init(ierr)
call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
call MPI_Comm_size(MPI_COMM_WORLD, npro, ierr)
! I will set local 2D arrays as: [1,4] for rank #0, and [2,4] for rank #1
! then, the global 2D array will be [3,4] (assuming I use 2 processes)
loc_size = [rank+1,4] ! [1,4], [2,4]
glob_size = [3,4] ! I will use npro = 2
! allocate local and global arrays
allocate(local(loc_size(1), loc_size(2))) ! [1,4], [2,4]
allocate(global(glob_size(1), glob_size(2)))! [3,4] ! if npro = 2
! set "local" array
! rank = 0: [11, 12, 13, 14]
! rank = 1: [21, 22, 23, 24
! 31, 32, 33, 34]
if(rank == 0) then
do j=1,4
local(1,j) = 10 + j ! [11,12,13,14]
end do
else if(rank == 1) then
do i=1,2
do j=1,4
local(i,j) = (i+1)*10 + j ! [21,22,23,24; 31,32,33,34]
end do
end do
end if
! create a 2D subarray and set as "newtype"
starts = [0,0] ! array start location
call MPI_Type_create_subarray(2, glob_size, loc_size, starts, &
& MPI_ORDER_FORTRAN, MPI_INTEGER, &
& newtype, ierr)
! get MPI_INTEGER type size in byte
! I don't quite understand the following processes...
! So, please comment on each step if possible...
call MPI_Type_size(MPI_INTEGER, int_size, ierr)
begin = 0
extent = (rank+1) * int_size ! rank 0 = 4 byte; rank 1 = 8 byte (am I doing correct here?)
call MPI_Type_create_resized(newtype, begin, extent, resizedtype, ierr) ! I dont' quite understand this process
call MPI_Type_commit(resizedtype, ierr)
! allocate index for mpi_gatherv
allocate(displs(npro)) ! [2], index for mpi_gatherv
allocate(counts(npro)) ! [2], index for mpi_gatherv
counts = [1,1]
do i = 1,npro
displs(i) = (i-1) ! [0,1]
end do
call MPI_Gatherv(local, 1, MPI_INTEGER, &
& global, counts, displs, resizedtype, &
& 0, MPI_COMM_WORLD, ierr)
if(rank == 0) then
do i=1,3
write(*,*) (global(i,j), j=1,4)
end do
end if
call MPI_Finalize(ierr)
end program main
Thank you in advance.
Upvotes: 0
Views: 321
Reputation: 1764
I think it would be a lot easier if you changed the storage order (i.e. have rank "i" initialise "i+1" columns of fixed length), but the following code seems to work for what you currently have. I have turned on debug output, changed the number of columns to 4, ran on 3 processes (so the global number of rows = 1+2+3 = 6) and made sure the local arrays are initialised with unique data.
An important point is that you need a different send type for each rank as the strides are different (since the local arrays are of different dimensions). Maybe there's a much easier way to do this (without changing the storage order) but at least this seems to work.
Note that the comments no longer correlate with the actual code!
program main
use mpi
implicit none
integer :: i, j
integer :: rank, npro, ierr
integer, allocatable :: local(:,:)
integer, allocatable :: global(:,:), displs(:), counts(:)
integer :: loc_size(2), glob_size(2), starts(2)
integer :: newtype, int_size, stype, resizedstype, rtype, resizedrtype
integer(kind=MPI_ADDRESS_KIND) :: extent, begin
! End of local variables ==================================================!
call MPI_Init(ierr)
call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
call MPI_Comm_size(MPI_COMM_WORLD, npro, ierr)
! I will set local 2D arrays as: [1,3] for rank #0, and [2,3] for rank #1
! then, the global 2D array will be [3,3] (assuming I use 2 processes)
loc_size = [rank+1,4] ! [1,3], [2,3]
glob_size = [6,4] ! I will use npro = 3
! allocate local and global arrays
allocate(local(loc_size(1), loc_size(2))) ! [1,3], [2,3]
allocate(global(glob_size(1), glob_size(2)))! [3,3] ! if npro = 2
! set "local" array
! rank = 0: [0, 0, 0]
! rank = 1: [1, 1, 1
! 1, 1, 1]
do i=1,rank+1
do j=1,4
local(i,j) = 10*rank+4*(i-1)+j
end do
end do
! check the local array
do i=1,rank+1
write(*,*) 'rank = ', rank, 'local = ', (local(i,j), j=1,4)
end do
! create a 2D subarray and set as send type stype
loc_size= [1,4]
starts = [0,0] ! array start location
glob_size=[rank+1,4]
call MPI_Type_create_subarray(2, glob_size, loc_size, starts, &
& MPI_ORDER_FORTRAN, MPI_INTEGER, &
& stype, ierr)
! get MPI_INTEGER type size in byte
call MPI_Type_size(MPI_INTEGER, int_size, ierr)
begin = 0
extent = int_size
call MPI_Type_create_resized(stype, begin, extent, resizedstype, ierr)
call MPI_Type_commit(resizedstype, ierr)
! create a 2D subarray and set as receive type rtype
loc_size=[1,4]
starts = [0,0] ! array start location
glob_size=[6,4]
call MPI_Type_create_subarray(2, glob_size, loc_size, starts, &
& MPI_ORDER_FORTRAN, MPI_INTEGER, &
& rtype, ierr)
! get MPI_INTEGER type size in byte
! I don't quite understand the following processes...
! So, please comment on each step if possible...
call MPI_Type_size(MPI_INTEGER, int_size, ierr)
begin = 0
extent = int_size
call MPI_Type_create_resized(rtype, begin, extent, resizedrtype, ierr)
call MPI_Type_commit(resizedrtype, ierr)
! allocate index for mpi_gatherv
allocate(displs(npro)) ! [2], index for mpi_gatherv
allocate(counts(npro)) ! [2], index for mpi_gatherv
counts = [1,2,3]
displs = [0,1,3]
call MPI_Gatherv(local, rank+1, resizedstype, &
& global, counts, displs, resizedrtype, &
& 0, MPI_COMM_WORLD, ierr)
if(rank == 0) then
do i=1,6
write(*,*) (global(i,j), j=1,4)
end do
end if
call MPI_Finalize(ierr)
end program main
If I run on 3 processes I get sensible results:
rank = 0 local = 1 2 3 4
rank = 1 local = 11 12 13 14
rank = 1 local = 15 16 17 18
rank = 2 local = 21 22 23 24
rank = 2 local = 25 26 27 28
rank = 2 local = 29 30 31 32
1 2 3 4
11 12 13 14
15 16 17 18
21 22 23 24
25 26 27 28
29 30 31 32
Upvotes: 2