Moonwalk
Moonwalk

Reputation: 152

Gather 3D Matrix Blocks of Different Sizes using MPI

I am trying to gather 3D matrix blocks from 8 processes to root process. Each process has a 3D matrix of size 7x7x7 but it is sending just some part of it (a 3D matrix block). You can see the illustration in this figure:enter image description here

where e.g. root process 0 sends the matrix block of size 3x3x3. At the end, the root process 0 obtains all the 3D matrix blocks (upper left corner of the figure) into 7x7x7 global matrix A. I prescribed the offset to be (1,1,0) which denotes where the 3D matrix block starts in the whole 7x7x7 matrix of each process.

Here is my code:

PROGRAM MATRIX_3D_GATHER

USE MPI

IMPLICIT NONE

integer :: rank, size, ierr
INTEGER, PARAMETER :: NUM_PROC = 8, DIM3 = 3, UNITCONST = 12
INTEGER, DIMENSION(NUM_PROC, DIM3) :: LIST_3D_SUBARRAY_SIZES
INTEGER, DIMENSION(NUM_PROC) :: DISPLACEMENTS
INTEGER, DIMENSION(DIM3) :: GLOBALSIZES, LOCALSIZES
INTEGER, DIMENSION(DIM3) :: SIZES, SUB_SIZES, STARTS, STARTS_LOCAL
INTEGER, ALLOCATABLE, DIMENSION(:,:,:) :: A, LOCALDATA
INTEGER, ALLOCATABLE, DIMENSION(:) :: SENDCOUNTS, RECVCOUNTS, SENDDISPLS, RECVDISPLS
INTEGER :: SENDTYPE, I, J, K, START_INDX_I, START_INDX_J, START_INDX_K, END_INDX_I, END_INDX_J, END_INDX_K
INTEGER, ALLOCATABLE, DIMENSION(:) :: SEND_TYPES, RECV_TYPES

INTEGER(KIND=MPI_ADDRESS_KIND) :: LB, EXTENT
INTEGER, ALLOCATABLE, DIMENSION(:) :: RECV_BLOCK_TYPES, RESIZED_RECV_BLOCK_TYPES

CHARACTER (LEN = 11) :: STRING
CHARACTER(LEN=20) :: FMT
INTEGER :: STAT

CALL MPI_INIT(ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)

! size must be equal to NUM_PROC = 8
IF (rank == 0) THEN
  IF (size /= NUM_PROC) THEN
    WRITE(*,*) "Program must run with ", NUM_PROC, " cores."
    CALL MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
  END IF  
END IF

LIST_3D_SUBARRAY_SIZES = TRANSPOSE(RESHAPE((/ &
3, 3, 3, &
4, 3, 3, &
3, 4, 3, &
4, 4, 3, &
3, 3, 4, &
4, 3, 4, &
3, 4, 4, &
4, 4, 4  &
/), (/DIM3, NUM_PROC/)))
GLOBALSIZES(:) = 7
LOCALSIZES (:) = 7
STARTS_LOCAL = (/1, 1, 0/)
!displ. inside the root's final 3D matrix
DISPLACEMENTS(:) = (/0, 3, 21, 24,      &
                     147, 150, 168, 171/)

!initialize data
IF ( rank == 0 ) THEN
  ALLOCATE( A(GLOBALSIZES(1), GLOBALSIZES(2), GLOBALSIZES(3)) )
  A(:,:,:) = -1
END IF
ALLOCATE( LOCALDATA (LOCALSIZES(1), LOCALSIZES(2), LOCALSIZES(3)) )
LOCALDATA(:,:,:) = -2

START_INDX_I = STARTS_LOCAL(1)+1
START_INDX_J = STARTS_LOCAL(2)+1
START_INDX_K = STARTS_LOCAL(3)+1
END_INDX_I   = START_INDX_I + LIST_3D_SUBARRAY_SIZES(rank+1,1)
END_INDX_J   = START_INDX_J + LIST_3D_SUBARRAY_SIZES(rank+1,2)
END_INDX_K   = START_INDX_K + LIST_3D_SUBARRAY_SIZES(rank+1,3)

LOCALDATA(START_INDX_I:END_INDX_I, START_INDX_J:END_INDX_J, START_INDX_K:END_INDX_K) = rank
!allocate sendcounts, senddispls, sendtypes
ALLOCATE(SENDCOUNTS(size), SENDDISPLS(size), SEND_TYPES(size))
ALLOCATE(RECVCOUNTS(size), RECVDISPLS(size), RECV_TYPES(size))
SENDCOUNTS(:) = 0
SENDDISPLS(:) = 0
RECVCOUNTS(:) = 0
RECVDISPLS(:) = 0
RECV_TYPES(:) = MPI_INTEGER
SEND_TYPES(:) = MPI_INTEGER

!create type for sending to root
SIZES(:) = LOCALSIZES(:)
SUB_SIZES(:) = LIST_3D_SUBARRAY_SIZES(rank+1,:)
STARTS = STARTS_LOCAL
CALL MPI_TYPE_CREATE_SUBARRAY(DIM3, SIZES, SUB_SIZES, STARTS,                &
                              MPI_ORDER_FORTRAN, MPI_INTEGER, SENDTYPE, ierr)
CALL MPI_TYPE_COMMIT(SENDTYPE, ierr)
!send to root
SENDCOUNTS(1) = 1
SEND_TYPES(1) = SENDTYPE

!create type for receiving from others
IF ( rank == 0 ) THEN
  ALLOCATE(RECV_BLOCK_TYPES(size), RESIZED_RECV_BLOCK_TYPES(size))
  SIZES(:) = GLOBALSIZES(:)
  STARTS = (/0, 0, 0/)
  ! need to create size = NUM_PROC = 8 block types
  DO I = 1, size
    SUB_SIZES(:) = LIST_3D_SUBARRAY_SIZES(I,:)
    CALL MPI_TYPE_CREATE_SUBARRAY(DIM3, SIZES, SUB_SIZES, STARTS,                          &
                                  MPI_ORDER_FORTRAN, MPI_INTEGER, RECV_BLOCK_TYPES(I), ierr)
    CALL MPI_TYPE_COMMIT(RECV_BLOCK_TYPES(I), ierr)
    
    LB = 0
    CALL MPI_TYPE_GET_EXTENT(MPI_INTEGER, LB, EXTENT, ierr)
    CALL MPI_TYPE_CREATE_RESIZED(RECV_BLOCK_TYPES(I), LB, EXTENT, &
                                 RESIZED_RECV_BLOCK_TYPES(I), ierr)
    CALL MPI_TYPE_COMMIT(RESIZED_RECV_BLOCK_TYPES(I), ierr)
    
    RECV_TYPES(I) = RESIZED_RECV_BLOCK_TYPES(I)
    ! what data root expects from others
    RECVCOUNTS(I) = 1
    RECVDISPLS(I) = DISPLACEMENTS(I)*EXTENT
  END DO
END IF


CALL MPI_ALLTOALLW(LOCALDATA, SENDCOUNTS, SENDDISPLS, SEND_TYPES, &
                   A        , RECVCOUNTS, RECVDISPLS, RECV_TYPES, &
                   MPI_COMM_WORLD, ierr)

! write 2D slices (rows x columns) of the final 3D matrix
IF ( rank == 0 ) THEN
  WRITE ( STRING, '(I0)' ) GLOBALSIZES(2)
  FMT = '('//TRIM(STRING)//'(1X,I4))'
  DO K = 1, GLOBALSIZES(3)
    WRITE ( STRING, '(I0)' ) K
    OPEN( UNIT = UNITCONST + rank, FILE = 'MATRIX_2D_SLICE_'//TRIM(STRING)//'.DAT', STATUS = 'UNKNOWN', ACTION = 'WRITE', IOSTAT = STAT)
    DO I = 1, GLOBALSIZES(1)
      WRITE(UNITCONST + rank, FMT, IOSTAT = STAT) (A(I, J, K), J = 1, GLOBALSIZES(2))
    END DO
    CLOSE(UNITCONST + rank)
  END DO
END IF

CALL MPI_FINALIZE(ierr)

END PROGRAM MATRIX_3D_GATHER

For me the code looks that is working. One can see the output 2D slices of final 3D matrix A. I wrote the code based on the answers of @Jonathan Dursi Scatter Matrix Blocks of Different Sizes using MPI , using MPI_Gatherv for Fortran and Sending 2D arrays in Fortran with MPI_Gather for 2D matrices.

Question: I am using MPI_Alltoallw to gather the blocks (see e.g. Scatter Matrix Blocks of Different Sizes using MPI). I was wondering if this can be achieved by MPI_Gatherv? Or if someone has better idea how to implement this or improve the code.

Thank you very much for taking time to think about this.

Upvotes: 2

Views: 67

Answers (1)

Hristo Iliev
Hristo Iliev

Reputation: 74385

You cannot use MPI_Gatherv here since each piece has different geometry and requires its own strided MPI datatype. MPI_Gatherv allows you to work with blocks of varying sizes but the datatype is the same for all blocks. You can use it to collect and assemble pieces of different sizes along one (same for all pieces) dimension, but only if the sizes along all other dimensions are the same for all pieces, essentially putting together blocks consisting of varying number of (hyper-)slabs, which is not your case.

Upvotes: 0

Related Questions