MPI_Allreduce mix elements in the sum

Question

I am parallelising a fortran code which works with no problem in a no-MPI version. Below is an excerpt of the code.

Every processor does the following:

For a certain number of particles it evolves certain quantities in the loop "do 203"; in a given interval which is divided in Nint subintervals (j=1,Nint), every processor produces an element of the vectors Nx1(j), Nx2(j).
Then, the vectors Nx1(j), Nx2(j) are sent to the root (mype =0) which in every subinterval (j=1,Nint) sums all the contributions for every processor: Nx1(j) from processor 1 + Nx1(j) from processor 2.... The root sums for every value of j (every subinterval), and produces Nx5(j), Nx6(j).

Another issue is that if I deallocate the variables the code remains in standby after the end of the calculation without completing the execution; but I don't know if this is related to the MPI_Allreduce issue.

    include "mpif.h"
    ...
    integer*4 ....
    ...
    real*8 
    ...
    call MPI_INIT(mpierr)
    call MPI_COMM_SIZE(MPI_COMM_WORLD, npe, mpierr)
    call MPI_COMM_RANK(MPI_COMM_WORLD, mype, mpierr)

!       Allocate variables
    allocate(Nx1(Nint),Nx5(Nint))
    ...

!       Parameters
    ...

    call MPI_Barrier (MPI_COMM_WORLD, mpierr)

!   Loop on particles

    do 100 npartj=1,npart_local

     call init_random_seed() 
     call random_number (rand)

    ...
    Initial condition
    ... 
    do 203 i=1,1000000  ! loop for time evolution of single particle

        if(ufinp.gt.p1.and.ufinp.le.p2)then 
         do j=1,Nint  ! spatial position at any momentum
          ls(j) = lb+(j-1)*Delta/Nint !Left side of sub-interval across shock
          rs(j) = ls(j)+Delta/Nint
          if(y(1).gt.ls(j).and.y(1).lt.rs(j))then !position-ordered
            Nx1(j)=Nx1(j)+1 
          endif 
         enddo
        endif
       if(ufinp.gt.p2.and.ufinp.le.p3)then 
        do j=1,Nint  ! spatial position at any momentum
          ls(j) = lb+(j-1)*Delta/Nint !Left side of sub-interval across shock
          rs(j) = ls(j)+Delta/Nint
          if(y(1).gt.ls(j).and.y(1).lt.rs(j))then !position-ordered
            Nx2(j)=Nx2(j)+1 
          endif 
        enddo
       endif
203  continue 
100    continue     
    call MPI_Barrier (MPI_COMM_WORLD, mpierr)

    print*,"To be summed"
    do j=1,Nint
       call MPI_ALLREDUCE (Nx1(j),Nx5(j),npe,mpi_integer,mpi_sum,
     &      MPI_COMM_WORLD, mpierr)
           call MPI_ALLREDUCE (Nx2(j),Nx6(j),npe,mpi_integer,mpi_sum,
     &          MPI_COMM_WORLD, mpierr)
     enddo 

    if(mype.eq.0)then
     do j=1,Nint
       write(1,107)ls(j),Nx5(j),Nx6(j)
     enddo 
107  format(3(F13.2,2X,i6,2X,i6))   
    endif 
    call MPI_Barrier (MPI_COMM_WORLD, mpierr)
    print*,"Now deallocate"
!   deallocate(Nx1)  !inserting the de-allocate
!   deallocate(Nx2)

    close(1)

    call MPI_Finalize(mpierr)

    end


!  Subroutines
    ...

MPI_Allreduce mix elements in the sum

Answers (1)

Related Questions