Reputation: 316
I have a MWE where an omp reduction breaks if I add the option for GPU offloading (even if it isn't used).
I know I can do a work-around using preprocessor directives but I would really prefer to stick with an openMP solution.
When I run the code with nvfortran
it works fine. When I run with gfortran
it breaks.
compiler versions tried:
gfortran 10.3.0
gfortran 11.3.0
nvfortran 22.5-0
(from nvhpc toolkit)I am using an Arm Neoverse-N1
cpu and I have also tried on my local Intel Core i7-7500U
cpu.
commands to compile:
GPU version - nvfortran
: nvfortran -cpp -DUSEGPU -mp=gpu mwe.f90 && ./a.out
CPU version - nvfortran
: nvfortran -cpp -mp=multicore mwe.f90 && OMP_NUM_THREADS=2 ./a.out
CPU version - gfortran
: gfortran -cpp -fopenmp mwe.f90 && OMP_NUM_THREADS=2 ./a.out
Expected output: CPU Version
$ nvfortran -cpp -mp=multicore mwe.f90 && OMP_NUM_THREADS=2 ./a.out
id= 0 i,j= 1 1 a_ij= 1.000 b_ij= 0.900 max_diff=0.100
id= 0 i,j= 2 1 a_ij= 2.000 b_ij= 1.800 max_diff=0.200
id= 0 i,j= 3 1 a_ij= 3.000 b_ij= 2.700 max_diff=0.300
id= 0 i,j= 1 2 a_ij= 2.000 b_ij= 1.800 max_diff=0.300
id= 0 i,j= 2 2 a_ij= 4.000 b_ij= 3.600 max_diff=0.400
id= 1 i,j= 3 2 a_ij= 6.000 b_ij= 5.400 max_diff=0.600
id= 1 i,j= 1 3 a_ij= 3.000 b_ij= 2.700 max_diff=0.600
id= 1 i,j= 2 3 a_ij= 6.000 b_ij= 5.400 max_diff=0.600
id= 1 i,j= 3 3 a_ij= 9.000 b_ij= 8.100 max_diff=0.900
max_diff = 0.900
GPU Version
$ nvfortran -cpp -DUSEGPU -mp=gpu mwe.f90 && ./a.out
max_diff = 0.900
Output from gfortran:
$ gfortran -cpp -fopenmp mwe.f90 && OMP_NUM_THREADS=2 ./a.out
id= 0 i,j= 1 1 a_ij= 1.000 b_ij= 0.900 max_diff=0.100
id= 0 i,j= 2 1 a_ij= 2.000 b_ij= 1.800 max_diff=0.200
id= 0 i,j= 3 1 a_ij= 3.000 b_ij= 2.700 max_diff=0.300
id= 0 i,j= 1 2 a_ij= 2.000 b_ij= 1.800 max_diff=0.300
id= 0 i,j= 2 2 a_ij= 4.000 b_ij= 3.600 max_diff=0.400
id= 1 i,j= 3 2 a_ij= 6.000 b_ij= 5.400 max_diff=0.600
id= 1 i,j= 1 3 a_ij= 3.000 b_ij= 2.700 max_diff=0.600
id= 1 i,j= 2 3 a_ij= 6.000 b_ij= 5.400 max_diff=0.600
id= 1 i,j= 3 3 a_ij= 9.000 b_ij= 8.100 max_diff=0.900
max_diff = 0.000
If I comment out the lines below (marked with !<---- comment this
) and delete the work distribute
from the line !$omp distribute parallel do simd reduction(max:max_diff) collapse(2)
then it works as expected.
$ gfortran -cpp -fopenmp mwe.f90 && OMP_NUM_THREADS=2 ./a.out
id= 0 i,j= 1 1 a_ij= 1.000 b_ij= 0.900 max_diff=0.100
id= 0 i,j= 2 1 a_ij= 2.000 b_ij= 1.800 max_diff=0.200
id= 0 i,j= 3 1 a_ij= 3.000 b_ij= 2.700 max_diff=0.300
id= 0 i,j= 1 2 a_ij= 2.000 b_ij= 1.800 max_diff=0.300
id= 0 i,j= 2 2 a_ij= 4.000 b_ij= 3.600 max_diff=0.400
id= 1 i,j= 3 2 a_ij= 6.000 b_ij= 5.400 max_diff=0.600
id= 1 i,j= 1 3 a_ij= 3.000 b_ij= 2.700 max_diff=0.600
id= 1 i,j= 2 3 a_ij= 6.000 b_ij= 5.400 max_diff=0.600
id= 1 i,j= 3 3 a_ij= 9.000 b_ij= 8.100 max_diff=0.900
max_diff = 0.900
Am I mis-using the if()
statement with openMP or is there potentially a bug with gfortran or openMP?
mwe.f90
program test
use omp_lib
implicit none
integer, parameter :: N=3
integer :: i, j
real :: a(N,N), b(N,N), max_diff
logical :: is_GPU
is_GPU = .false.
#ifdef USEGPU
is_GPU = .true.
#endif
!$omp target data if(is_GPU) map(to:a, b)
!$omp target teams if(is_GPU)
!$omp distribute parallel do simd collapse(2)
do j = 1, N
do i = 1, N
a(i, j) = i*j
b(i, j) = i*j*0.9
end do
end do
!$omp end target teams
max_diff = 0.0
!$omp target teams if(is_GPU) !<---- comment this
!$omp distribute parallel do simd reduction(max:max_diff) collapse(2)
do j = 1, N
do i = 1, N
max_diff = max(max_diff, abs(b(i, j) - a(i, j)))
#ifndef USEGPU
write (*,'("id=",I2," i,j= ",2(I2)," a_ij= ",F5.3," b_ij= ",F5.3," max_diff=",F5.3)') &
omp_get_thread_num(), i, j, a(i,j), b(i,j), max_diff
#endif
end do
end do
!$omp end target teams !<---- comment this
write (*,'("max_diff = ", F6.3)') max_diff
!$omp end target data
end program
Upvotes: 0
Views: 151
Reputation: 316
Not a bug... but a user error (see bug report)
The following mwe will work (but it requires gfortran version >= 12.2.0)
program test
use omp_lib
implicit none
integer, parameter :: N=3
integer :: i, j
real :: a(N,N), b(N,N), max_diff
logical :: is_GPU
is_GPU = .false.
#ifdef USEGPU
is_GPU = .true.
#endif
!$omp target data if(is_GPU) map(to:a, b)
!$omp target teams distribute parallel do simd if(target:is_GPU) collapse(2)
do j = 1, N
do i = 1, N
a(i, j) = i*j
b(i, j) = i*j*0.9
end do
end do
max_diff = 0.0
!$omp target teams distribute parallel do simd if(target:is_GPU) reduction(max:max_diff) collapse(2)
do j = 1, N
do i = 1, N
max_diff = max(max_diff, abs(b(i, j) - a(i, j)))
end do
end do
write (*,'("max_diff = ", F6.3)') max_diff
!$omp end target data
end program
Upvotes: 0