Reputation: 1
I'm new to parallel programming, and I'm currently working on optimizing a code that works with electromagnetic calculations. Analyzing how the program works, I realized that 85% of the time spent on execution is about solving a linear equation. I studied a little bit of openmp but I have no idea how to parallelize a nested loop like this. Any idea? Thank you in advance. Follow the code below
Subroutine GaussEqSolver_Sym(n,ma,a,b,ep,kwji)
!------------------------------------------------------------------
! Solve sysmmetric linear equation ax=b by using Gauss elimination.
! If kwji=1, no solution;if kwji=0,has solution
! Input--n,ma,a(ma,n),b(n),ep,
! Output--b,kwji
!------------------------------------------------------------------
implicit real*8 (a-h,o-z)
dimension a(ma,n),b(n),m(n+1)
do 10 i=1,n
10 m(i)=i
do 120 k=1,n
p=0.0
do 20 i=k,n
do 20 j=k,n
if(dabs(a(i,j)).gt.dabs(p)) then
p=a(i,j)
io=i
jo=j
endif
20 continue
if(dabs(p)-ep) 30,30,35
30 kwji=1
return
35 continue
if(jo.eq.k) go to 45
do 40 i=1,n
t=a(i,jo)
a(i,jo)=a(i,k)
a(i,k)=t
40 continue
j=m(k)
m(k)=m(jo)
m(jo)=j
45 if(io.eq.k) go to 55
do 50 j=k,n
t=a(io,j)
a(io,j)=a(k,j)
a(k,j)=t
50 continue
t=b(io)
b(io)=b(k)
b(k)=t
55 p=1./p
in=n-1
if(k.eq.n) go to 65
do 60 j=k,in
60 a(k,j+1)=a(k,j+1)*p
65 b(k)=b(k)*p
if(k.eq.n) go to 120
do 80 i=k,in
do 70 j=k,in
70 a(i+1,j+1)=a(i+1,j+1)-a(i+1,k)*a(k,j+1)
80 b(i+1)=b(i+1)-a(i+1,k)*b(k)
120 continue
do 130 i1=2,n
i=n+1-i1
do 130 j=i,in
130 b(i)=b(i)-a(i,j+1)*b(j+1)
do 140 k=1,n
i=m(k)
140 a(1,i)=b(k)
do 150 k=1,n
150 b(k)=a(1,k)
kwji=0
return
end
Upvotes: 0
Views: 474
Reputation: 7433
If you are interested in performance you should be using LAPACK. To illustrate this I have written a simple driver program that compares the speed of the code you provided with calling DSYSV, the LAPACK routine that solves a set of linear equations for a symmetric, "double precision" matrix. The code and results are below, but in summary LAPACK varies from being 3.3 times faster than the Fortran, to 725 times faster. Note this is probably not an optimised LAPACK library, it is whatever comes with the Mint Linux I have installed on my laptop. Anyway, details below
ian@eris:~/work/stack$ cat solve.f90
Subroutine GaussEqSolver_Sym(n,ma,a,b,ep,kwji)
!------------------------------------------------------------------
! Solve sysmmetric linear equation ax=b by using Gauss elimination.
! If kwji=1, no solution;if kwji=0,has solution
! Input--n,ma,a(ma,n),b(n),ep,
! Output--b,kwji
!------------------------------------------------------------------
implicit real*8 (a-h,o-z)
dimension a(ma,n),b(n),m(n+1)
do 10 i=1,n
10 m(i)=i
do 120 k=1,n
p=0.0
do 20 i=k,n
do 20 j=k,n
if(dabs(a(i,j)).gt.dabs(p)) then
p=a(i,j)
io=i
jo=j
endif
20 continue
if(dabs(p)-ep) 30,30,35
30 kwji=1
return
35 continue
if(jo.eq.k) go to 45
do 40 i=1,n
t=a(i,jo)
a(i,jo)=a(i,k)
a(i,k)=t
40 continue
j=m(k)
m(k)=m(jo)
m(jo)=j
45 if(io.eq.k) go to 55
do 50 j=k,n
t=a(io,j)
a(io,j)=a(k,j)
a(k,j)=t
50 continue
t=b(io)
b(io)=b(k)
b(k)=t
55 p=1./p
in=n-1
if(k.eq.n) go to 65
do 60 j=k,in
60 a(k,j+1)=a(k,j+1)*p
65 b(k)=b(k)*p
if(k.eq.n) go to 120
do 80 i=k,in
do 70 j=k,in
70 a(i+1,j+1)=a(i+1,j+1)-a(i+1,k)*a(k,j+1)
80 b(i+1)=b(i+1)-a(i+1,k)*b(k)
120 continue
do 130 i1=2,n
i=n+1-i1
do 130 j=i,in
130 b(i)=b(i)-a(i,j+1)*b(j+1)
do 140 k=1,n
i=m(k)
140 a(1,i)=b(k)
do 150 k=1,n
150 b(k)=a(1,k)
kwji=0
return
end
Program solve_eqns
Use, Intrinsic :: iso_fortran_env, Only : wp => real64, li => int64
Implicit None
Real( wp ), Dimension( :, : ), Allocatable :: a, a_copy
Real( wp ), Dimension( : ), Allocatable :: b
Real( wp ), Dimension( : ), Allocatable :: x_lap, x_for
Real( wp ), Dimension( : ), Allocatable :: work
Real( wp ) :: time_lap, time_for
Integer, Dimension( : ), Allocatable :: pivots
Integer :: i
Integer :: n, nb = 64 ! hack value for nb - should use ilaenv
Integer :: error
Integer( li ) :: start, finish, rate
Write( *, * ) 'n ?'
Read ( *, * ) n
Allocate( a( 1:n, 1:n ) )
Allocate( b( 1:n ) )
Allocate( pivots( 1:n ) )
! Set up matrix
Call Random_number( a )
a = a - 0.5_wp
! Make A symmetric
a = 0.5_wp * ( a + Transpose( a ) )
! Add n to diagonal of A to avoid any nasty condition numbers
Do i = 1, n
a( i, i ) = a( i, i ) + n
End Do
! And keep a back up of A
a_copy = a
! RHS
Call Random_number( b )
! Solve with LAPACK
x_lap = b
Allocate( work( 1:n * nb ) )
Call system_clock( start, rate )
Call dsysv( 'U', n, 1, a, Size( a, dim = 1 ), pivots, &
x_lap, Size( x_lap, Dim = 1 ), work, Size( work ), error )
Call system_clock( finish, rate )
time_lap = Real( finish - start, Kind( time_lap ) ) / rate
! Restore A
a = a_copy
Write( *, * ) 'Errors for LAPACK', error, Maxval( Abs( Matmul( a, x_lap ) - b ) )
Write( *, * ) 'Time for LAPACK', time_lap
! Solve with Fortran
x_for = b
Call system_clock( start, rate )
Call GaussEqSolver_Sym( Size( a, Dim = 2 ), Size( a, Dim = 1 ), a, x_for, Epsilon( a ), error )
Call system_clock( finish, rate )
time_for = Real( finish - start, Kind( time_for ) ) / rate
! Restore A
a = a_copy
Write( *, * ) 'Errors for Fortran', error, Maxval( Abs( Matmul( a, x_for ) - b ) )
Write( *, * ) 'Time_For for Fortran', time_for
Write( *, * ) 'Max difference in solutions', Maxval( Abs( x_lap - x_for ) )
Write( *, * ) 'LAPACK is ', time_for / time_lap, ' times quicker than the Fortran'
End Program solve_eqns
ian@eris:~/work/stack$ gfortran --version
GNU Fortran (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
ian@eris:~/work/stack$ gfortran -O3 solve.f90 -llapack
ian@eris:~/work/stack$ ./a.out
n ?
100
Errors for LAPACK 0 4.4408920985006262E-016
Time for LAPACK 1.5952670000000000E-003
Errors for Fortran 0 9.9920072216264089E-016
Time_For for Fortran 5.3095140000000004E-003
Max difference in solutions 8.6736173798840355E-018
LAPACK is 3.3282917530419676 times quicker than the Fortran
ian@eris:~/work/stack$ ./a.out
n ?
1000
Errors for LAPACK 0 1.3322676295501878E-015
Time for LAPACK 3.9014976000000000E-002
Errors for Fortran 0 4.9960036108132044E-015
Time_For for Fortran 1.9314730620000000
Max difference in solutions 4.7704895589362195E-018
LAPACK is 49.505940026722044 times quicker than the Fortran
ian@eris:~/work/stack$ ./a.out
n ?
5000
Errors for LAPACK 0 4.3298697960381105E-015
Time for LAPACK 1.2611959250000000
Errors for Fortran 0 1.3322676295501878E-014
Time_For for Fortran 913.76959534100001
Max difference in solutions 2.7647155398380363E-018
LAPACK is 724.52628273517462 times quicker than the Fortran
Upvotes: 2