Wander Bassi
Wander Bassi

Reputation: 1

Solve symmetric linear equation ax=b by using Gauss elimination

I'm new to parallel programming, and I'm currently working on optimizing a code that works with electromagnetic calculations. Analyzing how the program works, I realized that 85% of the time spent on execution is about solving a linear equation. I studied a little bit of openmp but I have no idea how to parallelize a nested loop like this. Any idea? Thank you in advance. Follow the code below

    Subroutine GaussEqSolver_Sym(n,ma,a,b,ep,kwji)
!------------------------------------------------------------------
!  Solve sysmmetric linear equation ax=b by using Gauss elimination.
!  If kwji=1, no solution;if kwji=0,has solution
!  Input--n,ma,a(ma,n),b(n),ep,
!  Output--b,kwji
!------------------------------------------------------------------
   

        implicit real*8 (a-h,o-z)
           dimension a(ma,n),b(n),m(n+1)
           do 10 i=1,n
    10     m(i)=i
           do 120 k=1,n
              p=0.0
              do 20 i=k,n
                 do 20 j=k,n
                    if(dabs(a(i,j)).gt.dabs(p)) then
                       p=a(i,j)
                       io=i
                       jo=j
                    endif
    20        continue
              if(dabs(p)-ep) 30,30,35
    30        kwji=1
              return
    35        continue
              if(jo.eq.k) go to 45
              do 40 i=1,n
                 t=a(i,jo)
                 a(i,jo)=a(i,k)
                 a(i,k)=t
    40        continue
              j=m(k)
              m(k)=m(jo)
              m(jo)=j
    45        if(io.eq.k) go to 55
              do 50 j=k,n
                 t=a(io,j)
                 a(io,j)=a(k,j)
                 a(k,j)=t
    50        continue
              t=b(io)
              b(io)=b(k)
              b(k)=t
    55        p=1./p
              in=n-1
              if(k.eq.n) go to 65
              do 60 j=k,in
    60        a(k,j+1)=a(k,j+1)*p
    65        b(k)=b(k)*p
              if(k.eq.n) go to 120
              do 80 i=k,in
                 do 70 j=k,in
    70              a(i+1,j+1)=a(i+1,j+1)-a(i+1,k)*a(k,j+1)
    80              b(i+1)=b(i+1)-a(i+1,k)*b(k)
    120       continue
              do 130 i1=2,n
                 i=n+1-i1
                 do 130 j=i,in
    130       b(i)=b(i)-a(i,j+1)*b(j+1)
              do 140 k=1,n
                 i=m(k)
    140       a(1,i)=b(k)
              do 150 k=1,n
    150       b(k)=a(1,k)
              kwji=0
        return
        end

Upvotes: 0

Views: 474

Answers (1)

Ian Bush
Ian Bush

Reputation: 7433

If you are interested in performance you should be using LAPACK. To illustrate this I have written a simple driver program that compares the speed of the code you provided with calling DSYSV, the LAPACK routine that solves a set of linear equations for a symmetric, "double precision" matrix. The code and results are below, but in summary LAPACK varies from being 3.3 times faster than the Fortran, to 725 times faster. Note this is probably not an optimised LAPACK library, it is whatever comes with the Mint Linux I have installed on my laptop. Anyway, details below

ian@eris:~/work/stack$ cat solve.f90
    Subroutine GaussEqSolver_Sym(n,ma,a,b,ep,kwji)
!------------------------------------------------------------------
!  Solve sysmmetric linear equation ax=b by using Gauss elimination.
!  If kwji=1, no solution;if kwji=0,has solution
!  Input--n,ma,a(ma,n),b(n),ep,
!  Output--b,kwji
!------------------------------------------------------------------
   

        implicit real*8 (a-h,o-z)
           dimension a(ma,n),b(n),m(n+1)
           do 10 i=1,n
    10     m(i)=i
           do 120 k=1,n
              p=0.0
              do 20 i=k,n
                 do 20 j=k,n
                    if(dabs(a(i,j)).gt.dabs(p)) then
                       p=a(i,j)
                       io=i
                       jo=j
                    endif
    20        continue
              if(dabs(p)-ep) 30,30,35
    30        kwji=1
              return
    35        continue
              if(jo.eq.k) go to 45
              do 40 i=1,n
                 t=a(i,jo)
                 a(i,jo)=a(i,k)
                 a(i,k)=t
    40        continue
              j=m(k)
              m(k)=m(jo)
              m(jo)=j
    45        if(io.eq.k) go to 55
              do 50 j=k,n
                 t=a(io,j)
                 a(io,j)=a(k,j)
                 a(k,j)=t
    50        continue
              t=b(io)
              b(io)=b(k)
              b(k)=t
    55        p=1./p
              in=n-1
              if(k.eq.n) go to 65
              do 60 j=k,in
    60        a(k,j+1)=a(k,j+1)*p
    65        b(k)=b(k)*p
              if(k.eq.n) go to 120
              do 80 i=k,in
                 do 70 j=k,in
    70              a(i+1,j+1)=a(i+1,j+1)-a(i+1,k)*a(k,j+1)
    80              b(i+1)=b(i+1)-a(i+1,k)*b(k)
    120       continue
              do 130 i1=2,n
                 i=n+1-i1
                 do 130 j=i,in
    130       b(i)=b(i)-a(i,j+1)*b(j+1)
              do 140 k=1,n
                 i=m(k)
    140       a(1,i)=b(k)
              do 150 k=1,n
    150       b(k)=a(1,k)
              kwji=0
        return
     end 

Program solve_eqns

  Use, Intrinsic :: iso_fortran_env, Only :  wp => real64, li => int64

  Implicit None

  Real( wp ), Dimension( :, : ), Allocatable :: a, a_copy

  Real( wp ), Dimension( : ), Allocatable :: b
  Real( wp ), Dimension( : ), Allocatable :: x_lap, x_for
  Real( wp ), Dimension( : ), Allocatable :: work

  Real( wp ) :: time_lap, time_for
  
  Integer, Dimension( : ), Allocatable :: pivots

  Integer :: i
  Integer :: n, nb = 64 ! hack value for nb - should use ilaenv
  Integer :: error

  Integer( li ) :: start, finish, rate

  Write( *, * ) 'n ?'
  Read ( *, * )  n

  Allocate( a( 1:n, 1:n ) )
  Allocate( b( 1:n ) )
  Allocate( pivots( 1:n ) )

  ! Set up matrix
  Call Random_number( a )
  a = a - 0.5_wp
  ! Make A symmetric
  a = 0.5_wp * ( a + Transpose( a ) )
  ! Add n to diagonal of A to avoid any nasty condition numbers
  Do i = 1, n
     a( i, i ) = a( i, i ) + n
  End Do
  ! And keep a back up of A
  a_copy = a

  ! RHS
  Call Random_number( b )

  ! Solve with LAPACK
  x_lap = b
  Allocate( work( 1:n * nb ) )
  Call system_clock( start, rate )
  Call dsysv( 'U', n, 1, a, Size( a, dim = 1 ), pivots, &
       x_lap, Size( x_lap, Dim = 1 ), work, Size( work ), error )
  Call system_clock( finish, rate )
  time_lap = Real( finish - start, Kind( time_lap ) ) / rate
  ! Restore A
  a = a_copy
  Write( *, * ) 'Errors for LAPACK', error, Maxval( Abs( Matmul( a, x_lap ) - b ) )
  Write( *, * ) 'Time for LAPACK', time_lap

  ! Solve with Fortran  
  x_for = b
  Call system_clock( start, rate )
  Call GaussEqSolver_Sym( Size( a, Dim = 2 ), Size( a, Dim = 1 ), a, x_for, Epsilon( a ), error )
  Call system_clock( finish, rate )
  time_for = Real( finish - start, Kind( time_for ) ) / rate
  ! Restore A
  a = a_copy
  Write( *, * ) 'Errors for Fortran', error, Maxval( Abs( Matmul( a, x_for ) - b ) )
  Write( *, * ) 'Time_For for Fortran', time_for

  Write( *, * ) 'Max difference in solutions', Maxval( Abs( x_lap - x_for ) )

  Write( *, * ) 'LAPACK is ', time_for / time_lap, ' times quicker than the Fortran'
  
       
End Program solve_eqns
ian@eris:~/work/stack$ gfortran --version
GNU Fortran (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

ian@eris:~/work/stack$ gfortran -O3 solve.f90  -llapack
ian@eris:~/work/stack$ ./a.out
 n ?
100
 Errors for LAPACK           0   4.4408920985006262E-016
 Time for LAPACK   1.5952670000000000E-003
 Errors for Fortran           0   9.9920072216264089E-016
 Time_For for Fortran   5.3095140000000004E-003
 Max difference in solutions   8.6736173798840355E-018
 LAPACK is    3.3282917530419676       times quicker than the Fortran
ian@eris:~/work/stack$ ./a.out
 n ?
1000
 Errors for LAPACK           0   1.3322676295501878E-015
 Time for LAPACK   3.9014976000000000E-002
 Errors for Fortran           0   4.9960036108132044E-015
 Time_For for Fortran   1.9314730620000000     
 Max difference in solutions   4.7704895589362195E-018
 LAPACK is    49.505940026722044       times quicker than the Fortran
ian@eris:~/work/stack$ ./a.out
 n ?
5000 
 Errors for LAPACK           0   4.3298697960381105E-015
 Time for LAPACK   1.2611959250000000     
 Errors for Fortran           0   1.3322676295501878E-014
 Time_For for Fortran   913.76959534100001     
 Max difference in solutions   2.7647155398380363E-018
 LAPACK is    724.52628273517462       times quicker than the Fortran

Upvotes: 2

Related Questions