Herve
Herve

Reputation: 11

Gauss-Legendre and Gauss-Chebyshev quadrature in FORTRAN

Please I wrote a code for 5D integration using Gauss-Legendre and Gauss-Chebyshev in FORTRAN but when I compile it is very slow. Please can someone tell me how to increase the speed?

MODULE GauLegMod
        IMPLICIT NONE
        CONTAINS

SUBROUTINE  GaussLeg(n0, x, w)
Integer::i,j,n,m, dx,l,repeat_state,n0
Real(8)::x_k1,x_k,Pn,Leg_prime,Leg,x_k0,min_root
real(8)::Roots(n0),w(n0),x(n0)
Real(8),Allocatable::Roots_sorted(:),Semi_roots(:)
n = n0
IF (Mod(n,2)==0) THEN
    m = n/2
    dx = m + 1
ELSE

    m = (n+1)/2
    dx = m

END IF

Allocate( Roots_sorted(m),Semi_roots(m) )

Semi_roots = 0.0
x_k0            = 0.0

IF ( Mod(n,2)==0 ) THEN

  i = 0

 ELSE

  i = 1
  Semi_roots(i) = 0.0

END IF
   DO

    x_k0 = x_k0 + (0.001/dx)
     repeat_state = 0

      x_k   = x_k0
      x_k1 = x_k

      DO
          Call Leg_Derivative(n,x_k,Leg_prime)

           x_k = x_k - (Leg(n,x_k)/Leg_prime)

           IF ( ABS(x_k1-x_k)<0.00001 ) EXIT

           x_k1 = x_k

      END DO

      DO j = 1, m

           IF ( ABS(Semi_roots(j)-x_k)<=1.0E-5 ) THEN

              repeat_state = 1

           END IF

      END DO

      IF (  (repeat_state == 0) .AND. (x_k>=0.0) ) THEN

         i = i + 1
         Semi_roots(i) = x_k

      END IF

      IF (i==m) Exit

    END DO

!-------------------Sorting--------------
DO i = 1, m

min_root = Minval(Semi_roots)
Semi_roots(Minloc(Semi_roots)) = Maxval(Semi_roots)
Roots_sorted(i) = min_root

END DO

Semi_roots = Roots_sorted
!-----------------------------------------

IF ( Mod(n,2)==0 ) THEN

  j = 0

   DO i = ((n/2)+1) , n

         j = j + 1
         Roots(i) = Semi_roots(j)

   END DO

   DO i =  1 , (n/2)

         Roots(i) = -Roots(n-i+1)

   END DO

ELSE


  j = 0

   DO i = ((n+1)/2) , n

         j = j + 1
         Roots(i) = Semi_roots(j)

   END DO

   DO i =  1 , (((n+1)/2)-1)

         Roots(i) = -Roots(n-i+1)

   END DO

END IF

x = Roots

!------------------------------Calculating Weights----------------------------
DO i = 1, n

    w(i) = 2.0*( 1.0-(Roots(i)**2.0) ) / ( (Real(n)*Leg(n-1,Roots(i)) )**2.0 )

END DO
!-----------------------------------------------------------------------------------

END Subroutine

Subroutine Chebyshev(n0,x,w)
Implicit None
Integer::i,n0
Real(8)::w(n0),x(n0),PI

PI = 4.0*Atan(1.0)

DO i = 1 , n0

        X(i) = Cos( (2.0*Real(i) - 1.0)*PI/(2.0*Real(n0)) )
        W(i) = PI/Real(n0)

END DO

End Subroutine

end Module

Function Leg(n,x)
   Implicit None
   Integer::i,n
   Real(8)::x,Pn(n+1),Leg

   Pn(1) = 1.0

   IF (n>=1) Pn(2) = x

   DO i = 1, n - 1

        Pn(i+2) = ((2.0*(i-1)+3.0)/((i-1)+2.0))*x*Pn(i+1) - (((i-1)+1.0)/((i-1)+2.0))*Pn(i)

   END DO

   Leg = Pn(n+1)

END Function
Subroutine Leg_Derivative(n,x,Leg_prime)
   Implicit None
   Integer::i,n
   Real(8)::x,Leg,Leg_prime,h,xminus,xplus

    h           = 0.00001
    xplus    = x + h
    xminus = x - h

    Leg_prime = (Leg(n,xplus) -  Leg(n,xminus)) / (2.0*h)

END Subroutine

FUNCTION V(x1, x2, x3, x4, x5)
    Implicit None
 REAL(8), INTENT(IN) :: x1, x2, x3, x4, x5
 
    Real*8         ::facteur,V

    facteur=x1+ x2 + x3 + x4 + x5

             V =facteur! function for integration

End function V

          PROGRAM GaussLeg1
          USE GauLegMod
          IMPLICIT NONE
          REAL(8), EXTERNAL:: V
          REAL(8) :: S, a1, a2, b1, b2, a3, a4, b3, b4,a5,b5,x1c,x2c,x3c,x4c,x5c,fac,fac2
          REAL(8), ALLOCATABLE:: c1(:), x1(:), c2(:), x2(:), c3(:), x3(:), c4(:), x4(:), c5(:), x5(:)
         INTEGER :: i, j, k, l1, m, n,n1, h,i1,i2

          PRINT*, 'Enter the number of points n of the Gauss-Legendre Quadrature'
          READ*, n
          PRINT*, 'Enter the number of points n1 of the Gauss-Chebyshev Quadrature'
          READ*, n1
          Print*
          Print*,' Please Wait. Calculating...'

      ALLOCATE (x1(n1))
      ALLOCATE (c1(n1))
      ALLOCATE (x2(n))
      ALLOCATE (c2(n))
      ALLOCATE (x3(n))
      ALLOCATE (c3(n))
      ALLOCATE (x4(n1))
      ALLOCATE (c4(n1))
      ALLOCATE (x5(n1))
      ALLOCATE (c5(n1))

        a1 = 0.0 ;  b1 = 2.0 
        a2 = 0.0 ;  b2 = 2.0 
        a3 = 0.0 ;  b3 = 2.0 
        a4 = 0.0 ;  b4 = 2.0 
        a5 = 0.0 ;  b5 = 2.0 

         CALL Chebyshev(n1, x1, c1)  
         CALL GaussLeg(n, x2, c2)    
         CALL GaussLeg(n, x3, c3)    
         CALL Chebyshev(n1, x4, c4)    
         CALL Chebyshev(n1, x5, c5)    

         S = 0.0

         DO i = 1, n1
!          
         DO j = 1, n
!          
         DO k = 1, n
!          
         DO l1 = 1, n1
!          
         DO m = 1, n1
!         
          x1c = ( (b1-a1)*x1(i) / 2.0 )    +  (a1+b1)/2.0
          x2c = ( (b2-a2)*x2(j) / 2.0 )    +  (a2+b2)/2.0
          x3c = ( (b3-a3)*x3(k) / 2.0 )   +  (a3+b3)/2.0
          x4c = ( (b4-a4)*x4(l1) / 2.0 )  +  (a4+b4)/2.0
          x5c = ( (b5-a5)*x5(m) / 2.0 )  +  (a5+b5)/2.0

          fac   = (b1-a1)*(b2-a2)*(b3-a3)*(b4-a4)*(b5-a5) / (2.0**5.0)
          fac2 =  (dSQRT(1.0-(X1(i)**2.0)) * dSQRT(1.0-(X4(l1)**2.0)) * dSQRT(1.0-(X5(m)**2.0)))

          S = S + c1(i)*c2(j)*c3(k)*c4(l1)*c5(m)*fac*fac2*V(x1c, x2c, x3c,x4c, x5c)

         END DO
         END DO
         END DO
         END DO
         END DO

        write(*,*) S

         END PROGRAM GaussLeg1  

Analytically I obtain 160 but the code is 160.0787 with many nodes. Please, I need help with this Gauss-Chebyshev quadrature method.

Upvotes: 0

Views: 369

Answers (1)

steve
steve

Reputation: 900

With minimal changes to your code to parse command line arguments and time the computations I get

% gfcx -o z a.f90
% ./z 40 40
  160.12343539957692     
Time:   8.13639 sec

% gfcx -o z -O2 -march=native -mtune=native a.f90
% ./z 40 40
  160.12343539957581     
 Time:   0.52530 sec

with gfortran 12.0.0 20210816. So, simply adding options to turn on optimizations gives a performance boost.

Now, if I make the following sequence of changes

  • Change dsqrt to sqrt. Use generic function instead of specific.

  • Change real literal constants to integer literal where possible. Let the compiler do its job with type conversion. Of particular note, you do not want to do 2.0**5.0. Instead, do 2**5.

  • Change real literal constants to double precision where needed, e.g., h = 0.00001 should be h = 0.00001d0.

  • The value of pi does not change. Compute it once as a parameter and use correct precision. real(8), parameter :: pi = 4 * atan(1.d0)

  • In do-loop where repeat_state=1 is set, you can exit immediately.

  • Remove excessive parentheses. These may inhibit schedule of CPU/FPU instructions.

  • The constants a1 = 0, b1 = 2, etc are used in loops to compute (b1 - a1) / 2 and (b1 + a1) / 2. These always evaluate to 1. This then shows fac = 1, so remove them.

  • Put leg() and leg_derivative() in the gaulegmod module.

Now the times are

% gfcx -o z a.f90
% ./z 40 40
  160.12342053717975     
 Time:   3.68685 sec

% gfcx -o z -O2 -march=native -mtune=native a.f90
% ./z 40 40
  160.12342053718061     
 Time:   0.48633 sec

% gfcx -o z -Ofast -march=native -mtune=native -ftree-vectorize a.f90
% ./z 40 40
  160.12342053715955     
 Time:   0.23689 sec

Upvotes: 2

Related Questions