Reputation: 11
Please I wrote a code for 5D integration using Gauss-Legendre and Gauss-Chebyshev in FORTRAN but when I compile it is very slow. Please can someone tell me how to increase the speed?
MODULE GauLegMod
IMPLICIT NONE
CONTAINS
SUBROUTINE GaussLeg(n0, x, w)
Integer::i,j,n,m, dx,l,repeat_state,n0
Real(8)::x_k1,x_k,Pn,Leg_prime,Leg,x_k0,min_root
real(8)::Roots(n0),w(n0),x(n0)
Real(8),Allocatable::Roots_sorted(:),Semi_roots(:)
n = n0
IF (Mod(n,2)==0) THEN
m = n/2
dx = m + 1
ELSE
m = (n+1)/2
dx = m
END IF
Allocate( Roots_sorted(m),Semi_roots(m) )
Semi_roots = 0.0
x_k0 = 0.0
IF ( Mod(n,2)==0 ) THEN
i = 0
ELSE
i = 1
Semi_roots(i) = 0.0
END IF
DO
x_k0 = x_k0 + (0.001/dx)
repeat_state = 0
x_k = x_k0
x_k1 = x_k
DO
Call Leg_Derivative(n,x_k,Leg_prime)
x_k = x_k - (Leg(n,x_k)/Leg_prime)
IF ( ABS(x_k1-x_k)<0.00001 ) EXIT
x_k1 = x_k
END DO
DO j = 1, m
IF ( ABS(Semi_roots(j)-x_k)<=1.0E-5 ) THEN
repeat_state = 1
END IF
END DO
IF ( (repeat_state == 0) .AND. (x_k>=0.0) ) THEN
i = i + 1
Semi_roots(i) = x_k
END IF
IF (i==m) Exit
END DO
!-------------------Sorting--------------
DO i = 1, m
min_root = Minval(Semi_roots)
Semi_roots(Minloc(Semi_roots)) = Maxval(Semi_roots)
Roots_sorted(i) = min_root
END DO
Semi_roots = Roots_sorted
!-----------------------------------------
IF ( Mod(n,2)==0 ) THEN
j = 0
DO i = ((n/2)+1) , n
j = j + 1
Roots(i) = Semi_roots(j)
END DO
DO i = 1 , (n/2)
Roots(i) = -Roots(n-i+1)
END DO
ELSE
j = 0
DO i = ((n+1)/2) , n
j = j + 1
Roots(i) = Semi_roots(j)
END DO
DO i = 1 , (((n+1)/2)-1)
Roots(i) = -Roots(n-i+1)
END DO
END IF
x = Roots
!------------------------------Calculating Weights----------------------------
DO i = 1, n
w(i) = 2.0*( 1.0-(Roots(i)**2.0) ) / ( (Real(n)*Leg(n-1,Roots(i)) )**2.0 )
END DO
!-----------------------------------------------------------------------------------
END Subroutine
Subroutine Chebyshev(n0,x,w)
Implicit None
Integer::i,n0
Real(8)::w(n0),x(n0),PI
PI = 4.0*Atan(1.0)
DO i = 1 , n0
X(i) = Cos( (2.0*Real(i) - 1.0)*PI/(2.0*Real(n0)) )
W(i) = PI/Real(n0)
END DO
End Subroutine
end Module
Function Leg(n,x)
Implicit None
Integer::i,n
Real(8)::x,Pn(n+1),Leg
Pn(1) = 1.0
IF (n>=1) Pn(2) = x
DO i = 1, n - 1
Pn(i+2) = ((2.0*(i-1)+3.0)/((i-1)+2.0))*x*Pn(i+1) - (((i-1)+1.0)/((i-1)+2.0))*Pn(i)
END DO
Leg = Pn(n+1)
END Function
Subroutine Leg_Derivative(n,x,Leg_prime)
Implicit None
Integer::i,n
Real(8)::x,Leg,Leg_prime,h,xminus,xplus
h = 0.00001
xplus = x + h
xminus = x - h
Leg_prime = (Leg(n,xplus) - Leg(n,xminus)) / (2.0*h)
END Subroutine
FUNCTION V(x1, x2, x3, x4, x5)
Implicit None
REAL(8), INTENT(IN) :: x1, x2, x3, x4, x5
Real*8 ::facteur,V
facteur=x1+ x2 + x3 + x4 + x5
V =facteur! function for integration
End function V
PROGRAM GaussLeg1
USE GauLegMod
IMPLICIT NONE
REAL(8), EXTERNAL:: V
REAL(8) :: S, a1, a2, b1, b2, a3, a4, b3, b4,a5,b5,x1c,x2c,x3c,x4c,x5c,fac,fac2
REAL(8), ALLOCATABLE:: c1(:), x1(:), c2(:), x2(:), c3(:), x3(:), c4(:), x4(:), c5(:), x5(:)
INTEGER :: i, j, k, l1, m, n,n1, h,i1,i2
PRINT*, 'Enter the number of points n of the Gauss-Legendre Quadrature'
READ*, n
PRINT*, 'Enter the number of points n1 of the Gauss-Chebyshev Quadrature'
READ*, n1
Print*
Print*,' Please Wait. Calculating...'
ALLOCATE (x1(n1))
ALLOCATE (c1(n1))
ALLOCATE (x2(n))
ALLOCATE (c2(n))
ALLOCATE (x3(n))
ALLOCATE (c3(n))
ALLOCATE (x4(n1))
ALLOCATE (c4(n1))
ALLOCATE (x5(n1))
ALLOCATE (c5(n1))
a1 = 0.0 ; b1 = 2.0
a2 = 0.0 ; b2 = 2.0
a3 = 0.0 ; b3 = 2.0
a4 = 0.0 ; b4 = 2.0
a5 = 0.0 ; b5 = 2.0
CALL Chebyshev(n1, x1, c1)
CALL GaussLeg(n, x2, c2)
CALL GaussLeg(n, x3, c3)
CALL Chebyshev(n1, x4, c4)
CALL Chebyshev(n1, x5, c5)
S = 0.0
DO i = 1, n1
!
DO j = 1, n
!
DO k = 1, n
!
DO l1 = 1, n1
!
DO m = 1, n1
!
x1c = ( (b1-a1)*x1(i) / 2.0 ) + (a1+b1)/2.0
x2c = ( (b2-a2)*x2(j) / 2.0 ) + (a2+b2)/2.0
x3c = ( (b3-a3)*x3(k) / 2.0 ) + (a3+b3)/2.0
x4c = ( (b4-a4)*x4(l1) / 2.0 ) + (a4+b4)/2.0
x5c = ( (b5-a5)*x5(m) / 2.0 ) + (a5+b5)/2.0
fac = (b1-a1)*(b2-a2)*(b3-a3)*(b4-a4)*(b5-a5) / (2.0**5.0)
fac2 = (dSQRT(1.0-(X1(i)**2.0)) * dSQRT(1.0-(X4(l1)**2.0)) * dSQRT(1.0-(X5(m)**2.0)))
S = S + c1(i)*c2(j)*c3(k)*c4(l1)*c5(m)*fac*fac2*V(x1c, x2c, x3c,x4c, x5c)
END DO
END DO
END DO
END DO
END DO
write(*,*) S
END PROGRAM GaussLeg1
Analytically I obtain 160 but the code is 160.0787 with many nodes. Please, I need help with this Gauss-Chebyshev quadrature method.
Upvotes: 0
Views: 369
Reputation: 900
With minimal changes to your code to parse command line arguments and time the computations I get
% gfcx -o z a.f90
% ./z 40 40
160.12343539957692
Time: 8.13639 sec
% gfcx -o z -O2 -march=native -mtune=native a.f90
% ./z 40 40
160.12343539957581
Time: 0.52530 sec
with gfortran 12.0.0 20210816. So, simply adding options to turn on optimizations gives a performance boost.
Now, if I make the following sequence of changes
Change dsqrt to sqrt. Use generic function instead of specific.
Change real literal constants to integer literal where possible.
Let the compiler do its job with type conversion. Of particular
note, you do not want to do 2.0**5.0
. Instead, do 2**5
.
Change real literal constants to double precision where needed,
e.g., h = 0.00001
should be h = 0.00001d0
.
The value of pi
does not change. Compute it once as a parameter and
use correct precision. real(8), parameter :: pi = 4 * atan(1.d0)
In do-loop where repeat_state=1
is set, you can exit
immediately.
Remove excessive parentheses. These may inhibit schedule of CPU/FPU instructions.
The constants a1 = 0
, b1 = 2
, etc are used in loops to compute
(b1 - a1) / 2
and (b1 + a1) / 2
. These always evaluate to 1. This then shows fac = 1
, so remove them.
Put leg()
and leg_derivative()
in the gaulegmod
module.
Now the times are
% gfcx -o z a.f90
% ./z 40 40
160.12342053717975
Time: 3.68685 sec
% gfcx -o z -O2 -march=native -mtune=native a.f90
% ./z 40 40
160.12342053718061
Time: 0.48633 sec
% gfcx -o z -Ofast -march=native -mtune=native -ftree-vectorize a.f90
% ./z 40 40
160.12342053715955
Time: 0.23689 sec
Upvotes: 2