Reputation: 3
I have a formatted file, and I would like recursively randomly select one row and read it. Due to memory issues, it is not possible to read all the data, save it on vectors, and later select one line at time.
I solved in this way (only relevant code is reported), but it is quite slow and I'm wondering if someone could help me to find a fastest way to do it (I'm not fortran expert)
Edit: Yes I would like to use this routine several (~1kk) times, I'm defining the starting parameters for further analysis
PARAMETER(NLINES=10000000)
REAL ID,E,X,Y,Z,COSX,COSY
SAVE LOO
DATA LFIRST / .TRUE. /
IF ( LFIRST ) THEN
LFIRST = .FALSE.
OPEN(UNIT=88,FILE="../../../gene_rid.txt",STATUS="OLD")
END IF
XI = FLRNDM(XDUMMY)
LINE = INT(XI * DBLE(NLINES)) + 1
DO LOO=1,LINE
READ(88,*,IOSTAT=iostat) ID
END DO
READ(88,*,IOSTAT=iostat) ID,E,
& COSX, COSY, X, Y, Z
REWIND(88)
This is how the input file is formatted
head gene_rid.txt
7 0.933549E-03 -.162537E+00 0.136150E-01 -.4791E+01 0.3356E+00 0.2900E+02
7 0.203748E-02 -.115359E+00 -.217682E+00 -.3453E+01 -.6606E+01 0.2900E+02
7 0.289498E-02 0.159572E+00 -.954033E-01 0.4767E+01 -.2730E+01 0.2900E+02
Upvotes: 0
Views: 85
Reputation: 7432
Given it says in the comments that all the lines are the same length I would use a direct access file. Something like the below, take the times with a pinch of salt because who knows what caching the OS is doing.
ijb@ijb-Latitude-5410:~/work/stack$ cat rl.f90
Program testit
Use, Intrinsic :: iso_fortran_env, Only : wp => real64, li => int64
Implicit None
Integer, Parameter :: max_file_length = 128
Integer, Parameter :: max_line_length = 8192
Integer, Parameter :: n_lines = 10000000
Integer, Parameter :: n_tests = 1000000
Character( Len = * ), Parameter :: fmt = '( i8, 1x, f14.2 )'
Character( Len = max_file_length ) :: file
Character( Len = max_line_length ) :: line
Real( wp ) :: rand
Real( wp ) :: a
Integer :: start, finish, rate
Integer :: recl
Integer :: unit
Integer :: line_number
Integer :: d
Integer :: i
Logical :: worked
file = 'test.dat'
! Write some test data
Open( newunit = unit, file = file, form = 'formatted' )
Call system_clock( start, rate )
Do i = 1, n_lines
Write( unit, fmt ) i, Real( i, wp )
End Do
Call system_clock( finish, rate )
Write( *, * ) 'Write time ', Real( finish - start, wp ) / rate
Write( *, * ) 'Av. time per write = ', &
Real( finish - start, wp ) / rate / n_lines
Close( unit )
! Read it via direct access
! Work out the record length including the end of record marker
Write( line, fmt ) 1, 1.0_wp
recl = Len_trim( line ) + Len( new_line( 'a' ) )
! Now read the file
Open( newunit = unit, file = file, &
access = 'direct', recl = recl, form = 'formatted' )
worked = .True.
Call system_clock( start, rate )
Do i = 1, n_tests
Call Random_number( rand )
line_number = Int( rand * n_lines ) + 1
Read( unit, fmt, rec = line_number ) d, a
If( d /= line_number ) Then
Write( *, * ) 'Failed to read right line'
worked = .False.
Exit
End If
End Do
Call system_clock( finish, rate )
Close( unit )
If( worked ) Then
Write( *, * )
Write( *, * ) 'Read successful'
Write( *, * ) 'Read time ', Real( finish - start, wp ) / rate
Write( *, * ) 'Av. time per read = ', &
Real( finish - start, wp ) / rate / n_tests
End If
End Program testit
ijb@ijb-Latitude-5410:~/work/stack$ gfortran --version
GNU Fortran (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
Copyright (C) 2019 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
ijb@ijb-Latitude-5410:~/work/stack$ gfortran -std=f2008 -fcheck=all -Wall -Wextra -g -O rl.f90
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
Write time 8.8620000000000001
Av. time per write = 8.8619999999999998E-007
Read successful
Read time 2.3670000000000000
Av. time per read = 2.3670000000000000E-006
ijb@ijb-Latitude-5410:~/work/stack$
Upvotes: 1
Reputation: 2981
It might help to read the file once, and write it to lots of one-line files, e.g.
integer, parameter :: nlines = 1e7
character(100) :: line
character(100) :: filename
integer :: i
open(unit=88, file="../../../gene_rid.txt", status="old")
do i=1,nlines
read(88,'(a)') line
write(filename,'(a,i0)') "gene_rid", i
open(unit=89, file=filename, status="new")
write(89,*) line
close(89)
enddo
close(88)
Then in your main program you only have to open the right one-line file and read one line in each time, rather than scrolling through the entire large file.
If writing 1e7
one-line files is too much for your file system, it might instead be worth writing 1e4
thousand-line files or somesuch.
Upvotes: 0