Reputation: 1079
Python code running extremely slow. Starts fast, then turns into a crawl. ANything I can do to speed things up? I'm pulling up a text file, reading the contents of the file, filtering the contents of the text file, and writing that to a csv for later use by someone else for json.
I just started doing this, as I am sure you can tell. Any help would be greatly appreciated.
import glob
from pathlib import Path
import datetime
import re
import csv
get_this = []
thislist = []
def timeteller():
now = datetime.datetime.now()
month = str('{:02d}'.format(now.month))
day1 = now.day -1
day = str('{:02d}'.format(day1))
year =str(now.year)
time =year+month+day
return time
def these_files(x, y):
configfiles = Path('O:/Unit Management/Reports/G4S/').glob('{}*/{}*Transaction.txt'.format(x, y))
for files in configfiles:
thislist.append(files)
return thislist
def hasNumbers(inputString):
numberfinal = []
numberfinal = re.findall("\d+", inputString)
if numberfinal == []:
numberfinal = '1'
return numberfinal
def get_odometers(thesepath):
for thispath in thesepath:
with open(thispath,"r") as f:
searchlines = f.readlines()
for i, line in enumerate(searchlines):
if "Odometers" in line:
get_this.append(line)
elif "Lifetime" in line:
get_this.append(line)
return get_this
def make_pretty(checkthis):
the_numbers = {}
the_numbers['Serial'] = banumber
for i, line in enumerate(checkthis):
the_numbers['Serial'] = banumber
if '(BNR) Odometers Accept' in line:
for l in checkthis[i:i+2]:
numbers = l[0:20]
numberschecked = hasNumbers(numbers)
the_numbers['BNR'] = numberschecked[0]
elif '(BNR 2) Odometers Accept' in line:
for l in checkthis[i:i+2]:
numbers = l[0:20]
numberschecked = hasNumbers(numbers)
the_numbers['BNR 2'] = numberschecked[0]
elif '(BCR) Odometers Accept' in line:
for l in checkthis[i:i+2]:
numbers = l[0:20]
numberschecked = hasNumbers(numbers)
the_numbers['BCR'] = numberschecked[0]
elif '(BCR) Odometers Hopper1' in line:
for l in checkthis[i:i+2]:
numbers = l[0:20]
numberschecked = hasNumbers(numbers)
the_numbers['Hopper1'] = numberschecked[0]
elif '(BCR) Odometers Hopper2' in line:
for l in checkthis[i:i+2]:
numbers = l[0:20]
numberschecked = hasNumbers(numbers)
the_numbers['Hopper2'] = numberschecked[0]
elif '(BCR) Odometers Hopper3' in line:
for l in checkthis[i:i+2]:
numbers = l[0:20]
numberschecked = hasNumbers(numbers)
the_numbers['Hopper3'] = numberschecked[0]
elif '(BCR) Odometers Hopper4' in line:
for l in checkthis[i:i+2]:
numbers = l[0:20]
numberschecked = hasNumbers(numbers)
the_numbers['Hopper4'] = numberschecked[0]
elif '(BCR) Odometers Hopper5' in line:
for l in checkthis[i:i+2]:
numbers = l[0:20]
numberschecked = hasNumbers(numbers)
the_numbers['Hopper5'] = numberschecked[0]
elif '(BCR) Odometers Hopper6' in line:
for l in checkthis[i:i+2]:
numbers = l[0:20]
numberschecked = hasNumbers(numbers)
the_numbers['Hopper6'] = numberschecked[0]
elif '(BCR) Odometers Hopper7' in line:
for l in checkthis[i:i+2]:
numbers = l[0:20]
numberschecked = hasNumbers(numbers)
the_numbers['Hopper7'] = numberschecked[0]
elif '(BCR) Odometers Hopper8' in line:
for l in checkthis[i:i+2]:
numbers = l[0:20]
numberschecked = hasNumbers(numbers)
the_numbers['Hopper8'] = numberschecked[0]
return the_numbers
intnow = int(timeteller())
intnow -= 1
now = str(intnow)
thelist = []
thispath = open('banumberlist1.txt')
finallist = []
for files in thispath:
getem = files
banumber = getem[0:8]
print(banumber)
combined = '{}_{}'.format(banumber,now)
thepaths = these_files(banumber, combined)
needtomakepretty = get_odometers(thepaths)
goeslast = make_pretty(needtomakepretty)
finallist.append(goeslast)
f = open ('odom01.txt', 'w')
for ba in finallist:
sba = str(ba)
f.write(sba)
Upvotes: 0
Views: 1457
Reputation: 43533
If you want to know which part of your code is slow, you will need to use a profiler.
You could use cProfile
which is part of the standard library:
python -m cProfile -o profile.txt -s cumtime myscript.py
This will write the profiling results to profile.txt
, sorting the profile by cumulative time.
This will give you an overview of where your program is spending its time.
For a more detailed view, you could use the line_profiler module. This can even show you how much time every line of code uses.
After installing line_profiler
, add the following to the beginning of your script:
import line_profiler
import atexit
profile = line_profiler.LineProfiler()
atexit.register(profile.print_stats)
Now add the @profile
decorators to all your functions, like so:
@profile
def timeteller():
now = datetime.datetime.now()
month = str('{:02d}'.format(now.month))
day1 = now.day -1
day = str('{:02d}'.format(day1))
year =str(now.year)
time =year+month+day
return time
You should put the stuff at the end of your script into a function (e.g. main
) and add @profile
to that as well.
Now run your script, and you will see a profile.
Upvotes: 5