Reputation: 735
I wrote a small Program which basically searches for some mat files in a Network drive. I am using Python3.6 so i have access to os.scandir()
command which is mentioned to be better than os.walk().
But I face a weird Problem, when I run the program for the first time it takes a lot of time to get the data. But when I run the same program after few hours, it works very fast.
Can anyone explain me why this is caused? The following is my code.
Note: I have a very good Internet Speed, so the mapping of Network drive is seamless.
class WorkThread(QObject):
def scantree(self,path):
try:
for entry in scandir(path):
if entry.is_dir(follow_symlinks=False):
yield from self.scantree(entry.path) # see below for Python 2.x
else:
yield entry
except FileNotFoundError:
print("Excluded file path")
def searchFiles(self):
start=time.time()
ui.progressBar.setValue(0)
usePATH='V:\Messdatenbank_Powertrain' # Location to the network drive
os.chdir(usePATH)
fileLevels = 0
i=0
k=0
tableSize = ui.tableView.width()
ui.tableView.setColumnWidth(4, int(tableSize/4) + 30 )
ui.tableView.setColumnWidth(3, int(tableSize/4) + 300 )
for entry in self.scantree(usePATH):
if entry.name.endswith('COMPARE.mat') and 'MATLAB_NVH_TOOL' not in entry.path and 'old' not in entry.path and 'MESSDATENBANK' not in entry.path and 'old_' not in entry.path:
ui.progressBar.setValue(0)
i=i+1
fileLevels=0# if 'COMPARE.mat' in f and not 'MIN' in f and not 'MAX' in f / if 'COMPARE.mat' in f ) # if 'COMPARE.mat' in f and not 'MIN' in f and not 'MAX' in f
fileLevels=(entry.path.split('\\')) # Split path string at all '/'
#print (fileLevels)
t_row=[QtGui.QStandardItem(str(fileLevels[2])),QtGui.QStandardItem( str(fileLevels[3])),QtGui.QStandardItem(str(fileLevels[4])),QtGui.QStandardItem(str(fileLevels[len(fileLevels)-1])),QtGui.QStandardItem(str(entry.path))]
ui.tableView.model().appendRow(t_row)
ui.tableView.model().layoutChanged.emit()
fileLevels.remove(fileLevels[len(fileLevels)-1])
tmp_file_levels='\\'.join(fileLevels)
ui.files.append(tmp_file_levels) # All files path stored here
ui.file_loc_name.append(entry.path)
ui.progressBar.setValue(50)
# Implement try catch blocks
if str(fileLevels[2]) not in ui.clusterlist:
ui.clusterlist.append(str(fileLevels[2]))
if str(fileLevels[2]) not in ui.enginedict:
ui.enginedict[str(fileLevels[2])]=[str(fileLevels[3])]
else:
if str(fileLevels[3]) not in ui.enginedict[str(fileLevels[2])]:
ui.enginedict[str(fileLevels[2])].append(str(fileLevels[3]))
if str(fileLevels[3]) not in ui.measurementdict:
ui.measurementdict[str(fileLevels[3])]=[str(fileLevels[4])]
else:
if str(fileLevels[4]) not in ui.measurementdict[str(fileLevels[3])]:
ui.measurementdict[str(fileLevels[3])].append(str(fileLevels[4]))
ui.progressBar.setValue(100)
QApplication.processEvents()
else:
ui.label_7.setText(str(i))
ui.tableView.model().layoutChanged.emit()
ui.progressBar.setValue(0)
end=time.time()
print(end-start)
ui.label_2.setText('Update Complete')
ui.pushButton.setEnabled(False)
print(str(len(ui.files)))
ui.tableView.resizeColumnToContents (2)
ui.comboBox.setEnabled(True)
ui.label_7.setText(str(len(ui.files)))
ui.comboBox.clear()
ui.comboBox.addItems(["--Select Cluster--"])
ui.comboBox.addItems(ui.clusterlist)
ui.progressBar.setValue(100)
QApplication.processEvents()
ui.pushButton_2.setEnabled(True)
ui.pushButton_24.setEnabled(True)
Upvotes: 2
Views: 1514
Reputation: 10450
python.org PEP 471 -- os.scandir() describes the implementation of os.scandir
os.scandir
- This new function adds useful functionality and increases the speed of os.walk() by 2-20 times
The difference between the first execution, and the next executions caused by caching data during the first execution.
Notes on caching
The DirEntry objects are relatively dumb -- the name and path attributes are obviously always cached, and the is_X and stat methods cache their values (immediately on Windows via FindNextFile , and on first use on POSIX systems via a stat system call) and never refetch from the system.
For this reason, DirEntry objects are intended to be used and thrown away after iteration, not stored in long-lived data structured and the methods called again and again.
If developers want "refresh" behaviour (for example, for watching a file's size change), they can simply use pathlib.Path objects, or call the regular os.stat() or os.path.getsize() functions which get fresh data from the operating system every call.
Upvotes: 3