Reputation: 97
In a python script I am watching a directory for new files coming from a scanner. Currently my code is only reacting on the IN_CLOSE_WRITE event. I am aware that the right way would be to watch out for a IN_CREATE event followed by a IN_CLOSE_WRITE event.
My current code looks like this:
import os
import sys
import logging
import inotify.adapters
import ocrmypdf
def DoOCR(filePath, fileName):
print("Processing {}".format(fileName))
try:
fullPath = os.path.join(filePath, fileName)
ocrmypdf.ocr(fullPath, fullPath, deskew=True, clean=True, language="deu")
except ocrmypdf.exceptions.PriorOcrFoundError as err:
print("Already processed: {0}".format(err))
except:
print("Unexpected error:", sys.exc_info()[0])
raise
if __name__ == '__main__':
# Setup logging.
logging.basicConfig(level=logging.DEBUG)
#logging.basicConfig(level=logging.INFO)
i = inotify.adapters.Inotify()
pathToWatch = '/srv/smb/scanneddocs'
if not os.path.exists(pathToWatch):
os.mkdir(pathToWatch)
watchMask = inotify.constants.IN_CREATE | inotify.constants.IN_CLOSE_WRITE
i.add_watch(pathToWatch, watchMask)
for event in i.event_gen(yield_nones=False):
(_, type_names, path, filename) = event
#TODO: Check that a IN_CREATE is followed by a IN_CLOSE_WRITE
if "IN_CLOSE_WRITE" in type_names:
print("PATH=[{}] FILENAME=[{}] EVENT_TYPES={}".format(path, filename, type_names))
name, extension = os.path.splitext(filename)
print(extension)
if extension.lower() == ".pdf":
DoOCR(pathToWatch, filename)
else:
print("{} is no PDF file. Skipping...".format(filename))
What would be the best way to ensure that both events occurred after each other?
Upvotes: 0
Views: 2522
Reputation: 782508
Add the created files to a set
, then check the set when you get the IN_CLOSE_WRITE
event.
created_files = set()
for event in i.event_gen(yield_nones=False):
(_, type_names, path, filename) = event
if "IN_CREATE" in type_names:
created_files.add(filename)
if "IN_CLOSE_WRITE" in type_names:
if filename not in created_files:
continue
created_files.remove(filename) # remove unneeded item
print("PATH=[{}] FILENAME=[{}] EVENT_TYPES={}".format(path, filename, type_names))
name, extension = os.path.splitext(filename)
print(extension)
if extension.lower() == ".pdf":
DoOCR(pathToWatch, filename)
else:
print("{} is no PDF file. Skipping...".format(filename))
Upvotes: 1