Ronzo
Ronzo

Reputation: 97

Python inotify - Execute function upon new file creation

In a python script I am watching a directory for new files coming from a scanner. Currently my code is only reacting on the IN_CLOSE_WRITE event. I am aware that the right way would be to watch out for a IN_CREATE event followed by a IN_CLOSE_WRITE event.

My current code looks like this:

import os
import sys
import logging
import inotify.adapters
import ocrmypdf

def DoOCR(filePath, fileName):
    print("Processing {}".format(fileName))
    try:
       fullPath = os.path.join(filePath, fileName)
       ocrmypdf.ocr(fullPath, fullPath, deskew=True, clean=True, language="deu")
    except ocrmypdf.exceptions.PriorOcrFoundError as err:
       print("Already processed: {0}".format(err))
    except:
       print("Unexpected error:", sys.exc_info()[0])
       raise

if __name__ == '__main__':
    # Setup logging.
    logging.basicConfig(level=logging.DEBUG)
    #logging.basicConfig(level=logging.INFO)

    i = inotify.adapters.Inotify()

    pathToWatch = '/srv/smb/scanneddocs'
    if not os.path.exists(pathToWatch):
        os.mkdir(pathToWatch)

    watchMask = inotify.constants.IN_CREATE | inotify.constants.IN_CLOSE_WRITE

    i.add_watch(pathToWatch, watchMask)
    
    for event in i.event_gen(yield_nones=False):
        (_, type_names, path, filename) = event

        #TODO: Check that a IN_CREATE is followed by a IN_CLOSE_WRITE
        if "IN_CLOSE_WRITE" in type_names:
            print("PATH=[{}] FILENAME=[{}] EVENT_TYPES={}".format(path, filename, type_names))
            name, extension = os.path.splitext(filename)
            print(extension)
            if extension.lower() == ".pdf":
                DoOCR(pathToWatch, filename)
            else:
                print("{} is no PDF file. Skipping...".format(filename))

What would be the best way to ensure that both events occurred after each other?

Upvotes: 0

Views: 2522

Answers (1)

Barmar
Barmar

Reputation: 782508

Add the created files to a set, then check the set when you get the IN_CLOSE_WRITE event.

    created_files = set()
    for event in i.event_gen(yield_nones=False):
        (_, type_names, path, filename) = event

        if "IN_CREATE" in type_names:
            created_files.add(filename)
        if "IN_CLOSE_WRITE" in type_names:
            if filename not in created_files:
                continue
            created_files.remove(filename) # remove unneeded item
            print("PATH=[{}] FILENAME=[{}] EVENT_TYPES={}".format(path, filename, type_names))
            name, extension = os.path.splitext(filename)
            print(extension)
            if extension.lower() == ".pdf":
                DoOCR(pathToWatch, filename)
            else:
                print("{} is no PDF file. Skipping...".format(filename))

Upvotes: 1

Related Questions