How can I improve my detection efficiency when flann?

Question

I'm trying to improve the speed of my object detection. I'm using OpenCV and ORB brute force keypoint matching.

I've got 30 seconds for keypoint_detection to run and at the moment it's taking about 23 seconds. This is fine for now with 74 images, but that number is likely to increase.

So far I've;

Reduced my search area down to the absolute minimum
Refined my 74 images down to the absolute minimum size possible

I tried pre-processing to remove some of the clutter from the matching but it impacted my ability to find a successful match dramatically.

I've also tried;

SIF, SURF, MatchTemplate, Canny and a few other methods. ORB brute force is my best match by a significant margin.

This is my exact working code, not pseudo-code and not an MVP as I didn't think it would make sense presenting a cut down version of the code I'm looking to speed up.

Is there any way to improve the efficiency of my code?

  import cv2 as cv
  import os
  import glob
  import pyautogui as py
  from time import sleep
  from windowcapture import WindowCapture
  from vision import Vision

  # Change the working directory to the folder this script is in.
  os.chdir(r'C:	est')

  avoid = glob.glob(r"C:\Users	est\*.png")

  def loadImages(directory):
    # Intialise empty array
    image_list = []
    # Add images to array
    for i in directory:
        img = cv.imread(i, cv.IMREAD_UNCHANGED)
        image_list.append((img, i))
    return image_list

  # initialize the WindowCapture class
  wincap = WindowCapture()

  def keypoint_detection(image_list):
      counter = 0
      for i in image_list:
          counter += 1       
          needle_img = i[0]

          # load image to find
          objectToFind = Vision(needle_img)
          # get an updated image of the screen
          keypoint_haystack = wincap.get_haystack()
          # crop the image
          x, w, y, h = [600,700,20,50]
          keypoint_haystack = keypoint_haystack[y:y+h, x:x+w]

          kp1, kp2, matches, match_points = objectToFind.match_keypoints(keypoint_haystack, sliced_name, min_match_count=30)
          match_image = cv.drawMatches(objectToFind.needle_img, kp1, keypoint_haystack, kp2, matches, None)

          if match_points:
              # find the center point of all the matched features
              center_point = objectToFind.centeroid(match_points)
              # account for the width of the needle image that appears on the left
              center_point[0] += objectToFind.needle_w
              # drawn the found center point on the output image
              match_image = objectToFind.draw_crosshairs(match_image, [center_point])
            
              sleep(3)
              break

  while(True):
      ships_to_avoid = loadImages(avoid)
      keypoint_detection(ships_to_avoid)

WindowCapture Class

import numpy as np
import win32gui, win32ui, win32con

class WindowCapture:

    # properties
    w = 0
    h = 0
    hwnd = None
    cropped_x = 0
    cropped_y = 0
    offset_x = 0
    offset_y = 0

    # constructor
    def __init__(self, window_name=None):
        # find the handle for the window we want to capture.
        # if no window name is given, capture the entire screen
        if window_name is None:
            self.hwnd = win32gui.GetDesktopWindow()
        else:
            self.hwnd = win32gui.FindWindow(None, window_name)
            if not self.hwnd:
                raise Exception('Window not found: {}'.format(window_name))

        # get the window size
        window_rect = win32gui.GetWindowRect(self.hwnd)
        self.w = window_rect[2] - window_rect[0]
        self.h = window_rect[3] - window_rect[1]

        # account for the window border and titlebar and cut them off
        border_pixels = 0
        titlebar_pixels = 5
        self.w = self.w - border_pixels
        self.h = self.h - titlebar_pixels - border_pixels
        self.cropped_x = border_pixels
        self.cropped_y = titlebar_pixels

        # set the cropped coordinates offset so we can translate screenshot
        # images into actual screen positions
        self.offset_x = window_rect[0] + self.cropped_x
        self.offset_y = window_rect[1] + self.cropped_y

    def get_haystack(self):

        # get the window image data
        wDC = win32gui.GetWindowDC(self.hwnd)
        dcObj = win32ui.CreateDCFromHandle(wDC)
        cDC = dcObj.CreateCompatibleDC()
        dataBitMap = win32ui.CreateBitmap()
        dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
        cDC.SelectObject(dataBitMap)
        cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)

        # convert the raw data into a format opencv can read
        # dataBitMap.SaveBitmapFile(cDC, 'debug.bmp')
        signedIntsArray = dataBitMap.GetBitmapBits(True)
        img = np.fromstring(signedIntsArray, dtype='uint8')
        img.shape = (self.h, self.w, 4)

        # free resources
        dcObj.DeleteDC()
        cDC.DeleteDC()
        win32gui.ReleaseDC(self.hwnd, wDC)
        win32gui.DeleteObject(dataBitMap.GetHandle())
        img = img[...,:3]
        img = np.ascontiguousarray(img)
        return img

    @staticmethod
    def list_window_names():
        def winEnumHandler(hwnd, ctx):
            if win32gui.IsWindowVisible(hwnd):
                print(hex(hwnd), win32gui.GetWindowText(hwnd))
        win32gui.EnumWindows(winEnumHandler, None)

    # translate a pixel position on a screenshot image to a pixel position on the screen.
    # pos = (x, y)
    def get_screen_position(self, pos):
        return (pos[0] + self.offset_x, pos[1] + self.offset_y)

Vision Class

import cv2 as cv
import numpy as np


class Vision:
    # properties
    needle_img = None
    needle_w = 0
    needle_h = 0

    # constructor
    def __init__(self, needle_img_path):
        self.needle_img = needle_img_path

        # Save the dimensions of the needle image
        self.needle_w = self.needle_img.shape[1]
        self.needle_h = self.needle_img.shape[0]
                                 
    def match_keypoints(self, haystack_screenshot, name, min_match_count, patch_size=32):

        orb = cv.ORB_create(edgeThreshold=0, patchSize=patch_size)
        keypoints_needle, descriptors_needle = orb.detectAndCompute(self.needle_img, None)
        orb2 = cv.ORB_create(edgeThreshold=0, patchSize=patch_size, nfeatures=2000)
        keypoints_haystack, descriptors_haystack = orb2.detectAndCompute(haystack_screenshot, None)

        FLANN_INDEX_LSH = 6
        index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1)
        search_params = dict(checks=50)

        try:
            flann = cv.FlannBasedMatcher(index_params, search_params)
            matches = flann.knnMatch(descriptors_needle, descriptors_haystack, k=2)
        except cv.error:
                return None, None, [], []
            
        # store all the good matches as per Lowe's ratio test.
        good = []
        points = []

        for pair in matches:
            if len(pair) == 2:
                if pair[0].distance < 0.7*pair[1].distance:
                    good.append(pair[0])

        if len(good) > min_match_count:
            print(str(name) + ' - ' + '%03d keypoints matched - %03d' % (len(good), len(keypoints_needle)))
           
            for match in good:
                points.append(keypoints_haystack[match.trainIdx].pt)
        
        return keypoints_needle, keypoints_haystack, good, points

How can I improve my detection efficiency when flann?

Answers (0)

Related Questions