Reputation: 303
I'm trying to improve the speed of my object detection. I'm using OpenCV
and ORB
brute force keypoint matching.
I've got 30 seconds for keypoint_detection
to run and at the moment it's taking about 23 seconds. This is fine for now with 74 images, but that number is likely to increase.
So far I've;
I tried pre-processing to remove some of the clutter from the matching but it impacted my ability to find a successful match dramatically.
I've also tried;
This is my exact working code, not pseudo-code and not an MVP as I didn't think it would make sense presenting a cut down version of the code I'm looking to speed up.
Is there any way to improve the efficiency of my code?
import cv2 as cv
import os
import glob
import pyautogui as py
from time import sleep
from windowcapture import WindowCapture
from vision import Vision
# Change the working directory to the folder this script is in.
os.chdir(r'C:\test')
avoid = glob.glob(r"C:\Users\test\*.png")
def loadImages(directory):
# Intialise empty array
image_list = []
# Add images to array
for i in directory:
img = cv.imread(i, cv.IMREAD_UNCHANGED)
image_list.append((img, i))
return image_list
# initialize the WindowCapture class
wincap = WindowCapture()
def keypoint_detection(image_list):
counter = 0
for i in image_list:
counter += 1
needle_img = i[0]
# load image to find
objectToFind = Vision(needle_img)
# get an updated image of the screen
keypoint_haystack = wincap.get_haystack()
# crop the image
x, w, y, h = [600,700,20,50]
keypoint_haystack = keypoint_haystack[y:y+h, x:x+w]
kp1, kp2, matches, match_points = objectToFind.match_keypoints(keypoint_haystack, sliced_name, min_match_count=30)
match_image = cv.drawMatches(objectToFind.needle_img, kp1, keypoint_haystack, kp2, matches, None)
if match_points:
# find the center point of all the matched features
center_point = objectToFind.centeroid(match_points)
# account for the width of the needle image that appears on the left
center_point[0] += objectToFind.needle_w
# drawn the found center point on the output image
match_image = objectToFind.draw_crosshairs(match_image, [center_point])
sleep(3)
break
while(True):
ships_to_avoid = loadImages(avoid)
keypoint_detection(ships_to_avoid)
WindowCapture Class
import numpy as np
import win32gui, win32ui, win32con
class WindowCapture:
# properties
w = 0
h = 0
hwnd = None
cropped_x = 0
cropped_y = 0
offset_x = 0
offset_y = 0
# constructor
def __init__(self, window_name=None):
# find the handle for the window we want to capture.
# if no window name is given, capture the entire screen
if window_name is None:
self.hwnd = win32gui.GetDesktopWindow()
else:
self.hwnd = win32gui.FindWindow(None, window_name)
if not self.hwnd:
raise Exception('Window not found: {}'.format(window_name))
# get the window size
window_rect = win32gui.GetWindowRect(self.hwnd)
self.w = window_rect[2] - window_rect[0]
self.h = window_rect[3] - window_rect[1]
# account for the window border and titlebar and cut them off
border_pixels = 0
titlebar_pixels = 5
self.w = self.w - border_pixels
self.h = self.h - titlebar_pixels - border_pixels
self.cropped_x = border_pixels
self.cropped_y = titlebar_pixels
# set the cropped coordinates offset so we can translate screenshot
# images into actual screen positions
self.offset_x = window_rect[0] + self.cropped_x
self.offset_y = window_rect[1] + self.cropped_y
def get_haystack(self):
# get the window image data
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)
# convert the raw data into a format opencv can read
# dataBitMap.SaveBitmapFile(cDC, 'debug.bmp')
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
# free resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
return img
@staticmethod
def list_window_names():
def winEnumHandler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
print(hex(hwnd), win32gui.GetWindowText(hwnd))
win32gui.EnumWindows(winEnumHandler, None)
# translate a pixel position on a screenshot image to a pixel position on the screen.
# pos = (x, y)
def get_screen_position(self, pos):
return (pos[0] + self.offset_x, pos[1] + self.offset_y)
Vision Class
import cv2 as cv
import numpy as np
class Vision:
# properties
needle_img = None
needle_w = 0
needle_h = 0
# constructor
def __init__(self, needle_img_path):
self.needle_img = needle_img_path
# Save the dimensions of the needle image
self.needle_w = self.needle_img.shape[1]
self.needle_h = self.needle_img.shape[0]
def match_keypoints(self, haystack_screenshot, name, min_match_count, patch_size=32):
orb = cv.ORB_create(edgeThreshold=0, patchSize=patch_size)
keypoints_needle, descriptors_needle = orb.detectAndCompute(self.needle_img, None)
orb2 = cv.ORB_create(edgeThreshold=0, patchSize=patch_size, nfeatures=2000)
keypoints_haystack, descriptors_haystack = orb2.detectAndCompute(haystack_screenshot, None)
FLANN_INDEX_LSH = 6
index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1)
search_params = dict(checks=50)
try:
flann = cv.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(descriptors_needle, descriptors_haystack, k=2)
except cv.error:
return None, None, [], []
# store all the good matches as per Lowe's ratio test.
good = []
points = []
for pair in matches:
if len(pair) == 2:
if pair[0].distance < 0.7*pair[1].distance:
good.append(pair[0])
if len(good) > min_match_count:
print(str(name) + ' - ' + '%03d keypoints matched - %03d' % (len(good), len(keypoints_needle)))
for match in good:
points.append(keypoints_haystack[match.trainIdx].pt)
return keypoints_needle, keypoints_haystack, good, points
Upvotes: 0
Views: 236