Reputation: 672
I'm trying to defeat an anti-bot feature in a program where a user has to click on letters within a image in alpha-numerical order. I have managed to somewhat extract the text from the background using preprocessing, but still need to deskew each indivisual character to get optimal results using Tesseract.
Image before using Hough Lines, just preprocessing
Original images with lines detected by HoughLinesP drawn
I have tried to use Canny Edge Detector + Hough Lines to try and find the line below each character. However, it deemed inconsistent and too reliant on the quality of the line, and I can't differentiate the bottom line and lines detected on the characters itself.
Here is the code I have tried:
# -*- coding:utf-8 -*-
import cv2, numpy as np, time
img_roi = [48, 191, 980, 656] # x1, y1, x2, y2
src_img_dir = "images/source/9.png"
bg_img = cv2.imread("images/background.png", cv2.IMREAD_COLOR)[img_roi[1]:img_roi[3], img_roi[0]:img_roi[2]]
# The background of the area is constant. So I have used a reference background image and removed pixels which have a similar H value as the background
bg_hsv = cv2.cvtColor(bg_img, cv2.COLOR_BGR2HSV)
src_img = cv2.imread(src_img_dir, cv2.IMREAD_COLOR)[img_roi[1]:img_roi[3], img_roi[0]:img_roi[2]]
# This image is the image where letters are placed on top of the background image
src_hsv = cv2.cvtColor(src_img, cv2.COLOR_BGR2HSV)
mask = np.zeros([src_img.shape[0], src_img.shape[1], 3], dtype=np.uint8)
offset = 3
start_time = time.time()
for y in range(src_img.shape[0]):
for x in range(src_img.shape[1]):
sp = src_hsv[y][x]
bp = bg_hsv[y][x]
if bp[0]-offset <= sp[0] <= bp[0]+offset:
if sp[1] >= 109:
mask[y][x] = src_img[y][x]
elif sp[1] <= 90:
if sp[0] >= 67:
mask[y][x] = src_img[y][x]
elif sp[2] >= 125 and sp[1] >= 20:
mask[y][x] = src_img[y][x]
else:
mask[y][x] = src_img[y][x]
"""if sp[1] >= 60 and sp[2] >= 60:
mask[y][x] = src_img[y][x]
#mask[y][x] = conv"""
print("duration", time.time()-start_time)
mask = cv2.cvtColor(mask, cv2.COLOR_BGR2HSV)
#mask[:,:,2] = 255
mask = cv2.cvtColor(mask, cv2.COLOR_HSV2BGR)
mask_gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(mask_gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
opened = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, (3,3))
opened = cv2.morphologyEx(opened, cv2.MORPH_OPEN, (3,3))
opened = cv2.erode(opened, (3,3))
opened = cv2.dilate(opened, (3,3))
opened = cv2.dilate(opened, (5, 5))
opened = cv2.morphologyEx(opened, cv2.MORPH_CLOSE, (3,3))
opened = cv2.erode(opened, (3,3))
opened = cv2.erode(opened, (3,3))
final_img = opened
#edges = cv2.Canny(final_img, 0, 255)
lines = cv2.HoughLinesP(final_img, 1, np.pi / 180, 20, minLineLength=10, maxLineGap=3)
for line in lines:
coords = line[0]
cv2.line(src_img, (coords[0], coords[1]), (coords[2], coords[3]), [255,255,255], 2)
#cv2.imshow("can", edges)
#cv2.drawContours(src_img, fixed_contours, -1, (0,255,0), 2)
cv2.imshow("src", src_img)
cv2.imshow("", final_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Upvotes: 1
Views: 590
Reputation: 1447
From first glance, it appears the skew is not very strong, and the characters are far apart.
I'd do a multi-step approach on your filtered image (pretty good already)
Upvotes: 1