Reputation: 11
The provided image has various elements of a monitor. I want to extract the text of each of them. I have tried various approaches, but there is a lot of text that I cannot identify.enter image description here
Below is the logic used
import cv2
import numpy as np
from PIL import Image
def integrated_preprocess_image_with_size_filter(input_file_path, output_file_path):
img = Image.open(input_file_path)
if img.mode == 'RGBA':
img = img.convert('RGB')
dpi = img.info['dpi'] if 'dpi' in img.info else (72, 72)
if dpi[0] < 300 or dpi[1] < 300:
img.save(input_file_path, dpi=(300, 300))
img = Image.open(input_file_path)
img_cv = np.array(img)
img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR)
gray_image = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
blur_kernel_size = 3 if np.var(gray_image) < 100 else 3
blurred_image = cv2.bilateralFilter(gray_image, blur_kernel_size, 75, 75)
sharpening_kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
sharpened_image = cv2.filter2D(blurred_image, -1, sharpening_kernel)
_, binary_image = cv2.threshold(sharpened_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
kernel_size = int(np.mean(gray_image) // 50000) + 1
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
opened_image = cv2.morphologyEx(binary_image, cv2.MORPH_OPEN, kernel)
contours, _ = cv2.findContours(opened_image, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
epsilon = 0.02 * cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, epsilon, True)
if len(approx) == 4 and 100 < cv2.contourArea(cnt) < 50000:
x, y, w, h = cv2.boundingRect(approx)
if w > 90 and h > 90:
cv2.rectangle(img_cv, (x, y), (x+w, y+h), (0, 255, 0), 2)
cv2.imwrite(output_file_path, img_cv)
integrated_preprocess_image_with_size_filter('./test_img.png', './path_to_output.jpg')
To approach it in different ways, my approach was to extract the rectangular boxes and then proceed with the OCR.
I want to know how to extract text from that image in the best way possible.
Thank you.
Upvotes: 0
Views: 112