오정현
오정현

Reputation: 11

Text extraction with OCR

The provided image has various elements of a monitor. I want to extract the text of each of them. I have tried various approaches, but there is a lot of text that I cannot identify.enter image description here

  1. using easyocr
  2. using tesseract
  3. or, the image has squares of different shapes. I have also tried to identify the rectangles and then extract the image from the identified rectangles.

Below is the logic used

import cv2
import numpy as np
from PIL import Image

def integrated_preprocess_image_with_size_filter(input_file_path, output_file_path):
    
    img = Image.open(input_file_path)
    
    
    if img.mode == 'RGBA':
        img = img.convert('RGB')

    
    dpi = img.info['dpi'] if 'dpi' in img.info else (72, 72)

    
    if dpi[0] < 300 or dpi[1] < 300:
        img.save(input_file_path, dpi=(300, 300))
        img = Image.open(input_file_path)
    
    
    img_cv = np.array(img)
    img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR)

    
    gray_image = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)

    
    blur_kernel_size = 3 if np.var(gray_image) < 100 else 3

    
    blurred_image = cv2.bilateralFilter(gray_image, blur_kernel_size, 75, 75)

    
    sharpening_kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
    sharpened_image = cv2.filter2D(blurred_image, -1, sharpening_kernel)

    
    _, binary_image = cv2.threshold(sharpened_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    
    kernel_size = int(np.mean(gray_image) // 50000) + 1
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
    opened_image = cv2.morphologyEx(binary_image, cv2.MORPH_OPEN, kernel)

    
    contours, _ = cv2.findContours(opened_image, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    for cnt in contours:
        epsilon = 0.02 * cv2.arcLength(cnt, True)
        approx = cv2.approxPolyDP(cnt, epsilon, True)

        if len(approx) == 4 and 100 < cv2.contourArea(cnt) < 50000:
            x, y, w, h = cv2.boundingRect(approx)
            
            if w > 90 and h > 90:
                cv2.rectangle(img_cv, (x, y), (x+w, y+h), (0, 255, 0), 2)

    
    cv2.imwrite(output_file_path, img_cv)


integrated_preprocess_image_with_size_filter('./test_img.png', './path_to_output.jpg')


To approach it in different ways, my approach was to extract the rectangular boxes and then proceed with the OCR.

I want to know how to extract text from that image in the best way possible.

Thank you.

Upvotes: 0

Views: 112

Answers (0)

Related Questions