Accurately Detecting randomly rotated Text in Images

Question

I'm trying to detect text from items, which may be rotated in various directions. I've tried using Tesseract, EasyOCR, and EAST for text detection and extraction, but I am encountering issues with rotated text. Tesseract has given me the closest results, but it still incorrectly extracts the text when it is rotated.

Is there any possible way to extract text correctly, regardless of its rotation? I've included some sample images for better understanding.

More Samples

Someone suggested rotating the images and detecting text each time, but this solution is too time-consuming in my case (70 hours per run). Here is the code I used:

import os
import cv2
import pytesseract
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd

# Directory containing the images
directory = 'Camera2/front'

# Ensure pytesseract can find the tesseract executable
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR	esseract.exe'  # Adjust path as necessary

# Initialize an empty list to store results
results = []

# Get the list of image files in the directory
image_files = [f for f in os.listdir(directory) if f.endswith('.jpeg') or f.endswith('.jpg')]

def preprocess_image(image):
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply adaptive thresholding to preprocess the image
    binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
    
    return gray, binary

def detect_text(image):
    # Preprocess the image
    gray, binary = preprocess_image(image)
    
    # Perform OCR on the preprocessed image
    text = pytesseract.image_to_string(binary, config='--psm 3 -l eng --oem 3')  # Using page segmentation mode 3

    # Check if any text is detected
    return bool(text.strip()), text, gray

def rotate_image(image, angle):
    # Get the image dimensions
    (h, w) = image.shape[:2]
    # Calculate the center of the image
    center = (w / 2, h / 2)
    # Perform the rotation
    matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, matrix, (w, h))
    return rotated

# Iterate through each file in the directory with tqdm for progress visualization
for filename in tqdm(image_files, desc="Processing images"):
    filepath = os.path.join(directory, filename)
    
    # Load the current image
    original_image = cv2.imread(filepath)
    
    # Initialize text detection result
    has_text = False
    detected_text = ""
    gray_image = None
    
    # Rotate the image from 0 to 359 degrees
    for angle in tqdm(range(0, 360)):
        rotated_image = rotate_image(original_image, angle)
        has_text, detected_text, gray_image = detect_text(rotated_image)
        
        if has_text:
            break
    
    # Plotting the original and preprocessed images
    fig, axes = plt.subplots(1, 2, figsize=(12, 6))
    
    # Original image
    axes[0].imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
    axes[0].set_title('Original Image')
    axes[0].axis('off')
    
    # Gray scale image
    if gray_image is not None:
        axes[1].imshow(gray_image, cmap='gray')
        axes[1].set_title('Grayscale Image with Adjusted Thresholding')
        axes[1].axis('off')
    
    plt.tight_layout()
    plt.show()
    
    if has_text:
        print(f"Text detected in {filename}:")
        print(detected_text)
        # Store text in results list if it's longer than 3 characters
        if len(detected_text) > 3:
            image_id = filename.replace('.jpeg', '').replace('.jpg', '')
            results.append({'ID': image_id, 'text': detected_text})
    else:
        print(f"No text detected in {filename}.")

results_df = pd.DataFrame(results)

Accurately Detecting randomly rotated Text in Images

Answers (1)

Related Questions