Reputation: 2410
I am working on a project where I need to extract text from frames of an Instagram Reels video. I used the yt-dlp
to download the video, extracted frames using ffmpeg
, and attempted to read the text from the frames using Tesseract OCR.
However, I'm unable to extract text from the frames. Below is the code snippet I'm using:
from PIL import Image
import pytesseract
import os
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
image_path = r"Insta Reels\frame_0077.png"
try:
if not os.path.exists(image_path):
raise FileNotFoundError(f"The file {image_path} does not exist.")
image = Image.open(image_path)
text = pytesseract.image_to_string(image, lang='eng')
if text.strip():
print("Extracted text:")
print(text)
else:
print("No text was extracted from the image.")
except FileNotFoundError as e:
print(f"Error: {e}")
except Exception as e:
print(f"An error occurred: {e}")
The problem is that the extracted text is either incomplete or not detected at all.
What preprocessing steps should I apply to these frames to improve the accuracy of Tesseract OCR?
Edit:
import cv2
import pytesseract
# Mention the installed location of Tesseract-OCR in your system
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
image_path = r"Insta Reels\Video by mydarlingfood\frame_0077.png"
# Read image from which text needs to be extracted
img = cv2.imread(image_path)
# Preprocessing the image starts
# Convert the image to gray scale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Performing OTSU threshold
ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
# Specify structure shape and kernel size.
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (18, 18))
# Applying dilation on the threshold image
dilation = cv2.dilate(thresh1, rect_kernel, iterations=1)
# Finding contours
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# Creating a copy of image
im2 = img.copy()
# Looping through the identified contours
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
# Drawing a rectangle on copied image
rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Cropping the text block for giving input to OCR
cropped = im2[y:y + h, x:x + w]
# Display the cropped image
cv2.imshow("Cropped Image", cropped)
cv2.waitKey(0) # Press any key to continue to the next cropped image
cv2.destroyAllWindows()
It seems like it is not able to detect the different color.
Upvotes: 0
Views: 83