Reputation: 13
This is my image:
I can recognize the words:
I need to check if there is an incorrect word/s (incorrect spelling) in the text image, highlight this word/s with a red color rectangle and display an "x" above indicating that this is an incorrect word. For example this text image has an incorrect spelling of "worst" -- wort. And then I expect to see:
How can I achieve this?
My code:
import cv2
from pytesseract import pytesseract
from pytesseract import Output
pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
img = cv2.imread('test2.png')
print(pytesseract.image_to_string(img))
data = pytesseract.image_to_data(img, output_type=Output.DICT)
for i, word in enumerate(data['text']):
if word != "":
x, y, w, h = (data['left'][i], data['top'][i], data['width'][i], data['height'][i])
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
cv2.putText(img, word, (x, y-4), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 0, 255), 1)
cv2.imshow('image', img)
cv2.waitKey(0)
UPDATE:
Updated code showing the number of spelling error in above sentence:
import cv2
from pytesseract import pytesseract
from pytesseract import Output
from textblob import TextBlob
pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
def convert(lst):
li = list(lst.split(" "))
return li
img = cv2.imread('test2.png')
lst = pytesseract.image_to_string(img)
lst = convert(lst)
mistakes = 0
print(lst)
for x in lst:
a = TextBlob(x)
b = a.correct()
# print(b)
if (a.correct() != x):
mistakes = mistakes + 1
print(mistakes)
data = pytesseract.image_to_data(img, output_type=Output.DICT)
for i, word in enumerate(data['text']):
if word != "":
x, y, w, h = (data['left'][i], data['top'][i], data['width'][i], data['height'][i])
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
cv2.putText(img, word, (x, y-4), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 0, 255), 1)
#img = cv2.putText(img, data['text'][i], (x, y+h+20), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0),2 , cv2.LINES_AA)
cv2.imshow('image', img)
cv2.waitKey(0)
Output:
['It', 'was', 'the', 'best', 'of\ntimes,', 'it', 'was', 'the', 'wort\nof', 'times,', 'it', 'was', 'the', 'age\nof', 'wisdom,', 'it', 'was', 'the\n\nage', 'of', 'foolishness...\n']
1
Upvotes: 0
Views: 112