Reputation: 1
For a text image input, I need to break the text into segments using the OPENCV library Let's say the image has 4 lines of text, I need to write a function that breaks down and cuts the lines and creates 4 new images for each line respectively.
Then a function that receives a line/sentence that consists of several words and cuts images of the words separately.
Then a function that receives an image of a word and decomposes it into letters, for each letter its own image will be created.
The function below cutting only one word to letters
# Cutting one word to letters
def image_to_text(image_file_path):
img = cv2.imread(image_file_path, cv2.IMREAD_COLOR)
extracted_text = ""
if img is not None:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for idx, contour in enumerate(contours):
x, y, w, h = cv2.boundingRect(contour)
letter_img = thresh[y:y + h, x:x + w]
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], np.float32)
letter_img = cv2.filter2D(letter_img, -1, kernel)
letter_img = cv2.copyMakeBorder(letter_img, 4, 4, 4, 4, cv2.BORDER_CONSTANT, value=[0, 0, 0])
letter_img = cv2.resize(letter_img, (28, 28), interpolation=cv2.INTER_AREA)
# Display the image
plt.imshow(letter_img, cmap='gray')
plt.axis('off')
plt.show()
letter_img = letter_img.reshape(1, 28, 28, 1).astype('float32') / 255
prediction = loaded_model.predict(letter_img)
# Print the predicted class and its corresponding label
predicted_class = np.argmax(prediction)
predicted_label = class_labels[predicted_class]
print(f"Predicted class for the letter: {predicted_class} ({predicted_label})")
extracted_text += predicted_label
extracted_text = extracted_text[::-1]
print('Extracted text:', extracted_text)
return extracted_text
text = image_to_text(r"C:\Users\student\Desktop\FinalProject\Flask\image\test3IMG.jpg")
input:
output:
I am limited in the pictures I can upload to the forum, so I will only upload two examples of the letters B and E
The rest of the letters were extracted in the same way..
the question is how can i handle an input like this:
I'm trying for days and I'm getting frustrated.. will be glad for help
Upvotes: 0
Views: 159
Reputation: 426
hope this helps.
To find each line:
This is the thresholded image:
This is the closed image with 17 kernel size, now all lines are connected as one big blob:
This is the bounding boxes for each contour which are all detected lines:
Now repeating the same procedure for detecting each word in one line:
This the thresholded line image:
This is the closed image with 7 kernel size, smaller kernel because we just want to connect the letters of a word instead of connecting each word together:
This is the bounding boxes for each contour which are all detected lines:
This is the final result of word detection:
From this point you can use your own code that detects each letter of a word for every detected word.
This is the complete code:
import cv2
import numpy
#Funciton to create custom kernel
def xAxisKernel(size):
size = size if size%2 else size+1
xkernel = numpy.zeros((size,size),dtype=numpy.uint8)
center = size//2
for j in range(size):
xkernel[center][j] = 1
xkernel[center-1][j] = 1
xkernel[center+1][j] = 1
return xkernel
#Give filtered image, get bounding boxes for the lines
def findLines(full_image):
#Threshold the image, close with custom kernel to connect only vertical blobs which are letters
gray = cv2.cvtColor(full_image,cv2.COLOR_BGR2GRAY)
_,thresh = cv2.threshold(gray,200,255,cv2.THRESH_BINARY_INV)
closed = cv2.morphologyEx(thresh,cv2.MORPH_CLOSE,xAxisKernel(17)) #Connecting every word of a line together
#Find line contours in the closed image
contours,hierarchy = cv2.findContours(closed,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
lines = []
for cnt in contours:
if cv2.contourArea(cnt)>100: #to eliminate the dots
x,y,w,h = cv2.boundingRect(cnt)
lines.append([x,y,w,h])
return lines
#Give the cropped image of a line, get bounding boxes for the words
def findWords(line_image):
#Add white border around the line for better filtering
line_image = cv2.copyMakeBorder(line_image,top=10,bottom=10,left=10,right=10,borderType=cv2.BORDER_CONSTANT,value=[255, 255, 255])
#Threshold the image, close with custom kernel to connect only vertical blobs which are letters
gray = cv2.cvtColor(line_image,cv2.COLOR_BGR2GRAY)
_,thresh = cv2.threshold(gray,200,255,cv2.THRESH_BINARY_INV)
closed = cv2.morphologyEx(thresh,cv2.MORPH_CLOSE,xAxisKernel(7)) #Connecting the letters of a word
#Find word contours in the closed image
contours,hierarchy = cv2.findContours(closed,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
words = []
for cnt in contours:
if cv2.contourArea(cnt)>100:#to eliminate the dots
x,y,w,h = cv2.boundingRect(cnt)
words.append([x-10,y-10,w,h]) #subtracting the added border size for correction
cv2.rectangle(line_image,(x,y),(x+w,y+h),(0,0,255),2)
return words
#Read the image
image = cv2.imread('ph1.png')
height,width,channel = image.shape
# Find all lines in the image
al_lines = findLines(image)
# Iterate on each line and find all words
for line in al_lines:
lineX,lineY,lineW,lineH = line #Line bounding box coordinates
words = findWords(image[lineY:lineY+lineH,lineX:lineX+lineW])
for word in words:
x,y,w,h = word #Word bounding box coordinates
# Add line start position as offset to word start position for correction
x = x+lineX
y = y+lineY
#Draw bounding box around each word
cv2.rectangle(image,(x,y),(x+w,y+h),(0,0,255),2)
cv2.imshow('Original',image)
cv2.waitKey(0)
Upvotes: 1