How to do image segmentation with Python and OpenCV

Question

I have an image of an invoice. I want to split that image into pieces and to get smaller images. I tried to do OpenCV Kmeans but as an output i get just one small black window.

This is the code that I have:

import numpy as np
import cv2

#read the image
img = cv2.imread("image1.jpg")

#reshape the image
img = img.reshape((-1,3))
img = np.float32(img)

#criteria for clustering
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER , 10, 1)

#defining number of clusters and iteration number
nubmer_of_clusters = 6
attempts = 50

#doing the clustering
ret, label, center = cv2.kmeans(img, nubmer_of_clusters, None, criteria, attempts, cv2.KMEANS_RANDOM_CENTERS)

center = np.uint8(center)

res = center[label.flatten()]
res = res.reshape((img.shape))
cv2.imshow("starting_image", res)
cv2.waitKey(2)

This is the example of input image:

With red colour are marked parts of the image that I want to extract.

I do not know know if i used the right model, or even if i used the right approach. But I need segments of an image that have text on them.

I have tried with contours, but Im getting contours of each letter, and I want contours for each segment of text:

img = cv2.imread("image1.jpg")
img=cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

ret, thresh=cv2.threshold(img,127,255,cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)

for c in contours:
    x,y,w,h = cv2.boundingRect(c)
    cv2.rectangle(img,(x,y),(x+w,y+h),(0,0,255),2)
    cv2.imshow('Bounding rect',img)

Red · Accepted Answer

The key is to dilate (expand) the contours of the letters to form chunks. Here is how:

import cv2
import numpy as np

def process(img):
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img_canny = cv2.Canny(img_gray, 0, 0)
    return cv2.dilate(img_canny, np.ones((5, 5)), iterations=20)

def draw_segments(img):
    contours, hierarchies = cv2.findContours(process(img), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        if w * h > 70000:
            cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 5)

img = cv2.imread("document.jpg")
draw_segments(img)
cv2.imshow("Image", img)
cv2.waitKey(0)

Output:

Explanation:

Import the necessary libraries:

import cv2
import numpy as np

Define a function to process the image, see the comments in the code for details:

def process(img):
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Convert to grayscale
    img_canny = cv2.Canny(img_gray, 0, 0) # Detect edges with canny edge detector
    return cv2.dilate(img_canny, np.ones((5, 5)), iterations=20) # Dilate edges to convert scattered contours that are close to each others into chunks

Define a function that will take in an image, and utilize the process function defined earlier to process the image, and find its contours. It will then loop through each contour, and if the contour's bounding rectangle has an area greater than, for example, 70000 (to eliminate the stay text), draw the bounding rectangle on the image:

def draw_segments(img):
    contours, hierarchies = cv2.findContours(process(img), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        if w * h > 70000:
            cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 5)

Finally, read in the image, call the draw_segments function and display the image:

img = cv2.imread("document.jpg")
draw_segments(img)
cv2.imshow("Image", img)
cv2.waitKey(0)

How to do image segmentation with Python and OpenCV

Answers (1)

Related Questions