A14
A14

Reputation: 111

How to merge neighboring bounding boxes

I would like to join nearby bounding boxes, right now I am able to detected bounding boxes for each word.

Currently code is giving bounding box for each letter, how do I modify so that it will give one bounding box depends on tolerance

rect_box = []
im = cv2.imread('input.jpg')
grayImage = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)

_,thresh = cv2.threshold(grayImage, 150, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,(1,31))
dilated = cv2.dilate(thresh, kernel, iterations = 15) # dilate
contours0,_ = cv2.findContours(dilated,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) # get contours
contours = [cv2.approxPolyDP(cnt, 50, True) for cnt in contours0]
(contours, boundingBoxes) = sort_contours(contours, method="top-to-bottom")


contours, hierarchy = cv2.findContours(thresh, 1, 2)
for contour in contours:
    [x,y,w,h] = cv2.boundingRect(contour)
    if h>100 and w>100:
        continue

    if h<10 or w<10:
        continue
    pad_w, pad_h = int(0.05*w), int(0.15*h)
    cv2.rectangle(im,(x-pad_w,y-pad_h),(x+w+pad_w,y+h+pad_h),(255,0,255),1,shift=0)
    rect_box.append(((x-pad_w,y-pad_h),(x+w+pad_w,y+h+pad_h)))

cv2.imwrite("rectangle.png", im)
cv2.imshow('image', im)
cv2.waitKey(0)
cv2.destroyAllWindows()

# For joining words 
out = []
prev = rect_box[0]
temp_list = [rect_box[0][0]]

for num in rect_box[0:]:
    if num[0]-10 > prev[0]:
        out += [temp_list]
        temp_list = [num[0]]
    else:
        temp_list.append(num)
    prev = num
out.append(temp_list)

for i in out:
    i.pop(0)
out_1 = [x for x in out if x != []]

out_2 = []
for i in out_1:
    min_v = list(map(min, zip(*i)))
    max_v = list(map(max, zip(*i)))
    out_2.append((min_v[0], min_v[1], max_v[2],max_v[3]))

for i in out_2:
    x,y,w,h = i
    print(x,y,w,h)
    cv2.rectangle(im,(x,y),(w,y+h),(255,0,255),1,shift=0)


def sort_contours(cnts, method="left-to-right"):
    # initialize the reverse flag and sort index
    reverse = False
    i = 0

    # handle if we need to sort in reverse
    if method == "right-to-left" or method == "bottom-to-top":
        reverse = True

    # handle if we are sorting against the y-coordinate rather than
    # the x-coordinate of the bounding box
    if method == "top-to-bottom" or method == "bottom-to-top":
        i = 1

    # construct the list of bounding boxes and sort them from top to
    # bottom
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
        key=lambda b:b[1][i], reverse=reverse))

    # return the list of sorted contours and bounding boxes
    return (cnts, boundingBoxes)

Input:Input image Output: Output image After appending co-ordinates, I grouped them on the bases of y axis, and took min, max. This gave me bounding box for lines, same thing I did for whole region using x axis. But, somehow it is not working the way I want.

Upvotes: 2

Views: 2499

Answers (1)

nathancy
nathancy

Reputation: 46600

This is a classic use case for cv2.dilate(). Whenever you need to combine multiple individual contours into a single contour, you can dilate. To determine the tolerance, you can adjust the structuring type, the kernel size, or the number of dilate iterations

enter image description here

import cv2

image = cv2.imread('input.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 3)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,2))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)

dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10,5))
dilate = cv2.dilate(opening, dilate_kernel, iterations=4)

cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    x,y,w,h = cv2.boundingRect(c)
    cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)

cv2.imshow('thresh', thresh)
cv2.imshow('opening', opening)
cv2.imshow('dilate', dilate)
cv2.imshow('image', image)
cv2.waitKey()

Upvotes: 1

Related Questions