Detecting all rectangles and contours from a jpg of a scanned structured paper form using OpenCV

Question

As a newbie to OCR, I am attempting to detect all the rectangles/boxes in a scanned document illustrated here

However the output of the code snippet provided below is unable to identify a considerable number of rectangles from the image.

import cv2
import imutils
import warnings
import numpy as np

warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt

img = cv2.imread("example.jpg") 
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

threshold = cv2.adaptiveThreshold(
    gray.copy(), 
    255, # maximum value assigned to pixel values exceeding the threshold
    cv2.ADAPTIVE_THRESH_GAUSSIAN_C,  # gaussian weighted sum of neighborhood
    cv2.THRESH_BINARY_INV,  # thresholding type 
    301, # block size (5x5 window)
    21) # constant

font = cv2.FONT_HERSHEY_COMPLEX
keypoints = cv2.findContours(threshold.copy(), 
                             cv2.RETR_CCOMP, 
                             cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(keypoints)
working_image = None
idx = 1
cropped_field_images = []

contour_list = list(contours)
contour_list.reverse()
rev_contours = tuple(contour_list)

for contour in rev_contours:   
    x,y,w,h = cv2.boundingRect(contour) 
    area = cv2.contourArea(contour)
    approx = cv2.approxPolyDP(contour, 10, True)
    location = None
    if len(approx) == 4 and area > 1500 : #if the shape size is rectangular
        working_image = cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)   
        cv2.putText(img, str(idx), (x, y), font, 1, (0,0,255))
        
        location = approx
        mask = np.zeros(gray.shape, np.uint8) #Create a blank mask
        rect_img = cv2.drawContours(mask, [location], 0, 255, -1) 
        rect_img = cv2.bitwise_and(img, img, mask = mask) 
        
        (x, y) = np.where(mask==255)
        (x1, y1) = (np.min(x), np.min(y))
        (x2, y2) = (np.max(x), np.max(y))
        cropped_rect = gray[x1:x2+1, y1:y2+1]
        
        cropped_field_images.append(cropped_rect)
        
        idx += 1
    
plt.figure(figsize = (11.69*2,8.27*2))
plt.axis('off')
plt.imshow(cv2.cvtColor(working_image, cv2.COLOR_BGR2RGB));

The result of the code above is the image below. Any rectangle without a number on its top left corner and a green boundary was not recognised by the code above and has been marked by a red star. I tried varying opencv2 adaptive thresholds' type, block size and constant in the code snippet above, but these red-starred rectangles keep getting ommited from the output results.

What I'm I missing? What could I consider to make sure these boxes/regions are not omitted in the results? Any help in optimising adaptive thresholds to make sure all the red-starred rectangle sections are included in the output results would be greatly appreciated.

Timothy Tuti · Accepted Answer

I managed to figure out how to capture all the rectangles/boxes in this form. As Sembei's comment above alluded to it, the document is not to be assumed to be perfectly scanned and the scanned images are not equally shaped/sized or scaled, so image registration might not be the most efficient approach for my particular challenge.

My resolution to this challenge came about using the following steps:

Image pre-processing to remove the gray shaded areas. This was achieved in the code segment below where the image was first sharpened then the gray backgrounds were removed.

`

sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
sharpened = cv2.filter2D(img, -1, sharpen_kernel)


hsv = cv2.cvtColor(sharpened.copy(), cv2.COLOR_BGR2HSV)
mask_grey = cv2.inRange(hsv, (0, 0, 100), (255, 5, 255))


# Build mask of non black pixels.
nzmask = cv2.inRange(hsv, (0, 0, 5), (255, 255, 255))

# Erode the mask - all pixels around a black pixels should not be masked.
nzmask = cv2.erode(nzmask, np.ones((3,3)))
mask_grey = mask_grey & nzmask


cleaned_bg_img = img.copy()
cleaned_bg_img[np.where(mask_grey)] = 255

cleaned_bg_img = cv2.cvtColor(cleaned_bg_img.copy(), cv2.COLOR_BGR2RGB)
gray = cv2.cvtColor(cleaned_bg_img, cv2.COLOR_BGR2GRAY)

`

The result is illustrated in the image below (compare with image 1 in the question for contrasting the differences)

While looping through the identified contours, assume that not all identified contours are going to rectangular in shape, therefore instead of constraining the loop to only those whose length is approximately four, capture all that have length 4 or above, and then use the cv2.boundingRect of the contour as a result

`

epsilon = 0.01 * cv2.arcLength(contour, True)
x,y,w,h = cv2.boundingRect(contour) 
area = cv2.contourArea(contour)
approx = cv2.approxPolyDP(contour, epsilon, True)

if len(approx) > 3 and area > 3000 :
    #if the shape size is rectangular (or polygon: document not scanned perfectly)
    #if area is > 3000 to weed out small rectangles on characters/checkboxes.
    
    working_image = cv2.rectangle(cleaned_bg_img,(x,y),(x+w,y+h),(0,255,0),2)

`

The final result of these changes is illustrated by the image below. While the result in image 2 of the question identified 88 fields/boxes, these changes have allowed me to identify all 102 fields/boxes.

The final code snippet for anyone who might want to use this approach for identifying fields/boxes from structured paper records is provided below. I hope this solution is helpful to anyone with a similar challenge. Also, any variations that can be used for tabular structured format are highly welcomed!

import cv2
import imutils
import warnings
import numpy as np

warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt

img = cv2.imread("example.jpg") 

sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
sharpened = cv2.filter2D(img, -1, sharpen_kernel)


hsv = cv2.cvtColor(sharpened.copy(), cv2.COLOR_BGR2HSV)
mask_grey = cv2.inRange(hsv, (0, 0, 100), (255, 5, 255))


# Build mask of non black pixels.
nzmask = cv2.inRange(hsv, (0, 0, 5), (255, 255, 255))

# Erode the mask - all pixels around a black pixels should not be masked.
nzmask = cv2.erode(nzmask, np.ones((3,3)))
mask_grey = mask_grey & nzmask


cleaned_bg_img = img.copy()
cleaned_bg_img[np.where(mask_grey)] = 255

cleaned_bg_img = cv2.cvtColor(cleaned_bg_img.copy(), cv2.COLOR_BGR2RGB)
gray = cv2.cvtColor(cleaned_bg_img, cv2.COLOR_BGR2GRAY)


threshold = cv2.adaptiveThreshold(
    gray.copy(), 
    255, # maximum value assigned to pixel values exceeding the threshold
    cv2.ADAPTIVE_THRESH_GAUSSIAN_C,  # gaussian weighted sum of neighborhood
    cv2.THRESH_BINARY_INV,  # thresholding type 
    301, # block size (5x5 window)
    11) # constant


font = cv2.FONT_HERSHEY_COMPLEX
# find the largest bounded rectangle from the contours
keypoints = cv2.findContours(threshold.copy(), 
                             cv2.RETR_CCOMP, 
                             cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(keypoints)
working_image = None
idx = 1
cropped_field_images = []

# the following code section ensures we read the rectangles from 
# the top of the page, not bottom

contour_list = list(contours)
contour_list.reverse()
rev_contours = tuple(contour_list)

for contour in rev_contours:   
    
    epsilon = 0.01 * cv2.arcLength(contour, True)
    
    x,y,w,h = cv2.boundingRect(contour) 
    area = cv2.contourArea(contour)
    approx = cv2.approxPolyDP(contour, epsilon, True)
    location = None
    if len(approx) > 3 and area > 3000 : 
        # if the shape size is rect/polygon: document not scanned perfectly)
        # if area is >3000 to weed out small rectangles on characters/checkboxes etc.
        working_image = cv2.rectangle(cleaned_bg_img,(x,y),(x+w,y+h),(0,255,0),2)    

        cv2.putText(cleaned_bg_img, str(idx), (x, y), font, 1, (0,0,255))
        
        location = approx
        mask = np.zeros(gray.shape, np.uint8) #Create a blank mask
        rect_img = cv2.drawContours(mask, [location], 0, 255, -1) #Draw our contours for the specific location
        rect_img = cv2.bitwise_and(cleaned_bg_img, cleaned_bg_img, mask = mask) #Overlay the mask with the image
        
        (x, y) = np.where(mask==255)
        (x1, y1) = (np.min(x), np.min(y))
        (x2, y2) = (np.max(x), np.max(y))
        cropped_rect = gray[x1:x2+1, y1:y2+1]
        
        cropped_field_images.append(cropped_rect)
        
        
        idx += 1
    
plt.figure(figsize = (11.69*2,8.27*2))
plt.axis('off')
plt.imshow(cv2.cvtColor(working_image, cv2.COLOR_BGR2RGB));

Detecting all rectangles and contours from a jpg of a scanned structured paper form using OpenCV

Answers (2)

Related Questions