Find area in image with python and opencv

Question

I would like to find an area in about 1,5K images which are all in a similar format. They all are scans of painted or photographed images of persons. They all feature the same color card. The color cards may be placed on either side of the image (see sample image below).

The result should be an image, only containing the person's portrait.

I am able to find the color card with opencv template matching:

import cv2
import numpy as np

method = cv2.TM_SQDIFF_NORMED

# Read the images from the file
img_rgb = cv2.imread('./imgs/test_portrait.jpg')
img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)

template = cv2.imread('./portraet_color_card.png', 0)
w, h = template.shape[::-1]

result = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)

threshold = .97
loc = np.where(result >= threshold)
for pt in zip(*loc[::-1]):
   print("Found:", pt)
   cv2.rectangle(img_rgb, pt, (pt[0] + w, pt[1] + h), (0,255,255), 2)

cv2.imwrite('result.png',img_rgb)

Output:

Found: (17, 303)
Found: (18, 303)
Found: (17, 304)
Found: (18, 304)

With the coordinates and the image dimensions, I am able to determine if the image is left or right and can crop the image. The result is far from perfect, as the borders still are there.

Is there a better way to extract the portraits from the images? I would prefer to work with python and opencv but I am open to other suggestions on how to solve this problem for a larger number of images.

Samples:

Template:

Bilal · Accepted Answer

This solution assumes that The portrait is the largest pattern in the image

Solution Steps in order:

Classical Image processing to obtain the important features from the image:

Conversion to Gray level.
Gaussian Blur to reduce noise and smooth the image.
Edge Detection, using Canny in my case.
Morphological Dilation to group the features into two main patterns.
Largest Connected components Detection (credit to an old SO answer)
The rest is to mask the largest connected component.

Note that this solution has some assumptions, hence generalization might not always work!, but I have tested this solution with the given images.

#!/usr/bin/python3
# -*- coding: utf-8 -*-

import cv2
import numpy as np

class ImgProcessor:
    def __init__(self, path, imName):
        self.path = path
        self.imName = imName
        self.original = cv2.imread(self.path+self.imName)

    def imProcess(self, ksmooth=7, kdilate=3, thlow=50, thigh= 100):
        # Read Image in BGR format
        img_bgr = self.original.copy()
        # Convert Image to Gray
        img_gray= cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
        # Gaussian Filtering for Noise Removal
        gauss = cv2.GaussianBlur(img_gray, (ksmooth, ksmooth), 0)
        # Canny Edge Detection
        edges = cv2.Canny(gauss, thlow, thigh, 10)
        # Morphological Dilation
        # TODO: experiment diferent kernels
        kernel = np.ones((kdilate, kdilate), 'uint8')
        dil = cv2.dilate(edges, kernel)

        return dil
    
    def largestCC(self, imBW):
        # Extract Largest Connected Component
        # Source: https://stackoverflow.com/a/47057324
        image = imBW.astype('uint8')
        nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(image, connectivity=4)
        sizes = stats[:, -1]

        max_label = 1
        max_size = sizes[1]
        for i in range(2, nb_components):
            if sizes[i] > max_size:
                max_label = i
                max_size = sizes[i]

        img2 = np.zeros(output.shape)
        img2[output == max_label] = 255
        return img2
    
    def maskCorners(self, mask, outval=1):
        y0 = np.min(np.nonzero(mask.sum(axis=1))[0])
        y1 = np.max(np.nonzero(mask.sum(axis=1))[0])
        x0 = np.min(np.nonzero(mask.sum(axis=0))[0])
        x1 = np.max(np.nonzero(mask.sum(axis=0))[0])
        output = np.zeros_like(mask)
        output[y0:y1, x0:x1] = outval
        return output

    def extractROI(self):
        im = self.imProcess()
        lgcc = self.largestCC(im)
        lgcc = lgcc.astype(np.uint8)
        roi = self.maskCorners(lgcc)
        # TODO mask BGR with this mask
        exroi = cv2.bitwise_and(self.original, self.original, mask = roi)
        return exroi

    def show_res(self):
        result = self.extractROI()
        cv2.namedWindow("Result", cv2.WINDOW_NORMAL)
        cv2.imshow("Result", result)
        cv2.waitKey(0)

# ==============================================
if __name__ == "__main__":
    # TODO: change the path, and image name to suit your needs
    impr_ = ImgProcessor(path="/home/", imName="img.png")
    res = impr_.show_res()

Find area in image with python and opencv

Answers (2)

Solution Steps in order:

Related Questions