Reputation: 326
I have the following image. I want to detect and perspective transform the rectangular whiteboard.
I want to detect these 4 boundaries/corners and apply a perspective transformation to it. Have a look at the below image:
I am not able to detect the boundaries of the rectangle. Here's what I have tried:
import cv2, os
import numpy as np
from google.colab.patches import cv2_imshow
image = cv2.imread("img.jpg")
orig1 = image.copy()
# 1) Grayscale image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# cv2_imshow(gray)
# 2) Erosion
kernel = np.ones((5, 5), np.uint8)
erosion = cv2.erode(gray, kernel, iterations = 1)
# cv2_imshow(erosion)
# 3) Thresholding (OTSU)
blur = cv2.GaussianBlur(erosion, (5,5),0)
ret3, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
# cv2_imshow(thresh)
# 4) Contours
copy = thresh; orig = image;
cnts = cv2.findContours(copy, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
area = -1; c1 = 0
for c in cnts:
if area < cv2.contourArea(c):
area = cv2.contourArea(c)
c1 = c
cv2.drawContours(orig,[c1], 0, (0,255,0), 3)
epsilon = 0.09 * cv2.arcLength(c1,True)
approx = cv2.approxPolyDP(c1,epsilon,True)
if len(approx) != 4:
# Then it will fail here.
pass
cood = []
for i in range(0, len(approx)):
cood.append([approx[i][0][0], approx[i][0][1]])
# 5) Perspective Transformation
def reorder(myPoints):
myPoints = np.array(myPoints).reshape((4, 2))
myPointsNew = np.zeros((4, 1, 2), dtype=np.int32)
add = myPoints.sum(1)
myPointsNew[0] = myPoints[np.argmin(add)]
myPointsNew[3] =myPoints[np.argmax(add)]
diff = np.diff(myPoints, axis=1)
myPointsNew[1] =myPoints[np.argmin(diff)]
myPointsNew[2] = myPoints[np.argmax(diff)]
return myPointsNew
pts1 = np.float32(reorder(cood))
w = 1000; h = 1000; m1 = 1000; m2 = 1000
pts2 = np.float32([[0, 0], [w, 0], [0, h], [w, h]])
matrix = cv2.getPerspectiveTransform(pts1, pts2)
result = cv2.warpPerspective(orig1, matrix, (m1, m2))
cv2_imshow(result)
I have also gone through Microsoft's research, but not sure how to implement it. I am not able to detect and perspective transform the board. It would be great if anyone of you can help me out. Also, do let me know if my question requires more details.
Upvotes: 2
Views: 1218
Reputation: 326
I manage to get the 4 coordinates of the whiteboard. I have used adaptive thresholding to detect the edges rather than canny-edge detection, not sure whether the methodology is correct or not, but it is giving the required results. Here's the code for the same.
import ...
img = cv2.imread("path-to-image")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 199, 5)
cv2_imshow(thresh)
# finding contours and applying perspective
try:
copy = thresh.copy(); orig = img.copy()
cnts = cv2.findContours(copy, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
area = -1; c1 = 0
for c in cnts:
epsilon = 0.01 * cv2.arcLength(c,True)
approx = cv2.approxPolyDP(c,epsilon,True)
if len(approx) == 4 and area < cv2.contourArea(c):
area = cv2.contourArea(c)
c1 = c; approx1 = approx
warped = four_point_transform(orig, approx1.reshape(4, 2))
cv2_imshow(warped)
except:
print("Image cannot be transformed!!\n")
# four point transform
def order_points(pts):
# https://www.pyimagesearch.com/2016/03/21/ordering-coordinates-clockwise-with-python-and-opencv/
xSorted = pts[np.argsort(pts[:, 0]), :]
leftMost = xSorted[:2, :]
rightMost = xSorted[2:, :]
leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
(tl, bl) = leftMost
D = dist.cdist(tl[np.newaxis], rightMost, "euclidean")[0]
(br, tr) = rightMost[np.argsort(D)[::-1], :]
return np.array([tl, tr, br, bl], dtype="float32")
def four_point_transform(image, pts):
# https://www.pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/
rect = order_points(pts)
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped
Upvotes: 1