Image stitching

Question

I've recorded the video while bottle was rotated.Then i got frames from video and cut the central block from all images.

So for all frames I got the following images:

I've tried to stitch them to get panorama, but I got bad results. I used the following program:

import glob

#rom panorama import Panorama
import sys
import numpy
import imutils
import cv2


def readImages(imageString):
    images = []

    # Get images from arguments.
    for i in range(0, len(imageString)):
        img = cv2.imread(imageString[i])
        images.append(img)

    return images


def findAndDescribeFeatures(image):
    # Getting gray image
    grayImage = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Find and describe the features.
    # Fast: sift = cv2.xfeatures2d.SURF_create()
    sift = cv2.xfeatures2d.SIFT_create()

    # Find interest points.
    keypoints = sift.detect(grayImage, None)

    # Computing features.
    keypoints, features = sift.compute(grayImage, keypoints)

    # Converting keypoints to numbers.
    keypoints = numpy.float32([kp.pt for kp in keypoints])

    return keypoints, features


def matchFeatures(featuresA, featuresB):
    # Slow: featureMatcher = cv2.DescriptorMatcher_create("BruteForce")
    featureMatcher = cv2.DescriptorMatcher_create("FlannBased")
    matches = featureMatcher.knnMatch(featuresA, featuresB, k=2)
    return matches


def generateHomography(allMatches, keypointsA, keypointsB, ratio, ransacRep):
    if not allMatches:
        return None
    matches = []

    for match in allMatches:
        # Lowe's ratio test
        if len(match) == 2 and (match[0].distance / match[1].distance) < ratio:
            matches.append(match[0])

    pointsA = numpy.float32([keypointsA[m.queryIdx] for m in matches])
    pointsB = numpy.float32([keypointsB[m.trainIdx] for m in matches])

    if len(pointsA) > 4:
        H, status = cv2.findHomography(pointsA, pointsB, cv2.RANSAC, ransacRep)
        return matches, H, status
    else:
        return None


paths = glob.glob("C:/Users/andre/Desktop/Panorama-master/frames/*.jpg")
images = readImages(paths[::-1])

while len(images) > 1:
    imgR = images.pop()
    imgL = images.pop()

    interestsR, featuresR = findAndDescribeFeatures(imgR)
    interestsL, featuresL = findAndDescribeFeatures(imgL)
    try:
        try:
            allMatches = matchFeatures(featuresR, featuresL)
            _, H, _ = generateHomography(allMatches, interestsR, interestsL, 0.75, 4.0)

            result = cv2.warpPerspective(imgR, H,
                                     (imgR.shape[1] + imgL.shape[1], imgR.shape[0]))
            result[0:imgL.shape[0], 0:imgL.shape[1]] = imgL
            images.append(result)
        except TypeError:
            pass
    except cv2.error:
        pass
result = imutils.resize(images[0], height=260)
cv2.imshow("Result", result)
cv2.imwrite("Result.jpg", result)

cv2.waitKey(0)

My result was:

May be someone know hot to do it better? I think that using small blocks from frame should remove roundness... But...

Data: https://1drv.ms/f/s!ArcAdXhy6TxPho0FLKxyRCL-808Y9g

Oliort UA · Accepted Answer

I managed to achieve a nice result. I rewrote your code just a little bit, here is the changed part:

def generateTransformation(allMatches, keypointsA, keypointsB, ratio):
    if not allMatches:
        return None
    matches = []

    for match in allMatches:
        # Lowe's ratio test
        if len(match) == 2 and (match[0].distance / match[1].distance) < ratio:
            matches.append(match[0])

    pointsA = numpy.float32([keypointsA[m.queryIdx] for m in matches])
    pointsB = numpy.float32([keypointsB[m.trainIdx] for m in matches])

    if len(pointsA) > 2:
        transformation = cv2.estimateRigidTransform(pointsA, pointsB, True)
        if transformation is None or transformation.shape[1] < 1 or transformation.shape[0] < 1:
            return None
        return transformation
    else:
        return None


paths = glob.glob("a*.jpg")
images = readImages(paths[::-1])
result = images[0]

while len(images) > 1:
    imgR = images.pop()
    imgL = images.pop()

    interestsR, featuresR = findAndDescribeFeatures(imgR)
    interestsL, featuresL = findAndDescribeFeatures(imgL)
    allMatches = matchFeatures(featuresR, featuresL)

    transformation = generateTransformation(allMatches, interestsR, interestsL, 0.75)
    if transformation is None or transformation[0, 2] < 0:
        images.append(imgR)
        continue
    transformation[0, 0] = 1
    transformation[1, 1] = 1
    transformation[0, 1] = 0
    transformation[1, 0] = 0
    transformation[1, 2] = 0
    result = cv2.warpAffine(imgR, transformation, (imgR.shape[1] + 
                int(transformation[0, 2] + 1), imgR.shape[0]))
    result[:, :imgL.shape[1]] = imgL
    cv2.imshow("R", result)
    images.append(result)
    cv2.waitKey(1)

cv2.imshow("Result", result)

So the key thing I changed is the transformation of the images. I use estimateRigidTransform() instead of findHomography() to calculate transformation of the image. From that transformation matrix I only extract the x coordinate translation, which is in the [0, 2] cell of the resulting Affine Transformation matrix transformation. I set the other transformation matrix elements as if it is an identity transformation (no scaling, no perspective, no rotation or y translation). Then I pass it to warpAffine() to transform the imgR the same way you did with warpPerspective().

You can do it because you have stable camera and spinning object positions and you capture with a straight front view of the object. It means that you don't have to do any perspective / scaling / rotation image corrections and can just "glue" them together by x axis.

I think your approach fails because you actually observe the bottle with a slightly tilted down camera view or the bottle is not in the middle of the screen. I'll try to describe that with an image. I depict some text on the bottle with red. For example the algorithm finds a matching points pair (green) on the bottom of the captured round object. Note that the point moves not only right, but diagonally up too. The program then calculates the transformation taking into account the points which move up slightly. This continues to get worse frame by frame.

The recognition of matching image points also may be slightly inaccurate, so extracting only the x translation is even better because you give the algorithm "a clue" what actual situation you have. This makes it less applicable for another conditions, but in your case it improves the result a lot.

Also I filter out some incorrect results with if transformation[0, 2] < 0 check (it can rotate only one direction, and the code wont work if that is negative anyways).

Image stitching

Answers (1)

Related Questions