Concatenating multiple images into one

Question

This function receives a list of numpy arrays that consist of cropped parts of an image. The crops are all the same size, except for the right-most and bottom-most images which might be of smaller size.

predictions[2] would return the 3rd sub-image that was cropped from the original image. Each crop is a numpy array. There are WxH crops, enumerated from left to right, top to bottom (so if there are 4 sub-images constituting the width, the 5th image in predictions would be the first sub-image on the left from the 2nd row of sub-images).

crops contains the necessary information to find number of horizontal and vertical images that will constitute the reconstructed images. crops[2][3] will contain the 3rd from the top, 4th from the left image cropped.

The images contained by crops are of smaller dimension than the ones in predictions (I am basically making a model that increases the resolution of images). The reconstructed image if from the images in predictions, arranged in the same order as the ones in crops.

def reconstruct(predictions, crops):
    if len(crops) != 0:
        print("use crops")

    # TODO: properly extract the size of the full image
    width_length = 0
    height_length = 0

    full_image = np.empty(shape=(height_length, width_length))
    print(full_image.shape)

    # TODO: properly merge the crops back into a single image
    for height in range(len(predictions[0])):
        for width in range(len(predictions)):
            # concatenate here
            print(height, width)

    return full_image

I was going to use numpy.concatenate, but according to other answers I've seen on SO it wouldn't be an efficient way of doing it (apparently numpy will just recreate a new variable in memory, copy the old one, and add the new data, etc.). So now I'm left wondering how to properly merge my multiple images into a single image. The current idea I was going for was to create a python list of the proper shape and progressively fill it with each numpy array's data, but even that I'm not sure if it's the proper idea.

Here is more or less the kind of bunch of images I'm trying to concatenate into a single image:

Here is the expected result:

And to help you out with understanding what more might be available to you, here is some more code:

def predict(args):
    model = load_model(save_dir + '/' + args.model)
    image = skimage.io.imread(tests_path + args.image)

    predictions = []
    images = []

    crops = seq_crop(image)  # crops into multiple sub-parts the image based on 'input_' constants

    for i in range(len(crops)):  # amount of vertical crops
        for j in range(len(crops[0])):  # amount of horizontal crops
            current_image = crops[i][j]
            images.append(current_image)

        # Hack because GPU can only handle one image at a time
        input_img = (np.expand_dims(images[p], 0))       # Add the image to a batch where it's the only member
        predictions.append(model.predict(input_img)[0])  # returns a list of lists, one for each image in the batch

    return predictions, image, crops


# adapted from: https://stackoverflow.com/a/52463034/9768291
def seq_crop(img):
    """
    To crop the whole image in a list of sub-images of the same size.
    Size comes from "input_" variables in the 'constants' (Evaluation).
    Padding with 0 the Bottom and Right image.

    :param img: input image
    :return: list of sub-images with defined size
    """
    width_shape = ceildiv(img.shape[1], input_width)
    height_shape = ceildiv(img.shape[0], input_height)
    sub_images = []  # will contain all the cropped sub-parts of the image

    for j in range(height_shape):
        horizontal = []
        for i in range(width_shape):
            horizontal.append(crop_precise(img, i*input_width, j*input_height, input_width, input_height))
        sub_images.append(horizontal)

    return sub_images

def crop_precise(img, coord_x, coord_y, width_length, height_length):
    """
    To crop a precise portion of an image.
    When trying to crop outside of the boundaries, the input to padded with zeros.

    :param img: image to crop
    :param coord_x: width coordinate (top left point)
    :param coord_y: height coordinate (top left point)
    :param width_length: width of the cropped portion starting from coord_x
    :param height_length: height of the cropped portion starting from coord_y
    :return: the cropped part of the image
    """

    tmp_img = img[coord_y:coord_y + height_length, coord_x:coord_x + width_length]

    return float_im(tmp_img)  # From [0,255] to [0.,1.]

# from  https://stackoverflow.com/a/17511341/9768291
def ceildiv(a, b):
    """
    To get the ceiling of a division
    :param a:
    :param b:
    :return:
    """
    return -(-a // b)

if __name__ == '__main__':
    preds, original, crops = predict(args)  # returns the predictions along with the original

    # TODO: reconstruct image
    enhanced = reconstruct(preds, crops)  # reconstructs the enhanced image from predictions

EDIT:

The answer worked. Here is the version I've used:

# adapted from  https://stackoverflow.com/a/52733370/9768291
def reconstruct(predictions, crops):

    # unflatten predictions
    def nest(data, template):
        data = iter(data)
        return [[next(data) for _ in row] for row in template]

    predictions = nest(predictions, crops)

    H = np.cumsum([x[0].shape[0] for x in predictions])
    W = np.cumsum([x.shape[1] for x in predictions[0]])
    D = predictions[0][0]
    recon = np.empty((H[-1], W[-1], D.shape[2]), D.dtype)
    for rd, rs in zip(np.split(recon, H[:-1], 0), predictions):
        for d, s in zip(np.split(rd, W[:-1], 1), rs):
            d[...] = s
    return recon

Paul Panzer · Accepted Answer

The most convenient is probably np.block

import numpy as np
from scipy import misc
import Image

# get example picture
data = misc.face()
# chop it up
I, J = map(np.arange, (200, 200), data.shape[:2], (200, 200))
chops = [np.split(row, J, axis=1) for row in np.split(data, I, axis=0)]

# do something with the bits

predictions = [chop-(i+j)*(chop>>3) for j, row in enumerate(chops) for i, chop in enumerate(row)]

# unflatten predictions
def nest(data, template):
    data = iter(data)
    return [[next(data) for _ in row] for row in template]

pred_lol = nest(predictions, chops)

# almost builtin reconstruction
def np_block_2D(chops):
    return np.block([[[x] for x in row] for row in chops])

recon = np_block_2D(pred_lol)
Image.fromarray(recon).save('demo.png')

Reconstructed manipulated image:

But we can do faster than that by avoiding intermediary arrays. Instead, we copy into a preallocated array:

def speed_block_2D(chops):
    H = np.cumsum([x[0].shape[0] for x in chops])
    W = np.cumsum([x.shape[1] for x in chops[0]])
    D = chops[0][0]
    recon = np.empty((H[-1], W[-1], D.shape[2]), D.dtype)
    for rd, rs in zip(np.split(recon, H[:-1], 0), chops):
        for d, s in zip(np.split(rd, W[:-1], 1), rs):
            d[...] = s
    return recon

Timings, also including a generalized ND-ready variant of each method:

numpy 2D:               0.991 ms
prealloc 2D:            0.389 ms
numpy general:          1.021 ms
prealloc general:       0.448 ms

Code for general case and timings:

def np_block(chops):
    d = 0
    tl = chops
    while isinstance(tl, list):
        tl = tl[0]
        d += 1
    if d < tl.ndim:
        def adjust_depth(L):
            if isinstance(L, list):
                return [adjust_depth(l) for l in L]
            else:
                ret = L
                for j in range(d, tl.ndim):
                    ret = [ret]
                return ret
        chops = adjust_depth(chops)
    return np.block(chops)

def speed_block(chops):
    def line(src, i):
        while isinstance(src, list):
            src = src[0]
        return src.shape[i]
    def hyper(src, i):
        src = iter(src)
        fst = next(src)
        if isinstance(fst, list):
            res, dtype, szs = hyper(fst, i+1)
            szs.append([res[i], *(line(s, i) for s in src)])
            res[i] = sum(szs[-1])
            return res, dtype, szs
        res = np.array(fst.shape)
        szs = [res[i], *(s.shape[i] for s in src)]
        res[i] = sum(szs)
        return res, fst.dtype, [szs]
    shape, dtype, szs = hyper(chops, 0)
    recon = np.empty(shape, dtype)
    def cpchp(dst, src, i, szs=None):
        szs = np.array(hyper(src, i)[2]) if szs is None else szs
        dst = np.split(dst, np.cumsum(szs[-1][:-1]), i)
        if isinstance(src[0], list):
            szs = szs[:-1]
            for ds, sr in zip(dst, src):
                cpchp(ds, sr, i+1, szs)
                szs = None
        else:
            for ds, sr in zip(dst, src):
                ds[...] = sr
    cpchp(recon, chops, 0, np.array(szs))
    return recon

from timeit import timeit

T = (timeit(lambda: speed_block(pred_lol), number=1000),
     timeit(lambda: np_block(pred_lol), number=1000),
     timeit(lambda: speed_block_2D(pred_lol), number=1000),
     timeit(lambda: np_block_2D(pred_lol), number=1000))

assert (np.all(speed_block(pred_lol)==np_block(pred_lol)) and
        np.all(speed_block_2D(pred_lol)==np_block(pred_lol)) and
        np.all(speed_block(pred_lol)==np_block_2D(pred_lol)))

print(f"""
numpy 2D:          {T[3]:10.3f} ms
prealloc 2D:       {T[2]:10.3f} ms
numpy general:     {T[1]:10.3f} ms
prealloc general:  {T[0]:10.3f} ms
""")

Concatenating multiple images into one

Answers (1)

Related Questions