AOK
AOK

Reputation: 503

How to apply a tensorflow function on tensor slices efficiently?

I have a tensor of, say, a batch of grayscale images (so a BxHxW tensor).

I would like to slice the tensor along H and W into N and M, respectively, evenly spaced (more or less even) windows and apply tf.math.bincount to each window. <-- Sorry for this slightly convoluted English.

Below is what I currently do, but it is slow to build and creates quite a bloated tensorflow graph. Is there a better way to do this?

def makeHistBins(inPut, N, M=None):
    if M is None:
        M = N
    LEN = int(inPut.shape[0])
    ImgH = int(inPut.shape[1])
    ImgW = int(inPut.shape[2])
    HistH = math.ceil(ImgH/N)
    HistW = math.ceil(ImgW/M)
    for k in range(LEN):
        for i in range(N):
            for j in range(M):
                hist = tf.reshape(tf.math.bincount(inPut[k, math.floor(ImgH*i/N):math.ceil(ImgH*(i+1)/N), math.floor(ImgW*j/M):math.ceil(ImgW*(j+1)/M)], minlength=256), [256])
                if j == 0:
                    row = tf.expand_dims(hist, 0)
                else:
                    row = tf.concat([row, tf.expand_dims(hist, 0)], 0)
            if i == 0:
                col = tf.expand_dims(row, 0)
            else:
                col = tf.concat([col, tf.expand_dims(row, 0)], 0)
        if k == 0:
            out = tf.expand_dims(col, 0)
        else:
            out = tf.concat([out, tf.expand_dims(col, 0)], 0)
    return out

Example

# Images is 10x100x100
Out = makeHistBins(Images, 5)
# Out should be 10x5x5x256, where bincount was applied to 20x20 windows along each image.

Upvotes: 0

Views: 516

Answers (2)

giser_yugang
giser_yugang

Reputation: 6166

You should use tf.extract_image_patches.This op collects patches from the input image, as if applying a convolution. It supports window sliding and overlapping.

def imagemakeHistBins(inPut, N, M=None):
    if M is None:
        M = N
    B, H, W = inPut.get_shape().as_list()
    HistH = math.ceil(H / N)
    HistW = math.ceil(W / M)

    inPut = tf.pad(inPut+1,[[0,0],[math.ceil((HistH*N-H)/2),math.floor((HistH*N-H)/2)],[math.ceil((HistW*M-W)/2),math.floor((HistW*M-W)/2)]])

    # extract inPut to (-1,N,M,HistH*HistW)
    # the shape of inPut_new is (10,5,5,400) if inPut is (10,100,100) and M=N=5
    # the shape of inPut_new is (10,6,6,289) if inPut is (10,100,100) and M=N=6
    # ...
    inPut_new = tf.extract_image_patches(images=tf.expand_dims(inPut, axis=-1),
                               ksizes=[1, HistH, HistW, 1],
                               strides=[1, HistH, HistW, 1],
                               rates=[1, 1, 1, 1],
                               padding='VALID')

    # following codes are referenced from https://stackoverflow.com/a/50883668
    max_arr_plus = tf.reduce_max(inPut_new) + 1
    new_shape = tf.shape(inPut_new)
    group = max_arr_plus * tf.reshape(tf.range(new_shape[0] * new_shape[1]*new_shape[2]),
                                      [new_shape[0], new_shape[1],new_shape[2]])

    ids = inPut_new + group[:, :, :, None]
    out = tf.bincount(ids, minlength=max_arr_plus*new_shape[0]*new_shape[1]*new_shape[2])
    out = tf.reshape(out,[new_shape[0], new_shape[1],new_shape[2],max_arr_plus])
    return out[:,:,:,1:]

Experimental comparison

You can compare the time consumption of three methods at the same time.

import tensorflow as tf
import math
import time
import numpy as np

def makeHistBins(inPut, N, M=None):
    if M is None:
        M = N
    LEN = int(inPut.shape[0])
    ImgH = int(inPut.shape[1])
    ImgW = int(inPut.shape[2])
    HistH = math.ceil(ImgH/N)
    HistW = math.ceil(ImgW/M)
    for k in range(LEN):
        for i in range(N):
            for j in range(M):
                hist = tf.reshape(tf.math.bincount(inPut[k, math.floor(ImgH*i/N):math.ceil(ImgH*(i+1)/N), math.floor(ImgW*j/M):math.ceil(ImgW*(j+1)/M)], minlength=256), [256])
                if j == 0:
                    row = tf.expand_dims(hist, 0)
                else:
                    row = tf.concat([row, tf.expand_dims(hist, 0)], 0)
            if i == 0:
                col = tf.expand_dims(row, 0)
            else:
                col = tf.concat([col, tf.expand_dims(row, 0)], 0)
        if k == 0:
            out = tf.expand_dims(col, 0)
        else:
            out = tf.concat([out, tf.expand_dims(col, 0)], 0)
    return out

def makeHistBins2(inPut, N, M=None):
    if M is None:
        M = N
    LEN = int(inPut.shape[0])
    ImgH = int(inPut.shape[1])
    ImgW = int(inPut.shape[2])
    HistH = math.ceil(ImgH/N)
    HistW = math.ceil(ImgW/M)

    splits_h = [int(np.floor((ImgH*(i+1))/N)- np.floor((ImgH*i)/N)) for i in range(N)]
    splits_w = [int(np.floor((ImgW*(i+1))/M)- np.floor((ImgW*i)/M)) for i in range(M)]

    # splits_h = np.array([ImgH // N for _ in range(N)])
    # if ImgH - sum(splits_h) > 0:
    #     splits_h[:(ImgH - sum(splits_h))] += 1
    # 
    # splits_w = np.array([ImgW // M for _ in range(M)])
    # if ImgW - sum(splits_w) > 0:
    #     splits_w[:ImgW - sum(splits_w)] += 1

    # print(splits_w)
    out = tf.map_fn(
        lambda x: tf.stack(
            [tf.math.bincount(tf.reshape(z, [-1]), minlength=256, maxlength=256) for y in tf.split(x, splits_h, axis=0) for z in tf.split(y, splits_w, axis=1) ]
            ), inPut)
    return tf.reshape(out, [-1, len(splits_h), len(splits_w), 256])

def imagemakeHistBins(inPut, N, M=None):
    if M is None:
        M = N
    B, H, W = inPut.get_shape().as_list()
    HistH = math.ceil(H / N)
    HistW = math.ceil(W / M)

    inPut = tf.pad(inPut+1,[[0,0],[0,HistH*N-H],[0,HistW*M-H]])

    # extract inPut to (-1,N,M,HistH*HistW)
    # the shape of inPut_new is (10,5,5,400) if inPut is (10,100,100) and M=N=5
    # the shape of inPut_new is (10,6,6,289) if inPut is (10,100,100) and M=N=6
    # ...
    inPut_new = tf.extract_image_patches(images=tf.expand_dims(inPut, axis=-1),
                               ksizes=[1, HistH, HistW, 1],
                               strides=[1, HistH, HistW, 1],
                               rates=[1, 1, 1, 1],
                               padding='VALID')

    # following codes are referenced from https://stackoverflow.com/a/50883668
    max_arr_plus = tf.reduce_max(inPut_new) + 1
    new_shape = tf.shape(inPut_new)
    group = max_arr_plus * tf.reshape(tf.range(new_shape[0] * new_shape[1]*new_shape[2]),
                                      [new_shape[0], new_shape[1],new_shape[2]])

    ids = inPut_new + group[:, :, :, None]
    out = tf.bincount(ids, minlength=max_arr_plus*new_shape[0]*new_shape[1]*new_shape[2])
    out = tf.reshape(out,[new_shape[0], new_shape[1],new_shape[2],max_arr_plus])
    return out[:,:,:,1:]

Images =tf.placeholder(shape=(10,100,100),dtype=tf.int32)
out1 = makeHistBins(Images, 5)
out2 = makeHistBins2(Images, 5)
out3 = imagemakeHistBins(Images, 5)

with tf.Session() as sess:
    example = np.random.randint(0, 256, size=(10, 100, 100))
    value1,value2,value3 = sess.run([out1,out2,out3], feed_dict={Images: example})
    print(np.all(value1 == value2))
    print(np.all(value1 == value3))

    time1 = time.clock()
    for _ in range(10):
        sess.run(out1,feed_dict={Images:example})
    time2 = time.clock()
    print('makeHistBins cost time(ms) : %.2f' % ((time2 - time1)*1000))

    time1 = time.clock()
    for _ in range(10):
        sess.run(out2, feed_dict={Images: example})
    time2 = time.clock()
    print('makeHistBins2 cost time(ms) : %.2f' % ((time2 - time1) * 1000))

    time1 = time.clock()
    for _ in range(10):
        sess.run(out3, feed_dict={Images: example})
    time2 = time.clock()
    print('imagemakeHistBins cost time(ms) : %.2f' % ((time2 - time1) * 1000))

The result:

# True
# True
# makeHistBins cost time(ms) : 1187.79
# makeHistBins2 cost time(ms) : 568.32
# imagemakeHistBins cost time(ms) : 74.57

The operation time of imagemakeHistBins is reduced by an order of magnitude compared with that of imagemakeHistBins and makeHistBins2.

Notes

Please also pay attention to your extraction strategy.

According to your logic, the blocking strategy is as follows:

ImgH = 100
N=6
start_h = [math.floor(ImgH*i/N) for i in range(N)] # [0, 16, 33, 50, 66, 83]
end_h = [math.ceil(ImgH*(i+1)/N) for i in range(N)] # [17, 34, 50, 67, 84, 100]
# [0:17, 16:34, 33:50, 50:67, 66:84, 83:100]

Such a block cannot be supported by both tf.split and tf.extract_image_patches.

splits_h = np.array([ImgH // N for _ in range(N)])
if ImgH - sum(splits_h) > 0:
    splits_h[:(ImgH - sum(splits_h))] += 1
# [17 17 17 17 16 16]

So my strategy is to fill the last block with 0. Because I take the result from 1 to MAX(256) + 1 by out[:,:,:,1:], it will not affect the correctness of the final result.

# tf.extract_image_patches
# [17 17 17 17 17 15]

Even solution

def imagemakeHistBins(inPut, N, M=None):
    if M is None:
        M = N
    B, H, W = inPut.get_shape().as_list()

    splits_h = np.array([H // N for _ in range(N)])
    if H - sum(splits_h) > 0:
        splits_h[:(H - sum(splits_h))] += 1
    splits_h = [0] + splits_h.tolist()
    splits_h = np.cumsum(splits_h)

    splits_w = np.array([W // M for _ in range(M)])
    if W - sum(splits_w) > 0:
        splits_w[:(W - sum(splits_w))] += 1
    splits_w = [0] + splits_w.tolist()
    splits_w = np.cumsum(splits_w)
    # splits_w=[  0  17  34  51  68  84 100] if inPut is (10,100,100) and M=N=6
    print(splits_w)

    inPut_new = []
    for i in range(N):
        for j in range(M):
            hist = inPut[:, splits_h[i]:splits_h[i + 1], splits_w[j]:splits_w[j + 1]]
            hist = tf.pad(hist+1, [[0, 0],
                                 [0, splits_h[1] - splits_h[i + 1] + splits_h[i]],
                                 [0, splits_w[1] - splits_w[j + 1] + splits_w[j]]])
            inPut_new.append(hist)

    inPut_new = tf.reshape(tf.transpose(tf.stack(inPut_new), [1, 0, 2, 3]), [-1, N, M, splits_h[1] * splits_w[1]])

    # following codes are referenced from https://stackoverflow.com/a/50883668
    max_arr_plus = tf.reduce_max(inPut_new) + 1
    new_shape = tf.shape(inPut_new)
    group = max_arr_plus * tf.reshape(tf.range(new_shape[0] * new_shape[1]*new_shape[2]),
                                      [new_shape[0], new_shape[1],new_shape[2]])

    ids = inPut_new + group[:, :, :, None]
    out = tf.bincount(ids, minlength=max_arr_plus*new_shape[0]*new_shape[1]*new_shape[2])
    out = tf.reshape(out,[new_shape[0], new_shape[1],new_shape[2],max_arr_plus])
    return out[:,:,:,1:]

Upvotes: 2

thushv89
thushv89

Reputation: 11333

The following would work. But make sure that you specify minlength argument as well, because if the returned bin counts are different, you can't stack them. So basically we are doing two for loops (1 to split height and another to split width) and then use map_fn to iterate over rows, which gives you a 10, 20, 20, 256 matrix (as @giser_yugang pointed out, the output has a 256 dimension at the end). And it requires a bit of work to get this to work with uneven splits.

def makeHistBins(inPut, N, M=None):
    if M is None:
        M = N
    LEN = int(inPut.shape[0])
    ImgH = int(inPut.shape[1])
    ImgW = int(inPut.shape[2])
    #HistH = math.ceil(ImgH/N)
    #HistW = math.ceil(ImgW/M)

    splits_h = np.array([ImgH//N for _ in range(N)])
    if  ImgH - sum(splits_h) > 0:
      splits_h[:(ImgH - sum(splits_h))] += 1

    splits_w = np.array([ImgW//M for _ in range(M)])
    if ImgW - sum(splits_w) > 0:
      splits_w[:ImgW - sum(splits_w)] += 1

    print(splits_w)
    out = tf.map_fn(
        lambda x: tf.stack(
            [tf.math.bincount(tf.reshape(z, [-1]), minlength=256, maxlength=256) for y in tf.split(x, splits_h, axis=0) for z in tf.split(y, splits_w, axis=1) ]
            ), inPut)
    return tf.reshape(out, [LEN, N, M, 256])

Upvotes: 1

Related Questions