Reputation: 503
I have a tensor of, say, a batch of grayscale images (so a BxHxW tensor).
I would like to slice the tensor along H and W into N and M, respectively, evenly spaced (more or less even) windows and apply tf.math.bincount
to each window. <-- Sorry for this slightly convoluted English.
Below is what I currently do, but it is slow to build and creates quite a bloated tensorflow graph. Is there a better way to do this?
def makeHistBins(inPut, N, M=None):
if M is None:
M = N
LEN = int(inPut.shape[0])
ImgH = int(inPut.shape[1])
ImgW = int(inPut.shape[2])
HistH = math.ceil(ImgH/N)
HistW = math.ceil(ImgW/M)
for k in range(LEN):
for i in range(N):
for j in range(M):
hist = tf.reshape(tf.math.bincount(inPut[k, math.floor(ImgH*i/N):math.ceil(ImgH*(i+1)/N), math.floor(ImgW*j/M):math.ceil(ImgW*(j+1)/M)], minlength=256), [256])
if j == 0:
row = tf.expand_dims(hist, 0)
else:
row = tf.concat([row, tf.expand_dims(hist, 0)], 0)
if i == 0:
col = tf.expand_dims(row, 0)
else:
col = tf.concat([col, tf.expand_dims(row, 0)], 0)
if k == 0:
out = tf.expand_dims(col, 0)
else:
out = tf.concat([out, tf.expand_dims(col, 0)], 0)
return out
Example
# Images is 10x100x100
Out = makeHistBins(Images, 5)
# Out should be 10x5x5x256, where bincount was applied to 20x20 windows along each image.
Upvotes: 0
Views: 516
Reputation: 6166
You should use tf.extract_image_patches
.This op collects patches from the input image, as if applying a convolution. It supports window sliding and overlapping.
def imagemakeHistBins(inPut, N, M=None):
if M is None:
M = N
B, H, W = inPut.get_shape().as_list()
HistH = math.ceil(H / N)
HistW = math.ceil(W / M)
inPut = tf.pad(inPut+1,[[0,0],[math.ceil((HistH*N-H)/2),math.floor((HistH*N-H)/2)],[math.ceil((HistW*M-W)/2),math.floor((HistW*M-W)/2)]])
# extract inPut to (-1,N,M,HistH*HistW)
# the shape of inPut_new is (10,5,5,400) if inPut is (10,100,100) and M=N=5
# the shape of inPut_new is (10,6,6,289) if inPut is (10,100,100) and M=N=6
# ...
inPut_new = tf.extract_image_patches(images=tf.expand_dims(inPut, axis=-1),
ksizes=[1, HistH, HistW, 1],
strides=[1, HistH, HistW, 1],
rates=[1, 1, 1, 1],
padding='VALID')
# following codes are referenced from https://stackoverflow.com/a/50883668
max_arr_plus = tf.reduce_max(inPut_new) + 1
new_shape = tf.shape(inPut_new)
group = max_arr_plus * tf.reshape(tf.range(new_shape[0] * new_shape[1]*new_shape[2]),
[new_shape[0], new_shape[1],new_shape[2]])
ids = inPut_new + group[:, :, :, None]
out = tf.bincount(ids, minlength=max_arr_plus*new_shape[0]*new_shape[1]*new_shape[2])
out = tf.reshape(out,[new_shape[0], new_shape[1],new_shape[2],max_arr_plus])
return out[:,:,:,1:]
Experimental comparison
You can compare the time consumption of three methods at the same time.
import tensorflow as tf
import math
import time
import numpy as np
def makeHistBins(inPut, N, M=None):
if M is None:
M = N
LEN = int(inPut.shape[0])
ImgH = int(inPut.shape[1])
ImgW = int(inPut.shape[2])
HistH = math.ceil(ImgH/N)
HistW = math.ceil(ImgW/M)
for k in range(LEN):
for i in range(N):
for j in range(M):
hist = tf.reshape(tf.math.bincount(inPut[k, math.floor(ImgH*i/N):math.ceil(ImgH*(i+1)/N), math.floor(ImgW*j/M):math.ceil(ImgW*(j+1)/M)], minlength=256), [256])
if j == 0:
row = tf.expand_dims(hist, 0)
else:
row = tf.concat([row, tf.expand_dims(hist, 0)], 0)
if i == 0:
col = tf.expand_dims(row, 0)
else:
col = tf.concat([col, tf.expand_dims(row, 0)], 0)
if k == 0:
out = tf.expand_dims(col, 0)
else:
out = tf.concat([out, tf.expand_dims(col, 0)], 0)
return out
def makeHistBins2(inPut, N, M=None):
if M is None:
M = N
LEN = int(inPut.shape[0])
ImgH = int(inPut.shape[1])
ImgW = int(inPut.shape[2])
HistH = math.ceil(ImgH/N)
HistW = math.ceil(ImgW/M)
splits_h = [int(np.floor((ImgH*(i+1))/N)- np.floor((ImgH*i)/N)) for i in range(N)]
splits_w = [int(np.floor((ImgW*(i+1))/M)- np.floor((ImgW*i)/M)) for i in range(M)]
# splits_h = np.array([ImgH // N for _ in range(N)])
# if ImgH - sum(splits_h) > 0:
# splits_h[:(ImgH - sum(splits_h))] += 1
#
# splits_w = np.array([ImgW // M for _ in range(M)])
# if ImgW - sum(splits_w) > 0:
# splits_w[:ImgW - sum(splits_w)] += 1
# print(splits_w)
out = tf.map_fn(
lambda x: tf.stack(
[tf.math.bincount(tf.reshape(z, [-1]), minlength=256, maxlength=256) for y in tf.split(x, splits_h, axis=0) for z in tf.split(y, splits_w, axis=1) ]
), inPut)
return tf.reshape(out, [-1, len(splits_h), len(splits_w), 256])
def imagemakeHistBins(inPut, N, M=None):
if M is None:
M = N
B, H, W = inPut.get_shape().as_list()
HistH = math.ceil(H / N)
HistW = math.ceil(W / M)
inPut = tf.pad(inPut+1,[[0,0],[0,HistH*N-H],[0,HistW*M-H]])
# extract inPut to (-1,N,M,HistH*HistW)
# the shape of inPut_new is (10,5,5,400) if inPut is (10,100,100) and M=N=5
# the shape of inPut_new is (10,6,6,289) if inPut is (10,100,100) and M=N=6
# ...
inPut_new = tf.extract_image_patches(images=tf.expand_dims(inPut, axis=-1),
ksizes=[1, HistH, HistW, 1],
strides=[1, HistH, HistW, 1],
rates=[1, 1, 1, 1],
padding='VALID')
# following codes are referenced from https://stackoverflow.com/a/50883668
max_arr_plus = tf.reduce_max(inPut_new) + 1
new_shape = tf.shape(inPut_new)
group = max_arr_plus * tf.reshape(tf.range(new_shape[0] * new_shape[1]*new_shape[2]),
[new_shape[0], new_shape[1],new_shape[2]])
ids = inPut_new + group[:, :, :, None]
out = tf.bincount(ids, minlength=max_arr_plus*new_shape[0]*new_shape[1]*new_shape[2])
out = tf.reshape(out,[new_shape[0], new_shape[1],new_shape[2],max_arr_plus])
return out[:,:,:,1:]
Images =tf.placeholder(shape=(10,100,100),dtype=tf.int32)
out1 = makeHistBins(Images, 5)
out2 = makeHistBins2(Images, 5)
out3 = imagemakeHistBins(Images, 5)
with tf.Session() as sess:
example = np.random.randint(0, 256, size=(10, 100, 100))
value1,value2,value3 = sess.run([out1,out2,out3], feed_dict={Images: example})
print(np.all(value1 == value2))
print(np.all(value1 == value3))
time1 = time.clock()
for _ in range(10):
sess.run(out1,feed_dict={Images:example})
time2 = time.clock()
print('makeHistBins cost time(ms) : %.2f' % ((time2 - time1)*1000))
time1 = time.clock()
for _ in range(10):
sess.run(out2, feed_dict={Images: example})
time2 = time.clock()
print('makeHistBins2 cost time(ms) : %.2f' % ((time2 - time1) * 1000))
time1 = time.clock()
for _ in range(10):
sess.run(out3, feed_dict={Images: example})
time2 = time.clock()
print('imagemakeHistBins cost time(ms) : %.2f' % ((time2 - time1) * 1000))
The result:
# True
# True
# makeHistBins cost time(ms) : 1187.79
# makeHistBins2 cost time(ms) : 568.32
# imagemakeHistBins cost time(ms) : 74.57
The operation time of imagemakeHistBins
is reduced by an order of magnitude compared with that of imagemakeHistBins
and makeHistBins2
.
Notes
Please also pay attention to your extraction strategy.
According to your logic, the blocking strategy is as follows:
ImgH = 100
N=6
start_h = [math.floor(ImgH*i/N) for i in range(N)] # [0, 16, 33, 50, 66, 83]
end_h = [math.ceil(ImgH*(i+1)/N) for i in range(N)] # [17, 34, 50, 67, 84, 100]
# [0:17, 16:34, 33:50, 50:67, 66:84, 83:100]
Such a block cannot be supported by both tf.split
and tf.extract_image_patches
.
splits_h = np.array([ImgH // N for _ in range(N)])
if ImgH - sum(splits_h) > 0:
splits_h[:(ImgH - sum(splits_h))] += 1
# [17 17 17 17 16 16]
So my strategy is to fill the last block with 0. Because I take the result from 1
to MAX(256) + 1
by out[:,:,:,1:]
, it will not affect the correctness of the final result.
# tf.extract_image_patches
# [17 17 17 17 17 15]
Even solution
def imagemakeHistBins(inPut, N, M=None):
if M is None:
M = N
B, H, W = inPut.get_shape().as_list()
splits_h = np.array([H // N for _ in range(N)])
if H - sum(splits_h) > 0:
splits_h[:(H - sum(splits_h))] += 1
splits_h = [0] + splits_h.tolist()
splits_h = np.cumsum(splits_h)
splits_w = np.array([W // M for _ in range(M)])
if W - sum(splits_w) > 0:
splits_w[:(W - sum(splits_w))] += 1
splits_w = [0] + splits_w.tolist()
splits_w = np.cumsum(splits_w)
# splits_w=[ 0 17 34 51 68 84 100] if inPut is (10,100,100) and M=N=6
print(splits_w)
inPut_new = []
for i in range(N):
for j in range(M):
hist = inPut[:, splits_h[i]:splits_h[i + 1], splits_w[j]:splits_w[j + 1]]
hist = tf.pad(hist+1, [[0, 0],
[0, splits_h[1] - splits_h[i + 1] + splits_h[i]],
[0, splits_w[1] - splits_w[j + 1] + splits_w[j]]])
inPut_new.append(hist)
inPut_new = tf.reshape(tf.transpose(tf.stack(inPut_new), [1, 0, 2, 3]), [-1, N, M, splits_h[1] * splits_w[1]])
# following codes are referenced from https://stackoverflow.com/a/50883668
max_arr_plus = tf.reduce_max(inPut_new) + 1
new_shape = tf.shape(inPut_new)
group = max_arr_plus * tf.reshape(tf.range(new_shape[0] * new_shape[1]*new_shape[2]),
[new_shape[0], new_shape[1],new_shape[2]])
ids = inPut_new + group[:, :, :, None]
out = tf.bincount(ids, minlength=max_arr_plus*new_shape[0]*new_shape[1]*new_shape[2])
out = tf.reshape(out,[new_shape[0], new_shape[1],new_shape[2],max_arr_plus])
return out[:,:,:,1:]
Upvotes: 2
Reputation: 11333
The following would work. But make sure that you specify minlength
argument as well, because if the returned bin counts are different, you can't stack them. So basically we are doing two for loops (1 to split height and another to split width) and then use map_fn
to iterate over rows, which gives you a 10, 20, 20, 256
matrix (as @giser_yugang pointed out, the output has a 256 dimension at the end). And it requires a bit of work to get this to work with uneven splits.
def makeHistBins(inPut, N, M=None):
if M is None:
M = N
LEN = int(inPut.shape[0])
ImgH = int(inPut.shape[1])
ImgW = int(inPut.shape[2])
#HistH = math.ceil(ImgH/N)
#HistW = math.ceil(ImgW/M)
splits_h = np.array([ImgH//N for _ in range(N)])
if ImgH - sum(splits_h) > 0:
splits_h[:(ImgH - sum(splits_h))] += 1
splits_w = np.array([ImgW//M for _ in range(M)])
if ImgW - sum(splits_w) > 0:
splits_w[:ImgW - sum(splits_w)] += 1
print(splits_w)
out = tf.map_fn(
lambda x: tf.stack(
[tf.math.bincount(tf.reshape(z, [-1]), minlength=256, maxlength=256) for y in tf.split(x, splits_h, axis=0) for z in tf.split(y, splits_w, axis=1) ]
), inPut)
return tf.reshape(out, [LEN, N, M, 256])
Upvotes: 1