Reputation: 131
I have loaded the trained checkpoint file for inference. I have extracted the beta, moving mean and moving variance and all weights from the model. In batch normalization, I getting the wrong result when I am manually calculating the output of batch_normalization
.
[UPDATED]
Here I am sharing my code which loads the checkpoint, print the input to the batch normalization, print beta, moving mean and moving variance and print the output of batch normalization on the console.
import tensorflow as tf
import cv2
import numpy as np
import time
import os
def main():
with tf.Session() as sess:
#[INFO] code for loading checkpoint
#---------------------------------------------------------------------
saver = tf.train.import_meta_graph("./bag-model-34000.meta")
saver.restore(sess, tf.train.latest_checkpoint("./"))
graph = tf.get_default_graph()
input_place = graph.get_tensor_by_name('input/image_input:0')
op = graph.get_tensor_by_name('output/image_output:0')
#----------------------------------------------------------------------
#[INFO] generating input data which is equal to input tensor shape
#----------------------------------------------------------------------
input_data = np.random.randint(255, size=(1,320,240, 3)).astype(float)
#----------------------------------------------------------------------
#[INFO] code to get all tensors_name
#----------------------------------------------------------------------
operations = sess.graph.get_operations()
ind = 0;
tens_name = [] # store all tensor name in list
for operation in operations:
#print(ind,"> ", operation.name, "=> \n", operation.values())
if (operation.values()):
name_of_tensor = str(operation.values()).split()[1][1:-1]
tens_name.append(name_of_tensor)
ind = ind + 1
#------------------------------------------------------------------------
#[INFO] printing Input to batch normalization, beta, moving mean and moving variance
# so I can calculate manually batch normalization output
#------------------------------------------------------------------------
tensor_number = 0
for tname in tens_name: # looping through each tensor name
if tensor_number <= 812: # I am interested in first 812 tensors
tensor = graph.get_tensor_by_name(tname)
tensor_values = sess.run(tensor, feed_dict={input_place: input_data})
print("tensor: ", tensor_number, ": ", tname, ": \n\t\t", tensor_values.shape)
# [INFO] 28'th tensor its name is "input/conv1/conv1_1/separable_conv2d:0"
# the output of this tensor is input to the batch normalization
if tensor_number == 28:
# here I am printing this tensor output
print(tensor_values) # [[[[-0.03182551 0.00226904 0.00440771 ...
print(tensor_values.shape) # (1, 320, 240, 32)
# [INFO] 31'th tensor its name is "conv1/conv1_1/BatchNorm/beta:0"
# the output of this tensor is all beta
if tensor_number == 31:
# here I am printing this beta's
print(tensor_values) # [ 0.04061257 -0.16322449 -0.10942575 ...
print(tensor_values.shape) # (32,)
# [INFO] 35'th tensor its name is "conv1/conv1_1/BatchNorm/moving_mean:0"
# the output of this tensor is all moving mean
if tensor_number == 35:
# here I am printing this moving means
print(tensor_values) # [-0.0013569 0.00618145 0.00248459 ...
print(tensor_values.shape) # (32,)
# [INFO] 39'th tensor its name is "conv1/conv1_1/BatchNorm/moving_variance:0"
# the output of this tensor is all moving_variance
if tensor_number == 39:
# here I am printing this moving variance
print(tensor_values) # [4.48082483e-06 1.21615967e-05 5.37582537e-06 ...
print(tensor_values.shape) # (32,)
# [INFO] 44'th tensor its name is "input/conv1/conv1_1/BatchNorm/FusedBatchNorm:0"
# here perform batch normalization and here I am printing the output of this tensor
if tensor_number == 44:
# here I am printing the output of this tensor
print(tensor_values) # [[[[-8.45019519e-02 1.23237416e-01 -4.60943699e-01 ...
print(tensor_values.shape) # (1, 320, 240, 32)
tensor_number = tensor_number + 1
#---------------------------------------------------------------------------------------------
if __name__ == "__main__":
main()
So after running above code from console I got the input to the batch normalization which is output of "input/conv1/conv1_1/separable_conv2d:0
" this tensor.
I am taking the first value from that output as x,
so, input x = -0.03182551
and beta, moving mean and moving variance is also printed on console.
and I am take the first value from each array.
beta = 0.04061257
moving mean = -0.0013569
moving variance = 4.48082483e-06
epsilon = 0.001 ... It is default value
and gamma is ignored. because I set training time as scale = false so gamma is ignored.
When I am calculate the output of batch normalization at inference time for given input x
x_hat = (x - moving_mean) / square_root_of(moving variance + epsilon)
= (-0.03182551 − (-0.0013569)) / √(0.00000448082483 + 0.001)
= −0.961350647
so x_hat is −0.961350647
y = gamma * x_hat + beta
gamma is ignored so equation becomes y = x_hat + beta
= −0.961350647 + 0.04061257
y = −0.920738077
So If I calculated manually y at inference time it gives as y = −0.920738077
but in program it showing y = -8.45019519e-02
It is output of "input/conv1/conv1_1/BatchNorm/FusedBatchNorm:0" tensor.
It is very very different from what I am calculated. Is my equation is wrong? So which modifications
I have to make to above x_hat and y equation so I can get this value.
So, I am very confused why my calculated result is very very different from resulted value ?
Also I am checked beta, moving mean and moving variance by using tf.compat.v1.global_variables(). all the values are matching with values which is printed on console for beta, moving mean and moving variance.
So why I am getting wrong result after manually substituting this values in equation x_hat
and y
?
Also I am providing here my console output, from tensor_number 28 to 44...
tensor: 28 : input/conv1/conv1_1/separable_conv2d:0 :
(1, 320, 240, 32)
[[[[-0.03182551 0.00226904 0.00440771 ... -0.01204819 0.02620635
tensor: 29 : input/conv1/conv1_1/BatchNorm/Const:0 :
(32,)
tensor: 30 : conv1/conv1_1/BatchNorm/beta/Initializer/zeros:0 :
(32,)
tensor: 31 : conv1/conv1_1/BatchNorm/beta:0 :
(32,)
[ 0.04061257 -0.16322449 -0.10942575 0.05056419 -0.13785222 0.4060304
tensor: 32 : conv1/conv1_1/BatchNorm/beta/Assign:0 :
(32,)
tensor: 33 : conv1/conv1_1/BatchNorm/beta/read:0 :
(32,)
tensor: 34 : conv1/conv1_1/BatchNorm/moving_mean/Initializer/zeros:0 :
(32,)
tensor: 35 : conv1/conv1_1/BatchNorm/moving_mean:0 :
(32,)
[-0.0013569 0.00618145 0.00248459 0.00340403 0.00600711 0.00291052
tensor: 36 : conv1/conv1_1/BatchNorm/moving_mean/Assign:0 :
(32,)
tensor: 37 : conv1/conv1_1/BatchNorm/moving_mean/read:0 :
(32,)
tensor: 38 : conv1/conv1_1/BatchNorm/moving_variance/Initializer/ones:0 :
(32,)
tensor: 39 : conv1/conv1_1/BatchNorm/moving_variance:0 :
(32,)
[4.48082483e-06 1.21615967e-05 5.37582537e-06 1.40261754e-05
tensor: 40 : conv1/conv1_1/BatchNorm/moving_variance/Assign:0 :
(32,)
tensor: 41 : conv1/conv1_1/BatchNorm/moving_variance/read:0 :
(32,)
tensor: 42 : input/conv1/conv1_1/BatchNorm/Const_1:0 :
(0,)
tensor: 43 : input/conv1/conv1_1/BatchNorm/Const_2:0 :
(0,)
tensor: 44 : input/conv1/conv1_1/BatchNorm/FusedBatchNorm:0 :
(1, 320, 240, 32)
[[[[-8.45019519e-02 1.23237416e-01 -4.60943699e-01 ... 3.77691090e-01
Upvotes: 3
Views: 1208
Reputation: 131
I am resolved this problem, for batch normalization operation it thinking it is in training.
So, it uses batch mean and batch variance and beta as 0 instead of provided moving mean , moving variance and beta.
So I calculated batch mean, batch variance and substitute this values in equation now it giving correct output.
So how can force him to use moving mean and moving variance and provided beta? I am tried with this change by setting training as false. But it is not working.
for tname in tens_name: # looping through each tensor name
if tensor_number <= 812: # I am interested in first 812 tensors
training = tf.placeholder(tf.bool, name = 'training')
is_training = tf.placeholder(tf.bool, name = 'is_training')
tensor = graph.get_tensor_by_name(tname)
tensor_values = sess.run(tensor, feed_dict={is_training: False, training: False, input_place: input_data})
in actual code is_training is true
def load_cnn(self,keep_prob = 0.5, num_filt = 32, num_layers = 2,is_training=True):
self.reuse=False
with tf.name_scope('input'):
self.image_input=tf.placeholder(tf.float32,shape=[None,None,None,3],name='image_input')
net=self.image_input
with slim.arg_scope([slim.separable_conv2d],
depth_multiplier=1,
normalizer_fn=slim.batch_norm,
normalizer_params={'is_training':is_training},
activation_fn=tf.nn.relu,weights_initializer=tf.truncated_normal_initializer(0.0, 0.01),
weights_regularizer=slim.l2_regularizer(0.0005)):
# Down Scaling
# Block 1
net=slim.repeat(net, 2, slim.separable_conv2d, num_filt, [3, 3], scope = 'conv1')
print('en_conv1',net.shape,net.name) # 320x240x3 -> 316x236x32
self.cnn_layer1=net
#Down Sampling
net=slim.max_pool2d(net,[2,2],scope='pool1')
print('en_maxpool1',net.shape,net.name) # 316x236x32 -> 158x118x32
Upvotes: 0