sahil makandar
sahil makandar

Reputation: 131

Batch Normalization at inference time in tensorflow

I have loaded the trained checkpoint file for inference. I have extracted the beta, moving mean and moving variance and all weights from the model. In batch normalization, I getting the wrong result when I am manually calculating the output of batch_normalization. [UPDATED]

Here I am sharing my code which loads the checkpoint, print the input to the batch normalization, print beta, moving mean and moving variance and print the output of batch normalization on the console.

import tensorflow as tf
import cv2
import numpy as np
import time
import os

def main():
    with tf.Session() as sess:        

        #[INFO] code for loading checkpoint
        #---------------------------------------------------------------------
        saver = tf.train.import_meta_graph("./bag-model-34000.meta")
        saver.restore(sess, tf.train.latest_checkpoint("./"))
        graph = tf.get_default_graph()
        input_place = graph.get_tensor_by_name('input/image_input:0')
        op = graph.get_tensor_by_name('output/image_output:0')
        #----------------------------------------------------------------------

        #[INFO] generating input data which is equal to input tensor shape
        #----------------------------------------------------------------------
        input_data = np.random.randint(255, size=(1,320,240, 3)).astype(float)
        #----------------------------------------------------------------------

        #[INFO] code to get all tensors_name
        #----------------------------------------------------------------------
        operations = sess.graph.get_operations()
        ind = 0;
        tens_name = []  # store all tensor name in list
        for operation in operations:
            #print(ind,"> ", operation.name, "=> \n", operation.values())

            if (operation.values()): 
                name_of_tensor = str(operation.values()).split()[1][1:-1]

            tens_name.append(name_of_tensor)
            ind = ind + 1
        #------------------------------------------------------------------------

        #[INFO] printing Input to batch normalization, beta, moving mean and moving variance
        # so I can calculate manually batch normalization output
        #------------------------------------------------------------------------   
        tensor_number = 0
        for tname in tens_name:         # looping through each tensor name

            if tensor_number <= 812:      # I am interested in first 812 tensors
                tensor = graph.get_tensor_by_name(tname)
                tensor_values = sess.run(tensor, feed_dict={input_place: input_data})
                print("tensor: ", tensor_number, ": ", tname, ": \n\t\t", tensor_values.shape)


                # [INFO] 28'th tensor its name is "input/conv1/conv1_1/separable_conv2d:0"
                # the output of this tensor is input to the batch normalization
                if tensor_number == 28:
                    # here I am printing this tensor output
                    print(tensor_values)            # [[[[-0.03182551  0.00226904  0.00440771 ... 
                    print(tensor_values.shape)      # (1, 320, 240, 32)


                # [INFO] 31'th tensor its name is "conv1/conv1_1/BatchNorm/beta:0"
                # the output of this tensor is all beta
                if tensor_number == 31:
                    # here I am printing this beta's
                    print(tensor_values)            # [ 0.04061257 -0.16322449 -0.10942575 ...
                    print(tensor_values.shape)      # (32,)


                # [INFO] 35'th tensor its name is "conv1/conv1_1/BatchNorm/moving_mean:0"
                # the output of this tensor is all moving mean
                if tensor_number == 35:
                    # here I am printing this moving means
                    print(tensor_values)            # [-0.0013569   0.00618145  0.00248459 ...
                    print(tensor_values.shape)      # (32,)


                # [INFO] 39'th tensor its name is "conv1/conv1_1/BatchNorm/moving_variance:0"
                # the output of this tensor is all moving_variance
                if tensor_number == 39:
                    # here I am printing this moving variance
                    print(tensor_values)            # [4.48082483e-06 1.21615967e-05 5.37582537e-06 ...
                    print(tensor_values.shape)      # (32,)


                # [INFO] 44'th tensor its name is "input/conv1/conv1_1/BatchNorm/FusedBatchNorm:0"
                # here perform batch normalization and here I am printing the output of this tensor
                if tensor_number == 44:
                    # here I am printing the output of this tensor
                    print(tensor_values)            # [[[[-8.45019519e-02  1.23237416e-01 -4.60943699e-01 ...
                    print(tensor_values.shape)      # (1, 320, 240, 32)

            tensor_number = tensor_number + 1
        #---------------------------------------------------------------------------------------------

if __name__ == "__main__":
    main()

So after running above code from console I got the input to the batch normalization which is output of "input/conv1/conv1_1/separable_conv2d:0" this tensor.

I am taking the first value from that output as x,
so, input x = -0.03182551

and beta, moving mean and moving variance is also printed on console. 
and I am take the first value from each array.
                beta = 0.04061257
                moving mean = -0.0013569
                moving variance = 4.48082483e-06
                epsilon = 0.001  ... It is default value

and gamma is ignored. because I set training time as scale = false so gamma is ignored.

When I am calculate the output of batch normalization at inference time for given input x
x_hat = (x - moving_mean) / square_root_of(moving variance + epsilon)
      = (-0.03182551 − (-0.0013569)) / √(0.00000448082483 + 0.001)
      = −0.961350647
so x_hat is −0.961350647

y = gamma * x_hat + beta
gamma is ignored so equation becomes y = x_hat + beta
                                       = −0.961350647 + 0.04061257
                                     y = −0.920738077

So If I calculated manually y at inference time it gives as y = −0.920738077
but in program it showing y = -8.45019519e-02
It is output of "input/conv1/conv1_1/BatchNorm/FusedBatchNorm:0" tensor.

It is very very different from what I am calculated. Is my equation is wrong? So which modifications 
I have to make to above x_hat and y equation so I can get this value.

So, I am very confused why my calculated result is very very different from resulted value ?

Also I am checked beta, moving mean and moving variance by using tf.compat.v1.global_variables(). all the values are matching with values which is printed on console for beta, moving mean and moving variance.

So why I am getting wrong result after manually substituting this values in equation x_hat and y?

Also I am providing here my console output, from tensor_number 28 to 44...

tensor:  28 :  input/conv1/conv1_1/separable_conv2d:0 : 
                 (1, 320, 240, 32)
[[[[-0.03182551  0.00226904  0.00440771 ... -0.01204819  0.02620635

tensor:  29 :  input/conv1/conv1_1/BatchNorm/Const:0 : 
                 (32,)
tensor:  30 :  conv1/conv1_1/BatchNorm/beta/Initializer/zeros:0 : 
                 (32,)

tensor:  31 :  conv1/conv1_1/BatchNorm/beta:0 : 
                 (32,)
[ 0.04061257 -0.16322449 -0.10942575  0.05056419 -0.13785222  0.4060304

tensor:  32 :  conv1/conv1_1/BatchNorm/beta/Assign:0 : 
                 (32,)
tensor:  33 :  conv1/conv1_1/BatchNorm/beta/read:0 : 
                 (32,)
tensor:  34 :  conv1/conv1_1/BatchNorm/moving_mean/Initializer/zeros:0 : 
                 (32,)

tensor:  35 :  conv1/conv1_1/BatchNorm/moving_mean:0 : 
                 (32,)
[-0.0013569   0.00618145  0.00248459  0.00340403  0.00600711  0.00291052

tensor:  36 :  conv1/conv1_1/BatchNorm/moving_mean/Assign:0 : 
                 (32,)
tensor:  37 :  conv1/conv1_1/BatchNorm/moving_mean/read:0 : 
                 (32,)
tensor:  38 :  conv1/conv1_1/BatchNorm/moving_variance/Initializer/ones:0 : 
                 (32,)

tensor:  39 :  conv1/conv1_1/BatchNorm/moving_variance:0 : 
                 (32,)
[4.48082483e-06 1.21615967e-05 5.37582537e-06 1.40261754e-05

tensor:  40 :  conv1/conv1_1/BatchNorm/moving_variance/Assign:0 : 
                 (32,)
tensor:  41 :  conv1/conv1_1/BatchNorm/moving_variance/read:0 : 
                 (32,)
tensor:  42 :  input/conv1/conv1_1/BatchNorm/Const_1:0 : 
                 (0,)
tensor:  43 :  input/conv1/conv1_1/BatchNorm/Const_2:0 : 
                 (0,)

tensor:  44 :  input/conv1/conv1_1/BatchNorm/FusedBatchNorm:0 : 
                 (1, 320, 240, 32)
[[[[-8.45019519e-02  1.23237416e-01 -4.60943699e-01 ...  3.77691090e-01

Upvotes: 3

Views: 1208

Answers (1)

sahil makandar
sahil makandar

Reputation: 131

I am resolved this problem, for batch normalization operation it thinking it is in training.

So, it uses batch mean and batch variance and beta as 0 instead of provided moving mean , moving variance and beta.

So I calculated batch mean, batch variance and substitute this values in equation now it giving correct output.

So how can force him to use moving mean and moving variance and provided beta? I am tried with this change by setting training as false. But it is not working.

for tname in tens_name:         # looping through each tensor name

            if tensor_number <= 812:      # I am interested in first 812 tensors
                training = tf.placeholder(tf.bool, name = 'training')
                is_training = tf.placeholder(tf.bool, name = 'is_training')
                tensor = graph.get_tensor_by_name(tname)
                tensor_values = sess.run(tensor, feed_dict={is_training: False, training: False, input_place: input_data})

in actual code is_training is true

def load_cnn(self,keep_prob = 0.5, num_filt = 32, num_layers = 2,is_training=True):
        self.reuse=False
        with tf.name_scope('input'):
            self.image_input=tf.placeholder(tf.float32,shape=[None,None,None,3],name='image_input')
            net=self.image_input

            with slim.arg_scope([slim.separable_conv2d],
            depth_multiplier=1,
            normalizer_fn=slim.batch_norm,
            normalizer_params={'is_training':is_training},
            activation_fn=tf.nn.relu,weights_initializer=tf.truncated_normal_initializer(0.0, 0.01),
            weights_regularizer=slim.l2_regularizer(0.0005)):

                # Down Scaling
                # Block 1
                net=slim.repeat(net, 2, slim.separable_conv2d, num_filt, [3, 3], scope = 'conv1')
                print('en_conv1',net.shape,net.name) # 320x240x3 -> 316x236x32
                self.cnn_layer1=net
                #Down Sampling
                net=slim.max_pool2d(net,[2,2],scope='pool1') 
                print('en_maxpool1',net.shape,net.name) # 316x236x32 -> 158x118x32

Upvotes: 0

Related Questions