Patrick
Patrick

Reputation: 2709

Weights in tensorflow model don't seem to change when printed

I'm trying to print weights before and after training in Tensorflow. I'm confused about what I get because weights don't seem to change even if training shows a decreasing cost. My code is:

from __future__ import print_function

import tensorflow as tf
import numpy as np
import argparse

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

# Parameters
learning_rate = 0.01
batch_size = 100
display_step = 1

load = False
max_epochs = 5
training_epochs = max_epochs

# Instantiate saver
if not load:
    # tf Graph Input
    x = tf.placeholder(tf.float32, [None, 784], name='x') # mnist data image of shape 28*28=784
    y = tf.placeholder(tf.float32, [None, 10], name='y') # 0-9 digits recognition => 10 classes

    # Set model weights
    W = tf.get_variable('W', initializer=tf.random_normal((784,10), seed=0))
    b = tf.get_variable('b',[10],initializer=tf.zeros_initializer)

    # Construct model
    #pred = tf.nn.softmax(tf.matmul(x, W) + b,name='pred') # Softmax
    pred = tf.nn.softmax(tf.matmul(x, W),name='pred') # Softmax

    # Minimize error using cross entropy
    cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
    # Gradient Descent
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

    init = tf.global_variables_initializer()

    saver = tf.train.Saver()
    # In order to be able to easily retrieve variables and ops later,
    # we add them to collections
    tf.add_to_collection('train_op', optimizer)
    tf.add_to_collection('cost_op', cost)
    tf.add_to_collection('input', x)
    tf.add_to_collection('target', y)
    tf.add_to_collection('pred', pred)

    initial_epoch = 0
else:
    # Find last executed epoch
    from glob import glob
    history = list(map(lambda x: int(x.split('-')[1][:-5]), glob('model.ckpt-*.meta')))
    last_epoch = np.max(history)
    # Instantiate saver object using previously saved meta-graph
    saver = tf.train.import_meta_graph('model.ckpt-{}.meta'.format(last_epoch))
    initial_epoch = last_epoch + 1


# Launch the graph
with tf.Session() as sess:
    if not load:
        sess.run(init)
    else:
        saver.restore(sess, 'model.ckpt-{}'.format(last_epoch))
        optimizer = tf.get_collection('train_op')[0]
        cost = tf.get_collection('cost_op')[0]
        x = tf.get_collection('input')[0]
        y = tf.get_collection('target')[0]
        pred = tf.get_collection('pred')[0]

    print ("Variables before training")
    for var in tf.global_variables():
        print (var.name, sess.run(var))

    # Training cycle
    for epoch in range(initial_epoch, training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([optimizer, cost], feed_dict={x: batch_xs,
                                                        y: batch_ys})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if (epoch+1) % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
        saver.save(sess, './model.ckpt', global_step=epoch)

    print("Optimization Finished!")

    # Test model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))

    print ("Variables after training")
    for var in tf.global_variables():
        print (var.name, sess.run(var))

Before training, printing variables shows something like that for W:

W:0 [[-0.22279324  0.75145274  0.30694658 ... -0.20406865 -0.10345581
0.47926915]
....

After training, printing produces the same result for W even if cost has decreased from 2.2 to 0.7. Where is my error?

Upvotes: 0

Views: 1234

Answers (2)

Alex
Alex

Reputation: 146

Y. Lou is correct, your W value actually is changing. It's not very visible in part because the values you see when printing W are the corners and edges (the parts which change least). I added some plotting that shows how the absolute difference between W and the initial W, with epochs on the vertical and digits on the horizontal; more intense color corresponds to a greater difference.

Note that the upper left corners and the bottom right corners have almost no change (oftentimes literally zero change); these are the values you see when printing (an abridged summary of) W, and it explains why it never seems to change. This all makes sense because the corner pixels are almost never useful when predicting digit class.

from __future__ import print_function

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

# Parameters
learning_rate = 0.01
batch_size = 100
display_step = 1

load = False
max_epochs = 5
training_epochs = max_epochs

# Instantiate saver
if not load:
    # tf Graph Input
    x = tf.placeholder(tf.float32, [None, 784], name='x') # mnist data image of shape 28*28=784
    y = tf.placeholder(tf.float32, [None, 10], name='y') # 0-9 digits recognition => 10 classes

    # Set model weights
    W = tf.get_variable('W', initializer=tf.random_normal((784,10), seed=0))

    # Construct model
    pred = tf.nn.softmax(tf.matmul(x, W),name='pred') # Softmax

    # Minimize error using cross entropy
    cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
    # Gradient Descent
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_oper=optimizer.minimize(cost)

    print(optimizer.compute_gradients(cost))

    init = tf.global_variables_initializer()

    saver = tf.train.Saver()
    # In order to be able to easily retrieve variables and ops later,
    # we add them to collections
    tf.add_to_collection('train_op', train_oper)
    tf.add_to_collection('cost_op', cost)
    tf.add_to_collection('input', x)
    tf.add_to_collection('target', y)
    tf.add_to_collection('pred', pred)

    initial_epoch = 0

# Launch the graph
with tf.Session() as sess:
    sess.run(init)

    print ("Variables before training")
    for var in tf.get_default_graph().get_collection('trainable_variables'):
        print (var, sess.run(var))

    #Initial value of W
    W_0=W.eval(sess)
    # Training cycle
    diff=[]
    for epoch in range(initial_epoch, training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([train_oper, cost], feed_dict={x: batch_xs,
                                                        y: batch_ys})
            # Compute average loss
            avg_cost += c/total_batch
        #Calculate difference between current W and initial W
        diff.append(np.reshape(np.abs(W_0-sess.run(W)),(28,28,10)))
        # Display logs per epoch step
        if (epoch+1) % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
        saver.save(sess, './model.ckpt', global_step=epoch)

    print("Optimization Finished!")

    # Test model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))

    print ("Variables after training")
    for var in tf.get_default_graph().get_collection('trainable_variables'):
        print (var, sess.run(var))


    scale=np.max(diff)
    fig,axs=plt.subplots(len(diff),10)   
    for i in range(len(diff)):
        for j in range(10):
            axs[i][j].imshow(diff[i][:,:,j]/scale,vmin=0,vmax=1)
            axs[i][j].axis('off') 
    plt.show(fig)

Upvotes: 2

Y. Luo
Y. Luo

Reputation: 5732

Your weight has changed in your training. I'm not sure but I think you didn't see it because you just printed out parts of the weight and found that those parts are the same. I changed your code a little bit to add numpy.array_equal comparison and add checking in the training loop as follow:

from __future__ import print_function

import tensorflow as tf
import numpy as np
import argparse

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

# Parameters
learning_rate = 0.01
batch_size = 100
display_step = 1

load = False
max_epochs = 5
training_epochs = max_epochs

# Instantiate saver
if not load:
    # tf Graph Input
    x = tf.placeholder(tf.float32, [None, 784], name='x') # mnist data image of shape 28*28=784
    y = tf.placeholder(tf.float32, [None, 10], name='y') # 0-9 digits recognition => 10 classes

    # Set model weights
    W = tf.get_variable('W', initializer=tf.random_normal((784,10), seed=0))
    b = tf.get_variable('b',[10],initializer=tf.zeros_initializer)

    # Construct model
    #pred = tf.nn.softmax(tf.matmul(x, W) + b,name='pred') # Softmax
    pred = tf.nn.softmax(tf.matmul(x, W),name='pred') # Softmax

    # Minimize error using cross entropy
    cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
    # Gradient Descent
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

    init = tf.global_variables_initializer()

    saver = tf.train.Saver()
    # In order to be able to easily retrieve variables and ops later,
    # we add them to collections
    tf.add_to_collection('train_op', optimizer)
    tf.add_to_collection('cost_op', cost)
    tf.add_to_collection('input', x)
    tf.add_to_collection('target', y)
    tf.add_to_collection('pred', pred)

    initial_epoch = 0
else:
    # Find last executed epoch
    from glob import glob
    history = list(map(lambda x: int(x.split('-')[1][:-5]), glob('model.ckpt-*.meta')))
    last_epoch = np.max(history)
    # Instantiate saver object using previously saved meta-graph
    saver = tf.train.import_meta_graph('model.ckpt-{}.meta'.format(last_epoch))
    initial_epoch = last_epoch + 1


# Launch the graph
with tf.Session() as sess:
    if not load:
        sess.run(init)
    else:
        saver.restore(sess, 'model.ckpt-{}'.format(last_epoch))
        optimizer = tf.get_collection('train_op')[0]
        cost = tf.get_collection('cost_op')[0]
        x = tf.get_collection('input')[0]
        y = tf.get_collection('target')[0]
        pred = tf.get_collection('pred')[0]

    print ("Variables before training")
    old_var = {}
    for var in tf.global_variables():
        old_var[var.name] = sess.run(var)
        #print (var.name, sess.run(var))
    print(old_var)
    new_var = {}

    # Training cycle
    for epoch in range(initial_epoch, training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([optimizer, cost], feed_dict={x: batch_xs,
                                                        y: batch_ys})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if (epoch+1) % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
        print('Check variable changes')
        for var in tf.global_variables():
            new_var[var.name] = sess.run(var)
        for vname in new_var:
            eq = np.array_equal(old_var[vname], new_var[vname])
            print('Is {} changed? {}'.format(vname, not eq))
            if not eq:
                old_var[vname] = new_var[vname]
        saver.save(sess, './model.ckpt', global_step=epoch)

    print("Optimization Finished!")

    # Test model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))

    print ("Variables after training")
    for var in tf.global_variables():
        new_var[var.name] = sess.run(var)
        #print (var.name, sess.run(var))
    print(new_var)

    print('Check variable changes')
    for vname in new_var:
        eq = np.array_equal(old_var[vname], new_var[vname])
        print('Is {} changed? {}'.format(vname, not eq))

The most relevant section of my output is:

Epoch: 0001 cost= 7.935980950
Check variable changes
Is W:0 changed? True
Is b:0 changed? False
Epoch: 0002 cost= 4.306569523
Check variable changes
Is W:0 changed? True
Is b:0 changed? False
Epoch: 0003 cost= 3.009391170
Check variable changes
Is W:0 changed? True
Is b:0 changed? False
Epoch: 0004 cost= 2.379378949
Check variable changes
Is W:0 changed? True
Is b:0 changed? False
Epoch: 0005 cost= 2.014794181
Check variable changes
Is W:0 changed? True
Is b:0 changed? False
Optimization Finished!

Your bias doesn't change as expected. If you run it, you will also find that your after training weight is the same as the weight from the your last epoch.

Upvotes: 2

Related Questions