Tensorflow: Invalid calculations

Question

I am new into tensorflow and try to understand how the computation graph works. I am working on the very basic linear regression example on the tensorflow website. I have the following piece of code:

import numpy as np
import tensorflow as tf


def manual_loss(_w, _b, _x, _y):
    _loss = 0.0
    n = len(_x)
    for j in range(n):
       _loss += (_w * _x[j] + _b - _y[j]) ** 2
    return _loss


def manual_grads(_w, _b, _x, _y):
     n = len(_x)
     g_w = 0.0
     g_b = 0
     for j in range(n):
         g_w += 2.0 * (_w * _x[j] + _b - _y[j]) * _x[j]
         g_b += 2.0 * (_w * _x[j] + _b - _y[j])
     return g_w, g_b


# Model parameters
W = tf.Variable([0.3], dtype=tf.float32)
b = tf.Variable([-0.3], dtype=tf.float32)
_W = 0.3
_b = -0.3
# Model input and output
x = tf.placeholder(tf.float32)
linear_model = W * x + b
y = tf.placeholder(tf.float32)
# loss
loss = tf.reduce_sum(tf.square(linear_model - y))  # sum of the squares
grads = tf.gradients(loss, [W, b])
# training data
x_train = [1, 2, 3, 4]
y_train = [0, -1, -2, -3]
# training loop
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)  
lr = 0.001
for i in range(1000):
    results = sess.run([loss, W, b, grads], {x: x_train, y: y_train})
    loss_value = results[0]
    W_value = results[1]
    b_value = results[2]
    grad_W = results[3][0]
    grad_b = results[3][1]
    manual_loss_value = manual_loss(_w=_W, _b=_b, _x=x_train, _y=y_train)
    manual_grad_W, manual_grad_b = manual_grads(_w=_W, _b=_b, _x=x_train, _y=y_train)
    new_W_value = W_value - lr * grad_W
    new_b_value = b_value - lr * grad_b
    W = tf.assign(W, value=new_W_value)
    b = tf.assign(b, value=new_b_value)
    print("***********************")
    print("loss={0}".format(loss_value))
    print("manual_loss_value={0}".format(manual_loss_value))
    print("W={0}".format(W_value))
    print("b={0}".format(b_value))
    print("manual_W={0}".format(_W))
    print("manual_b={0}".format(_b))
    print("grad_W={0}".format(grad_W))
    print("grad_b={0}".format(grad_b))
    print("manual_grad_W={0}".format(manual_grad_W))
    print("manual_grad_b={0}".format(manual_grad_b))
    print("***********************")
    _W -= lr * manual_grad_W
    _b -= lr * manual_grad_b

I just try to apply gradient descent to a simple (w*X - b - Y)^2 model. I don't use Tensorflow's own optimizer purposefully, I want to understand the underlying graph update mechanisms. In order to check that the system calculates correct gradients, I implemented my own loss and gradient calculation functions for linear regression as well. Unfortunately, it seems that tensorflow does not calculate the loss function and the gradients as expected. Here is what I get as an output:

 ***********************
 loss=23.65999984741211
 manual_loss_value=23.659999999999997
 W=[ 0.30000001]
 b=[-0.30000001]
 manual_W=0.3
 manual_b=-0.3
 grad_W=[ 52.]
 grad_b=[ 15.59999943]
 manual_grad_W=52.0
 manual_grad_b=15.599999999999998
 ***********************
 ***********************
 loss=23.65999984741211
 manual_loss_value=20.81095744
 W=[ 0.24800001]
 b=[-0.31560001]
 manual_W=0.248
 manual_b=-0.3156
 grad_W=[ 52.]
 grad_b=[ 15.59999943]
 manual_grad_W=48.568
 manual_grad_b=14.4352
 ***********************

As you can see, tensorflow calculates incorrect loss value and gradients for W and b in the second iteration, actually the same ones as the first iteration. In some trials, it starts to diverge from the actual values from third or fourth iterations; not always in the second one. Am I doing something wrong here? As soon as I get the values of W and b and their gradients, I update their values with tf.assign() in the training loop. Does the problem lie here; is it a wrong way to update variables with tensorflow? It is really discouraging to run into such problems just at the start.

Pietro Tortella · Accepted Answer

I think there is a problem with the use of tf.assign. The command tf.assign creates assign nodes, that should be run to be effective. You should change to something like

assign_W_placeholder = tf.placeholder(tf.float32)
assign_b_placeholder = tf.placeholder(tf.float32)
assign_W_node = tf.assign(W, assign_W_placeholder)
assign_b_node = tf.assign(b, assign_b_placeholder)

and then in the for loop, add something like

sess.run(assign_W_node, feed_dict={assign_W_placeholder: new_W_value}
sess.run(assign_b_node, feed_dict={assign_b_placeholder: new_b_value}

After these, tensorflow and manual give the same results.

The complete code:

import numpy as np
import tensorflow as tf


def manual_loss(_w, _b, _x, _y):
    _loss = 0.0
    n = len(_x)
    for j in range(n):
       _loss += (_w * _x[j] + _b - _y[j]) ** 2
    return _loss


def manual_grads(_w, _b, _x, _y):
     n = len(_x)
     g_w = 0.0
     g_b = 0
     for j in range(n):
         g_w += 2.0 * (_w * _x[j] + _b - _y[j]) * _x[j]
         g_b += 2.0 * (_w * _x[j] + _b - _y[j])
     return g_w, g_b


# Model parameters
W = tf.Variable([0.3], dtype=tf.float32)
b = tf.Variable([-0.3], dtype=tf.float32)
_W = 0.3
_b = -0.3
# Model input and output
x = tf.placeholder(tf.float32)
linear_model = W * x + b
y = tf.placeholder(tf.float32)

assign_W_placeholder = tf.placeholder(tf.float32)
assign_b_placeholder = tf.placeholder(tf.float32)
assign_W_node = tf.assign(W, assign_W_placeholder)
assign_b_node = tf.assign(b, assign_b_placeholder)

# loss
loss = tf.reduce_sum(tf.square(linear_model - y))  # sum of the squares
grads = tf.gradients(loss, [W, b])
# training data
x_train = [1, 2, 3, 4]
y_train = [0, -1, -2, -3]
# training loop
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)  
lr = 0.001
for i in range(1000):
    results = sess.run([loss, W, b, grads], {x: x_train, y: y_train})
    loss_value = results[0]
    W_value = results[1]
    b_value = results[2]
    grad_W = results[3][0]
    grad_b = results[3][1]
    manual_loss_value = manual_loss(_w=_W, _b=_b, _x=x_train, _y=y_train)
    manual_grad_W, manual_grad_b = manual_grads(_w=_W, _b=_b, _x=x_train, _y=y_train)
    new_W_value = W_value - lr * grad_W
    new_b_value = b_value - lr * grad_b
    sess.run([assign_W_node, assign_b_node], 
             feed_dict={assign_W_placeholder: new_W_value, assign_b_placeholder: new_b_value})
    print("***********************")
    print("loss={0}".format(loss_value))
    print("manual_loss_value={0}".format(manual_loss_value))
    print("W={0}".format(W_value))
    print("b={0}".format(b_value))
    print("manual_W={0}".format(_W))
    print("manual_b={0}".format(_b))
    print("grad_W={0}".format(grad_W))
    print("grad_b={0}".format(grad_b))
    print("manual_grad_W={0}".format(manual_grad_W))
    print("manual_grad_b={0}".format(manual_grad_b))
    print("***********************")
    _W -= lr * manual_grad_W
    _b -= lr * manual_grad_b

Tensorflow: Invalid calculations

Answers (2)

Related Questions