Ufuk Can Bicici
Ufuk Can Bicici

Reputation: 3649

Tensorflow: Invalid calculations

I am new into tensorflow and try to understand how the computation graph works. I am working on the very basic linear regression example on the tensorflow website. I have the following piece of code:

import numpy as np
import tensorflow as tf


def manual_loss(_w, _b, _x, _y):
    _loss = 0.0
    n = len(_x)
    for j in range(n):
       _loss += (_w * _x[j] + _b - _y[j]) ** 2
    return _loss


def manual_grads(_w, _b, _x, _y):
     n = len(_x)
     g_w = 0.0
     g_b = 0
     for j in range(n):
         g_w += 2.0 * (_w * _x[j] + _b - _y[j]) * _x[j]
         g_b += 2.0 * (_w * _x[j] + _b - _y[j])
     return g_w, g_b


# Model parameters
W = tf.Variable([0.3], dtype=tf.float32)
b = tf.Variable([-0.3], dtype=tf.float32)
_W = 0.3
_b = -0.3
# Model input and output
x = tf.placeholder(tf.float32)
linear_model = W * x + b
y = tf.placeholder(tf.float32)
# loss
loss = tf.reduce_sum(tf.square(linear_model - y))  # sum of the squares
grads = tf.gradients(loss, [W, b])
# training data
x_train = [1, 2, 3, 4]
y_train = [0, -1, -2, -3]
# training loop
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)  
lr = 0.001
for i in range(1000):
    results = sess.run([loss, W, b, grads], {x: x_train, y: y_train})
    loss_value = results[0]
    W_value = results[1]
    b_value = results[2]
    grad_W = results[3][0]
    grad_b = results[3][1]
    manual_loss_value = manual_loss(_w=_W, _b=_b, _x=x_train, _y=y_train)
    manual_grad_W, manual_grad_b = manual_grads(_w=_W, _b=_b, _x=x_train, _y=y_train)
    new_W_value = W_value - lr * grad_W
    new_b_value = b_value - lr * grad_b
    W = tf.assign(W, value=new_W_value)
    b = tf.assign(b, value=new_b_value)
    print("***********************")
    print("loss={0}".format(loss_value))
    print("manual_loss_value={0}".format(manual_loss_value))
    print("W={0}".format(W_value))
    print("b={0}".format(b_value))
    print("manual_W={0}".format(_W))
    print("manual_b={0}".format(_b))
    print("grad_W={0}".format(grad_W))
    print("grad_b={0}".format(grad_b))
    print("manual_grad_W={0}".format(manual_grad_W))
    print("manual_grad_b={0}".format(manual_grad_b))
    print("***********************")
    _W -= lr * manual_grad_W
    _b -= lr * manual_grad_b

I just try to apply gradient descent to a simple (w*X - b - Y)^2 model. I don't use Tensorflow's own optimizer purposefully, I want to understand the underlying graph update mechanisms. In order to check that the system calculates correct gradients, I implemented my own loss and gradient calculation functions for linear regression as well. Unfortunately, it seems that tensorflow does not calculate the loss function and the gradients as expected. Here is what I get as an output:

 ***********************
 loss=23.65999984741211
 manual_loss_value=23.659999999999997
 W=[ 0.30000001]
 b=[-0.30000001]
 manual_W=0.3
 manual_b=-0.3
 grad_W=[ 52.]
 grad_b=[ 15.59999943]
 manual_grad_W=52.0
 manual_grad_b=15.599999999999998
 ***********************
 ***********************
 loss=23.65999984741211
 manual_loss_value=20.81095744
 W=[ 0.24800001]
 b=[-0.31560001]
 manual_W=0.248
 manual_b=-0.3156
 grad_W=[ 52.]
 grad_b=[ 15.59999943]
 manual_grad_W=48.568
 manual_grad_b=14.4352
 ***********************

As you can see, tensorflow calculates incorrect loss value and gradients for W and b in the second iteration, actually the same ones as the first iteration. In some trials, it starts to diverge from the actual values from third or fourth iterations; not always in the second one. Am I doing something wrong here? As soon as I get the values of W and b and their gradients, I update their values with tf.assign() in the training loop. Does the problem lie here; is it a wrong way to update variables with tensorflow? It is really discouraging to run into such problems just at the start.

Upvotes: 2

Views: 201

Answers (2)

Manolo Santos
Manolo Santos

Reputation: 1913

I think you have a problem of numeric precision. Numpy uses double floats by default (64 bits). You are declaring your tensors as tf.float32. Try to change them to tf.float64.

Edit: I think the difference is due to the exponentiation in the loss function. Try to change for a multiplication as in:

_loss += (_w * _x[j] + _b - _y[j]) * (_w * _x[j] + _b - _y[j])

import numpy as np
import tensorflow as tf


def manual_loss(_w, _b, _x, _y):
    _loss = 0.0
    n = len(_x)
    for j in range(n):
       diff = (_w * _x[j] + _b - _y[j])
       _loss += diff * diff

    return _loss


def manual_grads(_w, _b, _x, _y):
     n = len(_x)
     g_w = 0.0
     g_b = 0
     for j in range(n):
         g_w += 2.0 * (_w * _x[j] + _b - _y[j]) * _x[j]
         g_b += 2.0 * (_w * _x[j] + _b - _y[j])
     return g_w, g_b


# Model parameters
W = tf.Variable([0.3], dtype=tf.float64)
b = tf.Variable([-0.3], dtype=tf.float64)
_W = 0.3
_b = -0.3
# Model input and output
x = tf.placeholder(tf.float64)
linear_model = W * x + b
y = tf.placeholder(tf.float64)

# loss
loss = tf.reduce_sum(tf.square(linear_model - y))  # sum of the squares
grads = tf.gradients(loss, [W, b])
# training data
x_train = [1, 2, 3, 4]
y_train = [0, -1, -2, -3]
# training loop
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)  
lr = 0.001
for i in range(10):
    with tf.device('cpu:0'):
        results = sess.run([loss, grads], {x: x_train, y: y_train})
    loss_value = results[0]
    grad_W = results[1][0]
    grad_b = results[1][1]
    manual_loss_value = manual_loss(_w=_W, _b=_b, _x=x_train, _y=y_train)
    manual_grad_W, manual_grad_b = manual_grads(_w=_W, _b=_b, _x=x_train, _y=y_train)
    new_W_value = (W - lr * grad_W).eval(session = sess)
    new_b_value = (b - lr * grad_b).eval(session = sess)
    tf.assign(W, value=new_W_value).eval(session = sess)
    tf.assign(b, value=new_b_value).eval(session = sess)

    print("***********************")
    print("loss={0}".format(loss_value))
    print("manual_loss_value={0}".format(manual_loss_value))
    print("W={0}".format(W.eval(session = sess)))
    print("b={0}".format(b.eval(session = sess)))
    print("manual_W={0}".format(_W))
    print("manual_b={0}".format(_b))
    print("grad_W={0}".format(grad_W))
    print("grad_b={0}".format(grad_b))
    print("manual_grad_W={0}".format(manual_grad_W))
    print("manual_grad_b={0}".format(manual_grad_b))
    print("***********************")
    _W -= lr * manual_grad_W
    _b -= lr * manual_grad_b

Upvotes: 1

Pietro Tortella
Pietro Tortella

Reputation: 1114

I think there is a problem with the use of tf.assign. The command tf.assign creates assign nodes, that should be run to be effective. You should change to something like

assign_W_placeholder = tf.placeholder(tf.float32)
assign_b_placeholder = tf.placeholder(tf.float32)
assign_W_node = tf.assign(W, assign_W_placeholder)
assign_b_node = tf.assign(b, assign_b_placeholder)

and then in the for loop, add something like

sess.run(assign_W_node, feed_dict={assign_W_placeholder: new_W_value}
sess.run(assign_b_node, feed_dict={assign_b_placeholder: new_b_value}

After these, tensorflow and manual give the same results.

The complete code:

import numpy as np
import tensorflow as tf


def manual_loss(_w, _b, _x, _y):
    _loss = 0.0
    n = len(_x)
    for j in range(n):
       _loss += (_w * _x[j] + _b - _y[j]) ** 2
    return _loss


def manual_grads(_w, _b, _x, _y):
     n = len(_x)
     g_w = 0.0
     g_b = 0
     for j in range(n):
         g_w += 2.0 * (_w * _x[j] + _b - _y[j]) * _x[j]
         g_b += 2.0 * (_w * _x[j] + _b - _y[j])
     return g_w, g_b


# Model parameters
W = tf.Variable([0.3], dtype=tf.float32)
b = tf.Variable([-0.3], dtype=tf.float32)
_W = 0.3
_b = -0.3
# Model input and output
x = tf.placeholder(tf.float32)
linear_model = W * x + b
y = tf.placeholder(tf.float32)

assign_W_placeholder = tf.placeholder(tf.float32)
assign_b_placeholder = tf.placeholder(tf.float32)
assign_W_node = tf.assign(W, assign_W_placeholder)
assign_b_node = tf.assign(b, assign_b_placeholder)

# loss
loss = tf.reduce_sum(tf.square(linear_model - y))  # sum of the squares
grads = tf.gradients(loss, [W, b])
# training data
x_train = [1, 2, 3, 4]
y_train = [0, -1, -2, -3]
# training loop
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)  
lr = 0.001
for i in range(1000):
    results = sess.run([loss, W, b, grads], {x: x_train, y: y_train})
    loss_value = results[0]
    W_value = results[1]
    b_value = results[2]
    grad_W = results[3][0]
    grad_b = results[3][1]
    manual_loss_value = manual_loss(_w=_W, _b=_b, _x=x_train, _y=y_train)
    manual_grad_W, manual_grad_b = manual_grads(_w=_W, _b=_b, _x=x_train, _y=y_train)
    new_W_value = W_value - lr * grad_W
    new_b_value = b_value - lr * grad_b
    sess.run([assign_W_node, assign_b_node], 
             feed_dict={assign_W_placeholder: new_W_value, assign_b_placeholder: new_b_value})
    print("***********************")
    print("loss={0}".format(loss_value))
    print("manual_loss_value={0}".format(manual_loss_value))
    print("W={0}".format(W_value))
    print("b={0}".format(b_value))
    print("manual_W={0}".format(_W))
    print("manual_b={0}".format(_b))
    print("grad_W={0}".format(grad_W))
    print("grad_b={0}".format(grad_b))
    print("manual_grad_W={0}".format(manual_grad_W))
    print("manual_grad_b={0}".format(manual_grad_b))
    print("***********************")
    _W -= lr * manual_grad_W
    _b -= lr * manual_grad_b

Upvotes: 1

Related Questions