Vikash Balasubramanian
Vikash Balasubramanian

Reputation: 3233

Tensorflow restoring named variables

I am building a convolutional neural network with Tensorflow.

def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial,name = 'weights')

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial, name = 'biases')

def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

with tf.Graph().as_default():
    with tf.name_scope('convolution1'):
        W_conv1 = weight_variable([5, 5, 1, 32])
        b_conv1 = bias_variable([32])

    x = tf.placeholder(tf.float32, shape=[None, 96*96])
    y_ = tf.placeholder(tf.float32, shape=[None, 30])
    x_image = tf.reshape(x, [-1,96,96,1])

    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)

    with tf.name_scope('convolution2'):
        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])

    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)

    with tf.name_scope('connected'):
        W_fc1 = weight_variable([24 * 24 * 64, 1024])
        b_fc1 = bias_variable([1024])

    h_pool2_flat = tf.reshape(h_pool2, [-1, 24*24*64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    keep_prob = tf.placeholder(tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    with tf.name_scope('output'):
        W_fc2 = weight_variable([1024, 30])
        b_fc2 = bias_variable([30])

After this i do some computation and training and save all the variables.

Now i recreate the same graph structure in another program

PROGRAM 2 snippet

tf.reset_default_graph()

x = tf.placeholder(tf.float32, shape=[None, 96*96])
x_image = tf.reshape(x, [-1,96,96,1])
y_ = tf.placeholder(tf.float32, shape=[None, 30])

with tf.name_scope('convolution1'):
    W_conv1 = tf.Variable(-1.0, validate_shape = False, name = 'weights')
    b_conv1 = tf.Variable(-1.0, validate_shape = False, name = 'biases')

with tf.name_scope('convolution2'):
    W_conv2 = tf.Variable(-1.0, validate_shape = False, name = 'weights')
    b_conv2 = tf.Variable(-1.0, validate_shape = False, name = 'biases')

with tf.name_scope('connected'):
    W_fc1 = tf.Variable(-1.0, validate_shape = False, name = 'weights')
    b_fc1 = tf.Variable(-1.0, validate_shape = False, name = 'biases')

with tf.name_scope('output'):
    W_fc2 = tf.Variable(-1.0, validate_shape = False, name = 'weights')
    b_fc2 = tf.Variable(-1.0, validate_shape = False, name = 'biases')

session = tf.Session()
saver = tf.train.Saver()
saver.restore(session, 'my-model-2000')
vars_list = tf.get_collection(tf.GraphKeys.VARIABLES)

h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
h_pool2_flat = tf.reshape(h_pool2, [-1, 24*24*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
y_convtry = tf.matmul(h_fc1, W_fc2) + b_fc2
y_conv_alternate = 95.99*tf.ones_like(y_convtry)
y_conv = tf.select(tf.greater(y_convtry, y_conv_alternate), y_conv_alternate, y_convtry)

cost = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.select(tf.is_nan(y_), y_conv, y_) - y_conv), reduction_indices=[1])))

train_step = tf.train.AdamOptimizer(1e-4).minimize(cost,var_list = vars_list)

The problem is the variables in vars_list when i try to get their shape they still show none, but running:

vars_list[i].eval(session = session) 

is giving the correct answer so the restoration is working.

My question is why isn't vars_list[i].get_shape() giving the wrong answer tf.shape(vars_list[i]) doesn't seem to work either.

This is a problem because when i use

tf.AdamOptimizer.minimize(cost) //This internally call var.get_shape() and throws error

Upvotes: 0

Views: 153

Answers (1)

mrry
mrry

Reputation: 126184

When you set validate_shape=False on creating a tf.Variable, this tells TensorFlow that the variable can contain any shape of data, and allows you (for example) to restore arbitrarily shaped checkpoint data into the variables. However, this provides TensorFlow with no static information about the shapes of the variables, which (for example) AdamOptimizer.minimize() uses to build appropriately shaped accumulator slots.

The best solution would be to reuse the same code for creating variables that you used in the first program, i.e.

with tf.name_scope('convolution1'):
    W_conv1 = weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])

...and so on. The initialization functions for these variables will never be run, so there is no additional cost to writing it this way.

Upvotes: 1

Related Questions