Reputation: 407
I have trained my model and saved it with using
saver = tf.train.Saver()
saver.save(sess, '/final_model', global_step = i)
Then I rebuild the exact same graph and try to restore the model to reproduce my result, restoring is working but as long as try to access to any value of the network parameters or operations, it gives me an error saying trying to use the uninitialized variable.
After rebuilding the graph, the code I use to restore is:
sess=tf.Session()
new_saver = tf.train.import_meta_graph('final_model-699.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('./'))
However, the following any of them give me an error of attempting to use uninitialized variable
print(sess.run(weights['hidden1']))
print(sess.run(loss_f, feed_dict={x: train_x, y_: train_y}))
Any idea?
As a simple example, here is the training and saving the model:
train_x = np.random.rand(200,2)
w= np.array([2,3])
train_y = np.dot(train_x, w)
train_y = np.reshape(train_y, [200,1])
feature_dim = 2
output_dim = 1
x = tf.placeholder(tf.float32, [None, feature_dim])
y_ = tf.placeholder(tf.float32, [None, output_dim])
weights = {
'hidden1': tf.Variable(tf.random_normal([feature_dim, output_dim], stddev=1 / np.sqrt(feature_dim)))
}
def network1(data):
output = tf.matmul(x, weights['hidden1'])
return output
y = network1(x)
loss_f = output_dim * tf.reduce_mean(tf.squared_difference(y, y_))
optimizer_f = tf.train.AdamOptimizer(1e-4).minimize(loss_f)
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(10000):
batch_x = train_x
batch_y = train_y
sess.run(optimizer_f, feed_dict={x: batch_x, y_: batch_y})
print(sess.run(loss_f, feed_dict={x: batch_x, y_: batch_y}))
saver.save(sess,'./savedmodel/', global_step = i)
Restoring and reproducing the result
import scipy.io
import numpy as np
import tensorflow as tf
import random
train_x = np.random.rand(200,2)
w= np.array([2,3])
train_y = np.dot(train_x, w)
train_y = np.reshape(train_y, [200,1])
feature_dim = 2
output_dim = 1
x = tf.placeholder(tf.float32, [None, feature_dim])
y_ = tf.placeholder(tf.float32, [None, output_dim])
weights = {
'hidden1': tf.Variable(tf.random_normal([feature_dim, output_dim], stddev=1 / np.sqrt(feature_dim)))
}
def network1(data):
output = tf.matmul(x, weights['hidden1'])
return output
y = network1(x)
loss_f = tf.reduce_mean(tf.squared_difference(y, y_))
optimizer_f = tf.train.AdamOptimizer(1e-4).minimize(loss_f)
sess = tf.Session()
saver = tf.train.import_meta_graph('./savedmodel/-9999.meta')
saver.restore(sess, tf.train.latest_checkpoint('./savedmodel/'))
print(sess.run(loss_f, feed_dict={x: train_x, y_: train_y}))
The error:
FailedPreconditionErrorTraceback (most recent call last)
<ipython-input-5-17910473afab> in <module>()
----> 1 print(sess.run(loss_f, feed_dict={x: train_x, y_: train_y}))
/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
776 try:
777 result = self._run(None, fetches, feed_dict, options_ptr,
--> 778 run_metadata_ptr)
779 if run_metadata:
780 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
980 if final_fetches or final_targets:
981 results = self._do_run(handle, final_targets, final_fetches,
--> 982 feed_dict_string, options, run_metadata)
983 else:
984 results = []
/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1030 if handle is None:
1031 return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1032 target_list, options, run_metadata)
1033 else:
1034 return self._do_call(_prun_fn, self._session, handle, feed_dict,
/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
1050 except KeyError:
1051 pass
-> 1052 raise type(e)(node_def, op, message)
1053
1054 def _extend_graph(self):
FailedPreconditionError: Attempting to use uninitialized value Variable
[[Node: Variable/read = Identity[T=DT_FLOAT, _class=["loc:@Variable"], _device="/job:localhost/replica:0/task:0/gpu:0"](Variable)]]
[[Node: Mean/_15 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_7_Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op u'Variable/read', defined at:
File "/usr/lib/python2.7/runpy.py", line 174, in _run_module_as_main
"__main__", fname, loader, pkg_name)
File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
exec code in run_globals
File "/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python2.7/dist-packages/traitlets/config/application.py", line 658, in launch_instance
app.start()
File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py", line 477, in start
ioloop.IOLoop.instance().start()
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py", line 888, in start
handler_func(fd_obj, events)
File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/zmqshell.py", line 533, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2822, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-3-7d229041d9bb>", line 6, in <module>
'hidden1': tf.Variable(tf.random_normal([feature_dim, output_dim], stddev=1 / np.sqrt(feature_dim)))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variables.py", line 197, in __init__
expected_shape=expected_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variables.py", line 316, in _init_from_args
self._snapshot = array_ops.identity(self._variable, name="read")
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 1338, in identity
result = _op_def_lib.apply_op("Identity", input=input, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value Variable
[[Node: Variable/read = Identity[T=DT_FLOAT, _class=["loc:@Variable"], _device="/job:localhost/replica:0/task:0/gpu:0"](Variable)]]
[[Node: Mean/_15 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_7_Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
In [ ]:
print(sess.run(weights['hidden1']))
In [ ]:
same error for:
print(sess.run(weights['hidden1']))
Upvotes: 3
Views: 7600
Reputation: 336
Ran into similar issue just recently with TF v1.5. It appears, that variables an ops should be added to a global collection for them to be restored properly. Below are two snippets for MNIST dataset that train, save and restore a model.
Train and persist
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import os
train_dir = 'train'
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
x = tf.placeholder(tf.float32, [None, 784], name='x')
W = tf.Variable(tf.zeros([784, 10]), name='W')
b = tf.Variable(tf.zeros([10]), name='b')
y = tf.nn.softmax(tf.matmul(x, W) + b, name='y')
saver = tf.train.Saver(tf.trainable_variables())
sess = tf.InteractiveSession()
tf.add_to_collection('x', x)
tf.add_to_collection('yt', yt)
tf.add_to_collection('accuracy', accuracy)
with tf.Session() as sess:
latest_checkpoint = tf.train.latest_checkpoint(train_dir)
meta_path = '%s.meta' % latest_checkpoint
saver = tf.train.import_meta_graph(meta_path)
saver.restore(sess, latest_checkpoint)
x = tf.get_collection('x')[0]
yt = tf.get_collection('yt')[0]
accuracy = tf.get_collection('accuracy')[0]
feed_dict={x: mnist.test.images, yt: mnist.test.labels}
print(sess.run(accuracy, feed_dict))
Restore and evaluate
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import os
train_dir = 'train'
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
with tf.Session() as sess:
latest_checkpoint = tf.train.latest_checkpoint(train_dir)
meta_path = '%s.meta' % latest_checkpoint
saver = tf.train.import_meta_graph(meta_path)
saver.restore(sess, latest_checkpoint)
x = tf.get_collection('x')[0]
yt = tf.get_collection('yt')[0]
accuracy = tf.get_collection('accuracy')[0]
feed_dict={x: mnist.test.images, yt: mnist.test.labels}
print(sess.run(accuracy, feed_dict))
Here is a link to related TF documentation
Upvotes: 3
Reputation: 407
Finally what I find is that seems every variable or operations we want to get access later need to be given a name, so is the placeholder, then we do not have the need to redefine the graph. The following is working finally, please give me more hints if there is an even more simple way to do that.
import scipy.io
import numpy as np
import tensorflow as tf
import random
train_x = np.random.rand(200,2)
w= np.array([2,3])
train_y = np.dot(train_x, w)
train_y = np.reshape(train_y, [200,1])
feature_dim = 2
output_dim = 1
x = tf.placeholder(tf.float32, [None, feature_dim], name="input")
y_ = tf.placeholder(tf.float32, [None, output_dim], name="output")
weights = {
'hidden1': tf.Variable(tf.random_normal([feature_dim, output_dim], stddev=1 / np.sqrt(feature_dim)), name="weights")
}
def network1(data):
output = tf.matmul(data, weights['hidden1'])
return output
y = network1(x)
loss_f = tf.reduce_mean(tf.squared_difference(y, y_), name="op_to_restore")
optimizer_f = tf.train.AdamOptimizer(1e-4).minimize(loss_f)
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(100):
batch_x = train_x
batch_y = train_y
sess.run(optimizer_f, feed_dict={x: batch_x, y_: batch_y})
print(sess.run(loss_f, feed_dict={x: batch_x, y_: batch_y}))
saver.save(sess,'./savedmodel/', global_step = i)
with tf.Session() as sess:
saver = tf.train.import_meta_graph('./savedmodel/-99.meta')
saver.restore(sess,tf.train.latest_checkpoint('./savedmodel/'))
graph = tf.get_default_graph()
x = graph.get_tensor_by_name("input:0")
y_ = graph.get_tensor_by_name("output:0")
feed_dict={x: train_x, y_: train_y}
op_to_restore = graph.get_tensor_by_name("op_to_restore:0")
print(sess.run(op_to_restore, feed_dict))
Upvotes: 2