Reputation: 21
I'm trying to make a convolutional neural network in tensorflow. I've trained it and saved the model with
saver = tf.tain.Saver()
saver.save(sess, "model.ckpt")
Then I have restored the model with
saver.restore(sess, "model.ckpt")
When I try to predict the label of one image, there's no problem. But when I try to predict the label of two or more images, the network predicts only the first and then I get an error.
Here's the code for training the network:
import tensorflow as tf
import pickle
import numpy as np
with open('X_train.pickle', 'rb') as y:
u = pickle._Unpickler(y)
u.encoding = 'latin1'
X_train = u.load()
with open('X_test.pickle', 'rb') as y:
u = pickle._Unpickler(y)
u.encoding = 'latin1'
X_test = u.load()
X_test = np.array(X_test).reshape(-1, 2500)
with open('y_train.pickle', 'rb') as y:
u = pickle._Unpickler(y)
u.encoding = 'latin1'
y_train = u.load()
with open('y_test.pickle', 'rb') as y:
u = pickle._Unpickler(y)
u.encoding = 'latin1'
y_test = u.load()
n_classes = 3
batch_size = 100
x = tf.placeholder('float', [None, 2500])
y = tf.placeholder('float')
keep_rate = 0.8
keep_prob = tf.placeholder(tf.float32)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
def convolutional_neural_network(x):
weights = {'W_conv1':tf.Variable(tf.random_normal([5,5,1,32])),
'W_conv2':tf.Variable(tf.random_normal([5,5,32,64])),
'W_fc':tf.Variable(tf.random_normal([13*13*64,1024])),
'out':tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'b_conv1':tf.Variable(tf.random_normal([32])),
'b_conv2':tf.Variable(tf.random_normal([64])),
'b_fc':tf.Variable(tf.random_normal([1024])),
'out':tf.Variable(tf.random_normal([n_classes]))}
x = tf.reshape(x, shape=[-1, 50, 50, 1])
conv1 = tf.nn.relu(conv2d(x, weights['W_conv1']) + biases['b_conv1'])
conv1 = maxpool2d(conv1)
conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv2 = maxpool2d(conv2)
fc = tf.reshape(conv2,[-1, 13*13*64])
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc'])+biases['b_fc'])
fc = tf.nn.dropout(fc, keep_rate)
output = tf.matmul(fc, weights['out'])+biases['out']
return output
def train_neural_network(x):
prediction = convolutional_neural_network(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits = prediction, labels = y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
saver = tf.train.Saver()
hm_epochs = 3
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
if epoch != 0:
saver.restore(sess, "model.ckpt")
epoch_loss = 0
i = 0
while i < len(X_train):
start = 1
end = i + batch_size
batch_x = np.array(X_train[start:end]).reshape(-1, 2500)
batch_y = np.array(y_train[start:end])
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
epoch_loss += c
i += batch_size
print(i)
saver.save(sess, "model.ckpt")
print('Epoch', epoch + 1, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:X_test, y:y_test}))
And here's the code for using the network:
import tensorflow as tf
import matplotlib.pyplot as plt
import pickle
import numpy as np
with open('X_train.pickle', 'rb') as y:
u = pickle._Unpickler(y)
u.encoding = 'latin1'
X_train = u.load()
with open('X_test.pickle', 'rb') as y:
u = pickle._Unpickler(y)
u.encoding = 'latin1'
X_test = u.load()
X_test = np.array(X_test).reshape(-1, 2500)
with open('y_train.pickle', 'rb') as y:
u = pickle._Unpickler(y)
u.encoding = 'latin1'
y_train = u.load()
with open('y_test.pickle', 'rb') as y:
u = pickle._Unpickler(y)
u.encoding = 'latin1'
y_test = u.load()
n_classes = 3
batch_size = 100
x = tf.placeholder('float', [None, 2500])
y = tf.placeholder('float')
keep_rate = 0.8
keep_prob = tf.placeholder(tf.float32)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
def convolutional_neural_network(x):
weights = {'W_conv1':tf.Variable(tf.random_normal([5,5,1,32])),
'W_conv2':tf.Variable(tf.random_normal([5,5,32,64])),
'W_fc':tf.Variable(tf.random_normal([13*13*64,1024])),
'out':tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'b_conv1':tf.Variable(tf.random_normal([32])),
'b_conv2':tf.Variable(tf.random_normal([64])),
'b_fc':tf.Variable(tf.random_normal([1024])),
'out':tf.Variable(tf.random_normal([n_classes]))}
x = tf.reshape(x, shape=[-1, 50, 50, 1])
conv1 = tf.nn.relu(conv2d(x, weights['W_conv1']) + biases['b_conv1'])
conv1 = maxpool2d(conv1)
conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv2 = maxpool2d(conv2)
fc = tf.reshape(conv2,[-1, 13*13*64])
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc'])+biases['b_fc'])
fc = tf.nn.dropout(fc, keep_rate)
output = tf.matmul(fc, weights['out'])+biases['out']
return output
def use_neural_network(input_data):
prediction = convolutional_neural_network(x)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver.restore(sess, "model.ckpt")
result = (sess.run(tf.argmax(prediction.eval(feed_dict={x:[input_data]}),1)))
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:X_test, y:y_test}))
return result
sample1 = X_train[432].reshape(2500)
res1 =use_neural_network(sample1)
if res == [0]: print('Go straight')
elif res == [1]: print('Turn right')
else: print('Turn left')
img = sample.reshape(50,50)
plt.imshow(img)
plt.show()
#till now there is no problem
sample2 = X_train[1222].reshape(2500)
res =use_neural_network(sample2)
#ERROR
if res2 == [0]: print('Go straight')
elif res2 == [1]: print('Turn right')
else: print('Turn left')
img2 = sample2.reshape(50,50)
plt.imshow(img2)
plt.show()
Here's the error:
2017-07-24 14:22:21.277633: W tensorflow/core/framework/op_kernel.cc:1158] Not found: Key Variable_10 not found in checkpoint
2017-07-24 14:22:21.278563: W tensorflow/core/framework/op_kernel.cc:1158] Not found: Key Variable_11 not found in checkpoint
2017-07-24 14:22:21.278577: W tensorflow/core/framework/op_kernel.cc:1158] Not found: Key Variable_12 not found in checkpoint
2017-07-24 14:22:21.278995: W tensorflow/core/framework/op_kernel.cc:1158] Not found: Key Variable_9 not found in checkpoint
2017-07-24 14:22:21.279316: W tensorflow/core/framework/op_kernel.cc:1158] Not found: Key Variable_13 not found in checkpoint
2017-07-24 14:22:21.279325: W tensorflow/core/framework/op_kernel.cc:1158] Not found: Key Variable_14 not found in checkpoint
2017-07-24 14:22:21.279997: W tensorflow/core/framework/op_kernel.cc:1158] Not found: Key Variable_15 not found in checkpoint
2017-07-24 14:22:21.284003: W tensorflow/core/framework/op_kernel.cc:1158] Not found: Key Variable_8 not found in checkpoint
Traceback (most recent call last):
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1139, in _do_call
return fn(*args)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1121, in _run_fn
status, run_metadata)
File "/anaconda/lib/python3.6/contextlib.py", line 89, in __exit__
next(self.gen)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.NotFoundError: Key Variable_10 not found in checkpoint
[[Node: save_1/RestoreV2_2 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_arg_save_1/Const_0_0, save_1/RestoreV2_2/tensor_names, save_1/RestoreV2_2/shape_and_slices)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "USE_NEURAL_NETWORK.py", line 103, in <module>
res =use_neural_network(sample)
File "USE_NEURAL_NETWORK.py", line 80, in use_neural_network
saver.restore(sess, "model.ckpt")
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1548, in restore
{self.saver_def.filename_tensor_name: save_path})
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 789, in run
run_metadata_ptr)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 997, in _run
feed_dict_string, options, run_metadata)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1132, in _do_run
target_list, options, run_metadata)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1152, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.NotFoundError: Key Variable_10 not found in checkpoint
[[Node: save_1/RestoreV2_2 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_arg_save_1/Const_0_0, save_1/RestoreV2_2/tensor_names, save_1/RestoreV2_2/shape_and_slices)]]
Caused by op 'save_1/RestoreV2_2', defined at:
File "USE_NEURAL_NETWORK.py", line 103, in <module>
res =use_neural_network(sample)
File "USE_NEURAL_NETWORK.py", line 75, in use_neural_network
saver = tf.train.Saver()
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1139, in __init__
self.build()
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1170, in build
restore_sequentially=self._restore_sequentially)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 691, in build
restore_sequentially, reshape)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 407, in _AddRestoreOps
tensors = self.restore_op(filename_tensor, saveable, preferred_shard)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 247, in restore_op
[spec.tensor.dtype])[0])
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 640, in restore_v2
dtypes=dtypes, name=name)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
op_def=op_def)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1269, in __init__
self._traceback = _extract_stack()
NotFoundError (see above for traceback): Key Variable_10 not found in checkpoint
[[Node: save_1/RestoreV2_2 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_arg_save_1/Const_0_0, save_1/RestoreV2_2/tensor_names, save_1/RestoreV2_2/shape_and_slices)]]
How can I avoid this error and predict as many images as I want ?
Upvotes: 0
Views: 397
Reputation: 659
When you do prediction = convolutional_neural_network(x)
in use_neural_network()
, you create your networks, with all variables having specific names.
If you do it twice, you will create 2 networks, identical in every point but the name of the variables, the second network will have other names for the variables (as you can't have the same name for 2 variables).
Then, problems arise when you use the saver to restore your model, as it tries to find values for every variables in your graph, but your checkpoint will only contain values for the variables in the first network, as they are properly named.
The Saver raise an exception because it failed to restore every variables.
So, your mistake is to restore your model everytime you want to process a new image.
You should have two functions, one for restoring, and one for processing.
Upvotes: 1