Reputation: 3146
I have written a simple version bidirectional lstm for sentence classification. But it keeps giving me "You must feed a value for placeholder tensor 'train_x'" error and it seems this come from the variable initialization step.
data = load_data(FLAGS.data)
model = RNNClassifier(FLAGS)
init = tf.initialize_all_variables()
with tf.Session() as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
sess.run(init)
print("Graph initialized..")
print()
np.random.seed(FLAGS.random_state)
for epoch in range(FLAGS.max_max_epoch):
loss = sess.run(model.cost, feed_dict={model.train_x: data.train_x, model.train_y: data.train_y,
model.embedding_placeholder: data.glove_vec})
print("Epoch {:2d}: Loss = {:.6f} = {:.5f}".format(epoch+1, loss))
coord.request_stop()
coord.join(threads)
And the RNNClassifier
class code (in a different directory):
class RNNClassifier:
def __init__(self, FLAGS):
self.params = FLAGS
with tf.device("/cpu:0"):
self.train_x = tf.placeholder(tf.int32, [6248, 42], name='train_x')
self.train_y = tf.placeholder(tf.int32, [6248, 3], name='train_y')
self.embedding_placeholder = tf.placeholder(tf.float32, [1193515, 100])
with tf.variable_scope('forward_lstm'):
lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.params.num_hidden, use_peepholes=False,
activation=tf.nn.relu, forget_bias=0.0,
state_is_tuple=True)
with tf.variable_scope('backward_lstm'):
lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.params.num_hidden, use_peepholes=False,
activation=tf.nn.relu, forget_bias=0.0,
state_is_tuple=True)
fw_initial_state = lstm_fw_cell.zero_state(self.params.batch_size, tf.float32)
bw_initial_state = lstm_bw_cell.zero_state(self.params.batch_size, tf.float32)
self._initial_state = [fw_initial_state, bw_initial_state]
with tf.device("/cpu:0"), tf.variable_scope('softmax'):
self.W = tf.get_variable('W', [self.params.num_hidden*2, self.params.num_classes])
self.b = tf.get_variable('b', [self.params.num_classes], initializer=tf.constant_initializer(0.0))
batched_inputs, batched_labels = self.batch_data()
embed_inputs = self.use_embedding(batched_inputs)
rnn_outputs, output_state_fw, output_state_bw = tf.nn.bidirectional_rnn(
cell_fw=lstm_fw_cell,
cell_bw=lstm_bw_cell,
inputs=embed_inputs,
initial_state_fw=fw_initial_state,
initial_state_bw=bw_initial_state
)
logits = tf.matmul(rnn_outputs[-1], self.W) + self.b
self._cost = cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf.cast(batched_labels, tf.float32)))
optimizer = tf.train.AdamOptimizer(learning_rate=0.05).minimize(cost)
def batch_data(self):
# inputs = tf.convert_to_tensor(train_x, dtype=tf.int32)
# labels = tf.convert_to_tensor(train_y, dtype=tf.int32)
batched_inputs, batched_labels = tf.train.batch(
tensors=[self._train_x, self._train_y],
batch_size=self.params.batch_size,
dynamic_pad=True,
enqueue_many=True,
name='batching'
)
return batched_inputs, batched_labels
def use_embedding(self, batched_inputs):
with tf.device("/cpu:0"), tf.name_scope("input_embedding"):
embedding = tf.get_variable("embedding", shape=[1193515, 100], trainable=False)
embedding_init = embedding.assign(self.embedding_placeholder)
embed_inputs = tf.split(1, self.params.seq_len, tf.nn.embedding_lookup(embedding_init, batched_inputs))
embed_inputs = [tf.squeeze(input_, [1]) for input_ in embed_inputs]
return embed_inputs
@property
def cost(self):
return self._cost
The output (including the error):
I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
I tensorflow/core/common_runtime/gpu/gpu_init.cc:102] Found device 0 with properties:
name: GeForce GTX 750 Ti
major: 5 minor: 0 memoryClockRate (GHz) 1.0845
pciBusID 0000:01:00.0
Total memory: 2.00GiB
Free memory: 1.41GiB
I tensorflow/core/common_runtime/gpu/gpu_init.cc:126] DMA: 0
I tensorflow/core/common_runtime/gpu/gpu_init.cc:136] 0: Y
I tensorflow/core/common_runtime/gpu/gpu_device.cc:839] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 750 Ti, pci bus id: 0000:01:00.0)
E tensorflow/core/client/tensor_c_api.cc:485] You must feed a value for placeholder tensor 'train_x' with dtype int32 and shape [6248,42]
[[Node: train_x = Placeholder[dtype=DT_INT32, shape=[6248,42], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Graph initialized..
W tensorflow/core/framework/op_kernel.cc:936] Out of range: PaddingFIFOQueue '_0_batching/padding_fifo_queue' is closed and has insufficient elements (requested 50, current size 0)
[[Node: batching = QueueDequeueMany[_class=["loc:@batching/padding_fifo_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batching/padding_fifo_queue, batching/n)]]
W tensorflow/core/framework/op_kernel.cc:936] Out of range: PaddingFIFOQueue '_0_batching/padding_fifo_queue' is closed and has insufficient elements (requested 50, current size 0)
[[Node: batching = QueueDequeueMany[_class=["loc:@batching/padding_fifo_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batching/padding_fifo_queue, batching/n)]]
E tensorflow/core/client/tensor_c_api.cc:485] PaddingFIFOQueue '_0_batching/padding_fifo_queue' is closed and has insufficient elements (requested 50, current size 0)
[[Node: batching = QueueDequeueMany[_class=["loc:@batching/padding_fifo_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batching/padding_fifo_queue, batching/n)]]
[[Node: batching/_9 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/gpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=1, tensor_name="edge_1191_batching", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
Traceback (most recent call last):
File "train_lstm.py", line 66, in <module>
model.embedding_placeholder: data.glove_vec})
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 382, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 655, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 723, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 743, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.OutOfRangeError: PaddingFIFOQueue '_0_batching/padding_fifo_queue' is closed and has insufficient elements (requested 50, current size 0)
[[Node: batching = QueueDequeueMany[_class=["loc:@batching/padding_fifo_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batching/padding_fifo_queue, batching/n)]]
[[Node: batching/_9 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/gpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=1, tensor_name="edge_1191_batching", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
Caused by op u'batching', defined at:
File "train_lstm.py", line 49, in <module>
model = RNNClassifier(FLAGS)
File "/home/ccrmad/Code/TDLSTM/models/rnn_classifier.py", line 34, in __init__
batched_inputs, batched_labels = self.batch_data()
File "/home/ccrmad/Code/TDLSTM/models/rnn_classifier.py", line 74, in batch_data
name='batching'
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/input.py", line 595, in batch
dequeued = queue.dequeue_many(batch_size, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/data_flow_ops.py", line 435, in dequeue_many
self._queue_ref, n=n, component_types=self._dtypes, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 867, in _queue_dequeue_many
timeout_ms=timeout_ms, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2310, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1232, in __init__
self._traceback = _extract_stack()
I have tried move the train_x
and train_y
placeholder initialization before init = tf.initialize_all_variables()
and feed them to RNNClassifier() as two args but it still give the same error. Why?
Upvotes: 15
Views: 3084
Reputation: 126154
It looks like the problem is in this method, in RNNClassifier
:
def batch_data(self):
# ...
batched_inputs, batched_labels = tf.train.batch(
tensors=[self._train_x, self._train_y],
batch_size=self.params.batch_size,
dynamic_pad=True,
enqueue_many=True,
name='batching')
The two tensor arguments to tf.train.batch()
are self._train_x
and self._train_y
. In the RNNClassifier
constructor, it seems like you create these as tf.placeholder()
tensors:
def __init__(self, FLAGS):
# ...
with tf.device("/cpu:0"):
self.train_x = tf.placeholder(tf.int32, [6248, 42], name='train_x')
self.train_y = tf.placeholder(tf.int32, [6248, 3], name='train_y')
...though I'm assuming that the difference between self._train_x
and self.train_x
is a copy-paste error, because self._train_x
doesn't seem to be defined anywhere else.
Now, one of the surprising things about tf.train.batch()
is that it consumes its inputs in a completely separate thread, called a "queue runner", and started when you call tf.train.start_queue_runners()
. This thread calls Session.run()
on a subgraph that depends on your placeholders, but doesn't know how to feed these placeholders, so it is this call that fails, leading to the error you are seeing.
How then should you fix it? One option would be to use a "feeding queue runner", for which there is experimental support. A simpler option would be to use the tf.train.slice_input_producer()
to produce slices from your input data, as follows:
def batch_data(self, train_x, train_y):
input_slice, label_slice = tf.train.slice_input_producer([train_x, train_y])
batched_inputs, batched_labels = tf.train.batch(
tensors=[input_slice, label_slice],
batch_size=self.params.batch_size,
dynamic_pad=False, # All rows are the same shape.
enqueue_many=False, # tf.train.slice_input_producer() produces one row at a time.
name='batching')
return batched_inputs, batched_labels
# ...
# Create batches from the entire training data, where `array_input` and
# `array_labels` are two NumPy arrays.
batched_inputs, batched_labels = model.batch_data(array_input, array_labels)
Upvotes: 1
Reputation: 3146
This is what I did..
I could change the way I initialised my input variables as:
data = load_data(FLAGS.data)
model = RNNClassifier(FLAGS, data)
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for epoch in range(FLAGS.max_max_epoch):
sess.run(model.train_step)
loss, acc = sess.run([model.mean_cost, model.accuracy])
print("Epoch {:2d}: Loss = {:.6f}; Training Accuracy = {:.5f}".format(epoch+1, loss, acc))
print()
coord.request_stop()
coord.join(threads)
And in RNNClassifier class I could replace
self.train_x = tf.placeholder(tf.int32, [6248, 42], name='train_x')
self.train_y = tf.placeholder(tf.int32, [6248, 3], name='train_y')
self.embedding_placeholder = tf.placeholder(tf.float32, [1193515, 100])
(and remove use_embedding()
) to
def __init__(self, FLAGS, data):
self._train_x = tf.convert_to_tensor(data.train_x, dtype=tf.int32)
self._train_y = tf.convert_to_tensor(data.train_y, dtype=tf.int32)
embedding = tf.get_variable("embedding", shape=self.embedding_shape, trainable=False)
self.embedding_init = embedding.assign(data.glove_vec)
This way everything is initialised with RNNClassifier(FLAGS, data)
before making the call to the queue runners.
I still don't know why the previous way aka using placeholders, didn't work. I have checked the dtype and data shape, they all match. This post seems to have a similar problem and the "answer" suggests to replace placeholder with tf.Variable. I have tried this -> input values can now be fed but obvs it doesn't work as placeholders do and only feed the initialised values..
UPDATE: I think the error might be caused by operating inputs batching in the RNNClassifier class, that somehow effected feeding input data into the graph (again please correct me if you know the cause to the error, thanks!). I have instead relocated my batching and embedding functions onto my main code before initialising my session, this way I think it makes sure all inputs are defined to the main pipeline and batching&prefetching would be handled inside the tf graph (by not using placeholders).
Here's my code:
def batch(x, y):
with tf.device("/cpu:0"):
x = tf.convert_to_tensor(x, dtype=tf.int32)
y = tf.convert_to_tensor(y, dtype=tf.int32)
batched_x, batched_y = tf.train.batch(
tensors=[x, y],
batch_size=50,
dynamic_pad=True,
enqueue_many=True,
name='batching'
)
return (batched_x, batched_y)
def embed(df):
with tf.device("/cpu:0"), tf.variable_scope("embed"):
embedding = tf.get_variable("embedding", shape=df.embed_shape, trainable=False)
embedding_init = embedding.assign(df.embed_vec)
return embedding_init
data = load_data(FLAGS.data)
pretrained_embed = embed(data)
batched_train_x, batched_train_y = batch(data.train_x, data.train_y)
batched_test_x, batched_test_y = batch(data.train_x, data.train_y)
model = RNNClassifier(FLAGS, pretrained_embed)
logits = model.inference(batched_train_x)
test_pred = model.inference(batched_test_x, reuse=True)
loss = model.loss(logits, batched_train_y)
test_loss = model.loss(test_pred, batched_test_y)
train_op = model.training(loss[0])
init = tf.group(tf.initialize_all_variables(),
tf.initialize_local_variables())
with tf.Session() as sess:
t0 = time.time()
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
np.random.seed(FLAGS.random_state)
for epoch in range(FLAGS.max_max_epoch):
_, output = sess.run([train_op, loss])
loss_value, acc = output
print("Epoch {:2d}: Loss = {:.6f}; Training Accuracy = {:.5f}".format(epoch+1, loss_value, acc))
print()
coord.request_stop()
coord.join(threads)
And the RNNClassifier:
class RNNClassifier:
def __init__(self, FLAGS, embedding_init):
self.batch_size = FLAGS.batch_size
self.num_hidden = FLAGS.num_hidden
self.num_classes = FLAGS.num_classes
self.seq_len = FLAGS.seq_len
self.embedding_init = embedding_init
def inference(self, batched_inputs, reuse=None):
embed_inputs = tf.nn.embedding_lookup(self.embedding_init, tf.transpose(batched_inputs))
with tf.variable_scope('hidden', reuse=reuse):
with tf.variable_scope('forward_lstm'):
lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.num_hidden, use_peepholes=False,
activation=tf.nn.relu, forget_bias=0.0,
initializer=tf.random_uniform_initializer(-1.0, 1.0),
state_is_tuple=True)
lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_fw_cell, input_keep_prob=0.7)
with tf.variable_scope('backward_lstm'):
lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.num_hidden, use_peepholes=False,
activation=tf.nn.relu, forget_bias=0.0,
initializer=tf.random_uniform_initializer(-1.0, 1.0),
state_is_tuple=True)
lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_bw_cell, input_keep_prob=0.7)
fw_initial_state = lstm_fw_cell.zero_state(self.batch_size, tf.float32)
bw_initial_state = lstm_bw_cell.zero_state(self.batch_size, tf.float32)
rnn_outputs, output_state_fw, output_state_bw = tf.nn.bidirectional_rnn(
cell_fw=lstm_fw_cell,
cell_bw=lstm_bw_cell,
inputs=tf.unpack(embed_inputs),
# sequence_length=self.seq_len,
initial_state_fw=fw_initial_state,
initial_state_bw=bw_initial_state
)
with tf.variable_scope('output', reuse=reuse):
with tf.variable_scope('softmax'):
W = tf.get_variable('W', [self.num_hidden*2, self.num_classes],
initializer=tf.truncated_normal_initializer(stddev=0.1))
b = tf.get_variable('b', [self.num_classes], initializer=tf.constant_initializer(0.1))
logits = tf.matmul(rnn_outputs[-1], W) + b
return logits
def loss(self, logits, labels):
cost = tf.nn.softmax_cross_entropy_with_logits(logits, tf.cast(labels, tf.float32))
# self.total_cost = tf.reduce_sum(self.cost)
mean_cost = tf.reduce_mean(cost)
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
return mean_cost, accuracy
def training(self, cost):
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(cost)
return train_op
On a side note, I wish the Tensorflow community on stackoverflow would be more active..
Upvotes: 0