Reputation: 506
I am trying to follow the tensorflow tutorial from https://www.tensorflow.org/tutorials/seq2seq.
The data seems to load fine but when I initialize the model I get the following error:
Traceback (most recent call last):
File "/Users/<username>/PycharmProjects/tensorflow_chatbot/execute.py", line 334, in <module>
train()
File "/Users/<username>/PycharmProjects/tensorflow_chatbot/execute.py", line 151, in train
model = create_model(sess, False)
File "/Users/<username>/PycharmProjects/tensorflow_chatbot/execute.py", line 113, in create_model
forward_only=forward_only)
File "/Users/<username>/PycharmProjects/tensorflow_chatbot/seq2seq_model_tf.py", line 181, in __init__
softmax_loss_function=softmax_loss_function)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/site-packages/tensorflow/contrib/legacy_seq2seq/python/ops/seq2seq.py", line 1206, in model_with_buckets
decoder_inputs[:bucket[1]])
File "/Users/<username>/PycharmProjects/tensorflow_chatbot/seq2seq_model_tf.py", line 180, in <lambda>
lambda x, y: seq2seq_f(x, y, False),
File "/Users/<username>/PycharmProjects/tensorflow_chatbot/seq2seq_model_tf.py", line 144, in seq2seq_f
dtype=dtype)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/site-packages/tensorflow/contrib/legacy_seq2seq/python/ops/seq2seq.py", line 848, in embedding_attention_seq2seq
encoder_cell = copy.deepcopy(cell)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 174, in deepcopy
y = copier(memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/site-packages/tensorflow/python/layers/base.py", line 476, in __deepcopy__
setattr(result, k, copy.deepcopy(v, memo))
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 230, in _deepcopy_list
y.append(deepcopy(a, memo))
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct
state = deepcopy(state, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct
state = deepcopy(state, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct
state = deepcopy(state, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct
state = deepcopy(state, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 230, in _deepcopy_list
y.append(deepcopy(a, memo))
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct
state = deepcopy(state, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct
state = deepcopy(state, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct
state = deepcopy(state, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 230, in _deepcopy_list
y.append(deepcopy(a, memo))
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 237, in _deepcopy_tuple
y.append(deepcopy(a, memo))
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct
state = deepcopy(state, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy
y = copier(x, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy
y = _reconstruct(x, rv, 1, memo)
File "/Users/<username>/anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 343, in _reconstruct
y.__dict__.update(state)
AttributeError: 'NoneType' object has no attribute 'update'
If I change the _buckets
variable in translate.py
to only have 1 tuple (it doesn't matter which one), there is no issue but training does not work very well. Any more than 1 tuple causes this error. No changes have been made to the files found on github at https://github.com/tensorflow/models/tree/master/tutorials/rnn/translate other than changing the file names for my local project.
The beginning of the __init__
function leading up to the area looks like:
def __init__(self,
source_vocab_size,
target_vocab_size,
buckets,
size,
num_layers,
max_gradient_norm,
batch_size,
learning_rate,
learning_rate_decay_factor,
use_lstm=False,
num_samples=512,
forward_only=False,
dtype=tf.float32):
"""Create the model.
Args:
source_vocab_size: size of the source vocabulary.
target_vocab_size: size of the target vocabulary.
buckets: a list of pairs (I, O), where I specifies maximum input length
that will be processed in that bucket, and O specifies maximum output
length. Training instances that have inputs longer than I or outputs
longer than O will be pushed to the next bucket and padded accordingly.
We assume that the list is sorted, e.g., [(2, 4), (8, 16)].
size: number of units in each layer of the model.
num_layers: number of layers in the model.
max_gradient_norm: gradients will be clipped to maximally this norm.
batch_size: the size of the batches used during training;
the model construction is independent of batch_size, so it can be
changed after initialization if this is convenient, e.g., for decoding.
learning_rate: learning rate to start with.
learning_rate_decay_factor: decay learning rate by this much when needed.
use_lstm: if true, we use LSTM cells instead of GRU cells.
num_samples: number of samples for sampled softmax.
forward_only: if set, we do not construct the backward pass in the model.
dtype: the data type to use to store internal variables.
"""
self.source_vocab_size = source_vocab_size
self.target_vocab_size = target_vocab_size
self.buckets = buckets
self.batch_size = batch_size
self.learning_rate = tf.Variable(
float(learning_rate), trainable=False, dtype=dtype)
self.learning_rate_decay_op = self.learning_rate.assign(
self.learning_rate * learning_rate_decay_factor)
self.global_step = tf.Variable(0, trainable=False)
# If we use sampled softmax, we need an output projection.
output_projection = None
softmax_loss_function = None
# Sampled softmax only makes sense if we sample less than vocabulary size.
if 0 < num_samples < self.target_vocab_size:
w_t = tf.get_variable("proj_w", [self.target_vocab_size, size], dtype=dtype)
w = tf.transpose(w_t)
b = tf.get_variable("proj_b", [self.target_vocab_size], dtype=dtype)
output_projection = (w, b)
def sampled_loss(labels, logits):
labels = tf.reshape(labels, [-1, 1])
# We need to compute the sampled_softmax_loss using 32bit floats to
# avoid numerical instabilities.
local_w_t = tf.cast(w_t, tf.float32)
local_b = tf.cast(b, tf.float32)
local_inputs = tf.cast(logits, tf.float32)
return tf.cast(
tf.nn.sampled_softmax_loss(
weights=local_w_t,
biases=local_b,
labels=labels,
inputs=local_inputs,
num_sampled=num_samples,
num_classes=self.target_vocab_size),
dtype)
softmax_loss_function = sampled_loss
# Create the internal multi-layer cell for our RNN.
def single_cell():
return tf.contrib.rnn.GRUCell(size)
if use_lstm:
def single_cell():
return tf.contrib.rnn.BasicLSTMCell(size)
cell = single_cell()
if num_layers > 1:
cell = tf.contrib.rnn.MultiRNNCell([single_cell() for _ in range(num_layers)])
# The seq2seq function: we use embedding for the input and attention.
def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
encoder_inputs,
decoder_inputs,
cell,
num_encoder_symbols=source_vocab_size,
num_decoder_symbols=target_vocab_size,
embedding_size=size,
output_projection=output_projection,
feed_previous=do_decode,
dtype=dtype)
# Feeds for inputs.
self.encoder_inputs = []
self.decoder_inputs = []
self.target_weights = []
for i in xrange(buckets[-1][0]): # Last bucket is the biggest one.
self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
name="encoder{0}".format(i)))
for i in xrange(buckets[-1][1] + 1):
self.decoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
name="decoder{0}".format(i)))
self.target_weights.append(tf.placeholder(dtype, shape=[None],
name="weight{0}".format(i)))
# Our targets are decoder inputs shifted by one.
targets = [self.decoder_inputs[i + 1]
for i in xrange(len(self.decoder_inputs) - 1)]
# Training outputs and losses.
if forward_only:
self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
self.encoder_inputs, self.decoder_inputs, targets,
self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, True),
softmax_loss_function=softmax_loss_function)
# If we use output projection, we need to project outputs for decoding.
if output_projection is not None:
for b in xrange(len(buckets)):
self.outputs[b] = [
tf.matmul(output, output_projection[0]) + output_projection[1]
for output in self.outputs[b]
]
else:
self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
self.encoder_inputs, self.decoder_inputs, targets,
self.target_weights, buckets,
lambda x, y: seq2seq_f(x, y, False),
softmax_loss_function=softmax_loss_function) # this is where the error occurrs
What do I need to change to get this to work? Using tensorflow version 1.2
UPDATE: This has been tested with both tensorflow built from source and built from the Pip package on Mac OSX Sierra and the same issue is occurring
Upvotes: 3
Views: 4070
Reputation: 92
I think the library you are using must have some requirements that are not properly installed. check them with accurate version of libraries and it might work.
Upvotes: 0
Reputation: 498
as I have already commented here the model are you trying to implement is deprecated. If you want to make it working check the code I've pasted in the issue.
Starting from tensorflow 1.1 and 1.2 you have the functions for dynamic decode like tf.nn.bidirectional_dynamic_rnn
. It allows you to take into account dynamic sized sequences for free.
I'm creating some examples and I'll post you a working example with the new api.
Upvotes: 5