Reputation: 405
I am trying to build a Unidirectional RNN for sequence modeling and neural embedding. I have built a custom class to experiment with various architectures and parameters. The data manager class in the code is another class that basically reads text data, processes it and converts it into numeric vectors. tf_train_set is tensorSliceDataset containing numeric vectors and labels of 60% of the dataset. rest 40% are in tf_valid_set.
I have the following code for my RNN:
class UniRNN:
def __init__(self, cell_type= 'gru', embed_size= 128, state_sizes= [128, 64], data_manager= None):
self.cell_type = cell_type
self.state_sizes = state_sizes
self.embed_size = embed_size
self.data_manager = data_manager
self.vocab_size = self.data_manager.vocab_size +1
#return the correspoding memory cell
@staticmethod
def get_layer(cell_type= 'gru', state_size= 128, return_sequences= False, activation = 'tanh'):
if cell_type=='gru':
return tf.keras.layers.GRU(state_size, return_sequences=return_sequences, activation=activation)
elif cell_type== 'lstm':
return tf.keras.layers.LSTM(state_size, return_sequences=return_sequences, activation=activation)
else:
return tf.keras.layers.SimpleRNN(state_size, return_sequences=return_sequences, activation=activation)
def build(self):
x = tf.keras.layers.Input(shape=[None])
h = tf.keras.layers.Embedding(self.vocab_size, self.embed_size, mask_zero=True, trainable=True)(x)
num_layers = len(self.state_sizes)
for i in range(num_layers):
h = self.get_layer(self.cell_type, self.state_sizes[i], return_sequences=True)(h)
h = tf.keras.layers.Dense(dm.num_classes, activation='softmax')(h)
self.model = tf.keras.Model(inputs=x, outputs=h)
def compile_model(self, *args, **kwargs):
self.model.compile(*args, **kwargs)
def fit(self, *args, **kwargs):
return self.model.fit(*args, **kwargs)
def evaluate(self, *args, **kwargs):
self.model.evaluate(*args, **kwargs)
To fit the model, my code is:
uni_rnn = UniRNN(cell_type='basic_rnn', embed_size=128, state_sizes=[128, 128], data_manager=dm) #Insert your code here
uni_rnn.build()
# uni_rnn.model.summary()
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
uni_rnn.compile_model(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
uni_rnn.fit(dm.tf_train_set.batch(64), epochs=20, validation_data = dm.tf_valid_set.batch(64))
when I run this code, I'm getting the following error:
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-184-abef9ae0cbcd> in <module>
3 opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
4 uni_rnn.compile_model(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
----> 5 uni_rnn.fit(dm.tf_train_set.batch(64), epochs=20, validation_data = dm.tf_valid_set.batch(64))
<ipython-input-170-53f4c12769ab> in fit(self, *args, **kwargs)
31
32 def fit(self, *args, **kwargs):
---> 33 return self.model.fit(*args, **kwargs)
34
35 def evaluate(self, *args, **kwargs):
~\anaconda3\envs\myenv_tf21_p37\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
817 max_queue_size=max_queue_size,
818 workers=workers,
--> 819 use_multiprocessing=use_multiprocessing)
820
821 def evaluate(self,
~\anaconda3\envs\myenv_tf21_p37\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
340 mode=ModeKeys.TRAIN,
341 training_context=training_context,
--> 342 total_epochs=epochs)
343 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
344
~\anaconda3\envs\myenv_tf21_p37\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
126 step=step, mode=mode, size=current_batch_size) as batch_logs:
127 try:
--> 128 batch_outs = execution_function(iterator)
129 except (StopIteration, errors.OutOfRangeError):
130 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
~\anaconda3\envs\myenv_tf21_p37\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in execution_function(input_fn)
96 # `numpy` translates Tensors to values in Eager mode.
97 return nest.map_structure(_non_none_constant_value,
---> 98 distributed_function(input_fn))
99
100 return execution_function
~\anaconda3\envs\myenv_tf21_p37\lib\site-packages\tensorflow_core\python\eager\def_function.py in __call__(self, *args, **kwds)
566 xla_context.Exit()
567 else:
--> 568 result = self._call(*args, **kwds)
569
570 if tracing_count == self._get_tracing_count():
~\anaconda3\envs\myenv_tf21_p37\lib\site-packages\tensorflow_core\python\eager\def_function.py in _call(self, *args, **kwds)
630 # Lifting succeeded, so variables are initialized and we can run the
631 # stateless function.
--> 632 return self._stateless_fn(*args, **kwds)
633 else:
634 canon_args, canon_kwds = \
~\anaconda3\envs\myenv_tf21_p37\lib\site-packages\tensorflow_core\python\eager\function.py in __call__(self, *args, **kwargs)
2361 with self._lock:
2362 graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 2363 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
2364
2365 @property
~\anaconda3\envs\myenv_tf21_p37\lib\site-packages\tensorflow_core\python\eager\function.py in _filtered_call(self, args, kwargs)
1609 if isinstance(t, (ops.Tensor,
1610 resource_variable_ops.BaseResourceVariable))),
-> 1611 self.captured_inputs)
1612
1613 def _call_flat(self, args, captured_inputs, cancellation_manager=None):
~\anaconda3\envs\myenv_tf21_p37\lib\site-packages\tensorflow_core\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1690 # No tape is watching; skip to running the function.
1691 return self._build_call_outputs(self._inference_function.call(
-> 1692 ctx, args, cancellation_manager=cancellation_manager))
1693 forward_backward = self._select_forward_and_backward_functions(
1694 args,
~\anaconda3\envs\myenv_tf21_p37\lib\site-packages\tensorflow_core\python\eager\function.py in call(self, ctx, args, cancellation_manager)
543 inputs=args,
544 attrs=("executor_type", executor_type, "config_proto", config),
--> 545 ctx=ctx)
546 else:
547 outputs = execute.execute_with_cancellation(
~\anaconda3\envs\myenv_tf21_p37\lib\site-packages\tensorflow_core\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
65 else:
66 message = e.message
---> 67 six.raise_from(core._status_to_exception(e.code, message), None)
68 except TypeError as e:
69 keras_symbolic_tensors = [
~\anaconda3\envs\myenv_tf21_p37\lib\site-packages\six.py in raise_from(value, from_value)
InvalidArgumentError: assertion failed: [Condition x == y did not hold element-wise:] [x (loss/dense_13_loss/SparseSoftmaxCrossEntropyWithLogits/Shape_1:0) = ] [64 1] [y (loss/dense_13_loss/SparseSoftmaxCrossEntropyWithLogits/strided_slice:0) = ] [64 100]
[[node loss/dense_13_loss/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert (defined at <ipython-input-170-53f4c12769ab>:33) ]] [Op:__inference_distributed_function_111597]
Function call stack:
distributed_function
Can someone please explain what the problem is? and maybe how I can fix it?
Upvotes: 1
Views: 643
Reputation: 22031
the last RNN in the loop must have return_sequence = False. to do this you can simply do:
def build(self):
x = tf.keras.layers.Input(shape=[None])
h = tf.keras.layers.Embedding(self.vocab_size, self.embed_size,
mask_zero=True, trainable=True)(x)
num_layers = len(self.state_sizes)
for i in range(num_layers-1):
h = self.get_layer(self.cell_type, self.state_sizes[i], return_sequences=True)(h)
h = self.get_layer(self.cell_type, self.state_sizes[i], return_sequences=False)(h)
h = tf.keras.layers.Dense(dm.num_classes, activation='softmax')(h)
self.model = tf.keras.Model(inputs=x, outputs=h)
Upvotes: 1