Reputation: 85
in the last weeks I tried to get the input pipeline running with tf.records under tensorflow (tf 2.0.1). From a CSV sentences are loaded and a record is generated:
import tensorflow as tf
import pathlib
import sys
import csv
PATH_PARENT = str(pathlib.Path(__file__).parent.absolute())
if PATH_PARENT.endswith('models'):
PATH_PARENT = PATH_PARENT[:-len('models')]
PATH_PARENT = PATH_PARENT.replace("\\", '/')
sys.path.append(PATH_PARENT)
def create_tf_example(features, label):
tf_example = tf.train.Example(features=tf.train.Features(feature={
'Sentence': tf.train.Feature(bytes_list=tf.train.BytesList(value=[features.encode('utf-8')])),
'Class': tf.train.Feature(bytes_list=tf.train.BytesList(value=[label.encode('utf-8')])),
}))
return tf_example
intent_load_list = ["training_data_intent_Music_controler_0.csv"] # Example: musik,<slot_0>,play,<slot_music_controle>
for load_intent in intent_load_list:
start = 14
end = load_intent.rfind("_")
label = load_intent[start : end]
print("loading intent " + label)
csv_data = []
with open(PATH_PARENT + "models/" + load_intent, 'r') as csv_file:
csv_reader = csv.reader(csv_file)
for row in csv_reader:
clean_output = ''
for word in row:
if '<' not in word:
clean_output = clean_output + word + ' '
csv_data.append(clean_output)
with tf.io.TFRecordWriter(PATH_PARENT + "models/dataset.tfrecords") as writer:
for row in csv_data:
features = row
example = create_tf_example(features, label)
writer.write(example.SerializeToString())
writer.close()
This works so far. This record should be loaded later, edited with a tf-hub model and then trained with the record. However, I either get an IndexEror or it fails completely:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import tensorflow_hub as hub
import sys
import pathlib
PATH_PARENT = str(pathlib.Path(__file__).parent.absolute())
if PATH_PARENT.endswith('models'):
PATH_PARENT = PATH_PARENT[:-len('models')]
PATH_PARENT = PATH_PARENT.replace("\\", '/')
sys.path.append(PATH_PARENT)
embed = hub.load("https://tfhub.dev/google/nnlm-de-dim50-with-normalization/2")
dataset = tf.data.TFRecordDataset(filenames = [PATH_PARENT + "models/dataset.tfrecords"])
def prepare_for_training(ds, shuffle_buffer_size=1024, batch_size=2):
ds = ds.map(lambda x: embed([x]))
ds = ds.shuffle(buffer_size=shuffle_buffer_size).batch(batch_size)
print(ds)
return ds
def convert_data(data):
data_np = embed(data)
data_list = data_np.tolist()
return data_list
batch_size = 64
n_intents = 2
train_ds = prepare_for_training(dataset, batch_size=batch_size)
build_model = keras.Sequential()
build_model.add(keras.layers.Input(shape=(None, 50)))
build_model.add(keras.layers.Dense(50, activation='relu'))
build_model.add(keras.layers.Dropout(0.2))
build_model.add(keras.layers.Dense(20, activation='relu'))
build_model.add(keras.layers.Dropout(0.2))
build_model.add(keras.layers.Dense(n_intents, activation='softmax'))
build_model.summary()
build_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
build_model.fit(train_ds, epochs=5) # IndexError - list index out of range
print("done")
Has anyone perhaps tried something similar or an idea? Unfortunately the documentation didn't help much. Thanks in advance
Upvotes: 1
Views: 132
Reputation:
You get this error in tensorflow version 2.0.1
when you don't pass the labels
in your data. In the below example, I am writing dummy Input
values using TFRecordWriter
and later reading it using TFRecordDataset
and passing it to the model.
If you run the same code in tensorflow version 2.1.0
, the error statement changes as IndexError: tuple index out of range
.
Also, if you run the same code in tensorflow version 2.2.0
, the error statement changes as ValueError: No gradients provided for any variable: ['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0', 'dense_2/kernel:0', 'dense_2/bias:0'].
Code to reproduce the error -
%tensorflow_version 2.x
import tensorflow as tf
import numpy as np
print(tf.__version__)
def write_date_tfrecord():
#writes 10 dummy values to replicate the issue
Input = [20191221.123 + x for x in range(0,10)]
print("Writing Input - ", Input)
example = tf.train.Example(
features = tf.train.Features(
feature = {
'Input':tf.train.Feature(float_list=tf.train.FloatList(value=Input))
}
))
writer = tf.io.TFRecordWriter("Data.tf_record")
writer.write(example.SerializeToString())
def parse_function(serialized_example):
features = {
'Input': tf.io.FixedLenSequenceFeature([], tf.float32,allow_missing=True)
}
features = tf.io.parse_single_example(serialized=serialized_example, features=features)
Input = features['Input']
return Input
def dataset_generator():
trRecordDataset = tf.data.TFRecordDataset("Data.tf_record")
trRecordDataset = trRecordDataset.map(parse_function, num_parallel_calls = tf.data.experimental.AUTOTUNE)
return trRecordDataset
write_date_tfrecord()
generator = dataset_generator()
build_model = tf.keras.Sequential()
build_model.add(tf.keras.layers.Input(shape=(1,)))
build_model.add(tf.keras.layers.Dense(50, activation='relu'))
build_model.add(tf.keras.layers.Dropout(0.2))
build_model.add(tf.keras.layers.Dense(20, activation='relu'))
build_model.add(tf.keras.layers.Dropout(0.2))
build_model.add(tf.keras.layers.Dense(3, activation='softmax'))
build_model.summary()
build_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
build_model.fit(dataset_generator(), epochs=5) # IndexError - list index out of range
print("done")
Output -
2.0.1
Writing Input - [20191221.123, 20191222.123, 20191223.123, 20191224.123, 20191225.123, 20191226.123, 20191227.123, 20191228.123, 20191229.123, 20191230.123]
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 50) 100
_________________________________________________________________
dropout (Dropout) (None, 50) 0
_________________________________________________________________
dense_1 (Dense) (None, 20) 1020
_________________________________________________________________
dropout_1 (Dropout) (None, 20) 0
_________________________________________________________________
dense_2 (Dense) (None, 3) 63
=================================================================
Total params: 1,183
Trainable params: 1,183
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
1/Unknown - 0s 60ms/step
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-1-d1c5c463cdc2> in <module>()
47 build_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
48
---> 49 build_model.fit(dataset_generator(), epochs=5) # IndexError - list index out of range
50 print("done")
20 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
164
165 if hasattr(loss_fn, 'reduction'):
--> 166 per_sample_losses = loss_fn.call(targets[i], outs[i])
167 weighted_losses = losses_utils.compute_weighted_loss(
168 per_sample_losses,
IndexError: list index out of range
Solution - I created the dummy labels
variable and wrote the dummy labels
using TFRecordWriter
and later reading it using TFRecordDataset
and passing it to the model. Now we are passing both Input
and labels
to the model and it works fine.
Fixed Code -
%tensorflow_version 2.x
import tensorflow as tf
import numpy as np
print(tf.__version__)
def write_date_tfrecord():
#writes 10 dummy values to replicate the issue
Input = [20191221.123 + x for x in range(0,9)]
labels = [1, 1, 0, 0, 0, 1, 1, 2, 2]
example = tf.train.Example(
features = tf.train.Features(
feature = {
'Input':tf.train.Feature(float_list=tf.train.FloatList(value=Input)),
'labels':tf.train.Feature(float_list=tf.train.FloatList(value=labels))
}
))
writer = tf.io.TFRecordWriter("Data.tf_record")
writer.write(example.SerializeToString())
def parse_function(serialized_example):
features = {
'Input': tf.io.FixedLenSequenceFeature([], tf.float32,allow_missing=True),
'labels': tf.io.FixedLenSequenceFeature([], tf.float32,allow_missing=True)
}
features = tf.io.parse_single_example(serialized=serialized_example, features=features)
Input = features['Input']
labels = features['labels']
return Input, labels
def dataset_generator():
trRecordDataset = tf.data.TFRecordDataset("Data.tf_record")
trRecordDataset = trRecordDataset.map(parse_function, num_parallel_calls = tf.data.experimental.AUTOTUNE)
return trRecordDataset
write_date_tfrecord()
generator = dataset_generator()
build_model = tf.keras.Sequential()
build_model.add(tf.keras.layers.Input(shape=(1,)))
build_model.add(tf.keras.layers.Dense(50, activation='relu'))
build_model.add(tf.keras.layers.Dropout(0.2))
build_model.add(tf.keras.layers.Dense(20, activation='relu'))
build_model.add(tf.keras.layers.Dropout(0.2))
build_model.add(tf.keras.layers.Dense(3, activation='softmax'))
build_model.summary()
build_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
build_model.fit(dataset_generator(), epochs=5) # IndexError - list index out of range
print("done")
Output -
2.1.0
Model: "sequential_4"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_12 (Dense) (None, 50) 100
_________________________________________________________________
dropout_8 (Dropout) (None, 50) 0
_________________________________________________________________
dense_13 (Dense) (None, 20) 1020
_________________________________________________________________
dropout_9 (Dropout) (None, 20) 0
_________________________________________________________________
dense_14 (Dense) (None, 3) 63
=================================================================
Total params: 1,183
Trainable params: 1,183
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
1/1 [==============================] - 0s 362ms/step - loss: 8705754.0000 - accuracy: 0.0000e+00
Epoch 2/5
1/1 [==============================] - 0s 14ms/step - loss: 4458477.5000 - accuracy: 0.2222
Epoch 3/5
1/1 [==============================] - 0s 16ms/step - loss: 5933292.5000 - accuracy: 0.2222
Epoch 4/5
1/1 [==============================] - 0s 16ms/step - loss: 4305070.0000 - accuracy: 0.1111
Epoch 5/5
1/1 [==============================] - 0s 14ms/step - loss: 5578528.5000 - accuracy: 0.1111
done
Would also recommend to go through this link that explains "How to feed TFRecord to train Keras model".
Hope this answers your question. Happy Learning.
Upvotes: 1