Reputation: 932
This is a simplified version of my code which throws the error mentioned in the title:
import tensorflow as tf
BATCH_SIZE = 3
SEQ_LENGTH = 4
NUM_CLASSES = 2
LSTM_UNITS = 64
NUM_SHARDS = 4
NUM_CHANNELS = 2
tf.enable_eager_execution()
def keras_model():
inputs = tf.keras.layers.Input(shape=(SEQ_LENGTH, NUM_CHANNELS))
x = tf.keras.layers.Bidirectional(
tf.keras.layers.LSTM(LSTM_UNITS, return_sequences=True))(inputs)
outputs = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(NUM_CLASSES, activation='relu'))(x)
return tf.keras.Model(inputs, outputs)
dataset = tf.data.experimental.CsvDataset(filenames='../../input/aFile.csv', header=True,record_defaults=[tf.int64] * 3, select_cols=[0,1,2])
dataset= dataset.window(size=SEQ_LENGTH, shift=1, drop_remainder=True).flat_map(lambda f1,f2, label:
tf.data.Dataset.zip((tf.data.Dataset.zip((f1.batch(SEQ_LENGTH),f2.batch(SEQ_LENGTH))), label.batch(SEQ_LENGTH))))
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
train_iterator = dataset.make_one_shot_iterator()
train_features, train_labels = train_iterator.get_next()
print(train_features)
print(train_labels)
model = keras_model()
model.summary()
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.fit(x=train_features,y=train_labels, batch_size=BATCH_SIZE,epochs=1, steps_per_epoch=10)
and this is the output of the code:
...
(<tf.Tensor: id=44, shape=(3, 4), dtype=int64, numpy=
array([[0, 1, 2, 3],
[1, 2, 3, 4],
[2, 3, 4, 5]], dtype=int64)>, <tf.Tensor: id=45, shape=(3, 4), dtype=int64, numpy=
array([[100, 101, 102, 103],
[101, 102, 103, 104],
[102, 103, 104, 105]], dtype=int64)>)
tf.Tensor(
[[0 0 0 0]
[0 0 0 1]
[0 0 1 0]], shape=(3, 4), dtype=int64)
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 4, 2) 0
_________________________________________________________________
bidirectional (Bidirectional (None, 4, 128) 34304
_________________________________________________________________
time_distributed (TimeDistri (None, 4, 2) 258
=================================================================
Total params: 34,562
Trainable params: 34,562
Non-trainable params: 0
_________________________________________________________________
...
ValueError: Error when checking input: expected input_1 to have 3 dimensions, but got array with shape (3, 4)
Process finished with exit code 1
I'm using this csv file for demo
f1,f2,label
0,100,0
1,101,0
2,102,0
3,103,0
4,104,1
5,105,0
6,106,0
7,107,0
8,108,1
9,109,0
10,110,0
The two first columns are the feature columns coming from two different channels and the last column contains the labels. I need to use a sequence of for instance four rows of data as the time to feed into the model while the batch size is for instance three, therefore the input shape would be like three batches of four rows where each row contains two values. I think I need to use some kind of reshape function but couldn't figure out how. Could someone please tell me how to fix the issue?
Upvotes: 4
Views: 1404
Reputation: 1196
I looked into this again today and I think you could solve this by modifying the parse function like this:
def parse_values(f1, f2, label):
features = tf.stack([f1, f2], 0)
return features, label
Upvotes: 1
Reputation: 932
at the first attempt to fix the issues, I changed the code to first combine the channels to build the feature columns and then make the sequence of feature column. This changed the shape of the input from [batch_size, channel_num, sequence_length] to [batch_size, sequence_length, channel_num] and added dimension to the labels as the model expects. Here is the new code:
import tensorflow as tf
import numpy as np
BATCH_SIZE = 3
SEQ_LENGTH = 4
NUM_CLASSES = 2
LSTM_UNITS = 64
NUM_SHARDS = 4
NUM_CHANNELS = 2
tf.enable_eager_execution()
def parse_values(f1, f2, label):
features = [f1,f2]
return features, label
def keras_model():
inputs = tf.keras.layers.Input(shape=(SEQ_LENGTH,NUM_CHANNELS))
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(LSTM_UNITS, return_sequences=True))(inputs)
outputs = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(NUM_CLASSES, activation='relu'))(x)
return tf.keras.Model(inputs, outputs)
dataset = tf.data.experimental.CsvDataset(filenames='../../input/aFile.csv', header=True,record_defaults=[tf.int64] * 3, select_cols=[0,1,2])
dataset= dataset.map(parse_values).window(size=SEQ_LENGTH, shift=1, drop_remainder=True).flat_map(lambda features, label:
tf.data.Dataset.zip((features.batch(SEQ_LENGTH), label.batch(SEQ_LENGTH))))
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
train_iterator = dataset.make_one_shot_iterator()
train_features, train_labels = train_iterator.get_next()
print(train_features)
#train_labels = train_labels[:,SEQ_LENGTH-1] # output => [0 1 0]
#print(train_labels)
train_labels = np.expand_dims(train_labels, axis=2)
print(train_labels)
model = keras_model()
model.summary()
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.fit(x=train_features,y=train_labels, batch_size=BATCH_SIZE,epochs=1, steps_per_epoch=10)
And below is the output:
...tf.Tensor(
[[[ 0 100]
[ 1 101]
[ 2 102]
[ 3 103]]
[[ 1 101]
[ 2 102]
[ 3 103]
[ 4 104]]
[[ 2 102]
[ 3 103]
[ 4 104]
[ 5 105]]], shape=(3, 4, 2), dtype=int64)
[[[0]
[0]
[0]
[0]]
[[0]
[0]
[0]
[1]]
[[0]
[0]
[1]
[0]]]
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 4, 2) 0
_________________________________________________________________
bidirectional (Bidirectional (None, 4, 128) 34304
_________________________________________________________________
time_distributed (TimeDistri (None, 4, 2) 258
=================================================================
Total params: 34,562
Trainable params: 34,562
Non-trainable params: 0
_________________________________________________________________
1/10 [==>...........................] - ETA: 8s - loss: 13.3860 - acc: 0.1667
10/10 [==============================] - 1s 101ms/step - loss: 12.9909 - acc: 0.1667
Process finished with exit code 0
It would make more sense to me to have only one label per sequence which determines if the sequence belongs to category 0 or 1 (in my case 3 values per each batch since the batch size is 3). I tried to do so by adding a line of code (shown below) which I had to later comment out since it caused exception 'Incompatible shapes: [3] vs. [3,4]'
train_labels = train_labels[:,SEQ_LENGTH-1] # output => [0 1 0]
I could't figure out how to fix that error therefore as you could see in the output I fed the labels of all rows contained in the sequence to the model. Later I came up with a trick to have same label for all the items in a sequence. I decided to set all the labels in the sequence to the last label of the sequence. For instance [0 0 0 1] would change to [1 1 1 1] and [0 0 1 0] would change to [0 0 0 0]. I also changed the loss function to 'binary_crossentropy' as the problem is a binary classification here. Below is the code:
import tensorflow as tf
import numpy as np
BATCH_SIZE = 3
SEQ_LENGTH = 4
NUM_CLASSES = 1
LSTM_UNITS = 64
NUM_SHARDS = 4
NUM_CHANNELS = 2
tf.enable_eager_execution()
def parse_values(f1, f2, label):
features = [f1,f2]
return features, label
def map_label(features, label):
sequence_label1 = tf.fill([SEQ_LENGTH],label[SEQ_LENGTH-1])
return features, sequence_label1
def keras_model():
inputs = tf.keras.layers.Input(shape=(SEQ_LENGTH,NUM_CHANNELS),batch_size=BATCH_SIZE)
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(LSTM_UNITS, return_sequences=True))(inputs)
outputs = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(NUM_CLASSES, activation='sigmoid'))(x)
return tf.keras.Model(inputs, outputs)
dataset = tf.data.experimental.CsvDataset(filenames='../../input/aFile.csv', header=True,record_defaults=[tf.int64] * 3, select_cols=[0,1,2])
dataset= dataset.map(parse_values).window(size=SEQ_LENGTH, shift=1, drop_remainder=True).flat_map(lambda features, label:
tf.data.Dataset.zip((features.batch(SEQ_LENGTH), label.batch(SEQ_LENGTH)))).map(map_label)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
train_iterator = dataset.make_one_shot_iterator()
train_features, train_labels = train_iterator.get_next()
print(train_features)
train_labels = np.expand_dims(train_labels, axis=2)
print(train_labels)
model = keras_model()
model.summary()
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
model.fit(x=train_features,y=train_labels, batch_size=BATCH_SIZE,epochs=1, steps_per_epoch=10)
And below is the output:
...tf.Tensor(
[[[ 0 100]
[ 1 101]
[ 2 102]
[ 3 103]]
[[ 1 101]
[ 2 102]
[ 3 103]
[ 4 104]]
[[ 2 102]
[ 3 103]
[ 4 104]
[ 5 105]]], shape=(3, 4, 2), dtype=int64)
[[[0]
[0]
[0]
[0]]
[[1]
[1]
[1]
[1]]
[[0]
[0]
[0]
[0]]]
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (3, 4, 2) 0
_________________________________________________________________
bidirectional (Bidirectional (3, 4, 128) 34304
_________________________________________________________________
time_distributed (TimeDistri (3, 4, 1) 129
=================================================================
Total params: 34,433
Trainable params: 34,433
Non-trainable params: 0
_________________________________________________________________
...
1/10 [==>...........................] - ETA: 10s - loss: 0.6866 - acc: 0.5833
10/10 [==============================] - 1s 124ms/step - loss: 0.6571 - acc: 0.6500
Process finished with exit code 0
I hope this would help anyone facing similar issues.
Upvotes: 1