Reputation: 143
I have a time series (2 variables), each time series has about 80000 observations (X), each observation corresponds to a class (Y). I used moving window method to segment the time series to several intervals (each has a length of 30). Then I hot encoded the Y to make it to categorical variable.
Then I created batches with batch size of 64 with the following code
from sklearn.preprocessing import OneHotEncoder
def one_hot_encoder(y):
onehot_encoder = OneHotEncoder(sparse=False)
y = y.reshape(len(y), 1)
onehot_encoder = onehot_encoder.fit_transform(y)
return onehot_encoder
def data_generator(x, y, shuffle=False, batch_size=64):
# create order
while True:
index = np.arange(len(y))
if shuffle == True:
np.random.shuffle(index)
x = x[index]
y = y[index]
# generate batches
imax = int(len(index)/batch_size)
for i in range(imax):
yield x[i*batch_size: (i+1)*batch_size], y[i*batch_size: (i+1)*batch_size]
def get_batches(x, y):
x = np.array(x)
y = np.array(y)
return data_generator(x, one_hot_encoder(y))
For each batch print(next(batches)[0].shape) is (64, 30 ,2) -- 30 observations, 2 variables print(next(batches)1.shape) is (64, 3) - each observation corresponds to a hot-encoded class
Then I create Model with the following code:
def create_model():
model = Sequential()
model.add(BatchNormalization(axis=1, input_shape=(30, 2)))
model.add(Conv1D(16, 5, activation='relu'))
model.add(BatchNormalization(axis=1))
model.add(MaxPooling1D(2))
model.add(Conv1D(16, 5, activation='relu'))
model.add(BatchNormalization(axis=1))
model.add(MaxPooling1D(3))
model.add(Conv1D(32, 3, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))
return model
model = create_model()
model.compile(RMSprop(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
The summary of the model is as follow:
But when I train the model using fit_generator, I get the following error message. I am really confused whether my output dimension is not correct? or is there any error in my code.
Thanks.
model.fit_generator(batches, steps_per_epoch=30, nb_epoch=5, validation_data=None, validation_steps=None)
ValueError Traceback (most recent call last)
<ipython-input-29-c7ba2e8eddfd> in <module>()
----> 1 model.fit_generator(batches, steps_per_epoch=10, nb_epoch=5, validation_data=None, validation_steps=None)
D:\Programs\Anaconda3\lib\site-packages\keras\legacy\interfaces.py in wrapper(*args, **kwargs)
85 warnings.warn('Update your `' + object_name +
86 '` call to the Keras 2 API: ' + signature, stacklevel=2)
---> 87 return func(*args, **kwargs)
88 wrapper._original_function = func
89 return wrapper
D:\Programs\Anaconda3\lib\site-packages\keras\models.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, initial_epoch)
1119 workers=workers,
1120 use_multiprocessing=use_multiprocessing,
-> 1121 initial_epoch=initial_epoch)
1122
1123 @interfaces.legacy_generator_methods_support
D:\Programs\Anaconda3\lib\site-packages\keras\legacy\interfaces.py in wrapper(*args, **kwargs)
85 warnings.warn('Update your `' + object_name +
86 '` call to the Keras 2 API: ' + signature, stacklevel=2)
---> 87 return func(*args, **kwargs)
88 wrapper._original_function = func
89 return wrapper
D:\Programs\Anaconda3\lib\site-packages\keras\engine\training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
2040 outs = self.train_on_batch(x, y,
2041 sample_weight=sample_weight,
-> 2042 class_weight=class_weight)
2043
2044 if not isinstance(outs, list):
D:\Programs\Anaconda3\lib\site-packages\keras\engine\training.py in train_on_batch(self, x, y, sample_weight, class_weight)
1754 sample_weight=sample_weight,
1755 class_weight=class_weight,
-> 1756 check_batch_axis=True)
1757 if self.uses_learning_phase and not isinstance(K.learning_phase(), int):
1758 ins = x + y + sample_weights + [1.]
D:\Programs\Anaconda3\lib\site-packages\keras\engine\training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_batch_axis, batch_size)
1380 output_shapes,
1381 check_batch_axis=False,
-> 1382 exception_prefix='target')
1383 sample_weights = _standardize_sample_weights(sample_weight,
1384 self._feed_output_names)
D:\Programs\Anaconda3\lib\site-packages\keras\engine\training.py in _standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
130 ' to have ' + str(len(shapes[i])) +
131 ' dimensions, but got array with shape ' +
--> 132 str(array.shape))
133 for j, (dim, ref_dim) in enumerate(zip(array.shape, shapes[i])):
134 if not j and not check_batch_axis:
ValueError: Error when checking target: expected dense_2 to have 3 dimensions, but got array with shape (64, 3)
Upvotes: 3
Views: 2590
Reputation: 86600
When you use input_shape=(30,2)
, you are defining your input with 3 dimensions: (batchSize, 30, 2)
.
This is ok, but it's being passed through your model unchanged until it reaches the dense layers.
Dense layers won't reduce the number of dimensions, they will output (batchSize, 30, denseUnits)
.
One solution is to use a flatten layer, to reduce to only (batchSize,30*someValue)
. Then the dense will start outputting (batchSize,units)
This will provide you a 2D output that matches your 2D classes.
Before the dense layers:
model.add(Flatten())
Upvotes: 3