Reputation: 3099
I followed this tutorial to create a custom generator for my Keras model. Here is an MWE that shows the issues I'm facing:
import sys, keras
import numpy as np
import tensorflow as tf
import pandas as pd
from keras.models import Model
from keras.layers import Dense, Input
from keras.optimizers import Adam
from keras.losses import binary_crossentropy
class DataGenerator(keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, list_IDs, batch_size, shuffle=False):
'Initialization'
self.batch_size = batch_size
self.list_IDs = list_IDs
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.floor(len(self.list_IDs) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
#print('self.batch_size: ', self.batch_size)
print('index: ', index)
sys.exit()
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.list_IDs))
print('self.indexes: ', self.indexes)
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __data_generation(self, list_IDs_temp):
'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
X1 = np.empty((self.batch_size, 10), dtype=float)
X2 = np.empty((self.batch_size, 12), dtype=int)
#Generate data
for i, ID in enumerate(list_IDs_temp):
print('i is: ', i, 'ID is: ', ID)
#Preprocess this sample (omitted)
X1[i,] = np.repeat(1, X1.shape[1])
X2[i,] = np.repeat(2, X2.shape[1])
Y = X1[:,:-1]
return X1, X2, Y
if __name__=='__main__':
train_ids_to_use = list(np.arange(1, 321)) #1, 2, ...,320
valid_ids_to_use = list(np.arange(321, 481)) #321, 322, ..., 480
params = {'batch_size': 32}
train_generator = DataGenerator(train_ids_to_use, **params)
valid_generator = DataGenerator(valid_ids_to_use, **params)
#Build a toy model
input_1 = Input(shape=(3, 10))
input_2 = Input(shape=(3, 12))
y_input = Input(shape=(3, 10))
concat_1 = keras.layers.concatenate([input_1, input_2])
concat_2 = keras.layers.concatenate([concat_1, y_input])
dense_1 = Dense(10, activation='relu')(concat_2)
output_1 = Dense(10, activation='sigmoid')(dense_1)
model = Model([input_1, input_2, y_input], output_1)
print(model.summary())
#Compile and fit_generator
model.compile(optimizer=Adam(lr=0.001), loss=binary_crossentropy)
model.fit_generator(generator=train_generator, validation_data = valid_generator, epochs=2, verbose=2)
I don't want to shuffle my input data. I thought that was getting handled, but in my code, when I print out index
in __get_item__
, I get random numbers. I would like consecutive numbers. Notice I'm trying to kill the process using sys.exit
inside __getitem__
to see what's going on.
My questions:
Why does index
not go consecutively? How can I fix this?
When I run this in the terminal using screen, why doesn't it respond to Ctrl+C?
Upvotes: 3
Views: 1673
Reputation: 33410
You can use shuffle
argument of fit_generator
method to generate batches consecutively. From fit_generator()
documentation:
shuffle: Boolean. Whether to shuffle the order of the batches at the beginning of each epoch. Only used with instances of
Sequence
(keras.utils.Sequence
). Has no effect whensteps_per_epoch
is notNone
.
Just pass shuffle=False
to fit_generator
:
model.fit_generator(generator=train_generator, shuffle=False, ...)
Upvotes: 4