Keras fit_generator issue

Question

I followed this tutorial to create a custom generator for my Keras model. Here is an MWE that shows the issues I'm facing:

import sys, keras
import numpy as np
import tensorflow as tf
import pandas as pd
from keras.models import Model
from keras.layers import Dense, Input
from keras.optimizers import Adam
from keras.losses import binary_crossentropy

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, batch_size, shuffle=False):
        'Initialization'
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        #print('self.batch_size: ', self.batch_size)
        print('index: ', index)
        sys.exit()

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        print('self.indexes: ', self.indexes)
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)

        X1 = np.empty((self.batch_size, 10), dtype=float)
        X2 = np.empty((self.batch_size, 12),  dtype=int)

        #Generate data
        for i, ID in enumerate(list_IDs_temp):
            print('i is: ', i, 'ID is: ', ID)

            #Preprocess this sample (omitted)
            X1[i,] = np.repeat(1, X1.shape[1])
            X2[i,] = np.repeat(2, X2.shape[1])

        Y = X1[:,:-1]
        return X1, X2, Y

if __name__=='__main__':
    train_ids_to_use = list(np.arange(1, 321)) #1, 2, ...,320 
    valid_ids_to_use = list(np.arange(321, 481)) #321, 322, ..., 480

    params = {'batch_size': 32}

    train_generator = DataGenerator(train_ids_to_use, **params)
    valid_generator = DataGenerator(valid_ids_to_use, **params)

    #Build a toy model
    input_1 = Input(shape=(3, 10))
    input_2 = Input(shape=(3, 12))
    y_input = Input(shape=(3, 10))

    concat_1 = keras.layers.concatenate([input_1, input_2])
    concat_2 = keras.layers.concatenate([concat_1, y_input])

    dense_1 = Dense(10, activation='relu')(concat_2)
    output_1 = Dense(10, activation='sigmoid')(dense_1)

    model = Model([input_1, input_2, y_input], output_1)
    print(model.summary())

    #Compile and fit_generator
    model.compile(optimizer=Adam(lr=0.001), loss=binary_crossentropy)
    model.fit_generator(generator=train_generator, validation_data = valid_generator, epochs=2, verbose=2)

I don't want to shuffle my input data. I thought that was getting handled, but in my code, when I print out index in __get_item__, I get random numbers. I would like consecutive numbers. Notice I'm trying to kill the process using sys.exit inside __getitem__ to see what's going on.

My questions:

Why does index not go consecutively? How can I fix this?
When I run this in the terminal using screen, why doesn't it respond to Ctrl+C?

today · Accepted Answer

You can use shuffle argument of fit_generator method to generate batches consecutively. From fit_generator() documentation:

shuffle: Boolean. Whether to shuffle the order of the batches at the beginning of each epoch. Only used with instances of Sequence (keras.utils.Sequence). Has no effect when steps_per_epoch is not None.

Just pass shuffle=False to fit_generator:

model.fit_generator(generator=train_generator, shuffle=False, ...)

Keras fit_generator issue

Answers (1)

Related Questions