conjuring
conjuring

Reputation: 131

Print the batch of train data that is being sent to .fit_generator() function by the DataGenerator class

I have successfully implemented custom DataGenerator class and made use of the .fit_generator() function in keras because of the large data. But for the debugging purpose I wanted to print the batches of training data that the DataGenerator class is implicitly sending to the .fit_generator() function outside of my custom DataGenerator class (because inside the DataGenerator class it can easily be printed in the _ _getitem _ _ method). The main.py file is where I want to print the values and the my_classes.py is where my custom DataGenerator class recides.

main.py

import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Input, Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import Model
from my_classes import DataGenerator

params = {'dim': (224, 224),'batch_size': 4,'n_classes': 3,'n_channels': 3,'shuffle': True}
partition = {'train': ['id-1', 'id-2', 'id-3','id-4', 'id-5', 'id-6','id-7', 'id-8', 'id-9','id-10' ,'id-11', 'id-12', 'id-13', 'id-14', 'id-15', 'id-16'], 'validation': ['id-17', 'id-18', 'id-19', 'id-20']}
labels = {'id-1': 0, 'id-2': 0, 'id-3': 0, 'id-4': 2, 'id-5': 2, 'id-6': 0,'id-7': 2, 'id-8': 1, 'id-9': 1, 'id-10': 1, 'id-11': 2, 'id-12': 0, 'id-13': 1, 'id-14': 2, 'id-15': 1, 'id-16': 0, 'id-17': 2, 'id-18': 2, 'id-19': 1, 'id-20': 1}

# Generators
training_generator = DataGenerator(partition['train'], labels, **params)
validation_generator = DataGenerator(partition['validation'], labels, **params)

inputs = Input(shape=(224, 224, 3))
x = Conv2D(32, (3, 3), activation='relu')(inputs)
x = Conv2D(32, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(3, activation='softmax')(x)

model = Model(inputs=inputs, outputs=predictions)
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

#THIS IS WHERE I WANT TO PRINT THE BATCHES OF TRAINING DATA THAT ARE BEING FED BELOW BY DATAGENERATOR CLASS

#fit_generator function
model.fit_generator(generator=training_generator, validation_data=validation_generator, use_multiprocessing=True, workers=6)

my_classes.py

import numpy as np
import keras
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from numpy import expand_dims

class DataGenerator(keras.utils.Sequence):
    def __init__(self, list_IDs, labels, batch_size=4, dim=(224, 224), n_channels=3,
                 n_classes=3, shuffle=True):
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        X, y = self.__data_generation(list_IDs_temp)
        #CAN ALSO BE PRINTED HERE
        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)
        for i, ID in enumerate(list_IDs_temp):
            img = load_img('data/' + ID + '.jpg', target_size=(224, 224))
            img = img_to_array(img)
            X[i,] = expand_dims(img, axis=0)
            y[i] = self.labels[ID]
        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

Upvotes: 1

Views: 1419

Answers (2)

Akanksha Pathak
Akanksha Pathak

Reputation: 161

instead of adding 'printing' function outside my_classes.py,print the 'list_Ids' in def _getitem__() as follows

def __getitem__(self, index):
    'Generate one batch of data'
    # Generate indexes of the batch
    indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

    # Find list of IDs
    list_IDs_temp = [self.list_IDs[k] for k in indexes]

    # Generate data
    X, y = self.__data_generation(list_IDs_temp)
    print(list_IDs_temp)
    return X, y

Upvotes: 0

conjuring
conjuring

Reputation: 131

Here I'm posting a simple oops workaround for my own question hoping that it helps someone down the line. Following are the changes:

  1. So instead of keeping the two files separate I merged the two files to overcome the cyclic import issue that could have come up with my following second point, had they been two separate files.
  2. Then I made a tweak to _ _getitem _ _() by making another function call from within to the printing(a,b) function that I declared outside of the class and sent the batches as arguments there.

And hands up ;) the rest stays intact, Here's the full merged code:

import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Input, Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import Model
import numpy as np
import keras
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from numpy import expand_dims

class DataGenerator(keras.utils.Sequence):
    def __init__(self, list_IDs, labels, batch_size=4, dim=(224, 224), n_channels=3,n_classes=3, shuffle=True):
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        X, y = self.__data_generation(list_IDs_temp)
        #MAKING CHANGES HERE BY CALLING printing() OUTSIDE THIS CLASS AND SENDING BATCHES IN REALTIME
        printing(X, y)
        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)
        for i, ID in enumerate(list_IDs_temp):
            img = load_img('data/' + ID + '.jpg', target_size=(224, 224))
            img = img_to_array(img)
            X[i,] = expand_dims(img, axis=0)
            y[i] = self.labels[ID]
        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

#MERGING POINT HERE------------------------------------------------------------------------------------>

params = {'dim': (224, 224),'batch_size': 4,'n_classes': 3,'n_channels': 3,'shuffle': True}
partition = {'train': ['id-1', 'id-2', 'id-3','id-4', 'id-5', 'id-6','id-7', 'id-8', 'id-9','id-10' ,'id-11', 'id-12', 'id-13', 'id-14', 'id-15', 'id-16'], 'validation': ['id-17', 'id-18', 'id-19', 'id-20']}
labels = {'id-1': 0, 'id-2': 0, 'id-3': 0, 'id-4': 2, 'id-5': 2, 'id-6': 0,'id-7': 2, 'id-8': 1, 'id-9': 1, 'id-10': 1, 'id-11': 2, 'id-12': 0, 'id-13': 1, 'id-14': 2, 'id-15': 1, 'id-16': 0, 'id-17': 2, 'id-18': 2, 'id-19': 1, 'id-20': 1}

# Generators
training_generator = DataGenerator(partition['train'], labels, **params)
validation_generator = DataGenerator(partition['validation'], labels, **params)

inputs = Input(shape=(224, 224, 3))
x = Conv2D(32, (3, 3), activation='relu')(inputs)
x = Conv2D(32, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(3, activation='softmax')(x)

model = Model(inputs=inputs, outputs=predictions)
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

#BATCHES BEING SENT AS ARGUMENTS HERE
def printing(a, b):
    print("the value being sent by __getitem__ here is", a, b)

model.fit_generator(generator=training_generator, validation_data=validation_generator, use_multiprocessing=True, workers=6)

I'd be glad if someone improves upon the answer or better still comes up with a better solution, but for now here you go!!

Upvotes: 1

Related Questions