Bobbyphtr
Bobbyphtr

Reputation: 115

ValueError: Error when checking target: expected up_sampling2d_2 to have 4 dimensions, but got array with shape (128, 1)

I'm trying to train a stacked convolution autoencoder with Custom Data Generator because its a very large synthetic dataset I've generated. I've followed https://medium.com/@mrgarg.rajat/training-on-large-datasets-that-dont-fit-in-memory-in-keras-60a974785d71 tutorial but still can't get it work

My dataset directory is like this:

real_train
   - img 1.png
   - img 2.png
   - ....

Here's my My_Data_Generator class

class My_Data_Generator(keras.utils.Sequence):

    def __init__(self, image_filenames, labels, batch_size):
        self.image_filenames = image_filenames
        self.labels =  labels
        self.batch_size = batch_size
        self.n = 0

    def __next__(self):
        # Get one batch of data
        data = self.__getitem__(self.n)
        # Batch index
        self.n += 1
        # If we have processed the entire dataset then
        if self.n >= self.__len__():
            self.on_epoch_end
            self.n = 0

        return data

    def __len__(self) :
        return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)

    def __getitem__(self, idx):
        batch_x = self.image_filenames[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]

        return np.array([
            resize(imread('E:/FontRecognition/Dataset_Final/preprocessed/real_train/' + str(file_name)), (105,105,1)) 
                for file_name in batch_x])/255.0, np.array(batch_y)

And here's my code

# load
X_train = np.load('X_train_filenames.npy')
X_val = np.load('X_val_filenames.npy')

# print(X_train.shape)
# print(X_val.shape)

batch_size = 128

my_training_batch_generator = My_Data_Generator(X_train, X_train, batch_size=batch_size)
my_validation_batch_generator = My_Data_Generator(X_val, X_val, batch_size=batch_size)

images, labels = next(my_training_batch_generator)
print("Train")
print(images.shape)
print(labels.shape)
images, labels = next(my_validation_batch_generator)
print("Val")
print(images.shape)
print(labels.shape)

input_img = Input(shape=(105,105,1))

x = Conv2D(64, kernel_size=(48,48), activation='relu', padding='same', strides=1)(input_img)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2,2)) (x)
x = Conv2D(128, kernel_size=(24,24), activation='relu', padding='same', strides=1)(x)
x = BatchNormalization()(x)
encoded = MaxPooling2D(pool_size=(2,2))(x)

x = Conv2D(64, kernel_size=(24,24), activation='relu', padding='same', strides=1)(encoded)
x = UpSampling2D(size=(2,2))(x)
x = Conv2D(1, kernel_size=(48,48), activation='relu', padding='same', strides=1)(x)
decoded = UpSampling2D(size=(2,2))(x)

adam = keras.optimizers.Adam(lr=0.01)
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer=adam, loss='mean_squared_error')

autoencoder.summary()

num_epochs = 20
autoencoder.fit_generator(generator=my_training_batch_generator,
                    steps_per_epoch=(int(1836695 // batch_size)),
                    epochs=num_epochs,
                    verbose=1,
                    validation_data=my_validation_batch_generator,
                    validation_steps=(int(459174 // batch_size))
                    # use_multiprocessing=True,
                    # workers=6
                    )
print("Finished")

I tried to run the code And here's the output:

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
input_1 (InputLayer)         (None, 105, 105, 1)       0
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 105, 105, 64)      147520
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 52, 52, 64)        0
_________________________________________________________________
batch_normalization_1 (Batch (None, 52, 52, 64)        256       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 52, 52, 128)       4718720
_________________________________________________________________
batch_normalization_2 (Batch (None, 52, 52, 128)       512
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 26, 26, 128)       0
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 26, 26, 64)        4718656
_________________________________________________________________
up_sampling2d_1 (UpSampling2 (None, 52, 52, 64)        0
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 52, 52, 1)         147457
_________________________________________________________________
up_sampling2d_2 (UpSampling2 (None, 104, 104, 1)       0
=================================================================
Total params: 9,733,121
Trainable params: 9,732,737
Non-trainable params: 384
_________________________________________________________________
Epoch 1/20
Traceback (most recent call last):
  File "SCAE_train.py", line 142, in <module>
    validation_steps=(int(459174 // batch_size))
  File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training.py", line 1732, in fit_generator
    initial_epoch=initial_epoch)
  File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training_generator.py", line 221, in fit_generator
    reset_metrics=False)
  File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training.py", line 1508, in train_on_batch
    class_weight=class_weight)
  File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training.py", line 621, in _standardize_user_data
    exception_prefix='target')
  File "C:\MyProgramFiles\Anaconda3\envs\tf_gpu\lib\site-packages\keras\engine\training_utils.py", line 135, in standardize_input_data
    'with shape ' + str(data_shape))
ValueError: Error when checking target: expected up_sampling2d_2 to have 4 dimensions, but got array with shape (128, 1)

I'm new to keras and python, and i still don't know what cause it..

Upvotes: 0

Views: 513

Answers (1)

Bashir Kazimi
Bashir Kazimi

Reputation: 1377

First of all, the input and output shape of your model does not match. Your model input size is 105x105 while your output size is 104x104. Either use a similar input size or tweak your kernel/stride sizes in the convolutional layers.

But to come to your question, please note that the tutorial you followed performs classification, and hence uses a target shape of (batch_size, number_of_categories). You, however, are using autoencoder which means you should change your data generator to return appropriate targets, i.e., with the shape of (batch_size, HEIGHT, WIDTH, NUM_CHANNELS) the same as your input.

Your input and output images are the same, so you do not need an extra labels argument in your data generator, just read the images and return two copies of them. Assuming you have the image files in the correct format/directory, I have edited your code to work as below:

Your Data Generator:

class My_Custom_Generator(keras.utils.Sequence) :

  def __init__(self, image_filenames, batch_size) :
    self.image_filenames = image_filenames
    self.batch_size = batch_size


  def __len__(self) :
    return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)


  def __getitem__(self, idx) :
    batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]

    current_x = np.array(
            resize(imread('E:/FontRecognition/Dataset_Final/preprocessed/real_train/' + str(file_name)), (105,105,1)) 
                for file_name in batch_x])/255.0
    return current_x, current_x

Your Model and Script:

# load
X_train = np.load('X_train_filenames.npy')
X_val = np.load('X_val_filenames.npy')

# print(X_train.shape)
# print(X_val.shape)

batch_size = 128

my_training_batch_generator = My_Data_Generator(X_train, batch_size=batch_size)
my_validation_batch_generator = My_Data_Generator(X_val, batch_size=batch_size)


input_img = keras.layers.Input(shape=(104,104,1))

x = keras.layers.Conv2D(64, kernel_size=(48,48), activation='relu', padding='same', strides=1)(input_img)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.MaxPooling2D(pool_size=(2,2), padding='same') (x)
x = keras.layers.Conv2D(128, kernel_size=(24,24), activation='relu', padding='same', strides=1)(x)
x = keras.layers.BatchNormalization()(x)
encoded = keras.layers.MaxPooling2D(pool_size=(2,2))(x)

x = keras.layers.Conv2D(64, kernel_size=(24,24), activation='relu', padding='same', strides=1)(encoded)
x = keras.layers.UpSampling2D(size=(2,2))(x)
x = keras.layers.Conv2D(1, kernel_size=(48,48), activation='relu', padding='same', strides=1)(x)
decoded = keras.layers.UpSampling2D(size=(2,2))(x)
autoencoder = keras.Model(input_img, decoded)
autoencoder.summary()
adam = keras.optimizers.Adam(lr=0.01)
autoencoder.compile(optimizer=adam, loss='mean_squared_error')
num_epochs = 20
autoencoder.fit_generator(generator=my_training_batch_generator,
                    epochs=num_epochs,
                    verbose=1,
                    validation_data=my_validation_batch_generator
                    # use_multiprocessing=True,
                    # workers=6
                    )

Note that I have removed the steps_per_epoch and validation_steps parameters as the custom data generator inheriting keras.utils.Sequence does not need them and can infer them from data directly.

Upvotes: 1

Related Questions