Bridge
Bridge

Reputation: 229

InvalidArgumentError: Incompatible shapes: [8,3] vs. [8,4]

I'm doing an object detection and classification problem with DICOM images containing X-Rays scans of four classes. My model works when I convert all images into a numpy array and fit it to the model. But then I decided to write an image generator to feed a batch of images at a time, and it throws an error:

InvalidArgumentError: Incompatible shapes: [8,3] vs. [8,4] [[node mean_squared_error/SquaredDifference (defined at <ipython-input-25-90d4137ca5f8>:6) ]] [Op:__inference_train_function_3036]

8 here is my batch_size.

Here is the code for my generator:

class My_Generator(tf.keras.utils.Sequence):
  
    def __init__(self, filepaths, dataframe, batch_size, image_target_size=(224, 224)):
        
        self.filepaths = filepaths
        self.dataframe = dataframe
        self.batch_size = batch_size
        self.image_target_size = image_target_size
    
    def __len__(self):
        return (np.ceil(len(self.filepaths) / float(self.batch_size))).astype(np.int)
  
    def __getitem__(self, ind) :
        
        filepaths_batch = self.filepaths[ind * self.batch_size : (ind + 1) * self.batch_size]
        
        images = []
        labels = []
        bboxes = []
        
        for filepath in filepaths_batch:
            image = self._preprocess_image(filepath)
            label, bbox = self._get_label_and_bbox(filepath)
            images.append(image)
            labels.append(label)
            bboxes.append(bbox)
        
        images_batch = np.array(images, dtype="float32")
        labels_batch = np.array(labels)
        bboxes_batch = np.array(bboxes, dtype="float32")
        
        lb = LabelBinarizer()
        labels_batch = lb.fit_transform(labels_batch)
        
        return images_batch, (labels_batch, bboxes_batch)
    
    
    def _preprocess_image(self, filepath):
        """
        Returns np array of image pixels of particular size
        """
        
        image_bytes = tf.io.read_file(filepath)

        image = tfio.image.decode_dicom_image(image_bytes, color_dim=True, on_error='lossy', scale='auto', dtype=tf.uint16)

        image = tf.squeeze(image, [0])
        
        self.h, self.w, _ = image.shape

        image = tf.image.resize(image, self.image_target_size)

        # Convert images from 1 channel to 3 channels (RGB), needed for feeding into the Keras model
        image = tf.image.grayscale_to_rgb(image).numpy()
        
        image /= 65536
        
        return image
    
    def _get_label_and_bbox(self, filepath):
        
        filename = filepath.rsplit('/')[-1].replace('.dcm', '')

        row_array = self.dataframe.loc[self.dataframe['id'] == filename].values
        bbox = [0, 0, 0, 0]
        xmin = row_array[0][1]
        ymin = row_array[0][2]
        xmax = row_array[0][3]
        ymax = row_array[0][4]
        label = row_array[0][5]
        print(self.h, self.w)

        if label == 'negative':
            bbox[0] = 0
            bbox[1] = 0
            bbox[2] = 1 / self.image_target_size[1]
            bbox[3] = 1 / self.image_target_size[0]
        else:
            bbox[0] = xmin / self.w
            bbox[1] = ymin / self.h
            bbox[2] = xmax / self.w
            bbox[3] = ymax / self.h
        return label, bbox

train_generator = My_Generator(train_data, train_result, batch_size)
val_generator = My_Generator(val_data, train_result, batch_size)

And here is my model:

base_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten()
])

flatten = base_model.output

bbox_head = Dense(128, activation="relu")(flatten)
bbox_head = Dense(64, activation="relu")(bbox_head)
bbox_head = Dense(32, activation="relu")(bbox_head)
bbox_head = Dense(4, activation="sigmoid", name="bounding_box")(bbox_head)

label_head = Dense(512, activation="relu")(flatten)
label_head = Dropout(0.5)(label_head)
label_head = Dense(512, activation="relu")(label_head)
label_head = Dropout(0.5)(label_head)
label_head = Dense(4, activation="softmax", name="class_label")(label_head)

model = Model(inputs=base_model.input, outputs=(bbox_head, label_head))

losses = {"class_label": "categorical_crossentropy", "bounding_box": "mean_squared_error",}

loss_weights = {"class_label": 1.0, "bounding_box": 1.0}

opt = Adam(learning_rate=INITIAL_LR)
model.compile(loss=losses, optimizer=opt, metrics=["accuracy"], loss_weights=loss_weights)

model.fit(train_generator,
                    steps_per_epoch = int(len(train_data) // batch_size),
                    epochs = 10,
                    verbose = 1,
                    validation_data = val_generator
                   )

Upvotes: 0

Views: 2298

Answers (1)

Natthaphon Hongcharoen
Natthaphon Hongcharoen

Reputation: 2430

These lines

lb = LabelBinarizer()
labels_batch = lb.fit_transform(labels_batch)

Means if none of the 8 labels is 3 like you will get only (8, 3) output, like

>>> lb = LabelBinarizer()
>>> lb.fit_transform([0, 1, 2, 0, 2, 0, 0, 1])
array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1],
       [1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 0]])

There are 2 easy workaround

  1. Since you're using tf.keras anyway, just use official keras method
>>> labels_batch = tf.keras.utils.to_categorical(labels_batch, 4)

You can test this using the prior example

>>> tf.keras.utils.to_categorical([0, 1, 2, 0, 2, 0, 0, 1], 4)
array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [1., 0., 0., 0.],
       [0., 0., 1., 0.],
       [1., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 1., 0., 0.]], dtype=float32)
  1. Put LabelBinarizer in the __init__ and call lb.fit there and use lb.transform instead of lb.fit_transform
    def __init__
        self.filepaths = filepaths
        self.dataframe = dataframe
        self.batch_size = batch_size
        self.image_target_size = image_target_size
        self.lb = LabelBinarizer()
        self.lb.fit([0, 1, 2, 3])

    def __getitem__

        labels_batch = self.lb.transform(labels_batch)

The example of this would be

>>> lb = LabelBinarizer()
>>> lb.fit([0, 1, 2, 3])
>>> lb.transform([0, 1, 2, 0, 2, 0, 0, 1])
array([[1, 0, 0, 0],
       [0, 1, 0, 0],
       [0, 0, 1, 0],
       [1, 0, 0, 0],
       [0, 0, 1, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       [0, 1, 0, 0]])

Upvotes: 1

Related Questions