Python image size mismatch leads to IndexError: boolean index did not match indexed array along dimension 0

Question

I have the following piece of code which loads a dataset, resizes the images to 1200*800, loads their annotations and then reports the accuracy and prediction:

# resize images
resized_images = []
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg")) 
for i in range(len(file_names)):
    print("Resizing: " + str(i))
    image = skimage.io.imread(file_names[i])
    image_resized = resize(image, (1200, 800),anti_aliasing=True)
    resized_images.append(image_resized)



masks_prediction = np.zeros((1200, 800, len(file_names)))
for i in range(len(resized_images)):
    print(i)
    image = resized_images[i]
    predictions = model.detect([image],  verbose=1)
    p = predictions[0]
    masks = p['masks']
    merged_mask = np.zeros((masks.shape[0], masks.shape[1]))
    for j in range(masks.shape[2]):
        merged_mask[masks[:,:,j]==True] = True
        masks_prediction[:,:,i] = merged_mask
print(masks_prediction.shape)

#load annotations
dataset = components.ComponentsDataset()
dataset.load_components(ANNOTATION_DIR, "predict")
resized_images = []
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg")) 
for i in range(len(file_names)):
    print("Resizing: " + str(i))
    image = skimage.io.imread(file_names[i])
    image_resized = resize(image, (1200, 800),anti_aliasing=True)
    resized_images.append(image_resized)

# report the accuracy and prediction
accuracy = 0
precision = 0
for image_id in range(len(dataset.image_info)):
    name = dataset.image_info[image_id]['id']
    file_name = os.path.join(IMAGE_DIR, name)
    image_id_pred = file_names.index(file_name)
    merged_mask = masks_prediction[:, :, image_id_pred]

    annotated_mask = dataset.load_mask(image_id)[0]
    merged_annotated_mask = np.zeros((1200, 800))
    for i in range(annotated_mask.shape[2]):
        merged_annotated_mask[annotated_mask[:,:,i]==True] = True
    accuracy  += np.sum(merged_mask==merged_annotated_mask) / (1200 * 800)
    all_correct = np.sum(merged_annotated_mask[merged_mask == 1])
    precision += all_correct / (np.sum(merged_mask))
print('accuracy:{}'.format(accuracy / len(file_names)))
print('precision:{}'.format(precision / len(file_names)))

However I get the following error which makes me think there is something totally wrong with the sizes:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
 in ()
     10     merged_annotated_mask = np.zeros((1200, 800))
     11     for i in range(annotated_mask.shape[2]):
---> 12         merged_annotated_mask[annotated_mask[:,:,i]==True] = True
     13     accuracy  += np.sum(merged_mask==merged_annotated_mask) / (1200 * 800)
     14     all_correct = np.sum(merged_annotated_mask[merged_mask == 1])

IndexError: boolean index did not match indexed array along dimension 0; dimension is 1200 but corresponding boolean dimension is 1572

I was not getting this error before I resized them into 1200*800, but then I was getting another error telling me that sizes were not matching. At this point I would really appreciate a solution which would mitigate this size issue once for all somehow. I get this problem in Python every now and then, and it's getting frustrating.

EDIT: Including the Dataset class.

############################################################
#  Dataset
############################################################

class ComponentsDataset(utils.Dataset):

    def load_components(self, dataset_dir, subset):
        """Load a subset of the Components dataset.
        dataset_dir: Root directory of the dataset.
        subset: Subset to load: train or val
        """
        # Add classes.
        self.add_class("components", 1, "screw")
        self.add_class("components", 2, "lid")

        # which dataset?
        assert subset in ["train", "val", "predict"]
        dataset_dir = os.path.join(dataset_dir, subset)

        # We mostly care about the x and y coordinates of each region
        annotations = json.load(open(os.path.join(dataset_dir, "via_region_data.json")))
        annotations = list(annotations.values())  # don't need the dict keys

        # The VIA tool saves images in the JSON even if they don't have any
        # annotations. Skip unannotated images.
        annotations = [a for a in annotations if a['regions']]

        # Add images
        for a in annotations:
            # Get the x, y coordinaets of points of the polygons that make up
            # the outline of each object instance. There are stored in the
            # shape_attributes (see json format above)
            polygons = [r['shape_attributes'] for r in a['regions']]
            names = [r['region_attributes'] for r in a['regions']]
            # load_mask() needs the image size to convert polygons to masks.
            # Unfortunately, VIA doesn't include it in JSON, so we must read
            # the image. This is only managable since the dataset is tiny.
            image_path = os.path.join(dataset_dir, a['filename'])
            image = skimage.io.imread(image_path)
            height, width = image.shape[:2]

            self.add_image(
                "components",
                image_id=a['filename'],  # use file name as a unique image id
                path=image_path,
                width=width, height=height,
                polygons=polygons,
                names=names)

    def load_mask(self, image_id):
        """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        # If not a balloon dataset image, delegate to parent class.
        image_info = self.image_info[image_id]
        if image_info["source"] != "components":
            return super(self.__class__, self).load_mask(image_id)

        # Convert polygons to a bitmap mask of shape
        # [height, width, instance_count]
        info = self.image_info[image_id]
        class_names = info["names"]
        mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
                        dtype=np.uint8)
        for i, p in enumerate(info["polygons"]):
            # Get indexes of pixels inside the polygon and set them to 1
            rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
            mask[rr, cc, i] = 1

        # Assign class_ids by reading class_names
        class_ids = np.zeros([len(info["polygons"])])
        # In the components dataset, pictures are labeled with name 'screw' and 'lid' representing arm and ring.
        for i, p in enumerate(class_names):
            # "name" is the attributes name decided when labeling, etc. 'region_attributes': {name:'a'}
            if p['name'] == 'screw':
                class_ids[i] = 1
            elif p['name'] == 'lid':
                class_ids[i] = 2
                # assert code here to extend to other labels
        class_ids = class_ids.astype(int)
        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID only, we return an array of 1s
        return mask.astype(np.bool), class_ids

    def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        if info["source"] == "components":
            return info["path"]
        else:
            super(self.__class__, self).image_reference(image_id)

    def load_mask_hc(self, image_id):
        """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        # If not a components dataset image, delegate to parent class.
        image_info = self.image_info[image_id]
        if image_info["source"] != "components":
            return super(self.__class__, self).load_mask(image_id)

        # Convert polygons to a bitmap mask of shape
        # [height, width, instance_count]
        info = self.image_info[image_id]
        # "name" is the attributes name decided when labeling, etc. 'region_attributes': {name:'a'}
        class_names = info["names"]
        mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
                        dtype=np.uint8)
        for i, p in enumerate(info["polygons"]):
            # Get indexes of pixels inside the polygon and set them to 1
            rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
            mask[rr, cc, i] = 1
        # Assign class_ids by reading class_names
        class_ids = np.zeros([len(info["polygons"])])
        # In the components dataset, pictures are labeled with name 'screw' and 'lid' representing arm and ring.
        for i, p in enumerate(class_names):
            if p['name'] == 'lid':
                class_ids[i] = 14
            elif p['name'] == 'error':
                pass
            else:
                class_ids[i] = int(p['name'])
                # assert code here to extend to other labels
        class_ids = class_ids.astype(int)
        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID only, we return an array of 1s
        return mask.astype(np.bool), class_ids

w-m · Accepted Answer

The images in the list resized_images are all correctly resized to (1200, 800). But the annotated masks are loaded from the dataset and are not resized on the fly:

annotated_mask = dataset.load_mask(image_id)[0]

The method load_mask generates a mask image of the original height and width of the image, not the resized one. The mask size and image size need to match.

You could resize the mask images after loading them (similar to resizing the input images) to make this approach work.

Another option is to do a batch preprocessing - resizing all images to a common size, saving them as .jpg again and using those as the new inputs, not doing any kind of resizing in this program at all. But then you'd have to carefully adapt the other data (like the polygons) as well to match the new coordinates.

Python image size mismatch leads to IndexError: boolean index did not match indexed array along dimension 0

Answers (1)

Related Questions