Python image types, shapes, and channels for segmentation

Question

I am using this tutorial for instance segmentation in PyTorch. The test data the tutorial uses includes images and accompanying image masks from a dataset available here. I have an example of one of the image masks from that data set here (example data for this question). That mask looks like this by default in the dataset:

The tutorial uses this code:

mask.putpalette([
    0, 0, 0, # black background
    255, 0, 0, # index 1 is red
    255, 255, 0, # index 2 is yellow
    255, 153, 0, # index 3 is orange
])

as explanatory step for the mask to make it look like this:

but that code is not requisite in the segmentation process itself. It's just used to show what the mask contains.

I am trying to use my own image data. I created masks for the images in G.I.M.P. This is one of the masks I made. It looks like this by default.

As I try to run the tutorial code, I have problems with the masks. This code chunk creates a class that creates PyTorch datasets.

import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image


class PennFudanDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))

    def __getitem__(self, idx):
        # load images ad masks
        img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
        mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
        img = Image.open(img_path).convert("RGB")
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path)

        mask = np.array(mask)
        # instances are encoded as different colors
        obj_ids = np.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        masks = mask == obj_ids[:, None, None]

        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

dataset = PennFudanDataset('PennFudanPed/')
dataset[0]

The last line returns:

(,
 {'area': tensor([35358., 36225.]), 'boxes': tensor([[159., 181., 301., 430.],
          [419., 170., 534., 485.]]), 'image_id': tensor([0]), 'iscrowd': tensor([0, 0]), 'labels': tensor([1, 1]), 'masks': tensor([[[0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           ...,
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0]],
  
          [[0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           ...,
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0]]], dtype=torch.uint8)})

When I run this code with my data,

...
dataset = four_chs('drive/MyDrive/chambers/')
dataset[0]

I get this error:

/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:38: DeprecationWarning: elementwise comparison failed; this will raise an error in the future.

---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

 in ()
      1 len(dataset)
----> 2 dataset[0]

 in __getitem__(self, idx)
     42         boxes = []
     43         for i in range(num_objs):
---> 44             pos = np.where(masks[i])
     45             xmin = np.min(pos[1])
     46             xmax = np.max(pos[1])

TypeError: 'bool' object is not subscriptable

I'm not sure exactly what is going on, but there is a difference between my mask and the mask that is in the test data. They are both PNG file types, but it seems that mine has the red, blue, green channels split out with another channel or something, but I don't know what it is, based on the shape of the object in Python. This comes from one of the masks I made:

mask2 = np.array(mask1)
mask2.shape
(5312, 2988, 4)

For one of the test data masks:

mask2 = np.array(mask)
mask2.shape
(536, 559)

There appears to be only one channel? So with their different shapes, I guess that why I get the error from (this is an excerpt from the code I pasted earlier)

...
        mask_path = os.path.join(self.root, "masks", self.masks[idx])
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path)

        mask = np.array(mask)
        # instances are encoded as different colors
        obj_ids = np.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        masks = mask == obj_ids[:, None, None]

        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
for i in range(num_objs):
             pos = np.where(masks[i])
...

How do I get my masks' shapes to match those of the masks in the test data so that I can use the rest of segmentation code create my PyTorch compatible dataset that will work with the segmentation algorithm? I'm not trying to get the same height and width, but change the number of channels/layers, but I don't think I want it to be grayscale.

EDIT after HansHirse's comment:

I went to back to G.I.M.P and used the Image Mode menu to change the image to grayscale. I exported with that setting. I tried to run the code with that file and it did not work.

I also found a way to convert the R.B.G. image to grayscale with upon import with Image.open().convert("L"). This does not work either.

In both cases, the problem has to do with speckles of colors that I thought were separate being mixed together. For example, I used HansHirse's advice to fill in the areas of interest with grey "colors" of 1,2,3,and 4, while the background stayed 0. Upon importing the file that created, the values of those files are 3,5,8, and 10. And while one of the shapes may be mostly of value 3, there are vagrant pixels with that value in the other shapes, so there is no value that is contained entirely in one shape. With that situation, the code draws bounding boxes that surround all 4 shapes, instead of around one shape.

I'm aware of using the hue, saturation, value (H.S.V.) color space and tried converting to that color space. That still doesn't solve the problem for me.

I'm trying to figure out how to use something like

np.where( mask[,, , ,0] == )

to sort of quarter up the mask, filter based on the main color's value for that shape in that quarter, and get the actual x and y values for the shape in that quarter. With those values, I figure I can use the min and max from the actual values to create my bounding boxes.

Another note is that when exporting from G.I.M.P., there is a drop-down menu in the export window to set the file to 8 bit R.G.B. or Gray. Select 8bpc Gray for the format needed.

HansHirse · Accepted Answer

Following is an example how to create a grayscale image representing classes for a segmentation task or similar.

On some black background, draw some shapes with fill values in the range of 1, ..., #classes. For visualization purposes, this mask is plotted as perceived as a regular grayscale image as well as scaled to the said value range – to emphasize that the mask looks all black in general, but there's actual content in it. This mask is saved as a lossless PNG image, and then opened using Pillow, and converted to mode P. Last step is to set up a proper palette for the desired number of colors, and apply that palette using Image.putpalette.

import cv2
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

# Generate mask:  0 - Background  |  1 - Class 1  |  2 - Class 2, and so on.
mask = np.zeros((300, 300), np.uint8)
cv2.rectangle(mask, (30, 40), (75, 60), 1, cv2.FILLED)
cv2.circle(mask, (230, 50), 85, 2, cv2.FILLED)
cv2.ellipse(mask, (230, 230), (60, 40), 0, 0, 360, 3, cv2.FILLED)
cv2.line(mask, (20, 240), (80, 260), 4, 5)

# Save mask as lossless PNG image
cv2.imwrite('mask.png', mask)

# Visualization
plt.figure(1, figsize=(18, 6))
plt.subplot(1, 3, 1), plt.imshow(mask, vmin=0, vmax=255, cmap='gray')
plt.colorbar(), plt.title('Mask when shown as regular image')
plt.subplot(1, 3, 2), plt.imshow(mask, cmap='gray')
plt.colorbar(), plt.title('Mask when shown scaled to values 0 - 4')

# Open mask with Pillow, and convert to mode 'P'
mask = Image.open('mask.png').convert('P')

# Set up and apply palette data
mask.putpalette([  0,   0,   0,         # Background - Black
                 255,   0,   0,         # Class 1 - Red
                   0, 255,   0,         # Class 2 - Green
                   0,   0, 255,         # Class 3 - Blue
                 255, 255,   0])        # Class 4 - Yellow

# More visualization
plt.subplot(1, 3, 3), plt.imshow(mask)
plt.title('Mask when shown as indexed image')
plt.tight_layout(), plt.show()

The first steps generating the actual mask can be done in GIMP, of course. Please be sure to use black background, and fill values in the range 1, ..., #classes. If you have difficulties to do that because these colors are all nearly black, draw your shapes in some bright, distinguishable colors, and later just fill these with values 1, 2, and so on.

----------------------------------------
System information
----------------------------------------
Platform:      Windows-10-10.0.19041-SP0
Python:        3.9.1
PyCharm:       2021.1.1
Matplotlib:    3.4.2
NumPy:         1.20.3
OpenCV:        4.5.2
Pillow:        8.2.0
----------------------------------------

Python image types, shapes, and channels for segmentation

Answers (1)

Related Questions