What could be reason of "ValueError: axes don't match array error" for Pytorch U-net segmentation model?

Question

I'm trying to implement a segmentation model (which i used for another dataset succesfully before) for kaggle dataset called "Carvana Image Masking Challange".

I searched a lot, but still could not figured out what is the reason i am getting this error. There were some suggestion to check image dimension which could be grayscale format but it seems i have 3 channel for both original and mask images.I am grateful for all your support

My code is following:

Libraries

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from PIL import Image
import numpy as np
import cv2
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torch.utils.data import Dataset as BaseDataset
import albumentations as albu
import torch
import numpy as np
import segmentation_models_pytorch as smp

Data path

DATA_DIR = 'D:/Users/eugur/Belgeler/Jupyter/Segmentation_Kaggle'

x_train_dir = os.path.join(DATA_DIR, 'train')
y_train_dir = os.path.join(DATA_DIR, 'train_masks')

x_valid_dir = os.path.join(DATA_DIR, 'valid')
y_valid_dir = os.path.join(DATA_DIR, 'valid_masks')

x_test_dir = os.path.join(DATA_DIR, 'test')

helper function for data visualization

def visualize(**images):
    """PLot images in one row."""
    n = len(images)
    plt.figure(figsize=(16, 5))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.title(' '.join(name.split('_')).title())
        plt.imshow(image)
    plt.show()

Dataset Class

class Dataset(BaseDataset):
    """
    
    Args:
        images_dir (str): path to images folder
        masks_dir (str): path to segmentation masks folder
        class_values (list): values of classes to extract from segmentation mask
        augmentation (albumentations.Compose): data transfromation pipeline 
            (e.g. flip, scale, etc.)
        preprocessing (albumentations.Compose): data preprocessing 
            (e.g. noralization, shape manipulation, etc.)
    
    """
    
    CLASSES = ['car']
    
    def __init__(
            self, 
            images_dir, 
            masks_dir, 
            classes=None, 
            augmentation=None, 
            preprocessing=None,
    ):
        self.ids = os.listdir(images_dir)
        self.images_fps = [os.path.join(images_dir, image_id) for image_id in self.ids]
        self.masks_fps = [os.path.join(masks_dir, image_id.split('.')[0]+'_mask.gif') for image_id in self.ids]
        
        # convert str names to class values on masks
        self.class_values = [self.CLASSES.index(cls.lower()) for cls in classes]
        
        self.augmentation = augmentation
        self.preprocessing = preprocessing
    
    def __getitem__(self, i):
        
        # read data
        image = cv2.imread(self.images_fps[i])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#         mask = cv2.imread(self.masks_fps[i], 0)
        mask = cv2.VideoCapture(self.masks_fps[i],0)
        ret,mask = mask.read()
        mask = mask/255
        
        # extract certain classes from mask (e.g. cars)
        masks = [(mask == v) for v in self.class_values]
        mask = np.stack(masks, axis=-1).astype('float')
        
        # apply augmentations
        if self.augmentation:
            sample = self.augmentation(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
        
        # apply preprocessing
        if self.preprocessing:
            sample = self.preprocessing(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
            
        return image, np.squeeze(mask,axis=3)


        
    def __len__(self):
        return len(self.ids)

Preprocessing and Augmentation

def get_training_augmentation():
    train_transform = [

        albu.HorizontalFlip(p=0.5),

        albu.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=1, border_mode=0),

        albu.PadIfNeeded(min_height=320, min_width=320, always_apply=True, border_mode=0),
        albu.RandomCrop(height=320, width=320, always_apply=True),

        albu.IAAAdditiveGaussianNoise(p=0.2),
        albu.IAAPerspective(p=0.5),

        albu.OneOf(
            [
                albu.CLAHE(p=1),
                albu.RandomBrightness(p=1),
                albu.RandomGamma(p=1),
            ],
            p=0.9,
        ),

        albu.OneOf(
            [
                albu.IAASharpen(p=1),
                albu.Blur(blur_limit=3, p=1),
                albu.MotionBlur(blur_limit=3, p=1),
            ],
            p=0.9,
        ),

        albu.OneOf(
            [
                albu.RandomContrast(p=1),
                albu.HueSaturationValue(p=1),
            ],
            p=0.9,
        ),
    ]
    return albu.Compose(train_transform)


def get_validation_augmentation():
    """Add paddings to make image shape divisible by 32"""
    test_transform = [
        albu.PadIfNeeded(384, 480)
    ]
    return albu.Compose(test_transform)


def to_tensor(x, **kwargs):

    
    return x.transpose(0,2,1).astype('float32')


def get_preprocessing(preprocessing_fn):
    """Construct preprocessing transform
    
    Args:
        preprocessing_fn (callbale): data normalization function 
            (can be specific for each pretrained neural network)
    Return:
        transform: albumentations.Compose
    
    """
    
    _transform = [
        albu.Lambda(image=preprocessing_fn),
        albu.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return albu.Compose(_transform)

Model Definition

ENCODER = 'se_resnext50_32x4d'
ENCODER_WEIGHTS = 'imagenet'
CLASSES = ['car']
ACTIVATION = 'sigmoid' # could be None for logits or 'softmax2d' for multicalss segmentation
DEVICE = 'cuda'

# create segmentation model with pretrained encoder
model = smp.FPN(
    encoder_name=ENCODER, 
    encoder_weights=ENCODER_WEIGHTS, 
    classes=len(CLASSES), 
    in_channels=3,
    activation=ACTIVATION,
)

preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

Data Loader

train_dataset = Dataset(
    x_train_dir, 
    y_train_dir, 
    preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
)

valid_dataset = Dataset(
    x_valid_dir, 
    y_valid_dir, 
    preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=0)

Optimer Definition

loss = smp.utils.losses.DiceLoss()
metrics = [
    smp.utils.metrics.IoU(threshold=0.5),
]

optimizer = torch.optim.Adam([ 
    dict(params=model.parameters(), lr=0.0001),
])

Training

train_epoch = smp.utils.train.TrainEpoch(
    model, 
    loss=loss, 
    metrics=metrics, 
    optimizer=optimizer,
    device=DEVICE,
    verbose=True,
)

valid_epoch = smp.utils.train.ValidEpoch(
    model, 
    loss=loss, 
    metrics=metrics, 
    device=DEVICE,
    verbose=True,
)

max_score = 0

for i in range(0, 20):
    
    print('
Epoch: {}'.format(i))
    train_logs = train_epoch.run(train_loader)
    valid_logs = valid_epoch.run(valid_loader)
    
    # do something (save model, change lr, etc.)
    if max_score < valid_logs['iou_score']:
        max_score = valid_logs['iou_score']
        torch.save(model, './best_model.pth')
        print('Model saved!')
        
    if i == 25:
        optimizer.param_groups[0]['lr'] = 1e-5
        print('Decrease decoder learning rate to 1e-5!')

Error

> Epoch: 0 train:   0%|          | 0/510 [00:00 
> --------------------------------------------------------------------------- ValueError                                Traceback (most recent call
> last)  in 
>       6 
>       7     print('
Epoch: {}'.format(i))
> ----> 8     train_logs = train_epoch.run(train_loader)
>       9     valid_logs = valid_epoch.run(valid_loader)
>      10 
> 
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages\segmentation_models_pytorch\utils	rain.py
> in run(self, dataloader)
>      43 
>      44         with tqdm(dataloader, desc=self.stage_name, file=sys.stdout, disable=not (self.verbose)) as iterator:
> ---> 45             for x, y in iterator:
>      46                 x, y = x.to(self.device), y.to(self.device)
>      47                 loss, y_pred = self.batch_update(x, y)
> 
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages	qdm\std.py
> in __iter__(self)    1169     1170         try:
> -> 1171             for obj in iterable:    1172                 yield obj    1173                 # Update and possibly print the
> progressbar.
> 
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages	orch\utils\data\dataloader.py
> in __next__(self)
>     433         if self._sampler_iter is None:
>     434             self._reset()
> --> 435         data = self._next_data()
>     436         self._num_yielded += 1
>     437         if self._dataset_kind == _DatasetKind.Iterable and \
> 
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages	orch\utils\data\dataloader.py
> in _next_data(self)
>     473     def _next_data(self):
>     474         index = self._next_index()  # may raise StopIteration
> --> 475         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
>     476         if self._pin_memory:
>     477             data = _utils.pin_memory.pin_memory(data)
> 
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages	orch\utils\data\_utils\fetch.py
> in fetch(self, possibly_batched_index)
>      42     def fetch(self, possibly_batched_index):
>      43         if self.auto_collation:
> ---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
>      45         else:
>      46             data = self.dataset[possibly_batched_index]
> 
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages	orch\utils\data\_utils\fetch.py
> in (.0)
>      42     def fetch(self, possibly_batched_index):
>      43         if self.auto_collation:
> ---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
>      45         else:
>      46             data = self.dataset[possibly_batched_index]
> 
>  in __getitem__(self, i)
>      54         # apply preprocessing
>      55         if self.preprocessing:
> ---> 56             sample = self.preprocessing(image=image, mask=mask)
>      57             image, mask = sample['image'], sample['mask']
>      58 
> 
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages\albumentations\core\composition.py
> in __call__(self, force_apply, *args, **data)
>     180                     p.preprocess(data)
>     181 
> --> 182             data = t(force_apply=force_apply, **data)
>     183 
>     184             if dual_start_end is not None and idx == dual_start_end[1]:
> 
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages\albumentations\core	ransforms_interface.py
> in __call__(self, force_apply, *args, **kwargs)
>      87                     )
>      88                 kwargs[self.save_key][id(self)] = deepcopy(params)
> ---> 89             return self.apply_with_params(params, **kwargs)
>      90 
>      91         return kwargs
> 
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages\albumentations\core	ransforms_interface.py
> in apply_with_params(self, params, force_apply, **kwargs)
>     100                 target_function = self._get_target_function(key)
>     101                 target_dependencies = {k: kwargs[k] for k in self.target_dependence.get(key, [])}
> --> 102                 res[key] = target_function(arg, **dict(params, **target_dependencies))
>     103             else:
>     104                 res[key] = None
> 
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages\albumentations\augmentations	ransforms.py
> in apply_to_mask(self, mask, **params)    3068     def
> apply_to_mask(self, mask, **params):    3069         fn =
> self.custom_apply_fns["mask"]
> -> 3070         return fn(mask, **params)    3071     3072     def apply_to_bbox(self, bbox, **params):
> 
>  in to_tensor(x, **kwargs)
>      52 
>      53 
> ---> 54     return x.transpose(0,2,1).astype('float32')
>      55 
>      56 
> 
> ValueError: axes don't match array

What could be reason of "ValueError: axes don't match array error" for Pytorch U-net segmentation model?

Libraries

Data path

helper function for data visualization

Dataset Class

Preprocessing and Augmentation

Model Definition

Data Loader

Optimer Definition

Training

Error

Answers (1)

Related Questions

What could be reason of &quot;ValueError: axes don&#39;t match array error&quot; for Pytorch U-net segmentation model?

Libraries

Data path

helper function for data visualization

Dataset Class

Preprocessing and Augmentation

Model Definition

Data Loader

Optimer Definition

Training

Error

Answers (1)

Related Questions

What could be reason of "ValueError: axes don't match array error" for Pytorch U-net segmentation model?