U-net training Error: The size of tensor a (16) must match the size of tensor b (6) at non-singleton dimension 1

Question

I’m trying to train a Unit model on LandCoverNet dataset, which is a satellite imagery dataset that contains input images and corresponding land cover type masks. I have created a custom dataset to get my images and masks:

# Create custom dataset that accepts 4 channels images
from torch.utils.data import Dataset, DataLoader, sampler
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
import os
import numpy as np
import rasterio as rio
from torchvision import transforms, datasets, models
# We have two dir: inputs(folder for each image) and tatgets

class LandCoverNetDataset(BaseDataset):
  
  CLASSES = ['otherland', 'cropland', 'pastureland', 'bare soil', 'openwater', 'forestland']

  def __init__(self, inputs_dir, targets_dir, 
               classes = None,  
               augmentation=None , 
               preprocessing = False,
               pytorch=True):
    
    super().__init__()
    self.samples = []
    self.pytorch = pytorch
    self.augmentation = augmentation
    self.preprocessing = preprocessing

    # Convert str names to class values on masks
    self.class_value = [self.CLASSES.index(cls.lower()) for cls in classes]

    # Create dictionary for images and targets
    for sub_dir in os.listdir(inputs_dir):
      files = {}
      files = {
         'img_bands' : os.path.join(inputs_dir, sub_dir),
          'target' : os.path.join(targets_dir, sub_dir[:13] + "_LC_10m.png")
        }
      self.samples.append(files)



                                       
  def __len__(self):
    return len(self.samples)
  
  def normalize(self, band):
    
    '''Notmalize a numpy array to have values between 0 and 1'''  
    band_min, band_max = band.min(), band.max()
    np.seterr(divide='ignore', invalid='ignore')
    normalized_band = ((band - band_min)/(band_max - band_min))
    #Remove any nan value and subtitute by zero
    where_are_NaNs = isnan(normalized_band)
    normalized_band[where_are_NaNs] = 0
    return normalized_band


  def open_as_array(self, idx, include_ndvi = False):
    '''
      Merge the 4 bands into one image and normalize the bands
    '''
    # List indivisual bands in each image folder
    # Stack them togather
    list_bands = []
    for img_file in os.listdir(self.samples[idx]['img_bands']):
      # Get the ndvi band
      if 'NDVI' in img_file:
        ndvi_band = os.path.join(self.samples[idx]['img_bands'], img_file)
      else:
        # Get the rgb bands
        band = rio.open(os.path.join(self.samples[idx]['img_bands'], img_file)).read(1)

        if self.preprocessing:
          # preprocess the bands before stacking them (only rgb)
          band = self.normalize(band)
        list_bands.append(band)

    # Stack the bands 
    raw_rgb = np.stack(list_bands, axis=2).astype('float32')

    if include_ndvi:
      # Include the NDVI band in the input images
      ndvi = np.expand_dims(rio.open(ndvi_band).read(1).astype('float32'), 2)
      raw_rgb = np.concatenate([raw_rgb, ndvi], axis=2)

    if self.augmentation:
      transformed = self.augmentation(image = raw_rgb)
      raw_rgb  = transformed["image"]

    if self.preprocessing:
      # transpose to tensor shape
      raw_rgb = raw_rgb.transpose((2,0,1)).astype('float32')
    
    return raw_rgb

  def open_mask(self, idx):
    # Extract certain classes from mask

    mask = cv2.imread(self.samples[idx]['target'], 0)
    masks = [(mask == v) for v in self.class_value]
    mask = np.stack(masks, axis=-1).astype('long')

    if self.augmentation:
      transformed = self.augmentation(image = mask)
      mask  = transformed["image"]
    
    if self.preprocessing:
      # preprocess the mask
      mask = self.normalize(mask)
      # transpose to tensor shape
      mask = mask.transpose((2, 0, 1)).astype('long')
      mask = mask[0, :, :]
    return mask
  
  def __getitem__(self, idx):
    x = torch.tensor(self.open_as_array(idx, include_ndvi=True), dtype=torch.float)
    y = torch.tensor(self.open_mask(idx), dtype=torch.long)

    return x, y
    
  def open_as_pil(self, idx):
    arr = 256*self.open_as_array(idx)   
    return Image.fromarray(arr.astype(np.uint8), 'RGB')

  def __repr__(self):
    s = 'Dataset class with {} files'.format(self.__len__())
    return s

The input here is 4 bands. This is the shape of the first batch for both input/target

torch.Size([16, 4, 224, 224]) torch.Size([16, 224, 224]) I’m using a model from segmentation-models-pytorch library, and here is how I customized it for my case:

ENCODER = 'se_resnext50_32x4d'
ENCODER_WEIGHTS = 'imagenet'
ACTIVATION = 'softmax2d'
DEVICE = 'cuda'

model = smp.FPN(ENCODER, classes=len(CLASSES), activation=ACTIVATION)

# Replace the model.conv1 to accept 4 channels
# first: copy the layer's weights
weight = model.encoder.layer0.conv1.weight.clone()
model.encoder.layer0.conv1 = nn.Conv2d(4, 64,kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
with torch.no_grad():
  model.encoder.layer0.conv1.weight[:, :3] = weight
  model.encoder.layer0.conv1.weight[:, 3] = model.encoder.layer0.conv1.weight[:, 0]

loss = smp.utils.losses.NLLLoss()
metrics = [
    smp.utils.metrics.IoU(threshold=0.5),
]
optimizer = torch.optim.SGD([ 
    dict(params=model.parameters(), lr=0.001, weight_decay=1e-8, momentum=0.9),
])


# create epoch runners 
# it is a simple loop of iterating over dataloader`s samples
train_epoch = smp.utils.train.TrainEpoch(
    model, 
    loss=loss, 
    metrics=metrics, 
    optimizer=optimizer,
    device=DEVICE,
    verbose=True,
)

valid_epoch = smp.utils.train.ValidEpoch(
    model, 
    loss=loss, 
    metrics=metrics, 
    device=DEVICE,
    verbose=True,
)

And here is my training loop


# train model for 40 epochs

max_score = 0

for i in range(0, 40):
    
    print('
Epoch: {}'.format(i))
    train_logs = train_epoch.run(train_loader)
    valid_logs = valid_epoch.run(valid_loader)
    
    # do something (save model, change lr, etc.)
    if max_score < valid_logs['iou_score']:
        max_score = valid_logs['iou_score']
        torch.save(model, './best_model.pth')
        print('Model saved!')
        
    if i == 25:
        optimizer.param_groups[0]['lr'] = 1e-5
        print('Decrease decoder learning rate to 1e-5!')

At first, the target shape was [16, 6, 224, 224] but I had an error and found this thread that it should be [batch_size, height, width] That’s why I added this line in the Dataset class : mask = mask[0, :, :] to get ride of the number of classes dim, and here where things get confusing for me, because the output of me model is torch.Size([10, 6, 224, 224]).

This is the entire error message:

Epoch: 0
train:   0%|          | 0/157 [00:00 in ()
      7 
      8     print('
Epoch: {}'.format(i))
----> 9     train_logs = train_epoch.run(train_loader)
     10     valid_logs = valid_epoch.run(valid_loader)
     11 

3 frames
/usr/local/lib/python3.6/dist-packages/segmentation_models_pytorch/utils/functional.py in iou(pr, gt, eps, threshold, ignore_channels)
     32     pr, gt = _take_channels(pr, gt, ignore_channels=ignore_channels)
     33 
---> 34     intersection = torch.sum(gt * pr)
     35     union = torch.sum(gt) + torch.sum(pr) - intersection + eps
     36     return (intersection + eps) / union

RuntimeError: The size of tensor a (16) must match the size of tensor b (6) at non-singleton dimension 1

Thanks!

U-net training Error: The size of tensor a (16) must match the size of tensor b (6) at non-singleton dimension 1

Answers (1)

Related Questions