Just a nice guy
Just a nice guy

Reputation: 548

I can't figure out why the size of the tensors doesn't match in Pytorch

Some context:

I have been studying AI and ML for the last couple of month now and finally I am studying neural nets. Great! The problem is that when I follow a tutorial everything seems to be OK, but when I try to implement a NN by my self I always face issues related to the size of the tensors.

I have seem the answer to other questions (like this one) but they face the exact problem of the post. I am not looking for a code to just copy and paste. I want to understand why I am facing this problem, how to handle it and avoid it.

The error message:

/home/devops/aic/venv/lib/python3.8/site-packages/torch/nn/modules/loss.py:528: UserWarning: Using a target size (torch.Size([16, 2])) that is different to the input size (torch.Size([9, 2])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.

return F.mse_loss(input, target, reduction=self.reduction)

Traceback (most recent call last):

File "nn_conv.py", line 195, in

loss = loss_function(outputs, targets)

File "/home/devops/aic/venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl

result = self.forward(*input, **kwargs)

File "/home/devops/aic/venv/lib/python3.8/site-packages/torch/nn/modules/loss.py", line 528, in forward

return F.mse_loss(input, target, reduction=self.reduction)

File "/home/devops/aic/venv/lib/python3.8/site-packages/torch/nn/functional.py", line 2928, in mse_loss

expanded_input, expanded_target = torch.broadcast_tensors(input, target)

File "/home/devops/aic/venv/lib/python3.8/site-packages/torch/functional.py", line 74, in broadcast_tensors

return _VF.broadcast_tensors(tensors)  # type: ignore

RuntimeError: The size of tensor a (9) must match the size of tensor b (16) at non-singleton dimension 0

This is my code:

import os

import cv2
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


class DogsVSCats():

    IMG_SIZE = 50
    CATS = 'PetImages/Cat'
    DOGS = 'PetImages/Dog'
    LABELS = {CATS: 0, DOGS: 1}

    training_data = []
    cats_count = 0
    dogs_count = 0

    def make_training_data(self):
        for label in self.LABELS.keys():

            for f in tqdm(os.listdir(label)):
                try:
                    path = os.path.join(label, f)
                    # convert image to grayscale
                    img = cv2.imread(path)
                    if img is not None:
                        height, width = img.shape[:2]
                        if width > height:
                            height = round((height * self.IMG_SIZE) / width)
                            width = self.IMG_SIZE
                            right = 0
                            bottom = self.IMG_SIZE - height
                        else:
                            width = round((width * self.IMG_SIZE) / height)
                            height = self.IMG_SIZE
                            right = self.IMG_SIZE - width
                            bottom = 0

                        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
                        img = cv2.resize(img, (width, height))
                        img = cv2.copyMakeBorder(img, 
                                                 top=0, 
                                                 bottom=bottom, 
                                                 left=0, 
                                                 right=right, 
                                                 borderType=cv2.BORDER_CONSTANT)

                        # Add a One-hot-vector of label of the image to self.training_data
                        self.training_data.append([np.array(img), np.eye(len(self.LABELS))[self.LABELS[label]]])

                        if label == self.CATS:
                            self.cats_count += 1
                        elif label == self.DOGS:
                            self.dogs_count += 1
                except cv2.error as e:
                    pass

        np.random.shuffle(self.training_data)
        np.save("PetImages/training_data.npy", self.training_data)

        print("Cats:", self.cats_count)
        print("Dogs:", self.dogs_count)

        training_data = np.load('PetImages/training_data.npy', allow_pickle=True)
        plt.imsave('PetImages/trained_example.png', training_data[1][0])


class RunningMetrics():

    def __init__(self):
        self._sum = 0
        self._count = 0

    def __call__(self):
        return self._sum/float(self._count)

    def update(self, val, size):
        self._sum += val
        self._count += size



class Net(nn.Module):

    def __init__(self, num_channels, conv_kernel_size=3, stride=1, padding=1, max_pool_kernel_size=2):
        super(Net, self).__init__()

        self._num_channels = num_channels
        self._max_pool_kernel_size = max_pool_kernel_size

        self.conv1 = nn.Conv2d(1, self._num_channels, conv_kernel_size, stride, padding)
        self.conv2 = nn.Conv2d(self._num_channels, self._num_channels*2, conv_kernel_size, stride, padding)
        self.conv3 = nn.Conv2d(self._num_channels*2, self._num_channels*4, conv_kernel_size, stride, padding)
        
        # Calc input of first
        self.fc1 = nn.Linear(self._num_channels*4*8*8, self._num_channels*8)
        self.fc2 = nn.Linear(self._num_channels*8, 2)



    def forward(self, x):
        
        # Conv
        x = self.conv1(x)
        x = F.relu(F.max_pool2d(x, self._max_pool_kernel_size))
        x = self.conv2(x)
        x = F.relu(F.max_pool2d(x, self._max_pool_kernel_size))
        x = self.conv3(x)
        x = F.relu(F.max_pool2d(x, self._max_pool_kernel_size))

        # Flatten
        x = x.view(-1, self._num_channels*4*8*8)

        # Fully Connected
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)

        # return F.log_softmax(x, dim=1)
        return F.softmax(x, dim=1)

    def save_model(path):
        torch.save(save, path)

    def load_model(path):
        self = torch.load(PATH)
        self.eval()


if __name__ == '__main__':

    print('Loading dataset')
    if not os.path.exists("PetImages/training_data.npy"):
        dogsvcats = DogsVSCats()
        dogsvcats.make_training_data()

    training_data = np.load('PetImages/training_data.npy', allow_pickle=True)

    print('Loading Net')
    net = Net(num_channels=32)
    # net = net.to(device)

    # optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9  )
    optimizer = optim.Adam(net.parameters(), lr=0.001)

    # loss_function = nn.NLLLoss()
    loss_function = nn.MSELoss()

    print('Converting X tensor')
    X = torch.Tensor([i[0] for i in training_data]).view(-1, 50, 50)
    X = X/255.0

    print('Converting Y tensor')
    y = torch.Tensor([i[1] for i in training_data])

    # Validation data
    VAL_PERCENT = 0.1
    val_size = int(len(X)*VAL_PERCENT)
    
    X_train = X[:-val_size]
    y_train = y[:-val_size]
    
    X_test = X[-val_size:]
    y_test = y[-val_size:]

    print('Training Set:', len(X_train))
    print('Testing Set:', len(X_test))

    BATCH_SIZE = 16
    EPOCHS = 2
    IMG_SIZE=50
    for epoch in range(EPOCHS):
        print(f'Epoch {epoch+1}/{EPOCHS}')

        running_loss = RunningMetrics()
        running_acc = RunningMetrics()

        for i in tqdm(range(0, len(X_train), BATCH_SIZE)):
            inputs = X_train[i:i+BATCH_SIZE].view(-1,1, IMG_SIZE, IMG_SIZE)
            targets = y_train[i:i+BATCH_SIZE]
            # inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()
            outputs = net(inputs)
            _, preds = torch.max(outputs, 1)

            loss = loss_function(outputs, targets)
            loss.backward()

            optimizer.step()

            running_loss.update(loss.item()*BATCH_SIZE,
                                BATCH_SIZE)

            running_acc.update(toch.sum(preds == targets).float(),
                                BATCH_SIZE)

        print(f'Loss: {running_loss:.4f}, Acc: {running_acc:.4f}')
        print('-'*10)

Dataset:

I am using the Microsoft's dataset of cats and dogs images

EDIT:

The error previous message has been solved following Anonymous' advice but now I am getting another error:

Traceback (most recent call last):

File "nn_conv.py", line 203, in

running_acc.update(torch.sum(preds == targets).float(), 

RuntimeError: The size of tensor a (16) must match the size of tensor b (2) at non-singleton dimension 1

Upvotes: 0

Views: 3052

Answers (1)

user15501526
user15501526

Reputation:

Input : 16 x 1 x 50 x 50
After conv1/maxpool1 : 16 x 32 x 25 x 25
After conv2/maxpool2 : 16 x 64 x 12 x 12 (no padding so taking floor)
After conv3/maxpool3 : 16 x 128 x 6 x 6 (=73 728 neurons here is your error)
Flattening : you specified a view like -1 x 32 * 4 * 8 * 8 = 9 x 8192

The correct flattening is -1 x 32 * 4 * 6 * 6

Few tips :

  • as you begin pytorch, you should go see how to use a dataloader/dataset
  • the binary cross entropy is more commonly used for classification (though MSE is still possible)

Upvotes: 1

Related Questions