Furkan Şenoğlu
Furkan Şenoğlu

Reputation: 1

PyTorch Convolutional Neural Network (CNN) Training Batch Size Mismatch Error

I have a dataset folder with 4 subfolder named contexts(forest,glacies,mountain,sea) each folder has approximately 25000 jpg image aboutcontext, I'm training a Convolutional Neural Network (CNN) in PyTorch using Stochastic Gradient Descent, and I'm encountering an "Expected input batchsize to match target batchsize" error in the training loop. How can I resolve the batch size mismatch? What changes can I make to address this error in the training loop? Thank you in advance for your suggestions!

Here's data set

https://drive.google.com/file/d/1vXbTtm-PlTQPO-zN443Ehr7EsYuIoer_/view?usp=share_link\*

Screen shot for exception*


error

Here's my code

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import os
from PIL import Image
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class ConvNet(nn.Module):
    def __init__(self, num_classes=4):
        super(ConvNet, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=4, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=4, out_channels=8, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1)

        # Max-pooling layers
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Fully connected (linear) layer
        self.fc = nn.Linear(16 * 64 * 64, num_classes)  # Adjust the input size based on your image dimensions

    def forward(self, X):
        # Convolutional layers with ReLU activations and max-pooling
        X = F.relu(self.conv1(X))
        X = self.pool(X)
        X = F.relu(self.conv2(X))
        X = self.pool(X)
        X = F.relu(self.conv3(X))
        X = self.pool(X)

        # Flatten the output for the fully connected layer
        X = X.view(-1, 16 * 64 * 64)  # Adjust the size based on your image dimensions

        # Fully connected layer
        X = self.fc(X)

        return X

class SceneDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_list, self.labels = self.load_dataset()

        # Create a mapping from class names to indices
        self.class_to_index = {class_name: idx for idx, class_name in enumerate(set(self.labels))}

    def load_dataset(self):
        image_list = []
        labels = []

        for class_name in os.listdir(self.root_dir):
            class_path = os.path.join(self.root_dir, class_name)
            if os.path.isdir(class_path):
                label = class_name
                for filename in os.listdir(class_path):
                    if filename.endswith(".jpg"):
                        image_list.append(os.path.join(class_path, filename))
                        labels.append(label)

        return image_list, labels

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, index):
        img_path = self.image_list[index]
        label = self.labels[index]

        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        # Get the class index
        label_index = self.class_to_index[label]

        # Convert label to tensor
        label_tensor = torch.tensor(label_index, dtype=torch.long)

        return image, label_tensor

def get_dataloaders(root, train_batchsize, test_batchsize):
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
    ])

    dataset = SceneDataset(root, transform=transform)

    # Split the dataset into train, validation, and test sets
    train_size = int(0.7 * len(dataset))
    val_size = int(0.1 * len(dataset))
    test_size = len(dataset) - train_size - val_size

    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
        dataset, [train_size, val_size, test_size])

    # Create data loaders
    train_dataloader = DataLoader(train_dataset, batch_size=train_batchsize, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=test_batchsize, shuffle=False)
    test_dataloader = DataLoader(test_dataset, batch_size=test_batchsize, shuffle=False)

    return train_dataloader, val_dataloader, test_dataloader

# Example usage
root_directory = "data"
train_batchsize = 32
test_batchsize = 1
train_dataloader, val_dataloader, test_dataloader = get_dataloaders(root_directory, train_batchsize, test_batchsize)

# Helper for visualization
def img_show(image, label):
    plt.figure()
    plt.title(f'This is a {label}')
    im = np.moveaxis(np.array(image), [0,1,2], [2, 0, 1])
    plt.imshow(im)
    plt.show()

# Visualize first 4 samples
for count, (image, label) in enumerate(train_dataloader):
    img_show(image[0], label[0])
    if count == 3:
        break

max_epoch = 300
train_batch = 32
test_batch = 1
learning_rate = 0.01

# Create train, validation, and test dataset loaders
train_loader, val_loader, test_loader = get_dataloaders(root_directory, train_batch, train_batch)  # Use the same batch size for validation

# Initialize your network
model = ConvNet()

# Define your loss function
criterion = nn.CrossEntropyLoss()

# Initialize optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=5e-04)

# Placeholder for best validation accuracy
best_val_accuracy = 0.0

# Placeholder for the best model state
best_model_state = None

# Placeholder for training and validation statistics
train_losses, val_losses = [], []
train_accuracies, val_accuracies = [], []

# Start training
for epoch in range(max_epoch):
    model = model.train()
    total_train_loss = 0.0
    correct_train = 0
    total_train = 0

    for images, labels in train_loader:
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)

        # Ensure labels have the correct shape
        if labels.size(0) != outputs.size(0):
            labels = labels[:outputs.size(0)]

        loss = criterion(outputs, labels.squeeze().long())  # Adjusted for label size

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        print(f"Predicted shape: {predicted.shape}, Labels shape: {labels[:predicted.size(0)].squeeze().shape}")
        total_train += labels.size(0)
        batch_size = min(labels.size(0), predicted.size(0))
        correct_train += (predicted[:batch_size] == labels[:batch_size].squeeze()).sum().item()

    # Calculate training accuracy and loss
    train_accuracy = correct_train / total_train
    train_losses.append(total_train_loss / len(train_loader))
    train_accuracies.append(train_accuracy)

    # Validation
    model = model.eval()
    total_val_loss = 0.0
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            loss = criterion(outputs, labels.squeeze().long()) # Convert labels to long tensor
            total_val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels[:predicted.size(0)].squeeze()).sum().item()

    # Calculate validation accuracy and loss
    val_accuracy = correct_val / total_val
    val_losses.append(total_val_loss / len(val_loader))
    val_accuracies.append(val_accuracy)

    # Save the best model based on validation accuracy
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        best_model_state = model.state_dict()

# Save the best model state to a file
best_model_path = "best_cnn_sgd.pth"
torch.save(best_model_state, best_model_path)

# Plot losses vs epoch
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss vs. Epoch')
plt.legend()
plt.show()

# Plot accuracies vs epoch
plt.figure(figsize=(10, 5))
plt.plot(train_accuracies, label='Training Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy vs. Epoch')
plt.legend()
plt.show()


I noticed that there is an issue with the batch size in the training loop that leads to a mismatch between the predicted batch size and the target batch size. I attempted to investigate and fix the problem by adjusting the data loading and model input dimensions. However, the issue persists, and I'm seeking guidance on how to correctly handle batch size in the training loop to resolve the error.

Upvotes: 0

Views: 131

Answers (1)

Chih-Hao Liu
Chih-Hao Liu

Reputation: 466

The primary issue lies within your model architecture. I suggest replacing your current architecture with the following:

class ConvNet(nn.Module):
    def __init__(self, num_classes=4):
        super(ConvNet, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=4, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=4, out_channels=8, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1)

        # Max-pooling layers
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Fully connected (linear) layer
        self.fc = nn.Linear(16*32*32, num_classes)  # Adjust the input size based on your image dimensions

    def forward(self, X):
        # Convolutional layers with ReLU activations and max-pooling
        X = F.relu(self.conv1(X))
        X = self.pool(X)
        X = F.relu(self.conv2(X))
        X = self.pool(X)
        X = F.relu(self.conv3(X))
        X = self.pool(X)
        # Flatten the output for the fully connected layer
        X = X.view(X.size(0),-1)   # Adjust the size based on your image dimensions
        # Fully connected layer
        X = self.fc(X)
        return X

I made modifications to the flattening process X.view(X.size(0),-1) and the fully-connected network self.fc.

Furthermore, you should also change the loss computation from:

loss = criterion(outputs, labels.squeeze().long())

to:

loss = criterion(outputs, labels.long())

Additionally, in your validation step, you should ensure to track correct_val and total_val within the loop for images, labels in val_loader: to avoid potential division by zero errors.

Upvotes: 0

Related Questions