Reputation: 207
I want to train a classifier based on a pretrained network with PyTorch. What I need to do is to take a pretrained model (I tried with ResNet50), add some layers at the end (I need to do this as it is required by the project specifications) and train only those layers I add. I tried this:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision import models
from guitar_dataset import GuitarDataset
from tqdm import tqdm
device = ("cuda" if torch.cuda.is_available() else "cpu")
transformations = transforms.Compose([
transforms.Resize((200, 200))
])
num_epochs = 10
learning_rate = 0.001
train_CNN = False
batch_size = 32
shuffle = True
pin_memory = True
num_workers = 1
dataset = GuitarDataset(f"../chords_data/cropped/train", transform=transformations)
train_set, validation_set = torch.utils.data.random_split(dataset, [int(0.8 * len(dataset)),
len(dataset) - int(0.8 * len(dataset))])
train_loader = DataLoader(dataset=train_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
testset = GuitarDataset(f"../chords_data/cropped/test", transform=transformations)
test_loader = DataLoader(dataset=testset, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
model = models.resnet50(pretrained=True)
for param in model.parameters():
param.requires_grad = False
model.fc = nn.Sequential(
nn.Conv2d(512, 64, (3, 3)),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(64, 64, (3, 3)),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Dropout(0.5),
nn.Flatten(),
nn.Linear(147456, 512),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(512, 64),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(64, 7)
)
model.to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=learning_rate)
PATH = f"./saved_models/mynet.pth"
def check_accuracy(loader, model):
if loader == train_loader:
print("Checking accuracy on training data")
else:
print("Checking accuracy on validation data")
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x)
# predictions = torch.tensor([1.0 if i >= 0.5 else 0.0 for i in scores]).to(device)
predictions = scores.argmax(1)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print(
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}"
)
return f"{float(num_correct) / float(num_samples) * 100:.2f}"
def train():
model.train()
for epoch in range(num_epochs + 1):
loop = tqdm(train_loader, total=len(train_loader), leave=True)
# if epoch % 2 == 0:
loop.set_postfix(val_acc=check_accuracy(validation_loader, model))
if epoch == num_epochs:
break
for imgs, labels in loop:
labels = torch.nn.functional.one_hot(labels, num_classes=7).float()
imgs = imgs.to(device)
labels = labels.to(device)
outputs = model(imgs)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loop.set_description(f"Epoch [{epoch + 1}/{num_epochs}]")
loop.set_postfix(loss=loss.item())
torch.save(model.state_dict(), PATH)
def test():
model.load_state_dict(torch.load(PATH))
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
for data in test_loader:
images, labels = data
# calculate outputs by running images through the network
outputs = model(images)
# the class with the highest energy is what we choose as prediction
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the test images: %d %%' % (
100 * correct / total))
if __name__ == "__main__":
print(f"Working on {data_type}")
train()
test()
but I get the error in the title as soon as I start the training phase. Shouldn't the downloaded model be ready-to-use?
Full stack trace:
Traceback (most recent call last):
File "/home/deffo/Documents/Unimore/Magistrale/Computer Vision and Cognitive Systems/Guitar_Fingering_&_Chords_Recognition/ChordsClassification/train_ResNetChord.py", line 139, in <module>
train()
File "/home/deffo/Documents/Unimore/Magistrale/Computer Vision and Cognitive Systems/Guitar_Fingering_&_Chords_Recognition/ChordsClassification/train_ResNetChord.py", line 99, in train
loop.set_postfix(val_acc=check_accuracy(validation_loader, model))
File "/home/deffo/Documents/Unimore/Magistrale/Computer Vision and Cognitive Systems/Guitar_Fingering_&_Chords_Recognition/ChordsClassification/train_ResNetChord.py", line 83, in check_accuracy
scores = model(x)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torchvision/models/resnet.py", line 249, in forward
return self._forward_impl(x)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torchvision/models/resnet.py", line 244, in _forward_impl
x = self.fc(x)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/container.py", line 119, in forward
input = module(input)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 399, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 395, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
RuntimeError: Expected 4-dimensional input for 4-dimensional weight [64, 512, 3, 3], but got 2-dimensional input of size [32, 2048] instead
Upvotes: 0
Views: 852
Reputation: 2278
Your network design in wrong.
You are not supposed to add Convolutional layers at the end of Resnet50
's feature extractor. Put some Linear
layers
model.fc = nn.Sequential(
# It has to start from 2048
nn.Linear(2048, 1024), nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(1024, 256), nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(1024, 7) # 7 is number of classes
)
The model.fc
has to start from 2048 units because that's what the ResNet50
's feature extractor produces.
The error is basically saying that it was expecting 4D input (because of your Conv2D layer at begining of model.fc
) but got (batch_size, 2048)
because that's what the the ResNet50 produced.
Upvotes: 1
Reputation: 40728
You can't replace resnet50's fc
with a convolutional network. The output of resnet's feature extractor is a CNN which outputs a flat 2048-long tensor, as such the layers following it should be fully connected layers.
Upvotes: 1