Reputation: 114
I'm new to Pytorch and I try to make a convolutional neural net to classify a set of images (personal iris recognition problem). My issue is that I have a small number of images (10 classes and 20 images per class). I tried to make data augmentation (random transforms for every epoch) but I'm not sure that these are applied at each epoch as I entended. Here's my code. If anyone can confirm that I'm doing it right or if it's not ok, is there a way to make the transforms inside the loop?
from torch import utils, nn, optim, no_grad
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from ConvNet import ConvNet
from ImagesDataset import ImagesDataset, AddGaussianNoise
DATABASE_PATH = "C://Users//Maria//Downloads//ees//CASIA-IrisV2"
MODEL_PATH = "entire_model.pt"
dataArray = []
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# data augmentation by applying some transforms randomly for every batch
transform = transforms.Compose([transforms.RandomCrop(5), transforms.RandomHorizontalFlip(p=0.1),
transforms.ColorJitter(brightness=0.1, contrast=0.2, saturation=0, hue=0),
AddGaussianNoise(0.1, 0.05), transforms.ToTensor()])
dataset = ImagesDataset(csv_file="generate_csv//generate_csv_correctly_detected.csv", root_dir=DATABASE_PATH, transform=transforms.ToTensor())
num_epochs = 300
num_classes = 10
batch_size = 100
learning_rate = 0.01
# the dataset is partitioned in 5 subsets to perform cross validation
sum_percents = 0
data_set = utils.data.random_split(dataset, [40, 40, 40, 40, 40])
for i in range(5):
test_set = data_set[i]
train_set = []
for j in range(5):
if j != i:
train_set += data_set[j]
train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True)
model = ConvNet(0).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)
loss_list = []
acc_list = []
# delete contents of loss1 file
file = open("loss1.txt", "r+")
file.truncate(0)
file.close()
for epoch in range(num_epochs):
print("Epoch: " + str(epoch))
for i, (images, labels) in enumerate(train_loader):
# Run the forward pass
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
loss = criterion(outputs, labels)
# set the gradients to zero
optimizer.zero_grad()
# compute gradients
loss.backward()
# update the parameters
optimizer.step()
# Track the accuracy
total = labels.size(0)
_, predicted = torch.max(outputs.data, 1)
correct = (predicted == labels).sum().item()
acc_list.append(correct / total)
# Save
torch.save(model, MODEL_PATH)
# Test the model
model.eval()
with no_grad():
correct = 0
total = 0
for images, labels in test_loader:
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Test Accuracy of the model on the 132 test images: {} %'.format((correct / total) * 100))
sum_percents += (correct / total) * 100
print('Average accuracy is {}%'.format((sum_percents/5)))
Upvotes: 0
Views: 1182
Reputation: 979
Hi what i meant wasn't like that but the following and i cannot completely reproduce since i dont have your function AddGaussianNoise
import torchvision.transforms as T
import numpy as np
transforms = T.Compose([
T.ToPILImage(), # You need to add this to pil image
T.RandomCrop(5), T.RandomHorizontalFlip(p=0.1),
T.ColorJitter(brightness=0.1, contrast=0.2, saturation=0, hue=0),
T.ToTensor()
])
transforms(np.random.randn(224, 224, 3).astype(np.uint8))
>>>tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0039],
[0.0000, 0.0000, 0.9882, 0.0000, 0.9882],
[0.0039, 0.9882, 0.9882, 0.0000, 0.9882],
[0.0000, 0.0039, 0.0000, 0.0000, 0.0000]],
[[0.0039, 0.0000, 0.0000, 0.0039, 0.9882],
[0.9882, 0.0000, 0.0000, 0.0000, 0.9882],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.9882, 0.0000, 0.0000, 0.0000],
[0.0000, 0.9882, 0.0000, 0.0000, 0.0039]],
[[0.0000, 0.9882, 0.0000, 0.9882, 0.0000],
[0.0000, 0.0039, 0.0000, 0.0000, 0.0000],
[0.0039, 0.0000, 0.0000, 0.0000, 0.0039],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0039, 0.0039, 0.0000, 0.0000, 0.0000]]])
So this is another assumption but a transform should work like this no? Since i dont have any of your code so here it is
import torchvision.transforms as T
transforms = T.Compose([
T.ToPILImage(), # You need to add this to pil image
T.RandomCrop(5), T.RandomHorizontalFlip(p=0.1),
T.ColorJitter(brightness=0.1, contrast=0.2, saturation=0, hue=0),
T.ToTensor(), # Maybe you can add you gaussian noise augment here
])
dataset = ImagesDataset(csv_file="generate_csv//generate_csv_correctly_detected.csv", root_dir=DATABASE_PATH, transform=transforms)
Upvotes: 1