Reputation: 51
I'm trying to implement a Generative Adversarial Network (GAN) for the MNIST-Dataset. I use Pytorch for this. My problem is, that after one epoch the Discriminator's and the Generator's loss doesn't change.
I already tried two other methods to build the network, but they cause all the same problem :/
import os
import torch
import matplotlib.pyplot as plt
import matplotlib.gridspec as grd
import numpy as np
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision #Datasets
from torchvision.utils import save_image
import torchvision.transforms as transforms
from torch.autograd import Variable
import pylab
#Parameter
batch_size = 64
epochs = 50000
image_size = 784
hidden_size = 392
sample_dir = 'samples'
save_dir = 'save'
noise_size = 100
lr = 0.001
# Image processing
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,),(0.5,))])
# Discriminator
D = nn.Sequential(
nn.Linear(image_size, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, 1),
nn.Sigmoid()
)
# Generator
G = nn.Sequential(
nn.Linear(noise_size, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, image_size),
nn.Sigmoid()
)
# Lossfunction and optimizer (sigmoid cross entropy with logits and Adam)
criterion = nn.BCEWithLogitsLoss()
d_optimizer = torch.optim.Adam(D.parameters(), lr = lr)
g_optimizer = torch.optim.Adam(G.parameters(), lr = lr)
def reset_grad():
d_optimizer.zero_grad()
g_optimizer.zero_grad()
# Statistics to be saved
d_losses = np.zeros(epochs)
g_losses = np.zeros(epochs)
real_scores = np.zeros(epochs)
fake_scores = np.zeros(epochs)
# Start training
total_step = len(data_loader)
for epoch in range(epochs):
for i, (images, _) in enumerate(data_loader):
if images.shape[0] != 64:
continue
images = images.view(batch_size, -1).cuda()
images = Variable(images)
# Create the labels which are later used as input for the BCE loss
real_labels = torch.ones(batch_size, 1).cuda()
real_labels = Variable(real_labels)
fake_labels = torch.zeros(batch_size, 1).cuda()
fake_labels = Variable(fake_labels)
# Train discriminator
# Compute BCE_WithLogitsLoss using real images
outputs = D(images)
d_loss_real = criterion(outputs, real_labels)
real_score = outputs
# Compute BCE_WithLogitsLoss using fake images
# First term of the loss is always zero since fake_labels == 0
z = torch.randn(batch_size, noise_size).cuda()
z = Variable(z)
fake_images = G(z)
outputs = D(fake_images)
d_loss_fake = criterion(outputs, fake_labels)
fake_score = outputs
# Backprop and optimize
# If D is trained so well, then don't update
d_loss = d_loss_real + d_loss_fake
reset_grad()
d_loss.backward()
d_optimizer.step()
# Train generator
# Compute loss with fake images
z = torch.randn(batch_size, noise_size).cuda()
z = Variable(z)
fake_images = G(z)
outputs = D(fake_images)
# We train G to maximize log(D(G(z)) instead of minimizing log(1 -D(G(z)))
# For the reason, see the last paragraph of section 3. https://arxiv.org/pdf/1406.2661.pdf
g_loss = criterion(outputs, real_labels)
# Backprop and optimize
# if G is trained so well, then don't update
reset_grad()
g_loss.backward()
g_optimizer.step()
# Update statistics
d_losses[epoch] = d_losses[epoch]*(i/(i+1.)) + d_loss.item()*(1./(i+1.))
g_losses[epoch] = g_losses[epoch]*(i/(i+1.)) + g_loss.item()*(1./(i+1.))
real_scores[epoch] = real_scores[epoch]*(i/(i+1.)) + real_score.mean().item()*(1./(i+1.))
fake_scores[epoch] = fake_scores[epoch]*(i/(i+1.)) + fake_score.mean().item()*(1./(i+1.))
# print results
print('Epoch [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}, D(x): {:.2f}, D(G(z)): {:.2f}'
.format(epoch, epochs, d_loss.item(), g_loss.item(),
real_score.mean().item(), fake_score.mean().item()))
The Generator's and Discriminator's loss should change from epoch to epoch, but they don't.
Epoch [0/50000], d_loss: 1.0069, g_loss: 0.6927, D(x): 1.00, D(G(z)): 0.00
Epoch [1/50000], d_loss: 1.0065, g_loss: 0.6931, D(x): 1.00, D(G(z)): 0.00
Epoch [2/50000], d_loss: 1.0064, g_loss: 0.6931, D(x): 1.00, D(G(z)): 0.00
Epoch [3/50000], d_loss: 1.0064, g_loss: 0.6931, D(x): 1.00, D(G(z)): 0.00
Epoch [4/50000], d_loss: 1.0064, g_loss: 0.6931, D(x): 1.00, D(G(z)): 0.00
Epoch [5/50000], d_loss: 1.0064, g_loss: 0.6931, D(x): 1.00, D(G(z)): 0.00
Thank's for your help.
Upvotes: 1
Views: 2915
Reputation: 51
I found out the solution of the problem. BCEWithLogitsLoss() and Sigmoid() doesn't work together, because BCEWithLogitsLoss() includes the Sigmoid activation. So you can use BCEWithLogitsLoss() without Sigmoid() or you can use Sigmoid() and BCELoss()
Upvotes: 3