Reputation: 14501
I am trying to train a GAN on the MNIST dataset. The code trains right now with mixed results. The issue seems to be the generated images are actually all the same:
You can find my full code below. I tried looking around to see if there was a solution to this and the only thing I found mentioned using randn
instead of rand
but I am not using rand
right now.
import os
import sys
from typing import Counter
import tensorflow as tf
import numpy as np
from tensorflow.keras import models, layers, callbacks
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import mnist
from matplotlib import pyplot as plt
import random
from sklearn.utils import shuffle
# Change this to the location of the database directories
DB_DIR = os.path.dirname(os.path.realpath(__file__))
# Import databases
sys.path.insert(1, DB_DIR)
from db_utils import get_imdb_dataset, get_speech_dataset, get_single_digit_dataset
def choose_dataset(dataset_type):
"""Select dataset based on string variable."""
if dataset_type == "nlp":
return get_imdb_dataset(dir=DB_DIR)
elif dataset_type == "computer_vision":
(X_train, y_train), (X_test, y_test) = mnist.load_data()
elif dataset_type == "speech_recognition":
# (X_train, y_train), (X_test, y_test), (_, _) = get_speech_dataset()
(X_train, y_train), (X_test, y_test), (_, _) = get_single_digit_dataset(0)
else:
raise ValueError("Couldn't find dataset.")
(X_train, X_test) = normalize_dataset(dataset_type, X_train, X_test)
(X_train, y_train), (X_test, y_test) = reshape_dataset(X_train, y_train, X_test, y_test)
return (X_train, y_train), (X_test, y_test)
def normalize_dataset(string, X_train, X_test):
"""Normalize speech recognition and computer vision datasets."""
if string == "computer vision":
X_train = X_train / 255
X_test = X_test / 255
else:
mean = np.mean(X_train)
std = np.std(X_train)
X_train = (X_train-std)/mean
X_test = (X_test-std)/mean
return (X_train, X_test)
def reshape_dataset(X_train, y_train, X_test, y_test):
"""Reshape Computer Vision and Speech datasets."""
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
return (X_train, y_train), (X_test, y_test)
class GAN:
"""Generative Adversarial Network for digit generation (MNIST)."""
def __init__(self, input_shape=(28,28,1), rand_vector_shape=(100,), lr=0.0002, beta=0.5):
# Input sizes
self.img_shape = input_shape
self.input_size = rand_vector_shape
# optimizer
self.opt = tf.keras.optimizers.Adam(lr, beta)
# Create Generator model
self.generator = self.generator_model()
self.generator.compile(loss='binary_crossentropy', optimizer = self.opt, metrics = ['accuracy'])
# Create Discriminator model
self.discriminator = self.discriminator_model()
self.discriminator.compile(loss='binary_crossentropy', optimizer = self.opt, metrics = ['accuracy'])
# Set the Discriminator as non trainable in the combined GAN model
self.discriminator.trainable = False
# Define model input and output
input = tf.keras.Input(self.input_size)
generated_img = self.generator(input)
output = self.discriminator(generated_img)
# Define and compile combined GAN model
self.GAN = tf.keras.Model(input, output, name="GAN")
self.GAN.compile(loss='binary_crossentropy', optimizer = self.opt, metrics=['accuracy'])
return None
def discriminator_model(self):
"""Create discriminator model."""
model = tf.keras.models.Sequential(name='Discriminator')
model.add(layers.Flatten())
model.add(layers.Dense(units=512, kernel_initializer='normal', activation='relu'))
model.add(layers.Dense(units=256, kernel_initializer='normal', activation='relu'))
model.add(layers.Dense(units=1, kernel_initializer='normal', activation='sigmoid'))
return model
def generator_model(self):
"""Create generator model."""
model = tf.keras.models.Sequential(name='Generator')
model.add(layers.Dense(units=256, kernel_initializer='normal', activation='relu'))
model.add(layers.Dense(units=512, kernel_initializer='normal', activation='relu'))
model.add(layers.Dense(units=1024, kernel_initializer='normal', activation='relu'))
model.add(layers.Dense(units=np.prod(self.img_shape), kernel_initializer='normal', activation='relu'))
model.add(layers.Reshape((28,28)))
return model
def plot_imgs(self, epoch):
r,c = 4,4
fig, axs = plt.subplots(r, c)
count = 0
for i in range(r):
for j in range(c):
noise = np.random.normal(0, 1, (1, self.input_size[0]))
img = self.generator.predict(noise)[0, :]
axs[i,j].imshow(img, cmap='gray')
axs[i,j].axis('off')
count += 1
fig.savefig("img/img_epoch_{0}.png".format(epoch))
plt.title("Epoch " + str(epoch))
# plt.show()
return None
def train(self, X_train, batch_size=128, epochs=2000, save_interval=200):
half_batch = batch_size//2
y_pos_train_dis = np.ones((half_batch, 1))
y_neg_train_dis = np.zeros((half_batch, 1))
y_train_GAN = np.ones((batch_size, 1))
for epoch in range(epochs):
# Generate training data for Discriminator
# random half_batch amount of real images
X_pos_train_dis = X_train[np.random.randint(0, X_train.shape[0], half_batch)]
# random half_batch amount of generated fake images
X_neg_train_dis = self.generator.predict(np.random.normal(0, 1, (half_batch, self.input_size[0])))
# Shuffle and append data using sklearn shuffle function
# X_train_dis, y_train_dis = tf.concat(shuffle(X_neg_train_dis, X_pos_train_dis, random_state=0), axis=0), tf.concat(shuffle(y_neg_train_dis, y_pos_train_dis, random_state=0), axis=0)
X_train_dis, y_train_dis = tf.random.shuffle(tf.concat([X_neg_train_dis, X_pos_train_dis], axis=0)), tf.random.shuffle(tf.concat([y_neg_train_dis, y_pos_train_dis], axis=0))
# Generate training data for combined GAN model
X_train_GAN = np.random.normal(0, 1, (batch_size, self.input_size[0]))
# Train Discriminator
loss_dis = self.discriminator.train_on_batch(X_train_dis, y_train_dis)
# Train Generator
loss_gen = self.GAN.train_on_batch(X_train_GAN, y_train_GAN)
# Print results
if epoch%save_interval == 0:
print("Discriminator loss: {0}, Generator loss: {1}".format(loss_dis[0], loss_gen[0]))
print("Discriminator acc.: {0}, Generator acc.: {1}".format(loss_dis[1], loss_gen[1]))
self.plot_imgs(epoch)
return 0
def main():
gan_model = GAN()
(X_train, _), (_, _) = choose_dataset("computer_vision")
# Switch from grayscale to (-1,1)
# X_train = X_train/127.5 - 1.0
gan_model.train(X_train)
if __name__ == '__main__':
main()
Here are the accuracies and losses which might give a clue as to the issue:
Discriminator loss: 1.0392613410949707, Generator loss: 0.7247573137283325
Discriminator acc.: 0.5078125, Generator acc.: 0.125
Discriminator loss: 0.7155331969261169, Generator loss: 0.7227296829223633
Discriminator acc.: 0.484375, Generator acc.: 0.0
Discriminator loss: 0.7079681158065796, Generator loss: 0.6722699403762817
Discriminator acc.: 0.4609375, Generator acc.: 1.0
Discriminator loss: 0.6883177757263184, Generator loss: 0.7037044763565063
Discriminator acc.: 0.5390625, Generator acc.: 0.0
Discriminator loss: 0.7039847373962402, Generator loss: 0.6718121767044067
Discriminator acc.: 0.453125, Generator acc.: 1.0
Discriminator loss: 0.7004268169403076, Generator loss: 0.6409173607826233
Discriminator acc.: 0.4765625, Generator acc.: 1.0
Discriminator loss: 0.6883779168128967, Generator loss: 0.7788660526275635
Discriminator acc.: 0.5390625, Generator acc.: 0.0
Discriminator loss: 0.6933140754699707, Generator loss: 0.6169038414955139
Discriminator acc.: 0.53125, Generator acc.: 1.0
Discriminator loss: 0.6910691261291504, Generator loss: 0.6194907426834106
Discriminator acc.: 0.5625, Generator acc.: 1.0
Discriminator loss: 0.692711353302002, Generator loss: 0.6367968320846558
Discriminator acc.: 0.5078125, Generator acc.: 1.0
Upvotes: 2
Views: 1755
Reputation: 26698
I think you would get better results if you simply change your activation functions in your models and add a few dropout layers:
def discriminator_model(self):
"""Create discriminator model."""
model = tf.keras.models.Sequential(name='Discriminator')
model.add(layers.Flatten())
model.add(layers.Dense(units=1024, kernel_initializer='normal'))
model.add(layers.LeakyReLU(alpha=0.02))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(units=512, kernel_initializer='normal'))
model.add(layers.LeakyReLU(alpha=0.02))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(units=256, kernel_initializer='normal'))
model.add(layers.LeakyReLU(alpha=0.02))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(units=1, kernel_initializer='normal', activation='sigmoid'))
return model
def generator_model(self):
"""Create generator model."""
model = tf.keras.models.Sequential(name='Generator')
model.add(layers.Dense(units=256, kernel_initializer='normal'))
model.add(layers.LeakyReLU(alpha=0.02))
model.add(layers.Dense(units=512, kernel_initializer='normal'))
model.add(layers.LeakyReLU(alpha=0.02))
model.add(layers.Dense(units=1024, kernel_initializer='normal'))
model.add(layers.LeakyReLU(alpha=0.02))
model.add(layers.Dense(units=np.prod(self.img_shape),
kernel_initializer='normal', activation='tanh'))
model.add(layers.Reshape((28,28)))
return model
Also remove the tf.random.shuffle
as it is making it too difficult for your generator to learn anything plausible:
X_train_dis, y_train_dis = tf.concat([X_neg_train_dis, X_pos_train_dis], axis=0), tf.concat([y_neg_train_dis, y_pos_train_dis], axis=0)
The results are ok, but you would be way better off using a CNN network for
Update, make sure your discriminator is set to trainable=False
accordingly:
self.discriminator.trainable = True
loss_dis = self.discriminator.train_on_batch(X_train_dis, y_train_dis)
self.discriminator.trainable = False
# Train Generator
loss_gen = self.GAN.train_on_batch(X_train_GAN, y_train_GAN)
Here the whole model:
import os
import sys
from typing import Counter
import tensorflow as tf
import numpy as np
from tensorflow.keras import models, layers, callbacks
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import mnist
from matplotlib import pyplot as plt
import random
from sklearn.utils import shuffle
def choose_dataset(dataset_type):
"""Select dataset based on string variable."""
(X_train, y_train), (X_test, y_test) = mnist.load_data()
(X_train, X_test) = normalize_dataset(dataset_type, X_train, X_test)
(X_train, y_train), (X_test, y_test) = reshape_dataset(X_train, y_train, X_test, y_test)
return (X_train, y_train), (X_test, y_test)
def normalize_dataset(string, X_train, X_test):
"""Normalize speech recognition and computer vision datasets."""
if string == "computer vision":
X_train = X_train / 255
X_test = X_test / 255
else:
mean = np.mean(X_train)
std = np.std(X_train)
X_train = (X_train-std)/mean
X_test = (X_test-std)/mean
return (X_train, X_test)
def reshape_dataset(X_train, y_train, X_test, y_test):
"""Reshape Computer Vision and Speech datasets."""
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
return (X_train, y_train), (X_test, y_test)
class GAN:
"""Generative Adversarial Network for digit generation (MNIST)."""
def __init__(self, input_shape=(28,28,1), rand_vector_shape=(100,), lr=0.0002, beta=0.5):
# Input sizes
self.img_shape = input_shape
self.input_size = rand_vector_shape
# optimizer
self.opt = tf.keras.optimizers.Adam(lr, beta)
# Create Generator model
self.generator = self.generator_model()
self.generator.compile(loss='binary_crossentropy', optimizer = self.opt, metrics = ['accuracy'])
# Create Discriminator model
self.discriminator = self.discriminator_model()
self.discriminator.compile(loss='binary_crossentropy', optimizer = tf.keras.optimizers.Adam(0.001, beta), metrics = ['accuracy'])
# Set the Discriminator as non trainable in the combined GAN model
self.discriminator.trainable = False
# Define model input and output
input = tf.keras.Input(self.input_size)
generated_img = self.generator(input)
output = self.discriminator(generated_img)
# Define and compile combined GAN model
self.GAN = tf.keras.Model(input, output, name="GAN")
self.GAN.compile(loss='binary_crossentropy', optimizer = self.opt, metrics=['accuracy'])
return None
def discriminator_model(self):
"""Create discriminator model."""
model = tf.keras.models.Sequential(name='Discriminator')
model.add(layers.Flatten())
model.add(layers.Dense(units=1024, kernel_initializer='normal'))
model.add(layers.LeakyReLU(alpha=0.02))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(units=512, kernel_initializer='normal'))
model.add(layers.LeakyReLU(alpha=0.02))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(units=256, kernel_initializer='normal'))
model.add(layers.LeakyReLU(alpha=0.02))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(units=1, kernel_initializer='normal', activation='sigmoid'))
return model
def generator_model(self):
"""Create generator model."""
model = tf.keras.models.Sequential(name='Generator')
model.add(layers.Dense(units=256, kernel_initializer='normal'))
model.add(layers.LeakyReLU(alpha=0.02))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(units=512, kernel_initializer='normal'))
model.add(layers.LeakyReLU(alpha=0.02))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(units=1024, kernel_initializer='normal'))
model.add(layers.LeakyReLU(alpha=0.02))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(units=np.prod(self.img_shape), kernel_initializer='normal', activation='tanh'))
model.add(layers.Reshape((28,28)))
return model
def plot_imgs(self, epoch):
seed = tf.random.normal((16, 100))
predictions = self.generator(seed, training=False)
fig = plt.figure(figsize=(4, 4))
for i in range(predictions.shape[0]):
plt.subplot(4, 4, i+1)
plt.imshow(predictions[i, :, :] * 127.5 + 127.5, cmap='gray')
plt.axis('off')
fig.savefig("img/img_epoch_{0}.png".format(epoch))
plt.title("Epoch " + str(epoch))
# plt.show()
return None
def train(self, X_train, batch_size=128, epochs=4000, save_interval=200):
half_batch = batch_size//2
y_pos_train_dis = tf.ones((half_batch, 1))
y_neg_train_dis = tf.zeros((half_batch, 1))
y_train_GAN = tf.ones((batch_size, 1))
for epoch in range(epochs):
# Generate training data for Discriminator
# random half_batch amount of real images
X_pos_train_dis = X_train[np.random.randint(0, X_train.shape[0], half_batch)]
# random half_batch amount of generated fake images
X_neg_train_dis = self.generator.predict(tf.random.normal((half_batch, self.input_size[0])))
# Shuffle and append data using sklearn shuffle function
# X_train_dis, y_train_dis = tf.concat(shuffle(X_neg_train_dis, X_pos_train_dis, random_state=0), axis=0), tf.concat(shuffle(y_neg_train_dis, y_pos_train_dis, random_state=0), axis=0)
X_train_dis, y_train_dis = tf.concat([X_neg_train_dis, X_pos_train_dis], axis=0), tf.concat([y_neg_train_dis, y_pos_train_dis], axis=0)
# Generate training data for combined GAN model
X_train_GAN = tf.random.normal((batch_size, self.input_size[0]))
# Train Discriminator
self.discriminator.trainable = True
loss_dis = self.discriminator.train_on_batch(X_train_dis, y_train_dis)
self.discriminator.trainable = False
# Train Generator
loss_gen = self.GAN.train_on_batch(X_train_GAN, y_train_GAN)
# Print results
if epoch%save_interval == 0:
print("Discriminator loss: {0}, Generator loss: {1}".format(loss_dis[0], loss_gen[0]))
print("Discriminator acc.: {0}, Generator acc.: {1}".format(loss_dis[1], loss_gen[1]))
self.plot_imgs(epoch)
return 0
gan_model = GAN()
(X_train, _), (_, _) = choose_dataset("computer_vision")
gan_model.train(X_train)
Upvotes: 2