logankilpatrick
logankilpatrick

Reputation: 14501

GAN result images are the same during the training process

I am trying to train a GAN on the MNIST dataset. The code trains right now with mixed results. The issue seems to be the generated images are actually all the same:

GAN Output after 1400 epochs

You can find my full code below. I tried looking around to see if there was a solution to this and the only thing I found mentioned using randn instead of rand but I am not using rand right now.

import os
import sys
from typing import Counter
import tensorflow as tf
import numpy as np
from tensorflow.keras import models, layers, callbacks
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import mnist
from matplotlib import pyplot as plt
import random
from sklearn.utils import shuffle

# Change this to the location of the database directories
DB_DIR = os.path.dirname(os.path.realpath(__file__))

# Import databases
sys.path.insert(1, DB_DIR)
from db_utils import get_imdb_dataset, get_speech_dataset, get_single_digit_dataset

def choose_dataset(dataset_type):
    """Select dataset based on string variable."""
    if dataset_type == "nlp":
        return get_imdb_dataset(dir=DB_DIR)
    elif dataset_type == "computer_vision":
        (X_train, y_train), (X_test, y_test) = mnist.load_data()
    elif dataset_type == "speech_recognition":
        # (X_train, y_train), (X_test, y_test), (_, _) = get_speech_dataset()
        (X_train, y_train), (X_test, y_test), (_, _) = get_single_digit_dataset(0)        

    else:
        raise ValueError("Couldn't find dataset.")

    (X_train, X_test) = normalize_dataset(dataset_type, X_train, X_test)

    (X_train, y_train), (X_test, y_test) = reshape_dataset(X_train, y_train, X_test, y_test)

    return (X_train, y_train), (X_test, y_test)

def normalize_dataset(string, X_train, X_test):
    """Normalize speech recognition and computer vision datasets."""
    if string == "computer vision":
        X_train = X_train / 255
        X_test = X_test / 255
    else:
        mean = np.mean(X_train)
        std = np.std(X_train)
        X_train = (X_train-std)/mean
        X_test = (X_test-std)/mean

    return (X_train, X_test)

def reshape_dataset(X_train, y_train, X_test, y_test):
    """Reshape Computer Vision and Speech datasets."""

    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    return (X_train, y_train), (X_test, y_test)


class GAN:
    """Generative Adversarial Network for digit generation (MNIST)."""
    def __init__(self, input_shape=(28,28,1), rand_vector_shape=(100,), lr=0.0002, beta=0.5):
        
        # Input sizes
        self.img_shape = input_shape
        self.input_size = rand_vector_shape
        
        # optimizer
        self.opt = tf.keras.optimizers.Adam(lr, beta)

        # Create Generator model
        self.generator = self.generator_model()
        self.generator.compile(loss='binary_crossentropy', optimizer = self.opt, metrics = ['accuracy'])
        
        # Create Discriminator model
        self.discriminator = self.discriminator_model()
        self.discriminator.compile(loss='binary_crossentropy', optimizer = self.opt, metrics = ['accuracy'])
        
        # Set the Discriminator as non trainable in the combined GAN model
        self.discriminator.trainable = False
        
        # Define model input and output
        input = tf.keras.Input(self.input_size)
        generated_img = self.generator(input)
        output = self.discriminator(generated_img)
        
        # Define and compile combined GAN model
        self.GAN = tf.keras.Model(input, output, name="GAN")
        self.GAN.compile(loss='binary_crossentropy', optimizer = self.opt, metrics=['accuracy'])

        return None
        
    def discriminator_model(self):
        """Create discriminator model."""
        model = tf.keras.models.Sequential(name='Discriminator')
        model.add(layers.Flatten())
        model.add(layers.Dense(units=512, kernel_initializer='normal', activation='relu'))
        model.add(layers.Dense(units=256, kernel_initializer='normal', activation='relu'))
        model.add(layers.Dense(units=1, kernel_initializer='normal', activation='sigmoid'))

        return model

    def generator_model(self):
        """Create generator model."""
        model = tf.keras.models.Sequential(name='Generator')
        model.add(layers.Dense(units=256, kernel_initializer='normal', activation='relu'))
        model.add(layers.Dense(units=512, kernel_initializer='normal', activation='relu'))
        model.add(layers.Dense(units=1024, kernel_initializer='normal', activation='relu'))
        model.add(layers.Dense(units=np.prod(self.img_shape), kernel_initializer='normal', activation='relu'))
        model.add(layers.Reshape((28,28)))
        
        return model
    
    def plot_imgs(self, epoch):
        r,c = 4,4

        fig, axs = plt.subplots(r, c)
        count = 0
        for i in range(r):
            for j in range(c):
                    noise = np.random.normal(0, 1, (1, self.input_size[0]))
                    img = self.generator.predict(noise)[0, :]
                    axs[i,j].imshow(img, cmap='gray')
                    axs[i,j].axis('off')
                    count += 1

        fig.savefig("img/img_epoch_{0}.png".format(epoch))
        plt.title("Epoch " + str(epoch))
        # plt.show()
        return None

    def train(self, X_train, batch_size=128, epochs=2000, save_interval=200):
        half_batch = batch_size//2
        y_pos_train_dis = np.ones((half_batch, 1))
        y_neg_train_dis = np.zeros((half_batch, 1))
        y_train_GAN = np.ones((batch_size, 1))
        
        for epoch in range(epochs):
            # Generate training data for Discriminator

            #   random half_batch amount of real images
            X_pos_train_dis = X_train[np.random.randint(0, X_train.shape[0], half_batch)]
            
            #   random half_batch amount of generated fake images
            X_neg_train_dis = self.generator.predict(np.random.normal(0, 1, (half_batch, self.input_size[0])))

            #   Shuffle and append data using sklearn shuffle function
            # X_train_dis, y_train_dis = tf.concat(shuffle(X_neg_train_dis, X_pos_train_dis, random_state=0), axis=0), tf.concat(shuffle(y_neg_train_dis, y_pos_train_dis, random_state=0), axis=0)
            X_train_dis, y_train_dis = tf.random.shuffle(tf.concat([X_neg_train_dis, X_pos_train_dis], axis=0)), tf.random.shuffle(tf.concat([y_neg_train_dis, y_pos_train_dis], axis=0))

            # Generate training data for combined GAN model
            X_train_GAN = np.random.normal(0, 1, (batch_size, self.input_size[0]))
            
            # Train Discriminator
            loss_dis = self.discriminator.train_on_batch(X_train_dis, y_train_dis)
            
            # Train Generator
            loss_gen = self.GAN.train_on_batch(X_train_GAN, y_train_GAN)

            # Print results
            if epoch%save_interval == 0:
                print("Discriminator loss: {0}, Generator loss: {1}".format(loss_dis[0], loss_gen[0]))
                print("Discriminator acc.: {0}, Generator acc.: {1}".format(loss_dis[1], loss_gen[1]))
                self.plot_imgs(epoch)
                
        return 0

def main():

    gan_model = GAN()
    (X_train, _), (_, _) = choose_dataset("computer_vision")
   
    # Switch from grayscale to (-1,1)
    # X_train = X_train/127.5 - 1.0

    gan_model.train(X_train)

if __name__ == '__main__':
    main()

Here are the accuracies and losses which might give a clue as to the issue:

Discriminator loss: 1.0392613410949707, Generator loss: 0.7247573137283325
Discriminator acc.: 0.5078125, Generator acc.: 0.125
Discriminator loss: 0.7155331969261169, Generator loss: 0.7227296829223633
Discriminator acc.: 0.484375, Generator acc.: 0.0
Discriminator loss: 0.7079681158065796, Generator loss: 0.6722699403762817
Discriminator acc.: 0.4609375, Generator acc.: 1.0
Discriminator loss: 0.6883177757263184, Generator loss: 0.7037044763565063
Discriminator acc.: 0.5390625, Generator acc.: 0.0
Discriminator loss: 0.7039847373962402, Generator loss: 0.6718121767044067
Discriminator acc.: 0.453125, Generator acc.: 1.0
Discriminator loss: 0.7004268169403076, Generator loss: 0.6409173607826233
Discriminator acc.: 0.4765625, Generator acc.: 1.0
Discriminator loss: 0.6883779168128967, Generator loss: 0.7788660526275635
Discriminator acc.: 0.5390625, Generator acc.: 0.0
Discriminator loss: 0.6933140754699707, Generator loss: 0.6169038414955139
Discriminator acc.: 0.53125, Generator acc.: 1.0
Discriminator loss: 0.6910691261291504, Generator loss: 0.6194907426834106
Discriminator acc.: 0.5625, Generator acc.: 1.0
Discriminator loss: 0.692711353302002, Generator loss: 0.6367968320846558
Discriminator acc.: 0.5078125, Generator acc.: 1.0

Upvotes: 2

Views: 1755

Answers (1)

AloneTogether
AloneTogether

Reputation: 26698

I think you would get better results if you simply change your activation functions in your models and add a few dropout layers:

def discriminator_model(self):
  """Create discriminator model."""
  model = tf.keras.models.Sequential(name='Discriminator')
  model.add(layers.Flatten())
  model.add(layers.Dense(units=1024, kernel_initializer='normal'))
  model.add(layers.LeakyReLU(alpha=0.02))
  model.add(layers.Dropout(0.3))
  model.add(layers.Dense(units=512, kernel_initializer='normal'))
  model.add(layers.LeakyReLU(alpha=0.02))
  model.add(layers.Dropout(0.3))
  model.add(layers.Dense(units=256, kernel_initializer='normal'))
  model.add(layers.LeakyReLU(alpha=0.02))
  model.add(layers.Dropout(0.3))
  model.add(layers.Dense(units=1, kernel_initializer='normal', activation='sigmoid'))

  return model

def generator_model(self):
  """Create generator model."""
  model = tf.keras.models.Sequential(name='Generator')
  model.add(layers.Dense(units=256, kernel_initializer='normal'))
  model.add(layers.LeakyReLU(alpha=0.02))
  model.add(layers.Dense(units=512, kernel_initializer='normal'))
  model.add(layers.LeakyReLU(alpha=0.02))
  model.add(layers.Dense(units=1024, kernel_initializer='normal'))
  model.add(layers.LeakyReLU(alpha=0.02))
  model.add(layers.Dense(units=np.prod(self.img_shape), 
  kernel_initializer='normal', activation='tanh'))
  model.add(layers.Reshape((28,28)))
        
  return model

Also remove the tf.random.shuffle as it is making it too difficult for your generator to learn anything plausible:

X_train_dis, y_train_dis = tf.concat([X_neg_train_dis, X_pos_train_dis], axis=0), tf.concat([y_neg_train_dis, y_pos_train_dis], axis=0)

The results are ok, but you would be way better off using a CNN network for enter image description here

Update, make sure your discriminator is set to trainable=False accordingly:

self.discriminator.trainable = True

loss_dis = self.discriminator.train_on_batch(X_train_dis, y_train_dis)

self.discriminator.trainable = False
            # Train Generator
loss_gen = self.GAN.train_on_batch(X_train_GAN, y_train_GAN)

                                          enter image description here

Here the whole model:

import os
import sys
from typing import Counter
import tensorflow as tf
import numpy as np
from tensorflow.keras import models, layers, callbacks
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import mnist
from matplotlib import pyplot as plt
import random
from sklearn.utils import shuffle


def choose_dataset(dataset_type):
    """Select dataset based on string variable."""

    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    (X_train, X_test) = normalize_dataset(dataset_type, X_train, X_test)

    (X_train, y_train), (X_test, y_test) = reshape_dataset(X_train, y_train, X_test, y_test)

    return (X_train, y_train), (X_test, y_test)

def normalize_dataset(string, X_train, X_test):
    """Normalize speech recognition and computer vision datasets."""
    if string == "computer vision":
        X_train = X_train / 255
        X_test = X_test / 255
    else:
        mean = np.mean(X_train)
        std = np.std(X_train)
        X_train = (X_train-std)/mean
        X_test = (X_test-std)/mean

    return (X_train, X_test)

def reshape_dataset(X_train, y_train, X_test, y_test):
    """Reshape Computer Vision and Speech datasets."""

    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    return (X_train, y_train), (X_test, y_test)


class GAN:
    """Generative Adversarial Network for digit generation (MNIST)."""
    def __init__(self, input_shape=(28,28,1), rand_vector_shape=(100,), lr=0.0002, beta=0.5):
        
        # Input sizes
        self.img_shape = input_shape
        self.input_size = rand_vector_shape
        
        # optimizer
        self.opt = tf.keras.optimizers.Adam(lr, beta)

        # Create Generator model
        self.generator = self.generator_model()
        self.generator.compile(loss='binary_crossentropy', optimizer = self.opt, metrics = ['accuracy'])
        
        # Create Discriminator model
        self.discriminator = self.discriminator_model()
        self.discriminator.compile(loss='binary_crossentropy', optimizer = tf.keras.optimizers.Adam(0.001, beta), metrics = ['accuracy'])
        
        # Set the Discriminator as non trainable in the combined GAN model
        self.discriminator.trainable = False
        
        # Define model input and output
        input = tf.keras.Input(self.input_size)
        generated_img = self.generator(input)
        output = self.discriminator(generated_img)
        
        # Define and compile combined GAN model
        self.GAN = tf.keras.Model(input, output, name="GAN")
        self.GAN.compile(loss='binary_crossentropy', optimizer = self.opt, metrics=['accuracy'])

        return None
        
    def discriminator_model(self):
        """Create discriminator model."""
        model = tf.keras.models.Sequential(name='Discriminator')
        model.add(layers.Flatten())
        model.add(layers.Dense(units=1024, kernel_initializer='normal'))
        model.add(layers.LeakyReLU(alpha=0.02))
        model.add(layers.Dropout(0.3))
        model.add(layers.Dense(units=512, kernel_initializer='normal'))
        model.add(layers.LeakyReLU(alpha=0.02))
        model.add(layers.Dropout(0.3))
        model.add(layers.Dense(units=256, kernel_initializer='normal'))
        model.add(layers.LeakyReLU(alpha=0.02))
        model.add(layers.Dropout(0.3))
        model.add(layers.Dense(units=1, kernel_initializer='normal', activation='sigmoid'))

        return model

    def generator_model(self):
        """Create generator model."""
        model = tf.keras.models.Sequential(name='Generator')
        model.add(layers.Dense(units=256, kernel_initializer='normal'))
        model.add(layers.LeakyReLU(alpha=0.02))
        model.add(layers.Dropout(0.3))
        model.add(layers.Dense(units=512, kernel_initializer='normal'))
        model.add(layers.LeakyReLU(alpha=0.02))
        model.add(layers.Dropout(0.3))
        model.add(layers.Dense(units=1024, kernel_initializer='normal'))
        model.add(layers.LeakyReLU(alpha=0.02))
        model.add(layers.Dropout(0.3))
        model.add(layers.Dense(units=np.prod(self.img_shape), kernel_initializer='normal', activation='tanh'))
        model.add(layers.Reshape((28,28)))
        
        return model
    
    def plot_imgs(self, epoch):
        seed = tf.random.normal((16, 100))
        predictions = self.generator(seed, training=False)

        fig = plt.figure(figsize=(4, 4))
        for i in range(predictions.shape[0]):
            plt.subplot(4, 4, i+1)
            plt.imshow(predictions[i, :, :] * 127.5 + 127.5, cmap='gray')
            plt.axis('off')

        fig.savefig("img/img_epoch_{0}.png".format(epoch))
        plt.title("Epoch " + str(epoch))
        # plt.show()
        return None

    def train(self, X_train, batch_size=128, epochs=4000, save_interval=200):
        half_batch = batch_size//2
        y_pos_train_dis = tf.ones((half_batch, 1))
        y_neg_train_dis = tf.zeros((half_batch, 1))
        y_train_GAN = tf.ones((batch_size, 1))

        for epoch in range(epochs):
            # Generate training data for Discriminator

            #   random half_batch amount of real images
            X_pos_train_dis = X_train[np.random.randint(0, X_train.shape[0], half_batch)]
            
            #   random half_batch amount of generated fake images
            X_neg_train_dis = self.generator.predict(tf.random.normal((half_batch, self.input_size[0])))

            #   Shuffle and append data using sklearn shuffle function
            # X_train_dis, y_train_dis = tf.concat(shuffle(X_neg_train_dis, X_pos_train_dis, random_state=0), axis=0), tf.concat(shuffle(y_neg_train_dis, y_pos_train_dis, random_state=0), axis=0)
            X_train_dis, y_train_dis = tf.concat([X_neg_train_dis, X_pos_train_dis], axis=0), tf.concat([y_neg_train_dis, y_pos_train_dis], axis=0)

            # Generate training data for combined GAN model
            X_train_GAN = tf.random.normal((batch_size, self.input_size[0]))
            
            # Train Discriminator
            self.discriminator.trainable = True

            loss_dis = self.discriminator.train_on_batch(X_train_dis, y_train_dis)

            self.discriminator.trainable = False
            # Train Generator
            loss_gen = self.GAN.train_on_batch(X_train_GAN, y_train_GAN)

            # Print results
            if epoch%save_interval == 0:
                print("Discriminator loss: {0}, Generator loss: {1}".format(loss_dis[0], loss_gen[0]))
                print("Discriminator acc.: {0}, Generator acc.: {1}".format(loss_dis[1], loss_gen[1]))
                self.plot_imgs(epoch)
                
        return 0


gan_model = GAN()
(X_train, _), (_, _) = choose_dataset("computer_vision")

gan_model.train(X_train)

Upvotes: 2

Related Questions