Reputation: 33
I am trying to make a basic GAN that tries to learn a simple 3 by 3 matrix with a plus in it.
However, for some reason, the discriminator loss doesn't change.
For example:
[[0.0, 0.98, 0,01] [0.95, 0.97, 0.99] [0.02, 0.99, 0.02]]
Here is the code:
GENERATOR AND DISCRIMINATOR:
def make_generator():
model = keras.Sequential()
model.add(keras.layers.Dense(10, activation='relu', input_shape=(5, )))
model.add(keras.layers.Dense(20, activation='relu'))
model.add(keras.layers.Dense(9, activation='relu'))
model.add(keras.layers.Reshape((3, 3)))
return model
def make_discriminator():
model = keras.Sequential()
model.add(keras.layers.Dense(10, activation='relu', input_shape=[3, 3]))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(20, activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(9, activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(1, activation='softmax'))
return model
generator = make_generator()
discriminator = make_discriminator()
I think that the problem lies in the training, but I'm not sure.
The training program:
generator_optimizer = tf.keras.optimizers.SGD(learning_rate = 0.1)
discriminator_optimizer = tf.keras.optimizers.SGD(learning_rate = 0.1)
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
def generator_loss(generated_im):
loss = cross_entropy(tf.ones_like(generated_im), generated_im)
return loss
def discriminator_loss(real_im_pred, generated_im_pred):
loss_on_real = cross_entropy(tf.ones_like(real_im_pred), real_im_pred)
loss_on_generated = cross_entropy(tf.zeros_like(generated_im_pred), generated_im_pred)
loss = loss_on_generated + loss_on_real
return loss
@tf.function
def train_step(images, batch_size):
noise = tf.random.normal([batch_size, 5])
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
generated_images = generator(noise, training=True)
classification_on_real = discriminator(images, training=True)
classification_on_fake = discriminator(generated_images, training=True)
gen_loss = generator_loss(generated_images)
disc_loss = discriminator_loss(classification_on_real, classification_on_fake)
gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
return gen_loss, disc_loss
def train(data, epochs, batch_size):
for epoch in range(epochs):
start = time.time()
# Keep track of the total loss and accuracy
total_gen_loss = 0
total_disc_loss = 0
for image_batch in data:
gen_loss, disc_loss = train_step(image_batch, batch_size)
total_gen_loss += gen_loss
total_disc_loss += disc_loss
print ('Time for epoch {} is {} sec, generator loss: {}, discriminator loss: {}'
.format(epoch + 1, round(time.time()-start), round(float(total_gen_loss), 2), round(float(total_disc_loss), 2)))
The output I get when running this code is the following:
Time for epoch 1 is 3 sec, generator loss: 346.15, discriminator loss: 3252.97
Time for epoch 2 is 2 sec, generator loss: 308.61, discriminator loss: 3252.97
Time for epoch 3 is 2 sec, generator loss: 308.33, discriminator loss: 3252.97
Time for epoch 4 is 2 sec, generator loss: 308.24, discriminator loss: 3252.97
Time for epoch 5 is 2 sec, generator loss: 308.19, discriminator loss: 3252.97
Time for epoch 6 is 2 sec, generator loss: 308.16, discriminator loss: 3252.97
Time for epoch 7 is 2 sec, generator loss: 308.14, discriminator loss: 3252.97
Time for epoch 8 is 2 sec, generator loss: 308.13, discriminator loss: 3252.97
Time for epoch 9 is 2 sec, generator loss: 308.12, discriminator loss: 3252.97
Time for epoch 10 is 2 sec, generator loss: 308.11, discriminator loss: 3252.97
Time for epoch 11 is 2 sec, generator loss: 308.11, discriminator loss: 3252.97
Time for epoch 12 is 2 sec, generator loss: 308.11, discriminator loss: 3252.97
Time for epoch 13 is 2 sec, generator loss: 308.1, discriminator loss: 3252.97
Time for epoch 14 is 2 sec, generator loss: 308.1, discriminator loss: 3252.97
Time for epoch 15 is 2 sec, generator loss: 308.1, discriminator loss: 3252.97
Time for epoch 16 is 2 sec, generator loss: 308.1, discriminator loss: 3252.97
Time for epoch 17 is 2 sec, generator loss: 308.09, discriminator loss: 3252.97
Time for epoch 18 is 2 sec, generator loss: 308.09, discriminator loss: 3252.97
Time for epoch 19 is 2 sec, generator loss: 308.09, discriminator loss: 3252.97
Time for epoch 20 is 2 sec, generator loss: 308.09, discriminator loss: 3252.97
If you're interested in the code for making the data, here it is:
def plus():
array = np.array([[np.random.normal(0.05, 0.01, 1)[0], np.random.normal(0.95, 0.01, 1)[0], np.random.normal(0.05, 0.01, 1)[0]],
[np.random.normal(0.95, 0.01, 1)[0], np.random.normal(0.95, 0.01, 1)[0], np.random.normal(0.95, 0.01, 1)[0]],
[np.random.normal(0.05, 0.01, 1)[0], np.random.normal(0.95, 0.01, 1)[0], np.random.normal(0.05, 0.01, 1)[0]]])
return array
def dataset(size):
X = []
for _ in range(size):
x = plus()
X.append(x)
return np.array(X)
def get_batches(x, batch_size):
batches = []
for i in range(0, x.shape[0], batch_size):
batch = x[i:i + batch_size]
batches.append(batch)
random.shuffle(batches)
return np.array(batches)
BATCH_SIZE = 10
data = dataset(20000)
data = get_batches(data, BATCH_SIZE)
I hope you can help! Thanks a lot.
Upvotes: 1
Views: 418
Reputation:
From the comments:
The Activation function to be used in the last layer of the
Discriminator
should beSigmoid
, instead ofSoftmax
, as theFinal Dense Layer
of theDiscriminator
has only 1Node/Neuron/Unit
. (paraphrased from xdurch0).
Upvotes: 1