Reputation: 260
I am trying to implement autoencoders using CNN in tensorflow. Firstly, I trained my model on MNIST dataset and everything worked perfectly, I got the lower loss and when I ran the inference model worked perfectly (giving good output images). But then I decided to test my network on CelebA dataset, but my model fails and loss never decreases. The model processes fast and I tried decreasing the learning rate. Even though I decreased the learning rate, there is not much difference between the time it takes to train.
Here I will try to put all the code that I use
**Note I've set up GitHub repository as well, in case it's easier for you to read the code there
self.batch_size = 64
self.shape = shape
self.output_height = 64
self.output_width = 64
self.gf_dim = 64
self.c_dim = 3
self.strides_size = 2
self.kernel_size = 2
self.padding = 'SAME'
def encoder_conv_net(self, input_):
self.conv1 = Model.batch_norm(self, Model.conv_2d(self, input_, [3,3,self.c_dim,32], name = 'conv1'))
self.conv2 = Model.batch_norm(self, Model.conv_2d(self, self.conv1, [3,3,32,64], name = 'conv2'))
self.conv3 = Model.batch_norm(self, Model.conv_2d(self, self.conv2, [3,3,64,128], name = 'conv3'))
self.conv4 = Model.batch_norm(self, Model.conv_2d(self, self.conv3, [3,3,128,128], name = 'conv4'))
fc = tf.reshape(self.conv4, [ -1, 512 ])
dropout1 = tf.nn.dropout(fc, keep_prob=0.5)
fc1 = Model.fully_connected(self, dropout1, 512)
return tf.nn.tanh(fc1)
def decoder_conv_net(self,
input_,
shape):
g_width, g_height = shape[1], shape[0]
g_width2, g_height2 = np.ceil(shape[1]/2), np.ceil(shape[0]/2)
g_width4, g_height4 = np.ceil(shape[1]/4), np.ceil(shape[0]/4)
g_width8, g_height8 = np.ceil(shape[1]/8), np.ceil(shape[0]/8)
input_ = tf.reshape(input_, [-1, 4, 4, 128])
print(input_.shape, g_width8, self.gf_dim)
deconv1 = Model.deconv_2d(self, input_, [self.batch_size, g_width8, g_height8, self.gf_dim * 2],
[5,5],
name = 'deconv_1')
deconv2 = Model.deconv_2d(self, deconv1, [self.batch_size, g_width4, g_height4, self.gf_dim * 2],
[5,5],
name = 'deconv_2')
deconv3 = Model.deconv_2d(self, deconv2, [self.batch_size, g_width2, g_height2, self.gf_dim],
[5,5],
name = 'deconv_3')
deconv4 = Model.deconv_2d(self, deconv3, [self.batch_size, g_width, g_height, self.c_dim],
[5,5],
name = 'deconv_4',
relu = False)
return tf.nn.tanh(deconv4)
these are the functions for model encoder and decoder.
The main function looks like this
dataset = tf.data.Dataset.from_tensor_slices(filenames)
dataset = dataset.shuffle(len(filenames))
dataset = dataset.map(parse_function, num_parallel_calls=4)
#dataset = dataset.map(train_preprocess, num_parallel_calls=4)
dataset = dataset.repeat().batch(batch_size)
#dataset = dataset.apply(tf.contrib.data.batch_and_drop_remainder(batch_size))
dataset = dataset.prefetch(1)
iterator = tf.data.Iterator.from_structure(dataset.output_types,
dataset.output_shapes)
next_element = iterator.get_next()
init_op = iterator.make_initializer(dataset)
#print(next_element)
x = next_element
#plt.imshow(x)
#x = tf.reshape(x, [64, 64, 64, 3])
ENC = Encoder(shape)
DEC = Decoder(shape)
encoding = ENC.encoder_conv_net(x)
print("Encoding output shape " + str(encoding.shape))
output = DEC.decoder_conv_net(encoding, [64,64])
print(output.shape)
loss = tf.reduce_mean(tf.squared_difference(x, output))
opt = tf.train.AdamOptimizer(learning_rate=0.1e-5)
train = opt.minimize(loss)
saver = tf.train.Saver()
init = tf.global_variables_initializer()
I call this train session in the normal way
with tf.Session(graph=graph) as sess:
#saver.restore(sess, '')
sess.run(init)
sess.run(init_op)
a = sess.run(next_element)
for ind in tqdm(range(nb_epoch)):
loss_acc, outputs, _ = sess.run([loss, output, train])
print(loss_acc)
if ind % 40 == 0:
print(loss_acc)
saver.save(sess, save_path = "./checkpoints/" \
"/model_face.ckpt", global_step = ind)
After all of this training starts without an error, but my loss does not decrease.
Here are utility functions as well
def parse_function(filename):
image_string = tf.read_file(filename)
image = tf.image.decode_jpeg(image_string, channels=3)
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize_images(image, [64, 64])
return image
def train_preprocess(image):
image = tf.image.random_flip_left_right(image)
image = tf.image.random_brightness(image, max_delta=32.0 / 255.0)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.clip_by_value(image, 0.0, 1.0)
return image
Upvotes: 3
Views: 1202
Reputation: 2370
By changing the activation function to softmax, which is more suitable to your image encoding:
image = tf.clip_by_value(image, 0.0, 1.0)
The loss starts at 0.14066154
.
Increasing the number of training epochs, the loss seems to get as low as ~0.08216808
, which is reasonable, given I've only trained the model for a couple of minutes on a single Titan Xp.
Upvotes: 3
Reputation: 51
can you print the value of x ,outputs and gradient? My first thought about unchanged loss is: 1.if x is always zero,then the output keep same.the loss keep same 2.if x is not zero,but keep same in every step, and if gradient is always zero(the weight don' update),then the output keep same, the loss keep same but because you can successfully run model on mnist, this show model is ok, so I suspect the problem is probably more about data.
Upvotes: 1