thebeancounter
thebeancounter

Reputation: 4839

python tensor flow - relu not learning in autoencoder task

I am using python 2.7 and trying to get to know tensor flow a bit better.

I am using the following code trying to train an auto encoder over the mnist data, when i use sigmoid activation, it gerelizes ok (90%), but when i try relu it is just about random.

closest thing i found but i did not find solution for my issue there.

What Am i doing wrong? should i add dropout? maybe the cost function or the optimizer are not good with relu?

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)




# Parameters
learning_rate = 0.01
training_epochs = 60
batch_size = 256
display_step = 1
examples_to_show = 10

# Network Parameters
n_hidden_1 = 256 # 1st layer num features
#n_hidden_1 = 400
n_hidden_2 = 128 # 2nd layer num features
#n_hidden_2 = 250
n_hidden_3 = 60

#n_hidden_2 = 30
n_input = 784 # MNIST data input (img shape: 28*28)

# tf Graph input (only pictures)
X = tf.placeholder("float", [None, n_input])

keep_prob = tf.placeholder("float", None)
#keep_prob = tf.placeholder(tf.float32)

weights = {
    'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'encoder_h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
    'decoder_h1': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_2])),
    'decoder_h2': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1])),
    'decoder_h3': tf.Variable(tf.random_normal([n_hidden_1, n_input])),
}
biases = {
    'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'encoder_b3': tf.Variable(tf.random_normal([n_hidden_3])),
    'decoder_b1': tf.Variable(tf.random_normal([n_hidden_2])),
    'decoder_b2': tf.Variable(tf.random_normal([n_hidden_1])),
    'decoder_b3': tf.Variable(tf.random_normal([n_input])),
}




# Building the encoder
def encoder(x):
    # Encoder Hidden layer with sigmoid activation #1

    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),
                                   biases['encoder_b1']))
    # Decoder Hidden layer with sigmoid activation #2
    dropout1 = tf.nn.dropout(layer_1, keep_prob)

    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']),
                                   biases['encoder_b2']))

    layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, weights['encoder_h3']),
                                   biases['encoder_b3']))

    return layer_3


# Building the decoder
def decoder(x):
    # Encoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),
                                   biases['decoder_b1']))
    # Decoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']),
                                   biases['decoder_b2']))

    layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, weights['decoder_h3']),
                                   biases['decoder_b3']))

    return layer_3

# Construct model
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)

# Prediction
y_pred = decoder_op
x_encode = encoder_op 

# Targets (Labels) are the input data.
y_true = X

# Define loss and optimizer, minimize the squared error
cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))

optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
#optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# Initializing the variables
init = tf.global_variables_initializer()



# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    print("num examples are ", mnist.train.num_examples, mnist.validation.num_examples, mnist.test.num_examples)
    total_batch = int(mnist.train.num_examples/batch_size)
    # Training cycle
    for epoch in range(training_epochs):
        # Loop over all batches
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([optimizer, cost], feed_dict={X: batch_xs})
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1),
                  "cost=", "{:.9f}".format(c))

    print("Optimization Finished!")




    # Applying encode and decode over test set
    encode_decode = sess.run(
        y_pred, feed_dict={X: mnist.test.images[:examples_to_show]})

    encoded_data = sess.run(x_encode, feed_dict={X: mnist.test.images})


distance_matrix = scipy.spatial.distance.squareform(pdist(encoded_data))

d_m_2 = distance_matrix[:,:]
np.fill_diagonal(d_m_2,np.inf)

labels = np.argmax(mnist.test.labels,1) #these are the labels!
predicate = labels[np.argmin(d_m_2,1)] #get the indecies of the closest data sample
print ("this is the ammount of coorect clasificcations in the test set", np.sum(labels==predicate)) #count how many similar values are there!

Thanks!

Upvotes: 0

Views: 508

Answers (1)

ml4294
ml4294

Reputation: 2629

Maybe the ReLU is struggling with negative input values, since it is defined as R(x) := max(0,x). So, if the input is negative, R(x) = 0, and the gradient will be zero as well. Thus, your optimizer has no idea how to update the parameters. You could try initializing your weights with something mainly positive like tf.random_normal(shape=..., mean=0.5, stddev=0.2). Maybe this will reduce this issue.

Upvotes: 2

Related Questions