Garovat Adrian
Garovat Adrian

Reputation: 31

Tensorflow, binary classification

Is it possible to make a binary classification? Especially for pedestrian detection, whether it is or not a pedestrian. I couldn't find anything in the API or any good tutorials for this. I tried to adapt the code from the deep MNIST tutorial, that was used for multi-class classification; I made the images with pedestrians in them labeled with 1, and the negatives with 0, and used 3 channels(for colours, shouldn't be a problem right?), but the accuracy just jumps all over the place.

The code

    import dataset as input_data
    import tensorflow as tf


    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)


    def bias_variable(shape):
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)


    def conv2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


    def max_pool_2x2(x):
        return tf.nn.max_pool(x, ksize=[1, 3, 3, 1],
                              strides=[1, 2, 2, 1], padding='SAME')


    data = input_data.read_data_sets()

    sess = tf.InteractiveSession()

    x = tf.placeholder("float", shape=[None, input_data.HEIGHT * input_data.WIDTH * 3])
    y_ = tf.placeholder("float", shape=[None, 2])

    W_conv1 = weight_variable([5, 5, 3, 64])
    b_conv1 = bias_variable([64])

    x_image = tf.reshape(x, [-1, input_data.WIDTH, input_data.HEIGHT, 3])

    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)
    h_norm1 = tf.nn.lrn(h_pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)

    W_conv2 = weight_variable([5, 5, 64, 64])
    b_conv2 = bias_variable([64])

    h_conv2 = tf.nn.relu(conv2d(h_norm1, W_conv2) + b_conv2)
    h_norm2 = tf.nn.lrn(h_conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
    h_pool2 = max_pool_2x2(h_norm2)

    W_fc1 = weight_variable([input_data.HEIGHT / 4 * input_data.WIDTH / 4 * 64, 1024])
    b_fc1 = bias_variable([1024])

    h_pool2_flat = tf.reshape(h_pool2, [-1, input_data.HEIGHT / 4 * input_data.WIDTH / 4 * 64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    keep_prob = tf.placeholder("float")
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    W_fc2 = weight_variable([1024, 2])
    b_fc2 = bias_variable([2])

    y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

    cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))
    train_step = tf.train.AdamOptimizer(1e-6).minimize(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    sess.run(tf.initialize_all_variables())
    for i in range(20000):
        batch = data.train.next_batch(50)
        if i % 100 == 0:
            train_accuracy = accuracy.eval(feed_dict={
                x: batch[0], y_: batch[1], keep_prob: 1.0})
            print "step %d, training accuracy %g" % (i, train_accuracy)
        train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

    print "test accuracy %g" % accuracy.eval(feed_dict={
        x: data.test.images, y_: data.test.labels, keep_prob: 1.0})

The output

    step 0, training accuracy 0.14
    step 100, training accuracy 0.54
    step 200, training accuracy 0.28
    step 300, training accuracy 0.46
    step 400, training accuracy 0.32
    step 500, training accuracy 0.52
    step 600, training accuracy 0.56
    step 700, training accuracy 0.76
    step 800, training accuracy 0.66

Help would be apriciated, thanks.

Upvotes: 3

Views: 4226

Answers (1)

mathetes
mathetes

Reputation: 12077

You should definitely use tensorboard to visualize the cross entropy , biases and weights summaries. I think it will give you a much better view of what is going on.

Try with this code and then run tensorboard:

import dataset as input_data
import tensorflow as tf


def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 3, 3, 1],
                          strides=[1, 2, 2, 1], padding='SAME')


data = input_data.read_data_sets()

sess = tf.InteractiveSession()

x = tf.placeholder("float", shape=[None, input_data.HEIGHT * input_data.WIDTH * 3])
y_ = tf.placeholder("float", shape=[None, 2])

W_conv1 = weight_variable([5, 5, 3, 64])
b_conv1 = bias_variable([64])

x_image = tf.reshape(x, [-1, input_data.WIDTH, input_data.HEIGHT, 3])

h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
h_norm1 = tf.nn.lrn(h_pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)

W_conv2 = weight_variable([5, 5, 64, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_norm1, W_conv2) + b_conv2)
h_norm2 = tf.nn.lrn(h_conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
h_pool2 = max_pool_2x2(h_norm2)

W_fc1 = weight_variable([input_data.HEIGHT / 4 * input_data.WIDTH / 4 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, input_data.HEIGHT / 4 * input_data.WIDTH / 4 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

W_fc2 = weight_variable([1024, 2])
b_fc2 = bias_variable([2])

y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

# Add summary ops to collect data
w_fc2_hist = tf.histogram_summary("weights_fc2", W_fc2)
b_fc2_hist = tf.histogram_summary("bias_fc2", b_fc2)
w_in_hist = tf.histogram_summary("weights_in", W)
b_in_hist = tf.histogram_summary("bias_in", b)
y_hist = tf.histogram_summary("y_conv", y_conv)

cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))
ce_summ = tf.scalar_summary("cross entropy", cross_entropy)
train_step = tf.train.AdamOptimizer(1e-6).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
accuracy_summary = tf.scalar_summary("train accuracy", accuracy)
# Merge all the summaries and write them out to /tmp/tf
merged = tf.merge_all_summaries()
writer = tf.train.SummaryWriter("/tmp/tf", sess.graph_def)
sess.run(tf.initialize_all_variables())
for i in range(20000):
    batch = data.train.next_batch(50)
    feed={x: batch[0], y_: batch[1], keep_prob: 0.5}
    result = sess.run([merged, accuracy, train_step], feed_dict=feed)        
    if i % 100 == 0:  # Record summary data, and the accuracy
            summary_str = result[0]
            acc = result[1]
            writer.add_summary(summary_str, i)
            print("Accuracy at step {0}/{1}: {2}%".format(i, 20000, int(acc*100)))

print "test accuracy %g" % accuracy.eval(feed_dict={
    x: data.test.images, y_: data.test.labels, keep_prob: 1.0})

Upvotes: 1

Related Questions