Neil
Neil

Reputation: 794

Tensorflow Slim - model trains but always predicts the same when evaluating

https://kwotsin.github.io/tech/2017/02/11/transfer-learning.html I followed the above link to make a image classifier

Training code:

slim = tf.contrib.slim

dataset_dir = './data'
log_dir = './log'
checkpoint_file = './inception_resnet_v2_2016_08_30.ckpt'
image_size = 299
num_classes = 21
vlabels_file = './labels.txt'
labels = open(labels_file, 'r')
labels_to_name = {}
for line in labels:
    label, string_name = line.split(':')
    string_name = string_name[:-1]
    labels_to_name[int(label)] = string_name

file_pattern = 'test_%s_*.tfrecord'

items_to_descriptions = {
    'image': 'A 3-channel RGB coloured product image',
    'label': 'A label that from 20 labels'
}

num_epochs = 10
batch_size = 16
initial_learning_rate = 0.001
learning_rate_decay_factor = 0.7
num_epochs_before_decay = 4

def get_split(split_name, dataset_dir, file_pattern=file_pattern, file_pattern_for_counting='products'):
    if split_name not in ['train', 'validation']:
        raise ValueError(
            'The split_name %s is not recognized. Please input either train or validation as the split_name' % (
            split_name))

    file_pattern_path = os.path.join(dataset_dir, file_pattern % (split_name))

    num_samples = 0
    file_pattern_for_counting = file_pattern_for_counting + '_' + split_name
    tfrecords_to_count = [os.path.join(dataset_dir, file) for file in os.listdir(dataset_dir) if
                          file.startswith(file_pattern_for_counting)]
    for tfrecord_file in tfrecords_to_count:
        for record in tf.python_io.tf_record_iterator(tfrecord_file):
            num_samples += 1

    test = num_samples

    reader = tf.TFRecordReader

    keys_to_features = {
        'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'),
        'image/class/label': tf.FixedLenFeature(
            [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
    }

    items_to_handlers = {
        'image': slim.tfexample_decoder.Image(),
        'label': slim.tfexample_decoder.Tensor('image/class/label'),
    }

    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers)

    labels_to_name_dict = labels_to_name

    dataset = slim.dataset.Dataset(
        data_sources=file_pattern_path,
        decoder=decoder,
        reader=reader,
        num_readers=4,
        num_samples=num_samples,
        num_classes=num_classes,
        labels_to_name=labels_to_name_dict,
        items_to_descriptions=items_to_descriptions)

    return dataset

def load_batch(dataset, batch_size, height=image_size, width=image_size, is_training=True):
    '''
    Loads a batch for training.

    INPUTS:
    - dataset(Dataset): a Dataset class object that is created from the get_split function
    - batch_size(int): determines how big of a batch to train
    - height(int): the height of the image to resize to during preprocessing
    - width(int): the width of the image to resize to during preprocessing
    - is_training(bool): to determine whether to perform a training or evaluation preprocessing

    OUTPUTS:
    - images(Tensor): a Tensor of the shape (batch_size, height, width, channels) that contain one batch of images
    - labels(Tensor): the batch's labels with the shape (batch_size,) (requires one_hot_encoding).

    '''
    # First create the data_provider object
    data_provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        common_queue_capacity=24 + 3 * batch_size,
        common_queue_min=24)

    # Obtain the raw image using the get method
    raw_image, label = data_provider.get(['image', 'label'])

    # Perform the correct preprocessing for this image depending if it is training or evaluating
    image = inception_preprocessing.preprocess_image(raw_image, height, width, is_training)

    # As for the raw images, we just do a simple reshape to batch it up
    raw_image = tf.expand_dims(raw_image, 0)
    raw_image = tf.image.resize_nearest_neighbor(raw_image, [height, width])
    raw_image = tf.squeeze(raw_image)

    # Batch up the image by enqueing the tensors internally in a FIFO queue and dequeueing many elements with tf.train.batch.
    images, raw_images, labels = tf.train.batch(
        [image, raw_image, label],
        batch_size=batch_size,
        num_threads=4,
        capacity=4 * batch_size,
        allow_smaller_final_batch=True)

    return images, raw_images, labels


def run():
    # Create the log directory here. Must be done here otherwise import will activate this unneededly.
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    # ======================= TRAINING PROCESS =========================
    # Now we start to construct the graph and build our model
    with tf.Graph().as_default() as graph:
        tf.logging.set_verbosity(tf.logging.INFO)  # Set the verbosity to INFO level

        # First create the dataset and load one batch
        dataset = get_split('train', dataset_dir, file_pattern=file_pattern)
        images, _, labels = load_batch(dataset, batch_size=batch_size)

        # Know the number steps to take before decaying the learning rate and batches per epoch
        num_batches_per_epoch = int(dataset.num_samples / batch_size)
        num_steps_per_epoch = num_batches_per_epoch  # Because one step is one batch processed
        decay_steps = int(num_epochs_before_decay * num_steps_per_epoch)

        # Create the model inference
        with slim.arg_scope(inception_resnet_v2_arg_scope()):
            logits, end_points = inception_resnet_v2(images, num_classes=dataset.num_classes, is_training=True)

        # Define the scopes that you want to exclude for restoration
        exclude = ['InceptionResnetV2/Logits', 'InceptionResnetV2/AuxLogits']
        variables_to_restore = slim.get_variables_to_restore(exclude=exclude)

        # Perform one-hot-encoding of the labels (Try one-hot-encoding within the load_batch function!)
        one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)

        # Performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks
        loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=logits)
        total_loss = tf.losses.get_total_loss()  # obtain the regularization losses as well

        # Create the global step for monitoring the learning_rate and training.
        global_step = get_or_create_global_step()

        # Define your exponentially decaying learning rate
        lr = tf.train.exponential_decay(
            learning_rate=initial_learning_rate,
            global_step=global_step,
            decay_steps=decay_steps,
            decay_rate=learning_rate_decay_factor,
            staircase=True)

        # Now we can define the optimizer that takes on the learning rate
        optimizer = tf.train.AdamOptimizer(learning_rate=lr)

        # Create the train_op.
        train_op = slim.learning.create_train_op(total_loss, optimizer)

        # State the metrics that you want to predict. We get a predictions that is not one_hot_encoded.
        predictions = tf.argmax(end_points['Predictions'], 1)
        probabilities = end_points['Predictions']
        accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, labels)
        metrics_op = tf.group(accuracy_update, probabilities)

        # Now finally create all the summaries you need to monitor and group them into one summary op.
        tf.summary.scalar('losses/Total_Loss', total_loss)
        tf.summary.scalar('accuracy', accuracy)
        tf.summary.scalar('learning_rate', lr)
        my_summary_op = tf.summary.merge_all()

        # Now we need to create a training step function that runs both the train_op, metrics_op and updates the global_step concurrently.
        def train_step(sess, train_op, global_step):
            '''
            Simply runs a session for the three arguments provided and gives a logging on the time elapsed for each global step
            '''
            # Check the time for each sess run
            start_time = time.time()
            total_loss, global_step_count, _ = sess.run([train_op, global_step, metrics_op])
            time_elapsed = time.time() - start_time

            # Run the logging to print some results
            logging.info('global step %s: loss: %.4f (%.2f sec/step)', global_step_count, total_loss, time_elapsed)

            return total_loss, global_step_count

        # Now we create a saver function that actually restores the variables from a checkpoint file in a sess
        saver = tf.train.Saver(variables_to_restore)

        def restore_fn(sess):
            return saver.restore(sess, checkpoint_file)

        # Define your supervisor for running a managed session. Do not run the summary_op automatically or else it will consume too much memory
        sv = tf.train.Supervisor(logdir=log_dir, summary_op=None, init_fn=restore_fn)

        # Run the managed session
        with sv.managed_session() as sess:
            for step in xrange(num_steps_per_epoch * num_epochs):
                # At the start of every epoch, show the vital information:
                if step % num_batches_per_epoch == 0:
                    logging.info('Epoch %s/%s', step / num_batches_per_epoch + 1, num_epochs)
                    learning_rate_value, accuracy_value = sess.run([lr, accuracy])
                    logging.info('Current Learning Rate: %s', learning_rate_value)
                    logging.info('Current Streaming Accuracy: %s', accuracy_value)

                    # optionally, print your logits and predictions for a sanity check that things are going fine.
                    logits_value, probabilities_value, predictions_value, labels_value = sess.run(
                        [logits, probabilities, predictions, labels])
                    print 'logits: \n', logits_value
                    print 'Probabilities: \n', probabilities_value
                    print 'predictions: \n', predictions_value
                    print 'Labels:\n:', labels_value

                # Log the summaries every 10 step.
                if step % 10 == 0:
                    loss, _ = train_step(sess, train_op, sv.global_step)
                    summaries = sess.run(my_summary_op)
                    sv.summary_computed(sess, summaries)

                # If not, simply run the training step
                else:
                    loss, _ = train_step(sess, train_op, sv.global_step)

            # We log the final training loss and accuracy
            logging.info('Final Loss: %s', loss)
            logging.info('Final Accuracy: %s', sess.run(accuracy))

            # Once all the training has been done, save the log files and checkpoint model
            logging.info('Finished training! Saving model to disk now.')
            sv.saver.save(sess, sv.save_path, global_step=sv.global_step)

This code seems to work an I have ran training on some sample data and Im getting 94% accuracy

Evaluation code:

log_dir = './log'
log_eval = './log_eval_test'
dataset_dir = './data'
batch_size = 10
num_epochs = 1

checkpoint_file = tf.train.latest_checkpoint('./')


def run():
    if not os.path.exists(log_eval):
        os.mkdir(log_eval)
    with tf.Graph().as_default() as graph:
        tf.logging.set_verbosity(tf.logging.INFO)
        dataset = get_split('train', dataset_dir)
        images, raw_images, labels = load_batch(dataset, batch_size=batch_size, is_training=False)

        num_batches_per_epoch = dataset.num_samples / batch_size
        num_steps_per_epoch = num_batches_per_epoch

        with slim.arg_scope(inception_resnet_v2_arg_scope()):
            logits, end_points = inception_resnet_v2(images, num_classes=dataset.num_classes, is_training=False)

        variables_to_restore = slim.get_variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        def restore_fn(sess):
            return saver.restore(sess, checkpoint_file)

        predictions = tf.argmax(end_points['Predictions'], 1)
        accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, labels)
        metrics_op = tf.group(accuracy_update)

        global_step = get_or_create_global_step()
        global_step_op = tf.assign(global_step, global_step + 1)

        def eval_step(sess, metrics_op, global_step):
            '''
            Simply takes in a session, runs the metrics op and some logging information.
            '''
            start_time = time.time()
            _, global_step_count, accuracy_value = sess.run([metrics_op, global_step_op, accuracy])
            time_elapsed = time.time() - start_time

            logging.info('Global Step %s: Streaming Accuracy: %.4f (%.2f sec/step)', global_step_count, accuracy_value,
                         time_elapsed)

            return accuracy_value

        tf.summary.scalar('Validation_Accuracy', accuracy)
        my_summary_op = tf.summary.merge_all()

        sv = tf.train.Supervisor(logdir=log_eval, summary_op=None, saver=None, init_fn=restore_fn)

        with sv.managed_session() as sess:
            for step in xrange(num_steps_per_epoch * num_epochs):
                sess.run(sv.global_step)
                if step % num_batches_per_epoch == 0:
                    logging.info('Epoch: %s/%s', step / num_batches_per_epoch + 1, num_epochs)
                    logging.info('Current Streaming Accuracy: %.4f', sess.run(accuracy))

                if step % 10 == 0:
                    eval_step(sess, metrics_op=metrics_op, global_step=sv.global_step)
                    summaries = sess.run(my_summary_op)
                    sv.summary_computed(sess, summaries)


                else:
                    eval_step(sess, metrics_op=metrics_op, global_step=sv.global_step)

            logging.info('Final Streaming Accuracy: %.4f', sess.run(accuracy))

            raw_images, labels, predictions = sess.run([raw_images, labels, predictions])
            for i in range(10):
                image, label, prediction = raw_images[i], labels[i], predictions[i]
                prediction_name, label_name = dataset.labels_to_name[prediction], dataset.labels_to_name[label]
                text = 'Prediction: %s \n Ground Truth: %s' % (prediction_name, label_name)
                img_plot = plt.imshow(image)

                plt.title(text)
                img_plot.axes.get_yaxis().set_ticks([])
                img_plot.axes.get_xaxis().set_ticks([])
                plt.show()

            logging.info(
                'Model evaluation has completed! Visit TensorBoard for more information regarding your evaluation.')

So after training the model and getting 94% accuracy i tried to evaluate the model. On evaluation I get 0-1% accuracy the whole time. I investigated this only to find that it is predicting the same class every time

labels: [7, 11, 5, 1, 20, 0, 18, 1, 0, 7]
predictions: [10, 10, 10, 10, 10, 10, 10, 10, 10, 10]

Can anyone help in where i may be going wrong?

EDIT:

TensorBoard accuracy and loss form training

enter image description here enter image description here

TensorBoard accuracy from evaluation

enter image description here

EDIT:

Ive still not been able to solve this issues. I thought there might be a problem with how I am restoring the graph in the eval script so I tried using this to restore the model instead

saver = tf.train.import_meta_graph('/log/model.ckpt.meta')

def restore_fn(sess):
    return saver.restore(sess, checkpoint_file)

instead of

variables_to_restore = slim.get_variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

def restore_fn(sess):
    return saver.restore(sess, checkpoint_file)

and just just takes a very long time to start and finally errors. I then tried using V1 of the writer in the saver (saver = tf.train.Saver(variables_to_restore, write_version=saver_pb2.SaveDef.V1)) and retrained and was unable to load this checkpoint at all as it said variables was missing.

I also attempted to run my eval script with the same data it trained on just to see if this may give different results yet I get the same.

Finally I re-cloned the repo from the url and ran a train using the same dataset in the tutorial and I get 0-3% accuracy when I evaluate even after getting it to 84% whilst training. Also my checkpoints must have the correct information as when I restart training the accuracy continues from where it left of. It feels like i'm not doing something correctly when I restore the model. Would really appreciate any suggestions on this as im at a dead end currently :(

Upvotes: 1

Views: 1756

Answers (3)

Elendil
Elendil

Reputation: 9

Actually, this problem is caused by the BN update method. tf will not update the mean & var params by default.

From the official API docs:

Note: when training, the moving_mean and moving_variance need to be updated. By default the update ops are placed in tf.GraphKeys.UPDATE_OPS, so they need to be added as a dependency to the train_op. Also, be sure to add any batch_normalization ops before getting the update_ops collection. Otherwise, update_ops will be empty, and training/inference will not work properly.

Solution:

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    train_op = optimizer.minimize(loss)

Upvotes: 0

Neil
Neil

Reputation: 794

I finally managed to fix my issue. And it sounds strange but the is_training parameter when loading the model needs to either be set to False on both the training script and the eval script or it needs to be True on both. This is due to the BatchNormalisation that gets removed when is_training is False.

This can be validated by this thread in the tensorflow/tensorflow github https://github.com/tensorflow/models/issues/391#issuecomment-247392028

Also on this slim walkthrough Jupyter notebook https://github.com/tensorflow/models/blob/master/slim/slim_walkthrough.ipynbenter link description here

if you scroll to the end of the page to the section titled 'Apply fine tuned model to some images' you will see a code block showing how to reload a fine tuned, pre trained model. When they load the model you will see this line and the comment explains

# Create the model, use the default arg scope to configure the batch norm parameters.
with slim.arg_scope(inception.inception_v1_arg_scope()):
logits, _ = inception.inception_v1(images, 
num_classes=dataset.num_classes, is_training=True)

Even though this is Inception_v1 the principle is the same and this shows that setting both to either False or True will work but you cant set one different than the other without editing the inception_resnet_v2.py code in slim

Upvotes: 2

Michail N
Michail N

Reputation: 3855

First of all you want to do 21-Class Classifier if I understand correctly. Maybe your code is right but you don't split the data correctly. You should check in you training data if all classes are represented.

If your training data consist only from one class (maybe you took a very small data sample to experiment and you took images only from class 10) you would achieve similar results, high accuracy in training but at prediction the classifier would predict only class 10, giving near zero testing accuracy

Upvotes: 0

Related Questions