Saddam
Saddam

Reputation: 1

Tensorflow Inception resnet v2 input tensor

I'm trying to run this code

import os
import tensorflow as tf
from datasets import imagenet
from nets import inception_resnet_v2
from preprocessing import inception_preprocessing

checkpoints_dir = 'model'

slim = tf.contrib.slim

batch_size = 3
image_size = 299

with tf.Graph().as_default():

with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()):
    logits, _ = inception_resnet_v2.inception_resnet_v2([1, 299, 299, 3], num_classes=1001, is_training=False)
    probabilities = tf.nn.softmax(logits)

    init_fn = slim.assign_from_checkpoint_fn(
    os.path.join(checkpoints_dir, 'inception_resnet_v2_2016_08_30.ckpt'),
    slim.get_model_variables('InceptionResnetV2'))

    with tf.Session() as sess:
        init_fn(sess)

        imgPath = '.../image_3.jpeg'
        testImage_string = tf.gfile.FastGFile(imgPath, 'rb').read()
        testImage = tf.image.decode_jpeg(testImage_string, channels=3)

        np_image, probabilities = sess.run([testImage, probabilities])
        probabilities = probabilities[0, 0:]
        sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])]

        names = imagenet.create_readable_names_for_imagenet_labels()
        for i in range(15):
            index = sorted_inds[i]
            print((probabilities[index], names[index]))

But TF displays an error: ValueError: rank of shape must be at least 4 not: 1

I believe that problem is in input tensor shape [1, 299, 299, 3]. How to input tensor for 3 channel JPEG image???

there is an also one similar question (Using pre-trained inception_resnet_v2 with Tensorflow). I saw in code input_tensor - unfortunatelly there is explanation what is input_tensor. Maybe I'm asking something self-evident but i stuck! Thanks a lot in advance for any advice!

Upvotes: 0

Views: 3370

Answers (2)

Selina
Selina

Reputation: 1

You could use tf.expand_dims(your_tensor_3channel, axis=0) to expand it into batch format.

Upvotes: -1

eluk
eluk

Reputation: 93

You have to preprocess your image. Here is a code:

import os
import tensorflow as tf
from datasets import imagenet
from nets import inception_resnet_v2
from preprocessing import inception_preprocessing

checkpoints_dir = 'model'

slim = tf.contrib.slim

batch_size = 3
image_size = 299

with tf.Graph().as_default():
    with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()):

        imgPath = '.../cat.jpg'
        testImage_string = tf.gfile.FastGFile(imgPath, 'rb').read()
        testImage = tf.image.decode_jpeg(testImage_string, channels=3)
        processed_image = inception_preprocessing.preprocess_image(testImage, image_size, image_size, is_training=False)
        processed_images = tf.expand_dims(processed_image, 0)

        logits, _ = inception_resnet_v2.inception_resnet_v2(processed_images, num_classes=1001, is_training=False)
        probabilities = tf.nn.softmax(logits)

        init_fn = slim.assign_from_checkpoint_fn(
        os.path.join(checkpoints_dir, 'inception_resnet_v2_2016_08_30.ckpt'), slim.get_model_variables('InceptionResnetV2'))

        with tf.Session() as sess:
            init_fn(sess)

            np_image, probabilities = sess.run([processed_images, probabilities])
            probabilities = probabilities[0, 0:]
            sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x: x[1])]

            names = imagenet.create_readable_names_for_imagenet_labels()
            for i in range(15):
                index = sorted_inds[i]
                print((probabilities[index], names[index]))

The answer is:

(0.1131034, 'tiger cat')
(0.079478227, 'tabby, tabby cat')
(0.052777905, 'Cardigan, Cardigan Welsh corgi')
(0.030195976, 'laptop, laptop computer')
(0.027841948, 'bathtub, bathing tub, bath, tub')
(0.026694898, 'television, television system')
(0.024981709, 'carton')
(0.024039172, 'Egyptian cat')
(0.018425584, 'tub, vat')
(0.018221909, 'Pembroke, Pembroke Welsh corgi')
(0.015066789, 'skunk, polecat, wood pussy')
(0.01377619, 'screen, CRT screen')
(0.012509955, 'monitor')
(0.012224807, 'mouse, computer mouse')
(0.012188354, 'refrigerator, icebox')

Upvotes: 3

Related Questions