Hossein
Hossein

Reputation: 25934

Trying to to use Caffe classifier causes "sequence argument must have length equal to input rank "error

I am trying to use Caffe.Classifier class and its predict() method on my Imagenet trained caffemodel.
Images were resized to 256x256 and crops of 227x227 were used to train the net.
Everything is simple and straight forward, yet I keep getting weird errors such as the following :

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-7-3b440ebf1f6e> in <module>()
     17                        image_dims=(256, 256))
     18 
---> 19     out = net.predict([image_caffe], oversample=True)
     20     print(labels[out[0].argmax()].strip(),' (', out[0][out[0].argmax()] , ')')
     21     plabel = int(labels[out[0].argmax()].strip())

<ipython-input-5-e6ae1810b820> in predict(self, inputs, oversample)
     65         for ix, in_ in enumerate(inputs):
     66             print('image dims = ',self.image_dims[0],',',self.image_dims[1] ,'_in = ',in_.shape)
---> 67             input_[ix] = caffe.io.resize_image(in_, self.image_dims)
     68 
     69         if oversample:

C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\caffe\io.py in resize_image(im, new_dims, interp_order)
    335         # ndimage interpolates anything but more slowly.
    336         scale = tuple(np.array(new_dims, dtype=float) / np.array(im.shape[:2]))
--> 337         resized_im = zoom(im, scale + (1,), order=interp_order)
    338     return resized_im.astype(np.float32)
    339 

C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\scipy\ndimage\interpolation.py in zoom(input, zoom, output, order, mode, cval, prefilter)
    588     else:
    589         filtered = input
--> 590     zoom = _ni_support._normalize_sequence(zoom, input.ndim)
    591     output_shape = tuple(
    592             [int(round(ii * jj)) for ii, jj in zip(input.shape, zoom)])

C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\scipy\ndimage\_ni_support.py in _normalize_sequence(input, rank, array_type)
     63         if len(normalized) != rank:
     64             err = "sequence argument must have length equal to input rank"
---> 65             raise RuntimeError(err)
     66     else:
     67         normalized = [input] * rank

RuntimeError: sequence argument must have length equal to input rank

And here is the snippets of code I'm using :

import sys
import caffe
import numpy as np
import lmdb
import matplotlib.pyplot as plt
import itertools

def flat_shape(x):
    "Returns x without singleton dimension, eg: (1,28,28) -> (28,28)"
    return x.reshape(x.shape)

def db_reader(fpath, type='lmdb'):
    if type == 'lmdb':
        return lmdb_reader(fpath)
    else:
        return leveldb_reader(fpath)

def lmdb_reader(fpath):
    import lmdb
    lmdb_env = lmdb.open(fpath)
    lmdb_txn = lmdb_env.begin()
    lmdb_cursor = lmdb_txn.cursor()

    for key, value in lmdb_cursor:
        datum = caffe.proto.caffe_pb2.Datum()
        datum.ParseFromString(value)
        label = int(datum.label)
        image = caffe.io.datum_to_array(datum).astype(np.uint8)
        yield (key, flat_shape(image), label)

def leveldb_reader(fpath):
    import leveldb
    db = leveldb.LevelDB(fpath)

    for key, value in db.RangeIter():
        datum = caffe.proto.caffe_pb2.Datum()
        datum.ParseFromString(value)
        label = int(datum.label)
        image = caffe.io.datum_to_array(datum).astype(np.uint8)
        yield (key, flat_shape(image), label)

Classifier class (copied form Caffe's python directory):

    import numpy as np
    import caffe

    class Classifier(caffe.Net):
        """
        Classifier extends Net for image class prediction
        by scaling, center cropping, or oversampling.

        Parameters
        ----------
        image_dims : dimensions to scale input for cropping/sampling.
            Default is to scale to net input size for whole-image crop.
        mean, input_scale, raw_scale, channel_swap: params for
            preprocessing options.
        """
        def __init__(self, model_file, pretrained_file, image_dims=None,
                     mean=None, input_scale=None, raw_scale=None,
                     channel_swap=None):
            caffe.Net.__init__(self, model_file, pretrained_file, caffe.TEST)

            # configure pre-processing
            in_ = self.inputs[0]
            print('inputs[0]',self.inputs[0])
            self.transformer = caffe.io.Transformer(
                {in_: self.blobs[in_].data.shape})
            self.transformer.set_transpose(in_, (2, 0, 1))

            if mean is not None:
                self.transformer.set_mean(in_, mean)
            if input_scale is not None:
                self.transformer.set_input_scale(in_, input_scale)
            if raw_scale is not None:
                self.transformer.set_raw_scale(in_, raw_scale)
            if channel_swap is not None:
                self.transformer.set_channel_swap(in_, channel_swap)

            print('crops: ',self.blobs[in_].data.shape[2:]) 
            self.crop_dims = np.array(self.blobs[in_].data.shape[2:])
            if not image_dims:
                image_dims = self.crop_dims
            self.image_dims = image_dims

        def predict(self, inputs, oversample=True):
            """
            Predict classification probabilities of inputs.

            Parameters
            ----------
            inputs : iterable of (H x W x K) input ndarrays.
            oversample : boolean
                average predictions across center, corners, and mirrors
                when True (default). Center-only prediction when False.

            Returns
            -------
            predictions: (N x C) ndarray of class probabilities for N images and C
                classes.
            """
            # Scale to standardize input dimensions.
            input_ = np.zeros((len(inputs),
                               self.image_dims[0],
                               self.image_dims[1],
                               inputs[0].shape[2]),
                              dtype=np.float32)
            for ix, in_ in enumerate(inputs):
                print('image dims = ',self.image_dims[0],',',self.image_dims[1] ,'_in = ',in_.shape)
                input_[ix] = caffe.io.resize_image(in_, self.image_dims)

            if oversample:
                # Generate center, corner, and mirrored crops.
                input_ = caffe.io.oversample(input_, self.crop_dims)
            else:
                # Take center crop.
                center = np.array(self.image_dims) / 2.0
                crop = np.tile(center, (1, 2))[0] + np.concatenate([
                    -self.crop_dims / 2.0,
                    self.crop_dims / 2.0
                ])
                input_ = input_[:, crop[0]:crop[2], crop[1]:crop[3], :]

            # Classify
            caffe_in = np.zeros(np.array(input_.shape)[[0, 3, 1, 2]],
                                dtype=np.float32)
            for ix, in_ in enumerate(input_):
                caffe_in[ix] = self.transformer.preprocess(self.inputs[0], in_)
            out = self.forward_all(**{self.inputs[0]: caffe_in})
            predictions = out[self.outputs[0]]

            # For oversampling, average predictions across crops.
            if oversample:
                predictions = predictions.reshape((len(predictions) / 10, 10, -1))
                predictions = predictions.mean(1)

            return predictions

Main section :

proto ='deploy.prototxt'
model='snap1.caffemodel'
mean='imagenet_mean.binaryproto'
db_path='G:/imagenet/ilsvrc12_val_lmdb'

# Extract mean from the mean image file
#mean_blobproto_new = caffe.proto.caffe_pb2.BlobProto()
#f = open(mean, 'rb')
#mean_blobproto_new.ParseFromString(f.read())
#mean_image = caffe.io.blobproto_to_array(mean_blobproto_new)
#f.close()
mu = np.load('mean.npy').mean(1).mean(1)
caffe.set_mode_gpu() 
reader = lmdb_reader(db_path)

i = 0
for i, image, label in reader:
    image_caffe = image.reshape(1, *image.shape)
    print(image_caffe.shape, mu.shape)

    net = Classifier(proto, model,
                     mean= mu,
                     channel_swap=(2,1,0),
                     raw_scale=255,
                     image_dims=(256, 256))

    out = net.predict([image_caffe], oversample=True)
    print(i, labels[out[0].argmax()].strip(),' (', out[0][out[0].argmax()] , ')')
    i+=1

What is wrong here?

Upvotes: 0

Views: 2662

Answers (1)

Hossein
Hossein

Reputation: 25934

I found the cause, I had to feed the image in the form of 3D tensor not a 4D one! so our 4d tensor:

image_caffe = image.reshape(1, *image.shape) 

needed to be changed to a 3D one:

image_caffe = image.transpose(2,1,0)

As a side note, try using python2 for running any caffe related. python3 might work at first but will definitely cause a lot of headaches. for instance, predict method with oversample set to True, will crash under python3 but works just fine under python2!

Upvotes: 1

Related Questions