
Reputation: 101

Keras error when finetuning InceptionV3

I am trying to follow the "Fine-tune InceptionV3 on a new set of classes" sample code to freeze the first 172 layers and re-train the last layers on cats/dogs dataset. I keep getting an error which I have noted at the bottom. Please help. I am using Ubuntu 16.04, keras 1.2.1, theano, numpy 1.12.0 and python 3.5.

from PIL import Image
import os
import matplotlib.pyplot as plt
import numpy as np

data_root_dir = "/home/ubuntu/ML/data/dogscats/"
train_dir = os.path.join(data_root_dir,"sample", "train")
valid_dir = os.path.join(data_root_dir, "valid")

from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K

# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=True)

# add a global spatial average pooling layer
x = base_model.output
#x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(2, activation='softmax')(x)

# this is the model we will train
model = Model(input=base_model.input, output=predictions)

for layer in model.layers[:172]:
    layer.trainable = False
for layer in model.layers[172:]:
    layer.trainable = True

from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy')

from sklearn.preprocessing import OneHotEncoder
def get_data(path, target_size=(299,299)):
    batches = get_batches(path, shuffle=False, batch_size=1, class_mode=None, target_size=target_size)
    return np.concatenate([ for i in range(batches.nb_sample)])

def get_batches(dirname, gen=image.ImageDataGenerator(), shuffle=True, batch_size=2, class_mode='categorical',
    return gen.flow_from_directory(dirname, target_size=target_size,
            class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)

def onehot(x): return np.array(OneHotEncoder().fit_transform(x.reshape(-1,1)).todense())

# Use batch size of 1 since we're just doing preprocessing on the CPU
val_batches = get_batches(valid_dir, shuffle=False, batch_size=10)
train_batches = get_batches(train_dir, shuffle=False, batch_size=10)

val_classes = val_batches.classes
trn_classes = train_batches.classes
val_labels = onehot(val_classes)
trn_labels = onehot(trn_classes)

model.fit_generator(train_batches, samples_per_epoch=train_batches.n, nb_epoch=10, 
                        validation_data=val_batches, nb_val_samples=val_batches.n)

The exception is: padding must be zero for average_exc_pad

Here is the full stack-trace:

ValueError Traceback (most recent call last)
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/ in __call__(self, *args, **kwargs)
    883             outputs =\
--> 884                 self.fn() if output_subset is None else\
    885                 self.fn(output_subset=output_subset)

ValueError: padding must be zero for average_exc_pad

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-4-369d7760ec6e> in <module>()
     35 model.fit_generator(train_batches, samples_per_epoch=train_batches.n, nb_epoch=10, 
---> 36                         validation_data=val_batches, nb_val_samples=val_batches.n)

/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/engine/ in fit_generator(self, generator, samples_per_epoch, nb_epoch, verbose, callbacks, validation_data, nb_val_samples, class_weight, max_q_size, nb_worker, pickle_safe, initial_epoch)
   1551                     outs = self.train_on_batch(x, y,
   1552                                                sample_weight=sample_weight,
-> 1553                                                class_weight=class_weight)
   1555                     if not isinstance(outs, list):

/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/engine/ in train_on_batch(self, x, y, sample_weight, class_weight)
   1314             ins = x + y + sample_weights
   1315         self._make_train_function()
-> 1316         outputs = self.train_function(ins)
   1317         if len(outputs) == 1:
   1318             return outputs[0]

/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/backend/ in __call__(self, inputs)
    957     def __call__(self, inputs):
    958         assert isinstance(inputs, (list, tuple))
--> 959         return self.function(*inputs)

/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/ in __call__(self, *args, **kwargs)
    896                     node=self.fn.nodes[self.fn.position_of_error],
    897                     thunk=thunk,
--> 898                     storage_map=getattr(self.fn, 'storage_map', None))
    899             else:
    900                 # old-style linkers raise their own exceptions

/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/gof/ in raise_with_op(node, thunk, exc_info, storage_map)
    323         # extra long error message in that case.
    324         pass
--> 325     reraise(exc_type, exc_value, exc_trace)

/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/ in reraise(tp, value, tb)
    683             value = tp()
    684         if value.__traceback__ is not tb:
--> 685             raise value.with_traceback(tb)
    686         raise value

/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/ in __call__(self, *args, **kwargs)
    882         try:
    883             outputs =\
--> 884                 self.fn() if output_subset is None else\
    885                 self.fn(output_subset=output_subset)
    886         except Exception:

ValueError: padding must be zero for average_exc_pad
Apply node that caused the error: AveragePoolGrad{ignore_border=True, mode='average_exc_pad', ndim=2}(Join.0, IncSubtensor{InplaceInc;::, ::, :int64:, :int64:}.0, TensorConstant{(2,) of 3}, TensorConstant{(2,) of 1}, TensorConstant{(2,) of 1})
Toposort index: 5270
Inputs types: [TensorType(float32, 4D), TensorType(float32, 4D), TensorType(int64, vector), TensorType(int64, vector), TensorType(int64, vector)]
Inputs shapes: [(10, 2048, 8, 8), (10, 2048, 8, 8), (2,), (2,), (2,)]
Inputs strides: [(524288, 256, 32, 4), (524288, 256, 32, 4), (8,), (8,), (8,)]
Inputs values: ['not shown', 'not shown', array([3, 3]), array([1, 1]), array([1, 1])]
Outputs clients: [[Elemwise{add,no_inplace}(CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, AveragePoolGrad{ignore_border=True, mode='average_exc_pad', ndim=2}.0)]]

Upvotes: 0

Views: 851

Answers (1)


Reputation: 1980

Fine-tuning in that situation possibly means using the convolutional layers as pre-trained feature extractors. So you don't really want the top layers (densely connected layers) of the Inception network.


base_model = InceptionV3(weights='imagenet', include_top=True)


base_model = InceptionV3(weights='imagenet', include_top=False)

should work.

Also, if you have 200 classes you should change

# and a logistic layer -- let's say we have 200 classes
predictions = Dense(2, activation='softmax')(x)


predictions = Dense(200, activation='softmax')(x)

So your last layer will have the desired 200 elements.

Upvotes: 2

Related Questions