Reputation: 5797
I'm using ResNet50 pretrained model as building block for a Unet:
def ResNet50(include_top=True, weights='imagenet',
input_tensor=None, input_shape=None,
if weights not in {'imagenet', None}:
raise ValueError('The `weights` argument should be either '
'`None` (random initialization) or `imagenet` '
'(pre-training on ImageNet).')
if weights == 'imagenet' and include_top and classes != 1000:
raise ValueError('If using `weights` as imagenet with `include_top`'
' as true, `classes` should be 1000')
if input_tensor is None:
img_input = Input(shape=input_shape)
if not K.is_keras_tensor(input_tensor):
img_input = Input(tensor=input_tensor, shape=input_shape)
img_input = input_tensor
if K.image_data_format() == 'channels_last':
bn_axis = 3
bn_axis = 1
x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', name='conv1')(img_input)
x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
x = Activation('relu')(x)
x = MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x)
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
# Ensure that the model takes into account
# any potential predecessors of `input_tensor`.
if input_tensor is not None:
inputs = get_source_inputs(input_tensor)
inputs = img_input
# Create model.
model = Model(inputs, x, name='resnet50')
# load weights
if weights == 'imagenet':
if include_top:
weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels.h5',
weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
return model
Creating the Unet:
def conv_block_simple(prevlayer, filters, prefix, strides=(1, 1)):
conv = Conv2D(filters, (3, 3), padding="same", kernel_initializer="he_normal", strides=strides, name=prefix + "_conv")(prevlayer)
conv = BatchNormalization(name=prefix + "_bn")(conv)
conv = Activation('relu', name=prefix + "_activation")(conv)
return conv
def conv_block_simple_no_bn(prevlayer, filters, prefix, strides=(1, 1)):
conv = Conv2D(filters, (3, 3), padding="same", kernel_initializer="he_normal", strides=strides, name=prefix + "_conv")(prevlayer)
conv = Activation('relu', name=prefix + "_activation")(conv)
return conv
def get_unet_resnet(input_shape):
resnet_base = ResNet50(input_shape=input_shape, include_top=False)
for l in resnet_base.layers:
l.trainable = False
conv1 = resnet_base.get_layer("activation_1").output
conv2 = resnet_base.get_layer("activation_10").output
conv3 = resnet_base.get_layer("activation_22").output
conv4 = resnet_base.get_layer("activation_40").output
conv5 = resnet_base.get_layer("activation_49").output
up6 = concatenate([UpSampling2D()(conv5), conv4], axis=-1)
conv6 = conv_block_simple(up6, 256, "conv6_1")
conv6 = conv_block_simple(conv6, 256, "conv6_2")
up7 = concatenate([UpSampling2D()(conv6), conv3], axis=-1)
conv7 = conv_block_simple(up7, 192, "conv7_1")
conv7 = conv_block_simple(conv7, 192, "conv7_2")
up8 = concatenate([UpSampling2D()(conv7), conv2], axis=-1)
conv8 = conv_block_simple(up8, 128, "conv8_1")
conv8 = conv_block_simple(conv8, 128, "conv8_2")
up9 = concatenate([UpSampling2D()(conv8), conv1], axis=-1)
conv9 = conv_block_simple(up9, 64, "conv9_1")
conv9 = conv_block_simple(conv9, 64, "conv9_2")
up10 = UpSampling2D()(conv9)
conv10 = conv_block_simple(up10, 32, "conv10_1")
conv10 = conv_block_simple(conv10, 32, "conv10_2")
conv10 = SpatialDropout2D(0.2)(conv10)
x = Conv2D(1, (1, 1), activation="sigmoid", name="prediction")(conv10)
model = Model(resnet_base.input, x)
return model
I freezed the pretrained ResNet50 layers as proposed in several papers:
for l in resnet_base.layers:
l.trainable = False
Without the freezing the network works fine, but tends to hugely overfit, which I decreased with higher SpatialDropout2D()
When I freeze it however the train losses decrease but the validation losses circulate about some weird high value but actually stagnate.
I can't figure out, why the frozen network do work on training set, meanwhile it doesn't on validation set. I see no logical reason for it (on my current knowledge level).
I tried playing with learning rate, but no success.
What could be the problem? Any help would be highly appreciated. Thank you.
Upvotes: 1
Views: 1347
Reputation: 13
There is certaing problem (Keras creators are keen it is design) with BN layers in Keras. The problem is that during the training, BN layers are actually learning parameters of your new dataset, while during validation phase the parametrs of dataset ResNet was trained with are used (e.g. imagenet or cifar). This results into good training accuracy, but validation accuracy will be just nonexistant.
You can find more here
Upvotes: 1