Comparing embeddings of a siamese network

Question

I have created a Siamese network using tensorflow 2.4.

def create_encoder_siamese(pairs, cfg):
# Based on https://keras.io/examples/vision/siamese_network/
# and https://keras.io/examples/vision/siamese_contrastive/

def euclidean_distance(vects):
    x, y = vects
    sum_square = tf.math.reduce_sum(tf.math.square(x - y), axis=1, keepdims=True)
    return tf.math.sqrt(tf.math.maximum(sum_square, tf.keras.backend.epsilon()))
  
# Create the base model from the pre-trained model MobileNet V2
IMG_SHAPE = pairs.shape[2:]
print('Image shape:', IMG_SHAPE)

# img_batch = pairs[0:cfg['train']['batch_size'],0,:,:,:]
# print('batch shape:', img_batch.shape)

# pre-trained feature extraction model
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

# freeze the feature extraction part
base_model.trainable = False

feature_batch = base_model(pairs[0:cfg['train']['batch_size'],0,:,:,:])
print('batch feature shape:', feature_batch.shape)

# add a layer to convert to 1-d vectors
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print('batch average pooling shape:', feature_batch_average.shape)

# add a layer to convert to encoded vectors that we will use to measure distances
encoded_layer = tf.keras.layers.Dense(EMBEDDING_SIZE, activation='relu')
encoded_batch = encoded_layer(feature_batch_average)
print('batch encoded shape:', encoded_batch.shape)

# put all together
inputs = tf.keras.Input(shape=IMG_SHAPE)
x = base_model(inputs, training=False) # The False is necessary as it contains a BatchNormalisation layer
x = global_average_layer(x)
x = tf.keras.layers.Dropout(0.2)(x)
encoded = encoded_layer(x)

encoder = tf.keras.Model(inputs, encoded, name='encoder')
print(encoder.summary())
print()

input_1 = tf.keras.layers.Input(IMG_SHAPE)
input_2 = tf.keras.layers.Input(IMG_SHAPE)

# Link two towers
tower_1 = encoder(input_1)
tower_2 = encoder(input_2)

# Compute distance between embeddings 
distance_layer = tf.keras.layers.Lambda(euclidean_distance)([tower_1, tower_2])
output_layer = tf.keras.layers.Dense(1, activation="sigmoid")(distance_layer)

model = tf.keras.Model(inputs=[input_1, input_2], outputs=output_layer) 

print(model.summary())
print()

model.compile(optimizer='Adam',
          loss=tfa.losses.ContrastiveLoss(margin=cfg['train']['margin']),
          # loss=loss(margin=cfg['train']['margin']),
          metrics='accuracy')

print()
print('Number of layers to train:', len(model.trainable_variables))              

return model, encoder

Which I can successfully train and use to compute distances between desired image pairs. However for efficiency purposes at deployment I want to first transform all images into their 1D embeddings (which I can do with the encoder model) and then compute distances directly between embeddings with a submodel.

But when I try to create the comparer model with something like:

    comparer = tf.keras.Model(inputs=[(EMBEDDING_SIZE), (EMBEDDING_SIZE)], outputs=output_layer, name='comparer')

I get the error:

ValueError                                Traceback (most recent call last)
 in 
     88     return model, encoder, output_layer
     89 
---> 90 model, encoder, comparer = create_encoder_siamese(train_pairs, cfg)

 in create_encoder_siamese(pairs, cfg)
     64 
     65     # comparer model
---> 66     comparer = tf.keras.Model(inputs=[(EMBEDDING_SIZE), (EMBEDDING_SIZE)], outputs=output_layer, name='comparer')
     67 
     68     # full model

/usr/local/lib/python3.7/site-packages/tensorflow/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
    515     self._self_setattr_tracking = False  # pylint: disable=protected-access
    516     try:
--> 517       result = method(self, *args, **kwargs)
    518     finally:
    519       self._self_setattr_tracking = previous_value  # pylint: disable=protected-access

/usr/local/lib/python3.7/site-packages/tensorflow/python/keras/engine/functional.py in __init__(self, inputs, outputs, name, trainable, **kwargs)
    118     generic_utils.validate_kwargs(kwargs, {})
    119     super(Functional, self).__init__(name=name, trainable=trainable)
--> 120     self._init_graph_network(inputs, outputs)
    121 
    122   @trackable.no_automatic_dependency_tracking

/usr/local/lib/python3.7/site-packages/tensorflow/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
    515     self._self_setattr_tracking = False  # pylint: disable=protected-access
    516     try:
--> 517       result = method(self, *args, **kwargs)
    518     finally:
    519       self._self_setattr_tracking = previous_value  # pylint: disable=protected-access

/usr/local/lib/python3.7/site-packages/tensorflow/python/keras/engine/functional.py in _init_graph_network(self, inputs, outputs)
    155         base_layer_utils.create_keras_history(self._nested_outputs)
    156 
--> 157     self._validate_graph_inputs_and_outputs()
    158 
    159     # A Network does not create weights of its own, thus it is already

/usr/local/lib/python3.7/site-packages/tensorflow/python/keras/engine/functional.py in _validate_graph_inputs_and_outputs(self)
    680                        'is redundant. '
    681                        'All inputs should only appear once.'
--> 682                        ' Found: ' + str(self.inputs))
    683 
    684     for x in self.inputs:

ValueError: The list of inputs passed to the model is redundant. All inputs should only appear once. Found: [64, 64]

So how can I adapt this network to have a full model for training, an encoder to convert a single image to the embedding, and a final model to compare the 2 embeddings directly?

Note: I know everybody uses the encoder to transform the images into the embeddings and then compute the Euclidian distance to compute the similarity between samples. However the Euclidean distance, and the distance computed from the entire model are not the same and follow this relationship:

And who doesn't want a nice distance metric bounded in the range 0 to 1?

Comparing embeddings of a siamese network

Answers (1)

Related Questions