Reputation: 4152
So I have a working Siamese network already which takes 3 inputs. Anchor Branch Takes -> Image
, Positive Branch -> Text
and Negative Branch -> Text
. I pass them to my model and train the network easily as in the network given below:
def model(vocab_size, lr=0.0001):
input_1 = Input(shape=(None, None, 3)) # Anchor takes image
input_2 = Input(shape=(None,)) # Positive takes text
input_3 = Input(shape=(None,)) # Negative takes text
base_model = ResNet50(weights='imagenet', include_top=False)
x1 = base_model(input_1)
x1 = GlobalMaxPool2D()(x1)
dense_1 = Dense(vec_dim, activation="linear", name="dense_image_1")
x1 = dense_1(x1)
embed = Embedding(vocab_size, 50, name="embed")
gru = Bidirectional(GRU(256, return_sequences=True), name="gru_1")
dense_2 = Dense(vec_dim, activation="linear", name="dense_text_1")
x2 = embed(input_2)
x2 = SpatialDropout1D(0.1)(x2)
x2 = gru(x2)
x2 = GlobalMaxPool1D()(x2)
x2 = dense_2(x2)
x3 = embed(input_3)
x3 = SpatialDropout1D(0.1)(x3)
x3 = gru(x3)
x3 = GlobalMaxPool1D()(x3)
x3 = dense_2(x3)
_norm = Lambda(lambda x: K.l2_normalize(x, axis=-1)) # Normalize here
x1 = _norm(x1)
x2 = _norm(x2)
x3 = _norm(x3)
model = Model([input_1, input_2, input_3], [x1,x2,x3]) # Loss function handles the 3 outputs
model.compile(loss=triplet_loss, optimizer=Adam(lr)) # triplet_loss handles the multi output
return model
Now I want to try where something like this happens:
1 text and 1 image goes to GRU
and ResNet
respect and produce vectors. We concat / add those vectors and then We pass THAT vector to Siamese Network's Anchor
and the same happens for all the branches.
Upvotes: 0
Views: 317
Reputation: 419
Not sure about the need, but if you want to make something like that:
...the "extra network" before the anchor can be made following principles in:
import tensorflow as tf
from tensorflow import keras
from keras.layers import *
import keras.backend as K
from tensorflow.keras.applications.resnet50 import ResNet50
import numpy as np
#Just for demonstrating...
vocab_size=10
lr=0.0001
vec_dim=2
# Let's make an extra network having ability to take one image and one text in:
# (applying the same ResNet50 structure as a base in image analysis...remember to think would you like to have the same or separate base model)
#need to put in one extra image and one extra text ...
one_image=np.random.random((64,64,3))
one_text='experience in ai'
input_for_one_image=Input(shape=(None,None,3),name='extra image')
input_for_one_text=Input(shape=(None,),name='extra text')
base_model = ResNet50(weights='imagenet', include_top=False)
base_model_extra=keras.models.Model(inputs=base_model.input,outputs=base_model.output,name='resnet50_extra')
dense_1 = Dense(vec_dim, activation="linear", name="dense_image_1")
embed = Embedding(vocab_size, 50, name="embed")
gru = Bidirectional(GRU(256, return_sequences=True), name="gru_1")
dense_2 = Dense(vec_dim, activation="linear", name="dense_text_1")
x1_extra = base_model_extra(input_for_one_image)
x1_extra = GlobalMaxPool2D()(x1_extra)
dense_1_extra = Dense(vec_dim, activation="linear", name="dense_image_1_extra")
x1_extra = dense_1_extra(x1_extra)
embed_extra = Embedding(vocab_size, 50, name="embed_extra")
gru_extra = Bidirectional(GRU(256, return_sequences=True), name="gru_1_extra")
dense_2_extra = Dense(vec_dim, activation="linear", name="dense_text_1_extra")
x2_extra = embed_extra(input_for_one_text)
x2_extra = SpatialDropout1D(0.1)(x2_extra)
x2_extra = gru(x2_extra)
x2_extra = GlobalMaxPool1D()(x2_extra)
x2_extra = dense_2(x2_extra)
#Concatenate
temp=Concatenate()([x1_extra,x2_extra])
extra_network=keras.models.Model(inputs=[input_for_one_image,input_for_one_text],outputs=temp)
tf.keras.utils.plot_model(extra_network,to_file='extra.png')
...but if applying that, remember to fit the anchor correspondingly to be able to take in the "strictly-non-image-format" anchor.
Upvotes: 1