Reputation: 410
I have trouble making my neural network train. I have defined the neural network as follows:
shared = embedding_layer
inputA = keras.Input(shape=(40, ), name="anchor") # Variable-length sequence of ints
inputP = keras.Input(shape=(40, ), name="positive") # Variable-length sequence of ints
inputN = keras.Input(shape=(40, ), name="negative") # Binary vectors of size num_tags
concatenated = layers.concatenate([inputA, inputP, inputN])
embedded_A = shared(concatenated)
encoded = Dense(900, activation = "relu")(embedded_A)
encoded = Dense(600, activation = "relu")(encoded)
encoded = Dense(300, activation = "relu")(encoded)
encoded = Dense(100, activation = "relu")(encoded)
decoded = Dense(100, activation = "relu")(encoded)
decoded = Dense(300, activation = "relu")(decoded)
decoded = Dense(600, activation = "relu")(decoded)
decoded = Dense(900, activation = "relu")(decoded)
predictionsA = Dense(40, activation="sigmoid", name ='outA')(decoded)
predictionsP = Dense(40, activation="sigmoid", name ='outB')(decoded)
predictionsN = Dense(40, activation="sigmoid", name ='outC')(decoded)
ml_model = keras.Model(
inputs=[inputA, inputP, inputN],
outputs=[predictionsA, predictionsP, predictionsN]
)
ml_model.compile(
optimizer='adam',
loss='mse'
)
ml_model.fit(
{"anchor": anchor, "positive": positive, "negative": negative},
{"outA": anchor, "outB": positive, 'outC': negative},
epochs=2)
Which schematically looks like
The embeddingment layer is defined as follows:
embedding_m = model.syn0
embedding_layer = Embedding(len(vocab),
300,
weights=[embedding_m],
input_length=40,
trainable=True)
What I feed into the network is three numpy arrays of shape (120000, 40) which look like this:
array([[ 2334, 23764, 7590, ..., 3000001, 3000001, 3000001],
[3000000, 1245, 1124, ..., 3000001, 3000001, 3000001],
[ 481, 491, 5202, ..., 3000001, 3000001, 3000001],
...,
[3000000, 125, 20755, ..., 3000001, 3000001, 3000001],
[1217971, 168575, 239, ..., 9383, 1039, 87315],
[ 12990, 91, 258231, ..., 3000001, 3000001, 3000001]])
And the input is the same as the output, as I am making an auto-encoder decoder.
The error I get is:
Dimensions must be equal, but are 120 and 32 for '{{node mean_squared_error/SquaredDifference}} = SquaredDifference[T=DT_FLOAT](model_3/outA/Sigmoid, mean_squared_error/Cast)' with input shapes: [32,120,40], [32,40].
But I can't seem to find out why, or how to fix it... Any ideas? I can provide more examples if needed. I suspect that there is some dimension error, as I would ideally like my output to be of shape (120000,40) exactly as my input.
Upvotes: 0
Views: 74
Reputation: 419
Fixed version of the problematic endcoder - decoder:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers import Dense
#shared = embedding_layer
#Simulate that...
shared=Dense(1,activation="relu")
inputA = keras.Input(shape=(40, ), name="anchor") # Variable-length sequence of ints
inputP = keras.Input(shape=(40, ), name="positive") # Variable-length sequence of ints
inputN = keras.Input(shape=(40, ), name="negative") # Binary vectors of size num_tags
concatenated = layers.concatenate([inputA, inputP, inputN])
embedded_A = shared(concatenated)
encoded = Dense(900, activation = "relu")(embedded_A)
encoded = Dense(600, activation = "relu")(encoded)
encoded = Dense(300, activation = "relu")(encoded)
encoded = Dense(100, activation = "relu")(encoded)
#decoded = Dense(100, activation = "relu")(encoded)
decoded = Dense(300, activation = "relu")(encoded)
decoded = Dense(600, activation = "relu")(decoded)
decoded = Dense(900, activation = "relu")(decoded)
predictionsA = Dense(40, activation="sigmoid", name ='outA')(decoded)
predictionsP = Dense(40, activation="sigmoid", name ='outB')(decoded)
predictionsN = Dense(40, activation="sigmoid", name ='outC')(decoded)
ml_model = keras.Model(
inputs=[inputA, inputP, inputN],
outputs=[predictionsA, predictionsP, predictionsN]
)
ml_model.compile(
optimizer='adam',
loss='mse'
)
#Simulate...
anchor=tf.random.uniform((100,40))
positive=tf.random.uniform((100,40))
negative=tf.random.uniform((100,40))
ml_model.fit(
{"anchor": anchor, "positive": positive, "negative": negative},
{"outA": anchor, "outB": positive, 'outC': negative},
epochs=2)
Upvotes: 1
Reputation: 419
Remove the one "decoded" row to fix your network structure:
Upvotes: 0