Reputation: 1
I am working on creating a model definition. The model takes a sequence of images,a sequence of detected object's bounding boxes and associated classes.
I am using mobilenetv2 as a feature extractor and flattening the bounding box input and classes input. Concatenating all the 3 outputs and reshaping the output data and passing it through a lstm layer.
from keras_tuner import HyperModel
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, GlobalAveragePooling2D, Flatten, Concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import MobileNetV2
import tensorflow as tf
class xyHyperModel(HyperModel):
def __init__(self, input_shape, batch_size):
self.input_shape = input_shape
self.batch_size = batch_size
def build(self, hp):
# Define input layers with batch size
inputs = {
'image_path': Input(shape=(self.input_shape[0], 224, 224, 3), name='image_data', batch_size=self.batch_size),
'bboxes': Input(shape=(self.input_shape[0], 20, 4), name='bboxes', batch_size=self.batch_size),
'classes': Input(shape=(self.input_shape[0], 20, 8), name='classes', batch_size=self.batch_size)
}
# Image processing using MobileNetV2
base_model = MobileNetV2(include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False # Freeze the MobileNetV2 layers
# Process each image in the sequence
image_features = []
for i in range(self.input_shape[0]):
img = inputs['image_path'][:, i, ...] # Extract one image from the sequence
img_features = base_model(img)
img_features = GlobalAveragePooling2D()(img_features)
image_features.append(img_features)
# Debug print for each image feature
print(f"Shape of img_features at step {i}: {img_features.shape}")
# Concatenate image features along the sequence length axis
x = Concatenate(axis=1)(image_features)
print("Shape after concatenating image features:", x.shape)
# Flatten bounding boxes and classes
bboxes_flat = Flatten()(inputs['bboxes'])
print("Shape of bboxes_flat:", bboxes_flat.shape)
classes_flat = Flatten()(inputs['classes'])
print("Shape of classes_flat:", classes_flat.shape)
# Concatenate all features
combined = Concatenate(axis=-1)([x,bboxes_flat, classes_flat])
print("Shape after concatenation:", combined.shape)
# Reshape combined features to be suitable for LSTM
lstm_input = layers.Reshape((self.input_shape[0], -1))(combined)
print("Shape after reshaping for LSTM:", lstm_input.shape)
# Ensure that only one dimension is -1 for Reshape layer
lstm_input = layers.Reshape((self.input_shape[0], -1))(lstm_input) # Adjust this line if necessary
print("Shape after reshaping for LSTM:", lstm_input.shape)
# LSTM layer
lstm_units = hp.Int('lstm_units', min_value=32, max_value=128, step=32)
lstm_out = LSTM(lstm_units, return_sequences=True)(lstm_input)
print("Shape after LSTM:", lstm_out.shape)
# Output layers for x and y
x_output = Dense(20, activation='linear', name='x')(lstm_out)
print("Shape of x_output:", x_output.shape)
y_output = Dense(20, activation='linear', name='y')(lstm_out)
print("Shape of y_output:", y_output.shape)
# Model definition
model = Model(inputs=[inputs['image_path'], inputs['bboxes'], inputs['classes']], outputs=[x_output, y_output])
# Compile the model
model.compile(optimizer=Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
loss={'x': 'mse', 'y': 'mse'},
metrics={'x': 'mae', 'y': 'mae'})
return model
# Define input shape (sequence length, height, width, channels)
input_shape = (3, 224, 224, 3)
batch_size = 3 # Adjust according to your needs
hypermodel = xyHyperModel(input_shape, batch_size)
I am getting this runtime error:
Only one input size may be -1, not both 0 and 1 [[{{node functional_1_1/flatten_1_2/Reshape}}]] [Op:__inference_one_step_on_iterator_56973]-
I tried to reshape the output of the concatenation layer so that shape of the data would be compatible with LSTM layer. When I start the tuning of the model using keras tuner's randomsearch I get the output of the print statements and the error.
Shape of img_features at step 1: (3, 1280)
Shape of img_features at step 2: (3, 1280)
Shape after concatenating image features: (3, 3840)
Shape of bboxes_flat: (3, 240)
Shape of classes_flat: (3, 480)
Shape after concatenation: (3, 4560)
Shape after reshaping for LSTM: (3, 3, 1520)
Shape after reshaping for LSTM: (3, 3, 1520)
Shape after LSTM: (3, 3, 64)
Shape of x_output: (3, 3, 20)
Shape of y_output: (3, 3, 20)```
Upvotes: 0
Views: 59