Madhup Singh Yadav
Madhup Singh Yadav

Reputation: 8114

ValueError: Shapes (None, 28) and (None, 28, 10) are incompatible

I am trying to train model using keras sequential model and my code is below:

from tensorflow.keras import Input
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Example input and output sequences
input_sequence = ["Madhup is a good boy.", "I am a large language model."]
output_sequence = ["Madgboy", "Imllm"]

# Create tokenizers for input and output
# input_tokenizer = Tokenizer(oov_token='<START>', lower=True)
input_tokenizer = Tokenizer(char_level=True, lower=True)
input_tokenizer.fit_on_texts(input_sequence)
output_tokenizer = Tokenizer(char_level=True, lower=True)
output_tokenizer.fit_on_texts(output_sequence)

# Convert text to sequences of integers
input_sequence_int = input_tokenizer.texts_to_sequences(input_sequence)
output_sequence_int = output_tokenizer.texts_to_sequences(output_sequence)
print(f"Input Sequence : {input_sequence_int}\n Output Sequence : {output_sequence_int} \n")



# Pad sequences to the same length
max_input_length = max(len(seq) for seq in input_sequence_int)
max_output_length = max(len(seq) for seq in output_sequence_int)
max_sequence_length = max(max_input_length, max_output_length)
print("Max Input Length ",  max_input_length)
print("Max Output Length ",  max_output_length)
print("Max Seq Length ",  max_sequence_length)


input_sequence_padded = pad_sequences(input_sequence_int, maxlen=max_sequence_length, padding='post')
output_sequence_padded = pad_sequences(output_sequence_int, maxlen=max_sequence_length, padding='post')
print("Padded Input Sequence:", input_sequence_padded)
print("Padded Output Sequence:", output_sequence_padded)
print(output_tokenizer.word_index)


import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense
embedding_dim = 256
units = max_input_length

# Calculate the input vocab size
input_vocab_size = len(input_tokenizer.word_index) + 1
print(input_vocab_size)
output_vocab_size = len(output_tokenizer.word_index) + 1
print(output_vocab_size)


# Define the model
model = tf.keras.Sequential([
    Embedding(input_vocab_size, embedding_dim, input_length=max_input_length),
    LSTM(embedding_dim, return_sequences=True),
    Dense(output_vocab_size, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(input_sequence_padded, output_sequence_padded, epochs=10)


import numpy as np
def generate_nickname(input_text):
    input_sequence = input_tokenizer.texts_to_sequences([input_text])
    print(f"Input Sequence: {input_sequence}")
#     input_sequence = pad_sequences(input_sequence, maxlen=max_sequence_length, padding='post')
    input_sequence = pad_sequences(input_sequence, maxlen=max_sequence_length, padding='post')
    print(f"Padded Input Sequence: {input_sequence}")
    predicted_sequence = model.predict(input_sequence)
    max_arg = np.argmax(predicted_sequence, axis=-1)
    print(max_arg)
    predicted_nickname = output_tokenizer.sequences_to_texts(max_arg)[0]
    return predicted_nickname

# Example usage
input_text = "Madhup is a good boy."
predicted_nickname = generate_nickname(input_text)
print(f"Input Phrase: {input_text}")
print(f"Generated Nickname: {predicted_nickname}")

Here are the padded input and output sequences

Padded Input Sequence: [[ 5 2 6 12 9 13 1 10 14 1 2 1 3 4 4 6 1 15 4 16 11 0 0 0 0 0 0 0] [10 1 2 5 1 2 1 7 2 17 3 8 1 7 2 18 3 9 2 3 8 1 5 4 6 8 7 11]] Padded Output Sequence: [[1 3 4 5 6 7 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [9 1 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]

The above code throws error at

model.fit(input_sequence_padded, output_sequence_padded, epochs=10)

ValueError: Shapes (None, 28) and (None, 28, 10) are incompatible

I understand that i have to somehow reshape the output sequence before passing it to model. I am just confused how to do that

Note If I just pass 1 in place output_vocab_size the code will execute but that will of no use as the trained model will not return any output.

All help is appreciated!

Upvotes: 0

Views: 57

Answers (1)

user2586955
user2586955

Reputation: 399

You need to do these modification to your model to make it run with the parameters you specified:

model = tf.keras.Sequential([
    Embedding(input_vocab_size, embedding_dim, input_length=max_input_length),
    LSTM(embedding_dim, return_sequences=False),
    Dense(max_input_length, activation='softmax')
])

So first you need to disable the return_sequences of LSTM because you don't need the output from each LSTM cell, secondly in you last Dense layer you must set the nodes equal to your output padded data which is the same as the input.

Upvotes: 1

Related Questions