phd Mom
phd Mom

Reputation: 11

multihead self-attention for sentiment analysis not accurate results

i am trying to implement a model for sentiment analysis in text data using self-attention. In this example, i am using multi-head attention but cannot be sure if the results are accurate or not. It always shows approximately same heatmap attention for every example i try

enter image description here

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, MultiHeadAttention, Input, LayerNormalization, GlobalAveragePooling1D

# Define the input
num_heads = 8         # Number of attention heads
droup_out = 0.5
lstm_units = 64
attention_dim = 128
learning_rate = 0.0001
maxlen = 20 #  sequence length
embedding_dim = 200  # Define your embedding dimension
vocab_size = len(tokenizer.word_index) + 1

# Define the input
inputs = Input(shape=(maxlen,))
embedding_layer = Embedding(input_dim=vocab_size, output_dim=embedding_dim, weights=[embedding_matrix], input_length=maxlen, trainable=False)(inputs)
lstm_layer = LSTM(lstm_units, return_sequences=True)(embedding_layer)
dropout_layer = Dropout(droup_out)(lstm_layer)

# Add the MultiHeadAttention layer
attention_layer = MultiHeadAttention(num_heads=8, key_dim=attention_dim)
attention_output, attention_weights = attention_layer(query=dropout_layer, value=dropout_layer, key=dropout_layer, return_attention_scores=True)


# Normalize the output of the attention layer
attention_output = LayerNormalization(epsilon=1e-6)(attention_output)

# Add a global average pooling layer to reduce the output to a fixed size
pooled_output = GlobalAveragePooling1D()(attention_output)

# Add more layers as needed
dense_output = Dense(units=32, activation='relu')(pooled_output)
output_layer = Dense(1, activation='sigmoid')(dense_output)

# Create the model
model = Model(inputs=inputs, outputs=output_layer)

def lr_schedule(epoch, lr):
    if epoch % 5 == 0 and epoch > 0:
        return lr / 10
    return lr

# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=10, batch_size=128, validation_data=(X_val, y_val), callbacks=[early_stopping])

# Evaluate the model
results = model.evaluate(X_test, y_test)
print(f"Test Loss: {results[0]}, Test Accuracy: {results[1]}")


Upvotes: 1

Views: 52

Answers (0)

Related Questions