Reputation: 1194
I'm following a tutorial for POS Tagger for African Language, which uses LSTM-based classifier. When running the code:
import pandas as pd
import numpy as np
from keras.preprocessing import sequence
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, LSTM, SimpleRNN, Flatten
from keras.layers.embeddings import Embedding
from tensorflow.keras.callbacks import EarlyStopping
#from keras.callbacks import EarlyStopping
# load English-Yemba dictionary as CSV file
df = pd.read_csv('dict_en_yb.csv')
# display few words pairs
df.sample(frac=.1).head(15)
nb_labels = len(df.word_type.unique())
nb_words = df.shape[0]
# Create letter to token dictionary
chars = sorted(list(set(' '.join(df.yb))))
letter2idx = dict((c, i+1) for i, c in enumerate(chars))
# Create token to letter dictionary
idx2letter = dict((i, c) for c, i in letter2idx.items())
vocabulary_size = len(letter2idx)+1
print("Vocabulary size: ", vocabulary_size)
X = []
Y = []
max_len = 0
for i, r in df.iterrows():
word_vector = [letter2idx[c] for c in set(r.yb)]
label = r.word_type
if len(word_vector)>max_len:
max_len = len(word_vector)
X_max = word_vector
Y_max = label
X.append(word_vector)
Y.append(label)
X = sequence.pad_sequences(X, maxlen=max_len)
Y = to_categorical(Y)
X.shape, Y.shape
X_train, X_test, Y_train, Y_test = train_test_split(X, Y,
test_size=.2,
shuffle=True, random_state=999)
print("Training with {} words, validation with {} words".format(X_train.shape[0],
X_test.shape[0]))
# Building a 1-layer LSTM with 100 cells with embeddings
embedding_dim = 8
model_lstm = Sequential()
model_lstm.add(Embedding(vocabulary_size, embedding_dim, input_length=max_len))
model_lstm.add(LSTM(100))
model_lstm.add(Dense(nb_labels, activation='sigmoid'))
model_lstm.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model_lstm.summary()
# Training the network
early_stopping = EarlyStopping(monitor='val_acc',
min_delta=0,patience=5,verbose=1, mode='auto')
history_lstm = model_lstm.fit(X_train, Y_train, epochs=50, batch_size=32,
validation_data=(X_test, Y_test),
callbacks=[early_stopping])
scores_lstm = model_lstm.evaluate(X_test, Y_test, verbose=0)
I am getting the error:
File "testing.py", line 64, in <module> callbacks=[early_stopping])
...
ValueError: logits and labels must have the same shape ((None, 10) vs (None, 12))
It seems it has to do with neurons in the final dense layer (12), which do not correspond to 10 classes, but don't how to fix it.
Upvotes: 0
Views: 330
Reputation: 1729
Change the value in nb_labels
to 10 and set you activation to 'softmax'. Sigmoid is for binary cases.
Upvotes: 1