Reputation: 1216
New to tf/python and have created a model that classifies text with a toxicity level (obscene, toxic, threat, etc). This is what I have so far and it does produce the summary, so I know it is loading correctly. How do I pass text to the model to return a prediction? Any help would be much appreciated.
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
checkpoint_path = "tf_model/the_model/saved_model.pb"
checkpoint_dir = os.path.dirname(checkpoint_path)
new_model = tf.keras.models.load_model(checkpoint_dir)
# Check its architecture
new_model.summary()
inputs = [
"tenserflow seems like it fits the bill but there are zero tutorials that outline how to reuse a model in a production environment "
]
predictions = new_model.predict(inputs)
print(predictions)
I get many error messages, some of the long winded ones are as follows:
WARNING:tensorflow:Model was constructed with shape (None, 150) for input KerasTensor(type_spec=TensorSpec(shape=(None, 150), dtype=tf.float32, name='input_1'), name='input_1', description="created by layer 'input_1'"), but it was called on an input with incompatible shape (None, 1).
ValueError: Negative dimension size caused by subtracting 3 from 1 for '{{node model/conv1d/conv1d}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](model/conv1d/conv1d/ExpandDims, model/conv1d/conv1d/ExpandDims_1)' with input shapes: [?,1,1,256], [1,3,256,64].
This is the py code used to create and test it/prediction which works perfectly:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
TRAIN_DATA = "datasets/train.csv"
GLOVE_EMBEDDING = "embedding/glove.6B.100d.txt"
train = pd.read_csv(TRAIN_DATA)
train["comment_text"].fillna("fillna")
x_train = train["comment_text"].str.lower()
y_train = train[["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]].values
max_words = 100000
max_len = 150
embed_size = 100
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=max_words, lower=True)
tokenizer.fit_on_texts(x_train)
x_train = tokenizer.texts_to_sequences(x_train)
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_len)
embeddings_index = {}
with open(GLOVE_EMBEDDING, encoding='utf8') as f:
for line in f:
values = line.rstrip().rsplit(' ')
word = values[0]
embed = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = embed
word_index = tokenizer.word_index
num_words = min(max_words, len(word_index) + 1)
embedding_matrix = np.zeros((num_words, embed_size), dtype='float32')
for word, i in word_index.items():
if i >= max_words:
continue
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
input = tf.keras.layers.Input(shape=(max_len,))
x = tf.keras.layers.Embedding(max_words, embed_size, weights=[embedding_matrix], trainable=False)(input)
x = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(128, return_sequences=True, dropout=0.1,
recurrent_dropout=0.1))(x)
x = tf.keras.layers.Conv1D(64, kernel_size=3, padding="valid", kernel_initializer="glorot_uniform")(x)
avg_pool = tf.keras.layers.GlobalAveragePooling1D()(x)
max_pool = tf.keras.layers.GlobalMaxPooling1D()(x)
x = tf.keras.layers.concatenate([avg_pool, max_pool])
preds = tf.keras.layers.Dense(6, activation="sigmoid")(x)
model = tf.keras.Model(input, preds)
model.summary()
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(lr=1e-3), metrics=['accuracy'])
batch_size = 128
checkpoint_path = "tf_model/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,
save_weights_only=True,
verbose=1)
callbacks = [
tf.keras.callbacks.EarlyStopping(patience=5, monitor='val_loss'),
tf.keras.callbacks.TensorBoard(log_dir='logs'),
cp_callback
]
model.fit(x_train, y_train, validation_split=0.2, batch_size=batch_size,
epochs=1, callbacks=callbacks, verbose=1)
latest = tf.train.latest_checkpoint(checkpoint_dir)
model.load_weights(latest)
# Save the entire model as a SavedModel.
model.save('tf_model/the_model')
predictions = model.predict(np.expand_dims(x_train[42], 0))
print(tokenizer.sequences_to_texts([x_train[42]]))
print(y_train[42])
print(predictions)
Final solution:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
checkpoint_path = "tf_model/the_model/saved_model.pb"
checkpoint_dir = os.path.dirname(checkpoint_path)
new_model = tf.keras.models.load_model(checkpoint_dir)
max_words = 100000
max_len = 150
# Check its architecture
# new_model.summary()
inputs = ["tenserflow seems like it fits the bill but there are zero tutorials that outline how to reuse a model in a production environment."]
# use same tokenizer used to build model
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=max_words, lower=True)
tokenizer.fit_on_texts(inputs)
# pass string to tokenizer and that 'array' is passed to predict
sequence = tokenizer.texts_to_sequences(inputs) # same tokenizer which is used on train data.
sequence = tf.keras.preprocessing.sequence.pad_sequences(sequence, maxlen = max_len)
predictions = new_model.predict(sequence)
print(predictions)
# [[0.0365479 0.01275077 0.02102855 0.00647011 0.02302513 0.00406089]]
Upvotes: 1
Views: 1742
Reputation: 5079
It needs to be processed in the same way. This can be done with:
inputs = [
"tenserflow seems like it fits the bill but there are zero tutorials that outline
how to reuse a model in a production environment"]
sequence = tokenizer.texts_to_sequences(inputs) # same tokenizer which is used on train data.
sequence = pad_sequences(sequence, maxlen = max_len)
predictions = new_model.predict(sequence)
Upvotes: 2