New to ML and TensorFlow in general. Im getting this issue when I try to run this line (t_loss = loss_object(labels, predictions)) in the train_step function.
I feel i'm missing something super small and stupid! Checked other solutions and from what i can gather its for older versions of TF or syntax and structure is different. Below snippet is executable. Just feel like i dont understand enough after googling. Any help is appreciated.
Error Received
ValueError: Shape mismatch: The shape of labels (received (30,)) should equal the shape of logits except for the last dimension (received (2, 10)).
I am following this write up and adding my own spin if possible. GCP TF sample writeup '
import tensorflow as tf
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model
from tensorflow.keras import backend as K;
import nltk
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from import LancasterStemmer
from nltk.stem.porter import PorterStemmer
from nltk.corpus import stopwords
#try sklearn
from sklearn.model_selection import train_test_split
# staging and vars
data = data[pd.notnull(data['utext'])]
data=data[data.type != 'None']
# encode unique values of the types
le = LabelEncoder()
data['type'] = le.fit_transform(data['type'])
training_data = []
testTrain_data = []
# create a dictionary of data based on type
for index,row in data.iterrows():
training_data.append({"class":row["type"], "sentence":row["fulltext"]})
words = []
classes = []
documents= []
not_required= ['?']
# create our training data
training = []
output = []
lanStemmer = LancasterStemmer()
def stemDocWord(words=words, classes=classes):
# loop through each sentence in our training data
for pattern in training_data:
# tokenize each word in the sentence
w = nltk.word_tokenize(pattern['sentence'])
# add to our words list
documents.append((w, pattern['class']))
# add to our classes list
if pattern['class'] not in classes:
# stem and lower each word and remove duplicates
stemmer = PorterStemmer()
words = [stemmer.stem(w.lower()) for w in words if w not in not_required]
words = list(set(words))
# remove duplicates
classes = list(set(classes))
print(len(documents), "documents")
def listWordTokensForPattern():
# create an empty array for our output
output_empty = [0] * len(classes)
# training set, bag of words for each sentence
for doc in documents:
# initialize our bag of words
bag = []
# list of tokenized words for the pattern
pattern_words = doc[0]
# stem each word
pattern_words = [lanStemmer.stem(word.lower()) for word in pattern_words]
# create our bag of words array
for w in words:
bag.append(1) if w in pattern_words else bag.append(0)
# output is a '0' for each tag and '1' for current tag
output_row = list(output_empty)
output_row[classes.index(doc[1])] = 1
print("# output", len(output))
print("# training", len(training))
# og training function
X = np.array(training)
y = np.array(output)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=23)
x_train, x_test = x_train / 255.0, x_test / 255.0
# Add a channels dimension e.g. (60000, 28, 28) => (60000, 28, 28, 1)
x_train = x_train[..., tf.newaxis, tf.newaxis]
x_test = x_test[..., tf.newaxis, tf.newaxis]
train_ds =, y_train)).shuffle(100).batch(2)
test_ds =, y_test)).batch(2)
# inputs_ = tf.compat.v1.placeholder(tf.float32, [None, 32, 32, 3])
# inputs_ = tf.Variable(tf.ones(shape=(0 ,32, 32, 3)), name="inputs_")
class CustomModel(Model):
def __init__(self):
super(CustomModel, self).__init__()
self.conv1 = Conv2D(2, 1,activation='relu')#, input_shape=x_train.shape)#x_train.shape())
self.flatten = Flatten()
self.d1 = Dense(128, activation='relu')
self.d2 = Dense(10, activation='softmax')
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
return self.d2(x)
model = CustomModel()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(reduction='none')
optimizer = tf.keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
def train_step(images, labels):
with tf.GradientTape() as tape:
predictions = model(images)
loss = loss_object(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_accuracy(labels, predictions)
def test_step(images, labels):
predictions = model(images)
t_loss = loss_object(labels, predictions)
test_accuracy(labels, predictions)
for epoch in range(EPOCHS):
for images, labels in train_ds:
train_step(images, labels)
for test_images, test_labels in test_ds:
test_step(test_images, test_labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print (template.format(epoch+1,
# Save the weights
the CSV File looks similar to this
t1,"some random sentence",type1
t2,"some other random sentence",type2
t3,"some more random text",type3
Below is what i get regarding my first comment:
WARNING:tensorflow:Entity <function train_step at 0x000001E9480B11E0> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, 'export AUTOGRAPH_VERBOSITY=10') and attach the full output. Cause: converting <function train_step at 0x000001E9480B11E0>: AttributeError: module 'gast' has no attribute 'Str'
WARNING:tensorflow:Entity <bound method of <__main__.CustomModel object at 0x000001E947C9D358>> could not be transformed and will be executed as-is. Please report this to
the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, 'export AUTOGRAPH_VERBOSITY=10') and attach the full output. Cause: converting <bound method of <__main__.CustomModel object at 0x000001E947C9D358>>: AssertionError: Bad argument number for Name: 3, expecting 4
2020-02-29 12:14:46.018316: W tensorflow/core/framework/] Allocation of 2683371520 exceeds 10% of system memory.
2020-02-29 12:14:47.459793: W tensorflow/core/framework/] Allocation of 2683371520 exceeds 10% of system memory.
2020-02-29 12:14:47.869789: W tensorflow/core/framework/] Allocation of 2683371520 exceeds 10% of system memory.
Solved, the issue was my training x was one-hot encoded so my loss methond was incorrect. Just had to change the keras module to non sparse and bingo.
