Reputation: 81
def ml_1(epochs, lay1, lay2):
model = tf.keras.Sequential([
tf.keras.layers.Input(shape=(3,)),
tf.keras.layers.Dense(lay1, activation='relu'),
tf.keras.layers.Dense(lay2, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)
loss_fn = tf.keras.losses.BinaryCrossentropy()
for epoch in range(epochs):
for i in range(1, 100):
X_train, X_test, y_train, y_test = get_data(i)
# this returns (n x 3) dataframe of digits for x and series of boolean for y, both x and y are converted into tensors using tf.convert_to_tensor()
with tf.GradientTape() as tape:
logits = model(X_train, training=True)
loss_value = loss_fn(y_train, logits)
print(loss_value)
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
I have this function that should train abinary neural network model. each time I call get data with a new i. it returns a new (X_train, X_test, y_train, y_test). but its not working. print(loss_value) prints nan everytime. what am I doing wrong, am I choosing the right loss function?
Upvotes: 1
Views: 233
Reputation: 26718
The None
loss values are probably because of the data you are feeding to your model. Here is a simple working example:
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Input(shape=(3,)),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)
loss_fn = tf.keras.losses.BinaryCrossentropy()
epochs = 5
for epoch in range(epochs):
for i in range(1, 2):
X_train = tf.random.normal((100, 3))
y_train = tf.random.uniform((100, ), maxval=2, dtype=tf.int32)
with tf.GradientTape() as tape:
logits = model(X_train, training=True)
loss_value = loss_fn(y_train, logits)
print(loss_value)
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
tf.Tensor(0.69102806, shape=(), dtype=float32)
tf.Tensor(0.70286894, shape=(), dtype=float32)
tf.Tensor(0.68930304, shape=(), dtype=float32)
tf.Tensor(0.70442116, shape=(), dtype=float32)
tf.Tensor(0.69840324, shape=(), dtype=float32)
So maybe try outputting X_train
and y_train
and checking if they have the right size and if they contain NaN
values.
Upvotes: 1