more on my thoughts
more on my thoughts

Reputation: 81

loss function with gradienttape returns none

def ml_1(epochs, lay1, lay2):
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(3,)),
        tf.keras.layers.Dense(lay1, activation='relu'),
        tf.keras.layers.Dense(lay2, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)
    loss_fn = tf.keras.losses.BinaryCrossentropy()

    for epoch in range(epochs):
    
        for i in range(1, 100):
            X_train, X_test, y_train, y_test = get_data(i)
            # this returns (n x 3) dataframe of digits for x and series of boolean for y, both x and y are converted into tensors using tf.convert_to_tensor()


            with tf.GradientTape() as tape:

                logits = model(X_train, training=True)  

                loss_value = loss_fn(y_train, logits)

                print(loss_value)
            grads = tape.gradient(loss_value, model.trainable_weights)

            optimizer.apply_gradients(zip(grads, model.trainable_weights))

I have this function that should train abinary neural network model. each time I call get data with a new i. it returns a new (X_train, X_test, y_train, y_test). but its not working. print(loss_value) prints nan everytime. what am I doing wrong, am I choosing the right loss function?

Upvotes: 1

Views: 233

Answers (1)

AloneTogether
AloneTogether

Reputation: 26718

The None loss values are probably because of the data you are feeding to your model. Here is a simple working example:

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(3,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)
loss_fn = tf.keras.losses.BinaryCrossentropy()
epochs = 5
for epoch in range(epochs):

  for i in range(1, 2):
      X_train = tf.random.normal((100, 3))
      y_train = tf.random.uniform((100, ), maxval=2, dtype=tf.int32)

      with tf.GradientTape() as tape:

          logits = model(X_train, training=True)  
          loss_value = loss_fn(y_train, logits)

          print(loss_value)
      grads = tape.gradient(loss_value, model.trainable_weights)

      optimizer.apply_gradients(zip(grads, model.trainable_weights))
tf.Tensor(0.69102806, shape=(), dtype=float32)
tf.Tensor(0.70286894, shape=(), dtype=float32)
tf.Tensor(0.68930304, shape=(), dtype=float32)
tf.Tensor(0.70442116, shape=(), dtype=float32)
tf.Tensor(0.69840324, shape=(), dtype=float32)

So maybe try outputting X_train and y_train and checking if they have the right size and if they contain NaN values.

Upvotes: 1

Related Questions