Wilson
Wilson

Reputation: 666

Training logistic regression with tf.GradientTape() can't converge

I trained logistic regression with tf.GradientTape but it can't converge

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

xs = np.array([[1, 1], [-1, -1], [-1, -1.1]])
ys = np.array([1, 0, 0])

def lr_model():
    inputs = keras.Input(shape=(2))
    outputs = layers.Dense(1, activation='sigmoid')(inputs)

    return keras.Model(inputs=inputs, outputs=outputs)

model = lr_model()

model.compile(loss=keras.losses.BinaryCrossentropy(),
          optimizer=keras.optimizers.SGD(0.1),
          metrics=['accuracy'])

history = model.fit(xs, ys, batch_size=3, epochs=10)

for i in range(10):
    print(i, history.history['loss'][i], history.history['accuracy'][i])

It converges and the results:
0 1.04525887966156 0.0
1 0.9557339549064636 0.0
2 0.8753216862678528 0.0
3 0.8033372759819031 0.0
4 0.7390384674072266 0.0
5 0.6816689968109131 0.6666667
6 0.6304909586906433 1.0
7 0.5848075151443481 1.0
8 0.5439766049385071 1.0
9 0.5074175596237183 1.0

But I wanna write my training flow listed as below:

train_loss = keras.metrics.Mean(name='train_loss')
train_acc = keras.metrics.BinaryAccuracy()

model = lr_model()
optimizer = keras.optimizers.SGD(0.1)

def train_step(data, labels):    
    with tf.GradientTape() as tape:
    data = tf.cast(data, tf.float32)
    pred = model(data)
    loss = keras.losses.binary_crossentropy(labels, pred)

    grads = tape.gradient(loss, model.trainable_variables)

    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    train_loss(loss)
    train_acc(labels, pred)

for i in range(100):
    train_loss.reset_states()
    train_acc.reset_states()
    train_step(xs, ys)

    if i % 10 == 0:
    print(i, train_loss.result().numpy(),train_acc.result().numpy())

It can't converge as the previous example and I don't know why
0 0.7586897 1.0
10 0.6607897 1.0
20 0.64341 0.6666667
30 0.63867164 0.6666667
40 0.63722247 0.6666667
50 0.63676286 0.6666667
60 0.63661444 0.6666667
70 0.636566 0.6666667
80 0.63654995 0.6666667
90 0.6365447 0.6666667

What's the problem with my codes? And how should I modify my training codes by tf.GradientTape to converge as keras compile/fit does? Thanks

Upvotes: 1

Views: 356

Answers (1)

mujjiga
mujjiga

Reputation: 16856

Try this, it works for me:

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
import tensorflow.contrib.eager as tfe
tf.enable_eager_execution()

xs = np.array([[1, 1], [-1, -1], [-1, -1.1]])
ys = np.array([1., 0., 0.])

def get_model():
    inputs = keras.Input(shape=(2,))
    outputs = layers.Dense(1, activation='sigmoid')(inputs)
    return keras.Model(inputs=inputs, outputs=outputs)


# Without Gradient Tape
model = get_model()    
model.compile(loss=keras.losses.BinaryCrossentropy(),
          optimizer=keras.optimizers.SGD(0.1),
          metrics=['accuracy'])
model.fit(xs, ys, batch_size=3, epochs=50)

# With Gradient Tape
optimizer = tf.train.GradientDescentOptimizer(0.1)
train_acc = keras.metrics.BinaryAccuracy()
model = get_model()    

for i in range(50):
    with tf.GradientTape() as tape:
        xs_ = tf.cast(xs, tf.float32)
        ys_ = tf.cast(ys.reshape(-1,1), tf.float32)

        pred = model(xs_)

        loss = keras.losses.binary_crossentropy(ys_, pred)
        grads = tape.gradient(loss, model.trainable_variables) 
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        print (train_acc(ys_, pred))
        #print (tf.math.reduce_sum(loss))

Upvotes: 1

Related Questions