Reputation: 666
I trained logistic regression with tf.GradientTape but it can't converge
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
xs = np.array([[1, 1], [-1, -1], [-1, -1.1]])
ys = np.array([1, 0, 0])
def lr_model():
inputs = keras.Input(shape=(2))
outputs = layers.Dense(1, activation='sigmoid')(inputs)
return keras.Model(inputs=inputs, outputs=outputs)
model = lr_model()
model.compile(loss=keras.losses.BinaryCrossentropy(),
optimizer=keras.optimizers.SGD(0.1),
metrics=['accuracy'])
history = model.fit(xs, ys, batch_size=3, epochs=10)
for i in range(10):
print(i, history.history['loss'][i], history.history['accuracy'][i])
It converges and the results:
0 1.04525887966156 0.0
1 0.9557339549064636 0.0
2 0.8753216862678528 0.0
3 0.8033372759819031 0.0
4 0.7390384674072266 0.0
5 0.6816689968109131 0.6666667
6 0.6304909586906433 1.0
7 0.5848075151443481 1.0
8 0.5439766049385071 1.0
9 0.5074175596237183 1.0
But I wanna write my training flow listed as below:
train_loss = keras.metrics.Mean(name='train_loss')
train_acc = keras.metrics.BinaryAccuracy()
model = lr_model()
optimizer = keras.optimizers.SGD(0.1)
def train_step(data, labels):
with tf.GradientTape() as tape:
data = tf.cast(data, tf.float32)
pred = model(data)
loss = keras.losses.binary_crossentropy(labels, pred)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss(loss)
train_acc(labels, pred)
for i in range(100):
train_loss.reset_states()
train_acc.reset_states()
train_step(xs, ys)
if i % 10 == 0:
print(i, train_loss.result().numpy(),train_acc.result().numpy())
It can't converge as the previous example and I don't know why
0 0.7586897 1.0
10 0.6607897 1.0
20 0.64341 0.6666667
30 0.63867164 0.6666667
40 0.63722247 0.6666667
50 0.63676286 0.6666667
60 0.63661444 0.6666667
70 0.636566 0.6666667
80 0.63654995 0.6666667
90 0.6365447 0.6666667
What's the problem with my codes? And how should I modify my training codes by tf.GradientTape to converge as keras compile/fit does? Thanks
Upvotes: 1
Views: 356
Reputation: 16856
Try this, it works for me:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
import tensorflow.contrib.eager as tfe
tf.enable_eager_execution()
xs = np.array([[1, 1], [-1, -1], [-1, -1.1]])
ys = np.array([1., 0., 0.])
def get_model():
inputs = keras.Input(shape=(2,))
outputs = layers.Dense(1, activation='sigmoid')(inputs)
return keras.Model(inputs=inputs, outputs=outputs)
# Without Gradient Tape
model = get_model()
model.compile(loss=keras.losses.BinaryCrossentropy(),
optimizer=keras.optimizers.SGD(0.1),
metrics=['accuracy'])
model.fit(xs, ys, batch_size=3, epochs=50)
# With Gradient Tape
optimizer = tf.train.GradientDescentOptimizer(0.1)
train_acc = keras.metrics.BinaryAccuracy()
model = get_model()
for i in range(50):
with tf.GradientTape() as tape:
xs_ = tf.cast(xs, tf.float32)
ys_ = tf.cast(ys.reshape(-1,1), tf.float32)
pred = model(xs_)
loss = keras.losses.binary_crossentropy(ys_, pred)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
print (train_acc(ys_, pred))
#print (tf.math.reduce_sum(loss))
Upvotes: 1