Reputation: 6854
I am logging using the tf.summary.scalar
method AND with tf.train.LoggingTensorHook
for some tensors. This is with the tf.estimator.Estimator
framework.
The tf.train.LoggingTensorHook
stuff is not even showing up AFAIK. The other stuff is showing but apparently without time steps.
Graphs and everything else (weights) look ok in tensorboard.
UPDATE: it looks like calling train multiple times results in a graph. Is there something about steps
and every_n_iter
that do not interact as expected?
import numpy as np
import tensorflow as tf
m = 10000
n = 5
X = np.random.randn(m, n)
A = np.random.randn(n)
y = X.dot(A) + np.random.randn(m) * 0.1
batch_size = 1024
def input_fn(batch_size):
ds = tf.data.Dataset.from_tensor_slices(dict(X=X, y=y))
ds = ds.repeat(-1)
ds = ds.batch(batch_size)
return ds
def model_fn(features, labels, mode, params):
X = features['X']
y = features['y']
l = X
for i, k in enumerate([32, 16, 16]):
l = tf.layers.dense(inputs=l, units=k, name=f'l_{i}', activation=tf.nn.tanh)
some_thing = tf.reduce_sum(l, axis=1, name='some_thing')
l = tf.layers.dense(inputs=l, units=1, name='l_final')
predictions = tf.squeeze(l, axis=-1)
loss = tf.losses.mean_squared_error(y, predictions, weights=1.0)
metric_ops = {"mse": tf.metrics.mean_squared_error(labels=y, predictions=predictions)}
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, eval_metric_ops=metric_ops)
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=metric_ops)
raise Exception('should not hit this')
model = tf.estimator.Estimator(
model_fn=model_fn,
model_dir='/tmp/junk',
config=None,
params=dict(),
warm_start_from=None
)
tensors_to_log = dict(some_thing='some_thing')
logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=10)
train_input_fn = lambda: input_fn(batch_size)
test_input_fn = lambda: input_fn(batch_size)
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, hooks=[logging_hook], max_steps=100)
eval_spec = tf.estimator.EvalSpec(input_fn=test_input_fn, hooks=[logging_hook])
out = tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
UPDATE: This one does not show in tensorboard until the end of the run and then it only shows one point too.
import numpy as np
import tensorflow as tf
# tf.enable_eager_execution()
tf.logging.set_verbosity(tf.logging.INFO)
m = 10000
n = 5
X = np.random.randn(m, n)
A = np.random.randn(n)
y = X.dot(A) + np.random.randn(m) * 0.1
steps = 1000
batch_size = 1024
def input_fn(repeat, batch_size):
ds = tf.data.Dataset.from_tensor_slices(dict(X=X, y=y))
ds = ds.repeat(repeat)
ds = ds.batch(batch_size)
return ds
def model_fn(features, labels, mode, params):
X = features['X']
y = features['y']
l = X
for i, k in enumerate([32, 16, 16]):
l = tf.layers.dense(inputs=l, units=k, name=f'l_{i}', activation=tf.nn.tanh)
some_thing = tf.reduce_sum(l, axis=1, name='some_thing')
l = tf.layers.dense(inputs=l, units=1, name='l_final')
predictions = tf.squeeze(l, axis=-1)
loss = tf.losses.mean_squared_error(y, predictions, weights=1.0)
metric_ops = {"mse": tf.metrics.mean_squared_error(labels=y, predictions=predictions)}
tf.summary.scalar('summary_loss', loss) # plot a dist across the batch
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, eval_metric_ops=metric_ops)
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=metric_ops)
raise Exception('should not hit this')
model = tf.estimator.Estimator(
model_fn=model_fn,
model_dir='/tmp/junk',
config=None,
params=dict(),
warm_start_from=None
)
tensors_to_log = dict(some_thing='some_thing')
logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=10)
train_input_fn = lambda: input_fn(steps, batch_size)
test_input_fn = lambda: input_fn(steps, batch_size)
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, hooks=[logging_hook], max_steps=None)
eval_spec = tf.estimator.EvalSpec(input_fn=test_input_fn, hooks=[logging_hook])
out = tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
Upvotes: 4
Views: 1974
Reputation: 449
I ran into a very similar problem. and I was surprised that the solution is very simple. Close TensorBoard and initiated again, and wait for a few minutes. it takes time to catch up. For some reason, if you initiate TensorBoard during the training it will stuck. I hope this will help. I was running the code on google cloud
from google.datalab.ml import TensorBoard
TensorBoard().start('gs://{}/directoy_where_my_models_are'.format(BUCKET))
Upvotes: 2