how to compile and save the model in custom training tensorflow

Question

I tried to write a custom training loop following the tensorflow tutorials. And it gives the output as follows

Start of epoch 0
Training loss (for one batch) at step 0: 15.9249
Seen so far: 16 samples
Training loss (for one batch) at step 2: 14.9462
Seen so far: 48 samples
Training loss (for one batch) at step 4: 14.6554
Seen so far: 80 samples
Training loss (for one batch) at step 6: 14.1741
Seen so far: 112 samples
Training acc over epoch: 15.1999
Validation acc: 14.5266
Time taken: 8.02s

In custom training loop I donot know, how to compile the model,save the best model based on the criteria such as "if the loss on the validation sets fails to reduce or remains constant for 10 consecutive epochs then the model will be saved to model.h5 file and the training will be stopped. Moreover i want to save the training loss and validation loss of each epoch to a csv file which may be something similar to what the following keras commands does. I hope experts may help me incorporating few lines of code for carrying out above mentioned task. Thanks.

#save_model_name = 'model_name' +'.h5'
#early_stopping = EarlyStopping(monitor='val_loss', patience=30, verbose=1)
#model_checkpoint = ModelCheckpoint(save_model_name,monitor='val_R2_score',
                                    save_best_only=True, verbose=1, mode='max')
#reduce_lr = ReduceLROnPlateau(factor=0.5, monitor='val_loss',
#                              patience=15, min_lr=0.000001, verbose=1)

#csv_logger = CSVLogger(model_name +".csv", append=True)

My code is

@tf.function
def train_step(x, y):
    with tf.GradientTape() as tape:
        logits = model(x, training=True)
        loss_value = loss_fn(y, logits)
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    train_acc_metric.update_state(y, logits)
    return loss_value


@tf.function
def test_step(x, y):
    val_logits = model(x, training=False)
    val_acc_metric.update_state(y, val_logits)


optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)
loss_fn =tf.keras.losses.MeanSquaredError()

batch_size = 16



# dataset.

x_train = np.load('x_train_data.npy') 
x_valid = np.load('x_valid_data.npy') 
y_train = np.load('y_train_data.npy') 
y_valid = np.load('y_valid_data.npy') 


#prepare the data for training
x_train = np.expand_dims(x_train, axis=2)
x_valid = np.expand_dims(x_valid, axis=2)
y_train = np.expand_dims(y_train, axis=2)
y_valid = np.expand_dims(y_valid, axis=2)


#prepare the training datasets based on tensorflow
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)



# Prepare the validation dataset based on tensorflow
val_dataset = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))
val_dataset = val_dataset.batch(batch_size)


train_acc_metric = tf.keras.metrics.MeanSquaredError()
val_acc_metric = tf.keras.metrics.MeanSquaredError()


#model
model = test_model(im_width=1, im_height=80, neurons=16, kern_sz = 20) 
model.summary()

######cutom training loop ######

import time
epochs = 2
for epoch in range(epochs):
    print("
Start of epoch %d" % (epoch,))
    start_time = time.time()

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        loss_value = train_step(x_batch_train, y_batch_train)
        losses.append(float(loss_value))

        # Log every 200 batches.
        if step % 2 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %d samples" % ((step + 1) * batch_size))
    print(losses)
    # Display metrics at the end of each epoch.
    train_acc = train_acc_metric.result()
    print("Training acc over epoch: %.4f" % (float(train_acc),))
    train_acc_metric.reset_states()

     
    # Run a validation loop at the end of each epoch.
    for x_batch_val, y_batch_val in val_dataset:
        test_step(x_batch_val, y_batch_val)

    val_acc = val_acc_metric.result()
    val_acc_metric.reset_states()
    print("Validation acc: %.4f" % (float(val_acc),))
    print("Time taken: %.2fs" % (time.time() - start_time))

how to compile and save the model in custom training tensorflow

Answers (1)

Related Questions