Reproducibility between Ray Tune and PyTorch / Darts

Question

I want to tune the hyperparameter of my TFT model. To ensure reproducibility I first want to check if I obtain the same results with my script that use Darts and the one that use Ray Tune on top of darts.

The script that use darts always returns the same result => OK
The script that use Ray tune always returns the same results => OK
The results of both scripts are different (different loss).

Since everything should be the same (hyperparam, seed, etc.) I would have expect to obtain the same results.

In fact if I call my train_model(model_args, callbacks) directly without any RayTune or parameter I get the same results as Darts. It looks like the process used by raytune adds some step I cannot reproduce in Darts (but these steps are not random as I have consistent results).

Do you have an idea where I should look to be able to reproduce results between the two implementations?

My code:

import pandas as pd
import numpy as np
import torch 
import torch.nn as nn
import random 

from ray import tune, train
from ray.tune import Tuner
from ray.tune import CLIReporter
from ray.tune.integration.pytorch_lightning import TuneReportCallback, TuneReportCheckpointCallback
from pytorch_lightning.callbacks import EarlyStopping
from ray.tune.schedulers import ASHAScheduler
from ray.tune.search.optuna import OptunaSearch

from darts.dataprocessing.transformers import Scaler
from darts.models import TFTModel
from darts import TimeSeries
from darts.metrics import mape
from pytorch_lightning import seed_everything
from torch.optim.lr_scheduler import ExponentialLR

from Custom_Loss import WeightedMSELoss

# Read data
XY_full = pd.read_csv("jour.csv")
XY_full["date"] = pd.to_datetime(XY_full["date"]).dt.tz_localize(None)
XY_ts = TimeSeries.from_dataframe(XY_full, time_col="date").astype(np.float32)
y = XY_ts[["y"]]
x = XY_ts.drop_columns("y")

# Scaling
transformer_y = Scaler()
transformer_x = Scaler()
train_x, val_x = x.split_before(pd.Timestamp("20210430"))
train_y, val_y = y.split_before(pd.Timestamp("20210430"))

scaler_y = transformer_y.fit(train_y)
scaler_x = transformer_x.fit(train_x)

train_y = scaler_y.transform(train_y)
train_x = scaler_x.transform(train_x)

val_y = scaler_y.transform(val_y)
val_x = scaler_x.transform(val_x)

def train_model(model_args, callbacks):
    # enable_reproducibility(1)
    torch.use_deterministic_algorithms(True)
    torch.manual_seed(1)
    seed_everything(1, workers = True)
    random.seed(1)
    np.random.seed(1)
    
    # Create the model
    model = TFTModel(
        lstm_layers=1, 
        n_epochs=2, 
        optimizer_kwargs={'lr':0.005}, 
        random_state=42, 
        num_attention_heads=4,
        batch_size=16,
        hidden_size=16,
        input_chunk_length=7,
        output_chunk_length=1,
        dropout=0.1,
        add_encoders={
            'cyclic': {"future": ["month", "day", "day_of_year", "day_of_week", "weekofyear"]},
            'datetime_attribute': {"future": ["month", "dayofyear", "day_of_week"]}
        },
        likelihood=None,
        lr_scheduler_cls=ExponentialLR,
        lr_scheduler_kwargs={"gamma": 0.95},
        loss_fn=WeightedMSELoss(threshold=0.75, high_weight=4.0, low_weight=1.0),
        pl_trainer_kwargs={"accelerator": "auto", "callbacks": callbacks, "enable_progress_bar": True},
    )
    
    model.fit(
        series=train_y,
        val_series=val_y,
        future_covariates=train_x,
        val_future_covariates=val_x,
    )

# Ray Tune configuration
tune_callback = TuneReportCheckpointCallback({"val_loss": "val_loss"}, on="validation_end")

config = {
    "num_attention_heads": tune.choice([2, 3, 4, 5, 6]), 
    "batch_size": tune.choice([16, 32]), 
    "hidden_size": tune.choice([2, 6, 10, 14, 16, 18, 20, 24]), 
    "output_chunk_length": tune.choice([1, 2, 3, 4, 5, 6, 12, 24, 48, 96, 200]),
    "input_chunk_length": tune.choice([2, 4, 6, 8, 10, 12, 48, 96, 100, 300]),
    "dropout": tune.uniform(0, 0.2)
}


tuner = Tuner(
    tune.with_parameters(train_model, callbacks=[tune_callback]),
    param_space=config,
    tune_config=tune.TuneConfig(
        # search_alg=OptunaSearch(),
        num_samples=1, 
        metric="val_loss", 
        mode="min", 
        scheduler=ASHAScheduler(max_t=1000, grace_period=3, reduction_factor=2),
        trial_dirname_creator= lambda trial : "trial_" + str(trial.trial_id)
    )
)

analysis = tuner.fit() # Train / Val LOSS after 2 epochs = > 0.00644 / 0.0118 
train_model([], [])    # Train / Val LOSS after 2 epochs = > 0.00718 / 0.0221

Reproducibility between Ray Tune and PyTorch / Darts

Answers (0)

Related Questions