Hayaialaka
Hayaialaka

Reputation: 3

SARIMAX for demand forecasting

The code below runs SARIMAX to predict product demand by customer. First, the product types by customer are considered as a single combination, and then STL decomposition is performed on each combination, and the residuals from the decomposition results are used as dependent variables. Next, various variables are used as explanatory variables to run SARIMAX, and finally, the final demand is generated by multiplying the SARIMAX results by seasonality and trend. However, when I do this, the predicted value is set to be significantly higher than the actual value. Is there something wrong with the code? The data is normal, and there are no particular outliers

# SARIMAX 
import time
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.seasonal import STL

scaler = MinMaxScaler()
residual_logs_dict = {}
trend_logs_dict = {}
seasonal_logs_dict = {}
exog_train_dict = {}
exog_test_dict = {}

results_table = []

# Grouping
for group_key, train_group in train_grouped:
    test_group = test_grouped.get_group(group_key) if group_key in test_grouped.groups else None
    if test_group is None:
        continue

    industry, customer, box_type = group_key

    train_agg = train_group.groupby('ARV_REQ_DAT')['REQ_QTY'].sum().reindex(
        pd.date_range(start=train_data['ARV_REQ_DAT'].min(), end=train_data['ARV_REQ_DAT'].max(), freq='D'), fill_value=0
    ).sort_index()
    test_agg = test_group.groupby('ARV_REQ_DAT')['REQ_QTY'].sum().reindex(
        pd.date_range(start=test_data['ARV_REQ_DAT'].min(), end=test_data['ARV_REQ_DAT'].max(), freq='D'), fill_value=0
    ).sort_index()

    train_agg.fillna(0, inplace=True)
    test_agg.fillna(0, inplace=True)

    train_agg_log = np.log1p(train_agg)
    stl = STL(train_agg_log, period=90, seasonal=15, robust=True).fit()
    trend_logs_dict[group_key] = stl.trend
    seasonal_logs_dict[group_key] = stl.seasonal
    residual_logs_dict[group_key] = stl.resid

# explantory variables (train)
    exog_train = pd.DataFrame({
        'IS_WEEKDAY': train_group.groupby('ARV_REQ_DAT')['IS_WEEKDAY'].mean().reindex(train_agg.index, fill_value=0),
        'IS_HOLIDAY': train_group.groupby('ARV_REQ_DAT')['IS_HOLIDAY'].mean().reindex(train_agg.index, fill_value=0),
        'IS_HOLIDAY2_PERIOD': train_group.groupby('ARV_REQ_DAT')
    }).reindex(train_agg.index, fill_value=0)

# explantory variables (test)
    exog_test = pd.DataFrame({
        'Industry_Trend': industry_trend_scaled_test,
        'Industry_Seasonal': industry_seasonal_scaled_test,
        'IS_WEEKDAY': test_group.groupby('ARV_REQ_DAT')['IS_WEEKDAY'].mean().reindex(test_agg.index, fill_value=0),
        'IS_HOLIDAY': test_group.groupby('ARV_REQ_DAT')['IS_HOLIDAY'].mean().reindex(test_agg.index, fill_value=0),
    }).reindex(test_agg.index, fill_value=0)

    exog_train_dict[group_key] = exog_train
    exog_test_dict[group_key] = exog_test


# Training and forecasting
sarimax_models = {}
predicted_residuals_dict = {}

for group_key in residual_logs_dict.keys():
    residual_log = residual_logs_dict[group_key]
    exog_train = exog_train_dict[group_key]
    exog_test = exog_test_dict[group_key]

    try:
        sarimax_model = SARIMAX(
            residual_log, exog=exog_train, order=(1, 1, 1),
            seasonal_order=(1, 1, 1, 90), enforce_stationarity=False, enforce_invertibility=False
        ).fit(disp=False)
        sarimax_models[group_key] = sarimax_model
        predicted_residuals = sarimax_model.predict(
            start=len(residual_log), end=len(residual_log) + len(exog_test) - 1, exog=exog_test
        )
        predicted_residuals_dict[group_key] = predicted_residuals
    except Exception as e:
        print(f"Error in group {group_key}: {e}")

final_demand_dict = {}
for group_key, predicted_residuals in predicted_residuals_dict.items():
    trend_log = trend_logs_dict[group_key].reindex_like(predicted_residuals)
    seasonal_log = seasonal_logs_dict[group_key].reindex_like(predicted_residuals)
    
    log_final_forecast = trend_log + seasonal_log + predicted_residuals
    final_forecast = np.expm1(log_final_forecast)

Upvotes: 0

Views: 28

Answers (0)

Related Questions