Cooper Cardinale
Cooper Cardinale

Reputation: 11

Failure To Capture Different Hidden States

I am attempting to have different hidden market states detected by HMM. I am getting one state only. I cannot figure out why this is the case.

I tried to capture 4 different states, and am only getting 1 state returned. This became apparent when my plots showed one color only.

How do I get the HMM to identify the different states?

# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from alpha_vantage.timeseries import TimeSeries
from hmmlearn.hmm import GaussianHMM
from sklearn.preprocessing import StandardScaler
import logging
import sys

# Set up logging
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("hmm_debug.log", mode='w'),
        logging.StreamHandler(sys.stdout)
    ]
)

# Data Management

# Data Extraction using AlphaVantage
api_key = 'HIDDEN'  # Your AlphaVantage API key
start_date = "2017-01-01"
end_date = "2022-06-01"
symbol = "SPY"

logging.info("Fetching data from AlphaVantage...")
# Fetch data from AlphaVantage
ts = TimeSeries(key=api_key, output_format='pandas')
try:
    data, meta_data = ts.get_daily_adjusted(symbol=symbol, outputsize='full')
except Exception as e:
    logging.error(f"Error fetching data: {e}")
    sys.exit(1)

# Check if data was fetched successfully
if data.empty:
    logging.error("No data fetched from AlphaVantage. Please check your API key and network connection.")
    sys.exit(1)
else:
    logging.info(f"Data for {symbol} fetched successfully.")

# Rename columns to match the rest of the code
data = data.rename(columns={
    '1. open': 'Open',
    '2. high': 'High',
    '3. low': 'Low',
    '4. close': 'Close',
    '5. adjusted close': 'Adj Close',
    '6. volume': 'Volume',
    '7. dividend amount': 'Dividend Amount',
    '8. split coefficient': 'Split Coefficient'
})

# Convert index to datetime
data.index = pd.to_datetime(data.index)

# Sort the data by date
data = data.sort_index()

# Filter data between start_date and end_date
data = data.loc[(data.index >= start_date) & (data.index <= end_date)]

# Check the data after filtering
logging.info(f"Data filtered from {start_date} to {end_date}. Number of data points: {len(data)}")
logging.debug(f"First few rows of the data:\n{data.head()}")

# Add Returns and Range
df = data.copy()
df["Returns"] = df["Adj Close"].pct_change()
df["Range"] = (df["High"] - df["Low"]) / df["Low"]

# Add Volatility (rolling standard deviation of returns)
df['Volatility'] = df['Returns'].rolling(window=5).std()

# Drop NaN values resulting from calculations
df.dropna(inplace=True)
logging.info(f"Data after adding features and dropping NaNs. Number of data points: {len(df)}")
logging.debug(f"First few rows of the dataframe:\n{df.head()}")

# Check for NaNs or infinite values in features
logging.info("Checking for NaNs or infinite values in features...")
features = ['Returns', 'Range', 'Volatility']
for feature in features:
    nans = df[feature].isna().sum()
    infs = np.isinf(df[feature]).sum()
    logging.info(f"{feature} - NaNs: {nans}, Infs: {infs}")

# Structure Data for HMM
X = df[features].values

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
logging.info("Features scaled using StandardScaler.")

# HMM Learning

# Train the HMM model
n_components = 4
logging.info(f"Training GaussianHMM with {n_components} components...")
model = GaussianHMM(n_components=n_components, covariance_type='full', n_iter=10000, random_state=42, verbose=False)
model.fit(X_scaled)

# Check if the model has converged
if model.monitor_.converged:
    logging.info(f"Model converged in {model.monitor_.iter} iterations.")
else:
    logging.warning("Model did not converge.")

# Predict the hidden states
hidden_states = model.predict(X_scaled)
unique_states = np.unique(hidden_states)
logging.info(f"Unique states predicted: {unique_states}")

# Attach the hidden states to the dataframe
df['State'] = hidden_states

# Save the DataFrame to CSV
df.to_csv("State_Test.csv")
logging.info("Results saved to State_Test.csv")

# Additional Debugging Information

# Print the number of data points in each state
state_counts = df['State'].value_counts()
logging.info("Number of data points in each state:")
logging.info(f"\n{state_counts}")

# Print the means and covariances of each state
logging.info("Means and covariances of each hidden state:")
for i in range(n_components):
    logging.info(f"State {i}:")
    logging.info(f"Mean: {model.means_[i]}")
    logging.info(f"Covariance:\n{model.covars_[i]}")

# Visualize the hidden states over time
import matplotlib.dates as mdates

fig, ax = plt.subplots(figsize=(15, 8))

# Define a color map for states
state_colors = ['red', 'green', 'blue', 'black', 'purple', 'orange', 'yellow', 'cyan']

for state in unique_states:
    idx = (df['State'] == state)
    ax.plot(df.index[idx], df['Adj Close'][idx], '.', label=f'State {state}', color=state_colors[state % len(state_colors)])

ax.legend()
ax.set_title(f'{symbol} Adjusted Close Price Regime Detection')
ax.set_xlabel('Date')
ax.set_ylabel('Adjusted Close Price')

# Format the x-axis dates
ax.xaxis.set_major_locator(mdates.YearLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))

plt.show()

# Save the plot
fig.savefig("Regime_Detection.png")
logging.info("Plot saved to Regime_Detection.png")
logging.disable(sys.maxsize)

Upvotes: 0

Views: 33

Answers (0)

Related Questions