Reputation: 11
I am attempting to have different hidden market states detected by HMM. I am getting one state only. I cannot figure out why this is the case.
I tried to capture 4 different states, and am only getting 1 state returned. This became apparent when my plots showed one color only.
How do I get the HMM to identify the different states?
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from alpha_vantage.timeseries import TimeSeries
from hmmlearn.hmm import GaussianHMM
from sklearn.preprocessing import StandardScaler
import logging
import sys
# Set up logging
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler("hmm_debug.log", mode='w'),
logging.StreamHandler(sys.stdout)
]
)
# Data Management
# Data Extraction using AlphaVantage
api_key = 'HIDDEN' # Your AlphaVantage API key
start_date = "2017-01-01"
end_date = "2022-06-01"
symbol = "SPY"
logging.info("Fetching data from AlphaVantage...")
# Fetch data from AlphaVantage
ts = TimeSeries(key=api_key, output_format='pandas')
try:
data, meta_data = ts.get_daily_adjusted(symbol=symbol, outputsize='full')
except Exception as e:
logging.error(f"Error fetching data: {e}")
sys.exit(1)
# Check if data was fetched successfully
if data.empty:
logging.error("No data fetched from AlphaVantage. Please check your API key and network connection.")
sys.exit(1)
else:
logging.info(f"Data for {symbol} fetched successfully.")
# Rename columns to match the rest of the code
data = data.rename(columns={
'1. open': 'Open',
'2. high': 'High',
'3. low': 'Low',
'4. close': 'Close',
'5. adjusted close': 'Adj Close',
'6. volume': 'Volume',
'7. dividend amount': 'Dividend Amount',
'8. split coefficient': 'Split Coefficient'
})
# Convert index to datetime
data.index = pd.to_datetime(data.index)
# Sort the data by date
data = data.sort_index()
# Filter data between start_date and end_date
data = data.loc[(data.index >= start_date) & (data.index <= end_date)]
# Check the data after filtering
logging.info(f"Data filtered from {start_date} to {end_date}. Number of data points: {len(data)}")
logging.debug(f"First few rows of the data:\n{data.head()}")
# Add Returns and Range
df = data.copy()
df["Returns"] = df["Adj Close"].pct_change()
df["Range"] = (df["High"] - df["Low"]) / df["Low"]
# Add Volatility (rolling standard deviation of returns)
df['Volatility'] = df['Returns'].rolling(window=5).std()
# Drop NaN values resulting from calculations
df.dropna(inplace=True)
logging.info(f"Data after adding features and dropping NaNs. Number of data points: {len(df)}")
logging.debug(f"First few rows of the dataframe:\n{df.head()}")
# Check for NaNs or infinite values in features
logging.info("Checking for NaNs or infinite values in features...")
features = ['Returns', 'Range', 'Volatility']
for feature in features:
nans = df[feature].isna().sum()
infs = np.isinf(df[feature]).sum()
logging.info(f"{feature} - NaNs: {nans}, Infs: {infs}")
# Structure Data for HMM
X = df[features].values
# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
logging.info("Features scaled using StandardScaler.")
# HMM Learning
# Train the HMM model
n_components = 4
logging.info(f"Training GaussianHMM with {n_components} components...")
model = GaussianHMM(n_components=n_components, covariance_type='full', n_iter=10000, random_state=42, verbose=False)
model.fit(X_scaled)
# Check if the model has converged
if model.monitor_.converged:
logging.info(f"Model converged in {model.monitor_.iter} iterations.")
else:
logging.warning("Model did not converge.")
# Predict the hidden states
hidden_states = model.predict(X_scaled)
unique_states = np.unique(hidden_states)
logging.info(f"Unique states predicted: {unique_states}")
# Attach the hidden states to the dataframe
df['State'] = hidden_states
# Save the DataFrame to CSV
df.to_csv("State_Test.csv")
logging.info("Results saved to State_Test.csv")
# Additional Debugging Information
# Print the number of data points in each state
state_counts = df['State'].value_counts()
logging.info("Number of data points in each state:")
logging.info(f"\n{state_counts}")
# Print the means and covariances of each state
logging.info("Means and covariances of each hidden state:")
for i in range(n_components):
logging.info(f"State {i}:")
logging.info(f"Mean: {model.means_[i]}")
logging.info(f"Covariance:\n{model.covars_[i]}")
# Visualize the hidden states over time
import matplotlib.dates as mdates
fig, ax = plt.subplots(figsize=(15, 8))
# Define a color map for states
state_colors = ['red', 'green', 'blue', 'black', 'purple', 'orange', 'yellow', 'cyan']
for state in unique_states:
idx = (df['State'] == state)
ax.plot(df.index[idx], df['Adj Close'][idx], '.', label=f'State {state}', color=state_colors[state % len(state_colors)])
ax.legend()
ax.set_title(f'{symbol} Adjusted Close Price Regime Detection')
ax.set_xlabel('Date')
ax.set_ylabel('Adjusted Close Price')
# Format the x-axis dates
ax.xaxis.set_major_locator(mdates.YearLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
plt.show()
# Save the plot
fig.savefig("Regime_Detection.png")
logging.info("Plot saved to Regime_Detection.png")
logging.disable(sys.maxsize)
Upvotes: 0
Views: 33