Reputation: 11
I am trying to develop code to Backtest my strategy but am running into an error that ChatGPT is unable to resolve. Please try to run the code yourself on Google Colab to see if you can get it running.
The error is : ValueError: Data must be 1-dimensional, got ndarray of shape (1006, 1) instead
I`ve tried everything ChatGPT has requested but it still isn't working.
Here is the code:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ta.trend import EMAIndicator
from ta.momentum import RSIIndicator, StochasticOscillator
from ta.volatility import AverageTrueRange
# Data Retrieval
def get_data(ticker, start, end):
df = yf.download(ticker, start=start, end=end)
df.reset_index(inplace=True) # Reset the index to make it easier to work with
df['Date'] = df['Date'].dt.date # Convert to date type if needed
print("Initial DataFrame head:\n", df.head()) # Print the first few rows
print("Initial DataFrame shape:", df.shape) # Print the shape of the DataFrame
return df
# Indicator Calculation
def calculate_indicators(df):
# Exponential Moving Averages (EMAs)
df['EMA8'] = EMAIndicator(df['Close'], window=8).ema_indicator()
df['EMA14'] = EMAIndicator(df['Close'], window=14).ema_indicator()
df['EMA50'] = EMAIndicator(df['Close'], window=50).ema_indicator()
# Print shapes to debug
print("EMA8 shape:", df['EMA8'].shape)
print("EMA14 shape:", df['EMA14'].shape)
print("EMA50 shape:", df['EMA50'].shape)
# Relative Strength Index (RSI)
df['RSI14'] = RSIIndicator(df['Close'], window=14).rsi()
print("RSI14 shape:", df['RSI14'].shape)
# Stochastic RSI
stoch_rsi = StochasticOscillator(df['Close'], window=14, smooth_window=3)
df['StochRSI_K'] = stoch_rsi.stoch()
df['StochRSI_D'] = stoch_rsi.stoch_signal()
# Print shapes
print("StochRSI_K shape:", df['StochRSI_K'].shape)
print("StochRSI_D shape:", df['StochRSI_D'].shape)
# Average True Range (ATR)
atr = AverageTrueRange(df['High'], df['Low'], df['Close'], window=14)
df['ATR14'] = atr.average_true_range()
print("ATR14 shape:", df['ATR14'].shape)
# Drop rows with NaN values
df.dropna(inplace=True)
return df
# Entry Conditions
def signal_generator(df):
df['Long'] = (
(df['Close'] > df['EMA8']) &
(df['EMA8'] > df['EMA14']) &
(df['EMA14'] > df['EMA50']) &
(df['StochRSI_K'] > df['StochRSI_D'])
)
df['Short'] = (
(df['Close'] < df['EMA8']) &
(df['EMA8'] < df['EMA14']) &
(df['EMA14'] < df['EMA50']) &
(df['StochRSI_K'] < df['StochRSI_D'])
)
return df
# Position Management
def backtest_strategy(df):
position = None
entry_price = 0
results = []
for index, row in df.iterrows():
if position is None: # No open position
if row['Long']:
position = 'long'
entry_price = row['Close']
stop_loss = entry_price - (3 * row['ATR14'])
take_profit = entry_price + (2 * row['ATR14'])
elif row['Short']:
position = 'short'
entry_price = row['Close']
stop_loss = entry_price + (3 * row['ATR14'])
take_profit = entry_price - (2 * row['ATR14'])
elif position == 'long':
if row['Close'] >= take_profit or row['Close'] <= stop_loss:
results.append(row['Close'] - entry_price) # Profit or loss
position = None # Close position
elif position == 'short':
if row['Close'] <= take_profit or row['Close'] >= stop_loss:
results.append(entry_price - row['Close']) # Profit or loss
position = None # Close position
return results
# Performance Metrics
def calculate_performance(results, df):
df['Daily_Returns'] = df['Close'].pct_change()
df['Strategy_Returns'] = pd.Series(results).shift(1).fillna(0) # Align with dates
df['Cumulative_Strategy'] = (1 + df['Strategy_Returns']).cumprod()
df['Cumulative_Buy_Hold'] = (1 + df['Daily_Returns']).cumprod()
return df
# Visualization
def plot_performance(df):
plt.figure(figsize=(12, 6))
plt.plot(df['Cumulative_Strategy'], label='Strategy Returns', color='blue')
plt.plot(df['Cumulative_Buy_Hold'], label='Buy and Hold Returns', color='orange')
plt.title('Cumulative Returns: Strategy vs. Buy and Hold')
plt.xlabel('Date')
plt.ylabel('Cumulative Returns')
plt.legend()
plt.grid()
plt.show()
# Main Execution
ticker = 'IWV'
start_date = '2020-01-01'
end_date = '2024-01-01'
data = get_data(ticker, start_date, end_date)
# Check the DataFrame contents before calculating indicators
print("Data after retrieval:\n", data.head())
print("Data shape after retrieval:", data.shape)
# Now attempt to calculate indicators
data = calculate_indicators(data)
data = signal_generator(data)
results = backtest_strategy(data)
data = calculate_performance(results, data)
plot_performance(data)
Upvotes: 1
Views: 79
Reputation: 36
user28102875,
There is an issue with MultiIndex in Dataframe, this way we can fix it
# Drop columns that are not needed
if isinstance(df.columns, pd.MultiIndex) and df.columns.nlevels > 1:
df.columns = df.columns.droplevel(1)
Also StochasticOscillator requires High, Low, Close
stoch_rsi = StochasticOscillator(df['High'], df['Low'], df['Close'], window=14, smooth_window=3)
There is your full fixed code (maybe you need another changes for logic, but it works):
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ta.trend import EMAIndicator
from ta.momentum import RSIIndicator, StochasticOscillator
from ta.volatility import AverageTrueRange
# Data Retrieval
def get_data(ticker, start, end):
df = yf.download(ticker, start=start, end=end)
df.reset_index(inplace=True) # Reset the index to make it easier to work with
df['Date'] = df['Date'].dt.date # Convert to date type if needed
print("Initial DataFrame head:\n", df.head()) # Print the first few rows
print("Initial DataFrame shape:", df.shape) # Print the shape of the DataFrame
# Drop columns that are not needed
if isinstance(df.columns, pd.MultiIndex) and df.columns.nlevels > 1:
df.columns = df.columns.droplevel(1)
return df
# Indicator Calculation
def calculate_indicators(df):
# Exponential Moving Averages (EMAs)
df['EMA8'] = EMAIndicator(df['Close'], window=8).ema_indicator()
df['EMA14'] = EMAIndicator(df['Close'], window=14).ema_indicator()
df['EMA50'] = EMAIndicator(df['Close'], window=50).ema_indicator()
# Print shapes to debug
print("EMA8 shape:", df['EMA8'].shape)
print("EMA14 shape:", df['EMA14'].shape)
print("EMA50 shape:", df['EMA50'].shape)
# Relative Strength Index (RSI)
df['RSI14'] = RSIIndicator(df['Close'], window=14).rsi()
print("RSI14 shape:", df['RSI14'].shape)
# Stochastic RSI
stoch_rsi = StochasticOscillator(df['High'], df['Low'], df['Close'], window=14, smooth_window=3)
df['StochRSI_K'] = stoch_rsi.stoch()
df['StochRSI_D'] = stoch_rsi.stoch_signal()
# Print shapes
print("StochRSI_K shape:", df['StochRSI_K'].shape)
print("StochRSI_D shape:", df['StochRSI_D'].shape)
# Average True Range (ATR)
atr = AverageTrueRange(df['High'], df['Low'], df['Close'], window=14)
df['ATR14'] = atr.average_true_range()
print("ATR14 shape:", df['ATR14'].shape)
# Drop rows with NaN values
df.dropna(inplace=True)
return df
# Entry Conditions
def signal_generator(df):
df['Long'] = (
(df['Close'] > df['EMA8']) &
(df['EMA8'] > df['EMA14']) &
(df['EMA14'] > df['EMA50']) &
(df['StochRSI_K'] > df['StochRSI_D'])
)
df['Short'] = (
(df['Close'] < df['EMA8']) &
(df['EMA8'] < df['EMA14']) &
(df['EMA14'] < df['EMA50']) &
(df['StochRSI_K'] < df['StochRSI_D'])
)
return df
# Position Management
def backtest_strategy(df):
position = None
entry_price = 0
results = []
for index, row in df.iterrows():
if position is None: # No open position
if row['Long']:
position = 'long'
entry_price = row['Close']
stop_loss = entry_price - (3 * row['ATR14'])
take_profit = entry_price + (2 * row['ATR14'])
elif row['Short']:
position = 'short'
entry_price = row['Close']
stop_loss = entry_price + (3 * row['ATR14'])
take_profit = entry_price - (2 * row['ATR14'])
elif position == 'long':
if row['Close'] >= take_profit or row['Close'] <= stop_loss:
results.append(row['Close'] - entry_price) # Profit or loss
position = None # Close position
elif position == 'short':
if row['Close'] <= take_profit or row['Close'] >= stop_loss:
results.append(entry_price - row['Close']) # Profit or loss
position = None # Close position
return results
# Performance Metrics
def calculate_performance(results, df):
df['Daily_Returns'] = df['Close'].pct_change()
df['Strategy_Returns'] = pd.Series(results).shift(1).fillna(0) # Align with dates
df['Cumulative_Strategy'] = (1 + df['Strategy_Returns']).cumprod()
df['Cumulative_Buy_Hold'] = (1 + df['Daily_Returns']).cumprod()
return df
# Visualization
def plot_performance(df):
plt.figure(figsize=(12, 6))
plt.plot(df['Cumulative_Strategy'], label='Strategy Returns', color='blue')
plt.plot(df['Cumulative_Buy_Hold'], label='Buy and Hold Returns', color='orange')
plt.title('Cumulative Returns: Strategy vs. Buy and Hold')
plt.xlabel('Date')
plt.ylabel('Cumulative Returns')
plt.legend()
plt.grid()
plt.show()
# Main Execution
ticker = 'IWV'
start_date = '2020-01-01'
end_date = '2024-01-01'
data = get_data(ticker, start_date, end_date)
# Check the DataFrame contents before calculating indicators
print("Data after retrieval:\n", data.head())
print("Data shape after retrieval:", data.shape)
# Now attempt to calculate indicators
data = calculate_indicators(data)
data = signal_generator(data)
results = backtest_strategy(data)
data = calculate_performance(results, data)
plot_performance(data)
Upvotes: 0