user28102875
user28102875

Reputation: 11

Python Backtest Dataframe Alignment Issue

I am trying to develop code to Backtest my strategy but am running into an error that ChatGPT is unable to resolve. Please try to run the code yourself on Google Colab to see if you can get it running.

The error is : ValueError: Data must be 1-dimensional, got ndarray of shape (1006, 1) instead

I`ve tried everything ChatGPT has requested but it still isn't working.

Here is the code:

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ta.trend import EMAIndicator
from ta.momentum import RSIIndicator, StochasticOscillator
from ta.volatility import AverageTrueRange

# Data Retrieval
def get_data(ticker, start, end):
    df = yf.download(ticker, start=start, end=end)
    df.reset_index(inplace=True)  # Reset the index to make it easier to work with
    df['Date'] = df['Date'].dt.date  # Convert to date type if needed
    print("Initial DataFrame head:\n", df.head())  # Print the first few rows
    print("Initial DataFrame shape:", df.shape)  # Print the shape of the DataFrame
    return df

# Indicator Calculation
def calculate_indicators(df):
    # Exponential Moving Averages (EMAs)
    df['EMA8'] = EMAIndicator(df['Close'], window=8).ema_indicator()
    df['EMA14'] = EMAIndicator(df['Close'], window=14).ema_indicator()
    df['EMA50'] = EMAIndicator(df['Close'], window=50).ema_indicator()
    
    # Print shapes to debug
    print("EMA8 shape:", df['EMA8'].shape)
    print("EMA14 shape:", df['EMA14'].shape)
    print("EMA50 shape:", df['EMA50'].shape)
    
    # Relative Strength Index (RSI)
    df['RSI14'] = RSIIndicator(df['Close'], window=14).rsi()
    print("RSI14 shape:", df['RSI14'].shape)
    
    # Stochastic RSI
    stoch_rsi = StochasticOscillator(df['Close'], window=14, smooth_window=3)
    df['StochRSI_K'] = stoch_rsi.stoch()
    df['StochRSI_D'] = stoch_rsi.stoch_signal()
    
    # Print shapes
    print("StochRSI_K shape:", df['StochRSI_K'].shape)
    print("StochRSI_D shape:", df['StochRSI_D'].shape)
    
    # Average True Range (ATR)
    atr = AverageTrueRange(df['High'], df['Low'], df['Close'], window=14)
    df['ATR14'] = atr.average_true_range()
    print("ATR14 shape:", df['ATR14'].shape)

    # Drop rows with NaN values
    df.dropna(inplace=True)
    
    return df

# Entry Conditions
def signal_generator(df):
    df['Long'] = (
        (df['Close'] > df['EMA8']) &
        (df['EMA8'] > df['EMA14']) &
        (df['EMA14'] > df['EMA50']) &
        (df['StochRSI_K'] > df['StochRSI_D'])
    )
    
    df['Short'] = (
        (df['Close'] < df['EMA8']) &
        (df['EMA8'] < df['EMA14']) &
        (df['EMA14'] < df['EMA50']) &
        (df['StochRSI_K'] < df['StochRSI_D'])
    )
    
    return df

# Position Management
def backtest_strategy(df):
    position = None
    entry_price = 0
    results = []
    
    for index, row in df.iterrows():
        if position is None:  # No open position
            if row['Long']:
                position = 'long'
                entry_price = row['Close']
                stop_loss = entry_price - (3 * row['ATR14'])
                take_profit = entry_price + (2 * row['ATR14'])
                
            elif row['Short']:
                position = 'short'
                entry_price = row['Close']
                stop_loss = entry_price + (3 * row['ATR14'])
                take_profit = entry_price - (2 * row['ATR14'])
                
        elif position == 'long':
            if row['Close'] >= take_profit or row['Close'] <= stop_loss:
                results.append(row['Close'] - entry_price)  # Profit or loss
                position = None  # Close position
            
        elif position == 'short':
            if row['Close'] <= take_profit or row['Close'] >= stop_loss:
                results.append(entry_price - row['Close'])  # Profit or loss
                position = None  # Close position
                
    return results

# Performance Metrics
def calculate_performance(results, df):
    df['Daily_Returns'] = df['Close'].pct_change()
    df['Strategy_Returns'] = pd.Series(results).shift(1).fillna(0)  # Align with dates
    
    df['Cumulative_Strategy'] = (1 + df['Strategy_Returns']).cumprod()
    df['Cumulative_Buy_Hold'] = (1 + df['Daily_Returns']).cumprod()
    
    return df

# Visualization
def plot_performance(df):
    plt.figure(figsize=(12, 6))
    plt.plot(df['Cumulative_Strategy'], label='Strategy Returns', color='blue')
    plt.plot(df['Cumulative_Buy_Hold'], label='Buy and Hold Returns', color='orange')
    plt.title('Cumulative Returns: Strategy vs. Buy and Hold')
    plt.xlabel('Date')
    plt.ylabel('Cumulative Returns')
    plt.legend()
    plt.grid()
    plt.show()

# Main Execution
ticker = 'IWV'
start_date = '2020-01-01'
end_date = '2024-01-01'

data = get_data(ticker, start_date, end_date)

# Check the DataFrame contents before calculating indicators
print("Data after retrieval:\n", data.head())
print("Data shape after retrieval:", data.shape)

# Now attempt to calculate indicators
data = calculate_indicators(data)
data = signal_generator(data)
results = backtest_strategy(data)
data = calculate_performance(results, data)
plot_performance(data)

Upvotes: 1

Views: 79

Answers (1)

Evgenii Lazarev
Evgenii Lazarev

Reputation: 36

user28102875,

There is an issue with MultiIndex in Dataframe, this way we can fix it

    # Drop columns that are not needed
    if isinstance(df.columns, pd.MultiIndex) and df.columns.nlevels > 1:
        df.columns = df.columns.droplevel(1)

Also StochasticOscillator requires High, Low, Close

stoch_rsi = StochasticOscillator(df['High'], df['Low'], df['Close'], window=14, smooth_window=3)

There is your full fixed code (maybe you need another changes for logic, but it works):

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ta.trend import EMAIndicator
from ta.momentum import RSIIndicator, StochasticOscillator
from ta.volatility import AverageTrueRange


# Data Retrieval
def get_data(ticker, start, end):
    df = yf.download(ticker, start=start, end=end)
    df.reset_index(inplace=True)  # Reset the index to make it easier to work with
    df['Date'] = df['Date'].dt.date  # Convert to date type if needed
    print("Initial DataFrame head:\n", df.head())  # Print the first few rows
    print("Initial DataFrame shape:", df.shape)  # Print the shape of the DataFrame

    # Drop columns that are not needed
    if isinstance(df.columns, pd.MultiIndex) and df.columns.nlevels > 1:
        df.columns = df.columns.droplevel(1)
    return df


# Indicator Calculation
def calculate_indicators(df):
    # Exponential Moving Averages (EMAs)
    df['EMA8'] = EMAIndicator(df['Close'], window=8).ema_indicator()
    df['EMA14'] = EMAIndicator(df['Close'], window=14).ema_indicator()
    df['EMA50'] = EMAIndicator(df['Close'], window=50).ema_indicator()

    # Print shapes to debug
    print("EMA8 shape:", df['EMA8'].shape)
    print("EMA14 shape:", df['EMA14'].shape)
    print("EMA50 shape:", df['EMA50'].shape)

    # Relative Strength Index (RSI)
    df['RSI14'] = RSIIndicator(df['Close'], window=14).rsi()
    print("RSI14 shape:", df['RSI14'].shape)

    # Stochastic RSI
    stoch_rsi = StochasticOscillator(df['High'], df['Low'], df['Close'], window=14, smooth_window=3)
    df['StochRSI_K'] = stoch_rsi.stoch()
    df['StochRSI_D'] = stoch_rsi.stoch_signal()

    # Print shapes
    print("StochRSI_K shape:", df['StochRSI_K'].shape)
    print("StochRSI_D shape:", df['StochRSI_D'].shape)

    # Average True Range (ATR)
    atr = AverageTrueRange(df['High'], df['Low'], df['Close'], window=14)
    df['ATR14'] = atr.average_true_range()
    print("ATR14 shape:", df['ATR14'].shape)

    # Drop rows with NaN values
    df.dropna(inplace=True)

    return df


# Entry Conditions
def signal_generator(df):
    df['Long'] = (
            (df['Close'] > df['EMA8']) &
            (df['EMA8'] > df['EMA14']) &
            (df['EMA14'] > df['EMA50']) &
            (df['StochRSI_K'] > df['StochRSI_D'])
    )

    df['Short'] = (
            (df['Close'] < df['EMA8']) &
            (df['EMA8'] < df['EMA14']) &
            (df['EMA14'] < df['EMA50']) &
            (df['StochRSI_K'] < df['StochRSI_D'])
    )

    return df


# Position Management
def backtest_strategy(df):
    position = None
    entry_price = 0
    results = []

    for index, row in df.iterrows():
        if position is None:  # No open position
            if row['Long']:
                position = 'long'
                entry_price = row['Close']
                stop_loss = entry_price - (3 * row['ATR14'])
                take_profit = entry_price + (2 * row['ATR14'])

            elif row['Short']:
                position = 'short'
                entry_price = row['Close']
                stop_loss = entry_price + (3 * row['ATR14'])
                take_profit = entry_price - (2 * row['ATR14'])

        elif position == 'long':
            if row['Close'] >= take_profit or row['Close'] <= stop_loss:
                results.append(row['Close'] - entry_price)  # Profit or loss
                position = None  # Close position

        elif position == 'short':
            if row['Close'] <= take_profit or row['Close'] >= stop_loss:
                results.append(entry_price - row['Close'])  # Profit or loss
                position = None  # Close position

    return results


# Performance Metrics
def calculate_performance(results, df):
    df['Daily_Returns'] = df['Close'].pct_change()
    df['Strategy_Returns'] = pd.Series(results).shift(1).fillna(0)  # Align with dates

    df['Cumulative_Strategy'] = (1 + df['Strategy_Returns']).cumprod()
    df['Cumulative_Buy_Hold'] = (1 + df['Daily_Returns']).cumprod()

    return df


# Visualization
def plot_performance(df):
    plt.figure(figsize=(12, 6))
    plt.plot(df['Cumulative_Strategy'], label='Strategy Returns', color='blue')
    plt.plot(df['Cumulative_Buy_Hold'], label='Buy and Hold Returns', color='orange')
    plt.title('Cumulative Returns: Strategy vs. Buy and Hold')
    plt.xlabel('Date')
    plt.ylabel('Cumulative Returns')
    plt.legend()
    plt.grid()
    plt.show()


# Main Execution
ticker = 'IWV'
start_date = '2020-01-01'
end_date = '2024-01-01'

data = get_data(ticker, start_date, end_date)

# Check the DataFrame contents before calculating indicators
print("Data after retrieval:\n", data.head())
print("Data shape after retrieval:", data.shape)

# Now attempt to calculate indicators
data = calculate_indicators(data)
data = signal_generator(data)
results = backtest_strategy(data)
data = calculate_performance(results, data)
plot_performance(data)

enter image description here

Upvotes: 0

Related Questions