Reputation: 9
I have this Python code, but it's already running for 24h and doesn't seem to print the result for now. I don't know how long it will take. Can someone help me to optimize this code? The code is to find the best performance for trading RSI divergence in a certain period. It first defines some parameters for the RSI. The code then goes through every possible combination to find the best combination of parameters to have the best performances. I'm not really an expert.
I don't really know how i can change the code as i'm no expert. Happy to learn.
Thank you guys.
import pandas as pd
import numpy as np
import ta
def load_data(file_path, start_date, end_date):
"""
Loads data for the specified symbol and date range from a CSV file
"""
df = pd.read_csv(file_path)
if 'Date' not in df.columns:
df['Date'] = pd.to_datetime(df.index)
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')
df = df[(df.index >= start_date) & (df.index <= end_date)]
return df
def calc_rsi(df, n):
"""
Calculates the relative strength index (RSI) for the given dataframe and window size
"""
delta = df["Close"].diff()
gain = delta.where(delta > 0, 0)
loss = abs(delta.where(delta < 0, 0))
avg_gain = gain.rolling(window=n).mean()
avg_loss = loss.rolling(window=n).mean()
rs = avg_gain / avg_loss
rsi = 100 - (100 / (1 + rs))
return rsi
def calc_pivot_point(df, pivot_point_type, pivot_point_n):
"""
Calculates the pivot point for the given dataframe and pivot point type
"""
if pivot_point_type == "Close":
pivot_point = df["Close"].rolling(window=pivot_point_n).mean()
elif pivot_point_type == "High/Low":
pivot_point = (df["High"].rolling(window=pivot_point_n).mean() + df["Low"].rolling(window=pivot_point_n).mean()) / 2
else:
raise ValueError("Invalid pivot point type")
return pivot_point
def calc_divergence(df, rsi, pivot_point, divergence_type, max_pivot_point, max_bars_to_check):
"""
Calculates the divergence for the given dataframe and parameters
"""
if divergence_type == "Regular":
pivot_point_delta = pivot_point.diff()
pivot_point_delta_sign = pivot_point_delta.where(pivot_point_delta > 0, -1)
pivot_point_delta_sign[pivot_point_delta_sign > 0] = 1
rsi_delta = rsi.diff()
rsi_delta_sign = rsi_delta.where(rsi_delta > 0, -1)
rsi_delta_sign[rsi_delta_sign > 0] = 1
divergence = pivot_point_delta_sign * rsi_delta_sign
divergence[divergence < 0] = -1
divergence = divergence.rolling(window=max_pivot_point).sum()
divergence = divergence.rolling(window=max_bars_to_check).sum()
divergence = divergence.where(divergence > 0, 0)
divergence[divergence < 0] = -1
else:
raise ValueError("Invalid divergence type")
return divergence
def backtest(df, rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop, starting_capital):
"""
Backtests the strategy for the given dataframe and parameters
"""
rsi = calc_rsi(df, rsi_period)
pivot_point = calc_pivot_point(df, pivot_point_type, pivot_point_n)
divergence = calc_divergence(df, rsi, pivot_point, divergence_type, max_pivot_point, max_bars_to_check)
positions = pd.DataFrame(index=df.index, columns=["Position", "Stop Loss"])
positions["Position"] = 0.0
positions["Stop Loss"] = 0.0
capital = starting_capital
for i, row in enumerate(df.iterrows()):
date = row[0]
close = row[1]["Close"]
rsi_val = rsi.loc[date]
pivot_val = pivot_point.loc[date]
divergence_val = divergence.loc[date]
if divergence_val > 0 and positions.loc[date]["Position"] == 0:
positions.at[date, "Position"] = capital / close
positions.at[date, "Stop Loss"] = close * (1 - trailing_stop)
elif divergence_val < 0 and positions.loc[date]["Position"] > 0:
capital = positions.loc[date]["Position"] * close
positions.at[date, "Position"] = 0.0
positions.at[date, "Stop Loss"] = 0.0
elif close < positions.loc[date]["Stop Loss"] and positions.loc[date]["Position"] > 0:
capital = positions.loc[date]["Position"] * close
positions.at[date, "Position"] = 0.0
positions.at[date, "Stop Loss"] = 0.0
return capital
def find_best_iteration(df, start_rsi_period, end_rsi_period, pivot_point_types, start_pivot_point_n, end_pivot_point_n, divergence_types, start_max_pivot_point, end_max_pivot_point, start_max_bars_to_check, end_max_bars_to_check, start_trailing_stop, end_trailing_stop, starting_capital):
"""
Finds the best iteration for the given parameters
"""
best_result = 0.0
best_params = None
for rsi_period in range(start_rsi_period, end_rsi_period + 1):
for pivot_point_type in pivot_point_types:
for pivot_point_n in range(start_pivot_point_n, end_pivot_point_n + 1):
for divergence_type in divergence_types:
for max_pivot_point in range(start_max_pivot_point, end_max_pivot_point + 1):
for max_bars_to_check in range(start_max_bars_to_check, end_max_bars_to_check + 1):
for trailing_stop in np.arange(start_trailing_stop, end_trailing_stop + 0.01, 0.01):
result = backtest(df, rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop, starting_capital)
if result > best_result:
best_result = result
best_params = (rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop)
return best_result, best_params
# Define the parameters
file_path = 'C:\\Users\\The Death\\Downloads\\Binance_BTCUSDT_spot.csv'
start_date = "2020-03-16"
end_date = "2021-04-12"
df = load_data(file_path, start_date, end_date)
def load_data(start_date, end_date):
# Your code to load the data for the specified date range
# ...
return df
# Define the parameters for the backtesting
start_rsi_period = 1
end_rsi_period = 30
pivot_point_types = ["Close", "High/Low"]
start_pivot_point_n = 1
end_pivot_point_n = 50
divergence_types = ["Regular"]
start_max_pivot_point = 1
end_max_pivot_point = 20
start_max_bars_to_check = 30
end_max_bars_to_check = 200
start_trailing_stop = 0.01
end_trailing_stop = 0.5
starting_capital = 10000
# Run the backtesting
df = load_data(start_date, end_date)
best_result, best_params = find_best_iteration(df, start_rsi_period, end_rsi_period, pivot_point_types, start_pivot_point_n, end_pivot_point_n, divergence_types, start_max_pivot_point, end_max_pivot_point, start_max_bars_to_check, end_max_bars_to_check, start_trailing_stop, end_trailing_stop, starting_capital)
# Print the results
print("Best result: ", best_result)
print("Best parameters: ", best_params)
Upvotes: 0
Views: 406
Reputation: 434
You can try the following methods to improve the performance:
The backtest()
function is used many times inside the find_best_iteration()
function under many for
loops, thus the positions
variable inside backtest()
is being updated frequently which can be show when the positions
variable is a Dataframe. You can consider using numpy array for the positions
variable that is optimized for updates.
You can try using the multiprocessing
module in Python to parallelize the calculation of the divergence
variable.
Hope this help!
Upvotes: 0
Reputation: 26
I have two recommendations after I scroll up your code:
Reduce the usage of for loop. As you increase a layer of for loop (initial is O(n), the time complexity of your code will increase by a power. In your find_best_iteration() there is about 7 layers of for loop, this is extremely cost your time.
Save and process your data in numpy.array() instead of pd.dataframe(). Dataframe is a class that contains too many unused attributes, and its performance is also slower than numpy.array.
Upvotes: 1