Reputation: 128
I have this python code of the supertrend implementation. i am using pandas dataframe. the code works fine but, the supertrend function runs slower and slower as the dataframe increases in length. I was wondering if there is anything I could change in the code to optimize it and make it go faster even with a large dataframe length.
def trueRange(df):
df['prevClose'] = df['close'].shift(1)
df['high-low'] = df['high'] - df['low']
df['high-pClose'] = abs(df['high'] - df['prevClose'])
df['low-pClose'] = abs(df['low'] - df['prevClose'])
tr = df[['high-low','high-pClose','low-pClose']].max(axis=1)
return tr
def averageTrueRange(df, peroid=12):
df['trueRange'] = trueRange(df)
the_atr = df['trueRange'].rolling(peroid).mean()
return the_atr
def superTrend(df, peroid=5, multipler=1.5):
df['averageTrueRange'] = averageTrueRange(df, peroid=peroid)
h2 = ((df['high'] + df['low']) / 2)
df['Upperband'] = h2 + (multipler * df['averageTrueRange'])
df['Lowerband'] = h2 - (multipler * df['averageTrueRange'])
df['inUptrend'] = None
for current in range(1,len(df.index)):
prev = current- 1
if df['close'][current] > df['Upperband'][prev]:
df['inUptrend'][current] = True
elif df['close'][current] < df['Lowerband'][prev]:
df['inUptrend'][current] = False
else:
df['inUptrend'][current] = df['inUptrend'][prev]
if df['inUptrend'][current] and df['Lowerband'][current] < df['Lowerband'][prev]:
df['Lowerband'][current] = df['Lowerband'][prev]
if not df['inUptrend'][current] and df['Upperband'][current] > df['Upperband'][prev]:
df['Upperband'][current] = df['Upperband'][prev]
vector version
def superTrend(df, peroid=5, multipler=1.5):
df['averageTrueRange'] = averageTrueRange(df, peroid=peroid)
h2 = ((df['high'] + df['low']) / 2)
df['Upperband'] = h2 + (multipler * df['averageTrueRange'])
df['Lowerband'] = h2 - (multipler * df['averageTrueRange'])
df['inUptrend'] = None
cond1 = df['close'].values[1:] > df['Upperband'].values[:-1]
cond2 = df['close'].values[1:] < df['Lowerband'].values[:-1]
df.loc[cond1, 'inUptrend'] = True
df.loc[cond2, 'inUptrend'] = False
df.loc[(~cond1) & (cond2), 'inUptrend'] = df['inUptrend'][:-1]
df.loc[(~cond1) & (cond2) & (df['inUptrend'].values[1:] == True) & (df['Lowerband'].values[1:] < df['Lowerband'].values[:-1]), 'Lowerband'] = df['Lowerband'][:-1]
df.loc[(~cond1) & (cond2) & (df['inUptrend'].values[1:] == False) & (df['Upperband'].values[1:] > df['Upperband'].values[:-1]), 'Upperband'] = df['Upperband'][:-1]
Upvotes: 8
Views: 655
Reputation: 322
Here is the Numba / Numpy version of your code. You have to convert your df[ 'close' ], df[ 'high' ] and df[ 'low' ] as numpy arrays for speed benefit. I didn't check if the output values are correct but you got the idea.
import numpy as np
from numba import jit
# UNCOMMENT THIS LINE IF YOU DON'T HAVE THE OPEN PRICES
# c_open = np.concatenate((np.array([np.nan]), c_close[1:]))
@jit(nopython=True)
def true_range(c_open, c_high, c_low):
return np.maximum(np.maximum(c_high - c_low, np.abs(c_high - c_open)), np.abs(c_low - c_open))
@jit(nopython=True)
def average_true_range(c_open, c_high, c_low, period=12):
true_r = true_range(c_open, c_high, c_low)
size = len(true_r)
out = np.array([np.nan] * size)
for i in range(period - 1, size):
window = true_r[i - period + 1:i + 1]
out[i] = np.mean(window)
return out
@jit(nopython=True)
def super_trend(c_close, c_open, c_high, c_low, period=5, multipler=1.5):
size = len(c_close)
avg_true_r = average_true_range(c_open, c_high, c_low, period=period)
h2 = (c_high + c_low) / 2
upper_band = h2 + (multipler * avg_true_r)
lower_band = h2 - (multipler * avg_true_r)
in_up_trend = np.array([np.nan] * size)
for current in range(1, size):
prev = current - 1
if c_close[current] > upper_band[prev]:
in_up_trend[current] = True
elif c_close[current] < lower_band[prev]:
in_up_trend[current] = False
else:
in_up_trend[current] = in_up_trend[prev]
if in_up_trend[current] and lower_band[current] < lower_band[prev]:
lower_band[current] = lower_band[prev]
if not in_up_trend[current] and upper_band[current] > upper_band[prev]:
upper_band[current] = upper_band[prev]
return upper_band, lower_band, in_up_trend
Edit : If you don't use Heiken Ashi, you don't need to shift the close prices to get the last close prices as they are equivalent to open prices ;)
Feel free to check my lib of fast indicators @ github
Upvotes: 3
Reputation: 502
Instead of import pandas as pd
, try using Modin. Modin automatically makes pandas much faster. Just do import modin.pandas as pd
. You don't need to change any code other than the import.
If you need to use the df.apply()
method, there is a package called Swifter. After you pip install swifter
, all you need to do is import swifter
, and then instead of doing df.apply()
, do df.swifter.apply()
. What's convenient is that Swifter also works with Modin.
Upvotes: 8