Reputation: 11
Can someone help with this Datetime64 sum issue? I am reading the date from a CSV.
Code:
import pandas as pd
import numpy as np
import scipy
from scipy.stats import norm
import matplotlib.pyplot as plt
from datetime import datetime, timedelta, date
pd.options.display.float_format = '{:,.4f}'.format
# Inputs and Parameters
filename = 'SPY_quotedata.csv'
# Black-Scholes European-Options Gamma
def calcGammaEx(S, K, vol, T, r, q, optType, OI):
if T == 0 or vol == 0:
return 0
dp = (np.log(S/K) + (r - q + 0.5*vol**2)*T) / (vol*np.sqrt(T))
dm = dp - vol*np.sqrt(T)
if optType == 'call':
gamma = np.exp(-q*T) * norm.pdf(dp) / (S * vol * np.sqrt(T))
return OI * 100 * S * S * 0.01 * gamma
else: # Gamma is same for calls and puts. This is just to cross-check
gamma = K * np.exp(-r*T) * norm.pdf(dm) / (S * S * vol * np.sqrt(T))
return OI * 100 * S * S * 0.01 * gamma
def isThirdFriday(d):
return d.weekday() == 4 and 15 <= d.day <= 21
# This assumes the CBOE file format hasn't been edited, i.e. table beginds at line 4
optionsFile = open("SPY_quotedata.csv")
optionsFileData = optionsFile.readlines()
optionsFile.close()
# Get SPY Spot
spotLine = optionsFileData[1]
spotPrice = float(spotLine.split('Last:')[1].split(',')[0])
fromStrike = 0.8 * spotPrice
toStrike = 1.2 * spotPrice
# Get Today's Date
dateLine = optionsFileData[2]
todayDate = dateLine.split('Date: ')[1].split(' ')
day1=todayDate[1].split(',')
year=int(todayDate[2])
month=todayDate[0]
day=int(day1[0])
todayDate = datetime.strptime(month,'%B')
todayDate = todayDate.replace(day=day, year=year)
# Get SPY Options Data
df = pd.read_csv(filename, sep=",", header=None, skiprows=4)
df.columns = ['ExpirationDate','Calls','CallLastSale','CallNet','CallBid','CallAsk','CallVol',
'CallIV','CallDelta','CallGamma','CallOpenInt','StrikePrice','Puts','PutLastSale',
'PutNet','PutBid','PutAsk','PutVol','PutIV','PutDelta','PutGamma','PutOpenInt']
df['ExpirationDate'] = pd.to_datetime(df['ExpirationDate'], format='%a %b %d %Y')
df['ExpirationDate'] = df['ExpirationDate'] + timedelta(hours=16)
df['StrikePrice'] = df['StrikePrice'].astype(float)
df['CallIV'] = df['CallIV'].astype(float)
df['PutIV'] = df['PutIV'].astype(float)
df['CallGamma'] = df['CallGamma'].astype(float)
df['PutGamma'] = df['PutGamma'].astype(float)
df['CallOpenInt'] = df['CallOpenInt'].astype(float)
df['PutOpenInt'] = df['PutOpenInt'].astype(float)
# ---=== CALCULATE SPOT GAMMA ===---
# Gamma Exposure = Unit Gamma * Open Interest * Contract Size * Spot Price
# To further convert into 'per 1% move' quantity, multiply by 1% of spotPrice
df['CallGEX'] = df['CallGamma'] * df['CallOpenInt'] * 100 * spotPrice * spotPrice * 0.01
df['PutGEX'] = df['PutGamma'] * df['PutOpenInt'] * 100 * spotPrice * spotPrice * 0.01 * -1
df['TotalGamma'] = (df.CallGEX + df.PutGEX) / 10**9
dfAgg = df.groupby(['StrikePrice']).sum()
strikes = dfAgg.index.values
C:\Users\v_ichase\Desktop\New folder (3)>py "C:\Users\v_ichase\Desktop\New folder (3)\Gex.py"
Traceback (most recent call last):
File "C:\Users\v_ichase\Desktop\New folder (3)\Gex.py", line 78, in <module>
dfAgg = df.groupby(['StrikePrice']).sum()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\v_ichase\AppData\Roaming\Python\Python311\site-packages\pandas\core\groupby\groupby.py", line 3053, in sum
result = self._agg_general(
^^^^^^^^^^^^^^^^^^
File "C:\Users\v_ichase\AppData\Roaming\Python\Python311\site-packages\pandas\core\groupby\groupby.py", line 1835, in _agg_general
result = self._cython_agg_general(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\v_ichase\AppData\Roaming\Python\Python311\site-packages\pandas\core\groupby\groupby.py", line 1926, in _cython_agg_general
new_mgr = data.grouped_reduce(array_func)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\v_ichase\AppData\Roaming\Python\Python311\site-packages\pandas\core\internals\managers.py", line 1431, in grouped_reduce
applied = blk.apply(func)
^^^^^^^^^^^^^^^
File "C:\Users\v_ichase\AppData\Roaming\Python\Python311\site-packages\pandas\core\internals\blocks.py", line 366, in apply
result = func(self.values, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\v_ichase\AppData\Roaming\Python\Python311\site-packages\pandas\core\groupby\groupby.py", line 1902, in array_func
result = self.grouper._cython_operation(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\v_ichase\AppData\Roaming\Python\Python311\site-packages\pandas\core\groupby\ops.py", line 815, in _cython_operation
return cy_op.cython_operation(
^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\v_ichase\AppData\Roaming\Python\Python311\site-packages\pandas\core\groupby\ops.py", line 525, in cython_operation
return values._groupby_op(
^^^^^^^^^^^^^^^^^^^
File "C:\Users\v_ichase\AppData\Roaming\Python\Python311\site-packages\pandas\core\arrays\datetimelike.py", line 1637, in _groupby_op
raise TypeError(f"datetime64 type does not support {how} operations")
TypeError: datetime64 type does not support sum operations
C:\Users\v_ichase\Desktop\New folder (3)>py "C:\Users\v_ichase\Desktop\New folder (3)\Gex.py"`
Uninstalled and reinstalled Python versions, currently running 3.11.6
Upvotes: 1
Views: 523
Reputation: 469
It looks like at some point pandas changed it's behaviour relating to summing datetime types in a groupby.
Previously, datetime columns were silently dropped from the groupby...
>>> pd.__version__
u'0.24.2'
>>> df = pd.DataFrame({'id':('a','a','b'), 'dt':[pd.to_datetime(datetime.date.today())]*3, 'val':range(3)})
>>> df
dt id val
0 2024-07-10 a 0
1 2024-07-10 a 1
2 2024-07-10 b 2
>>> df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
dt 3 non-null datetime64[ns]
id 3 non-null object
val 3 non-null int64
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 144.0+ bytes
None
>>> df.groupby('id').sum()
val
id
a 1
b 2
More recent behaviour is to raise this exception if you attempt to sum datetime types in a groupby ...
>>> pd.__version__
'2.2.2'
>>> df = pd.DataFrame({'id':('a','a','b'), 'dt':[pd.to_datetime(datetime.date.today())]*3, 'val':range(3)})
>>> df
id dt val
0 a 2024-07-10 0
1 a 2024-07-10 1
2 b 2024-07-10 2
>>> df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 id 3 non-null object
1 dt 3 non-null datetime64[ns]
2 val 3 non-null int64
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 200.0+ bytes
None
>>> df.groupby('id').sum()
Traceback (most recent call last):
<snip>
TypeError: datetime64 type does not support sum operations
Assuming you are looking to stop the exception from occuring and aren't looking to perform some kind of aggregate operation on the datetime column, then I would suggest excluding the datetime column from the groupby, as follows ...
>>> df.drop(columns='dt').groupby('id').sum()
val
id
a 1
b 2
Upvotes: 0