Slartibartfast
Slartibartfast

Reputation: 1190

Annoting date on chart

I am trying to add a straight line down which would have date printed vertically on the line. I have added a picture of how i am trying to accomplish this below. I have also included the code which i am trying to annotate with.

My code:

import pandas as pd
from pandas import datetime
from pandas import DataFrame as df
import matplotlib
from pandas_datareader import data as web
import matplotlib.pyplot as plt
import datetime
import numpy as np

start = datetime.date(2015,1,1)
end = datetime.date.today()
start1 = datetime.date(2019,1,1)

data = web.DataReader("^GSPC", 'yahoo',start, end)
data1 = web.DataReader("^GSPC", 'yahoo', start1, end)

data.index = pd.to_datetime(data.index, format ='%Y-%m-%d')
data1.index = pd.to_datetime(data1.index, format ='%Y-%m-%d')

full_dates = pd.date_range(start, end)
data = data.reindex(full_dates)
data1 = data1.reindex(full_dates)

data.set_index('month',append=True,inplace=True)
data1.set_index('month',append=True,inplace=True)
data.set_index('week',append=True,inplace=True)
data1.set_index('week',append=True,inplace=True)
data.set_index('day',append=True,inplace=True)
data1.set_index('day',append=True,inplace=True)

data['pct_day']= data['Adj Close'].pct_change()
data1['pct_day']= data1['Adj Close'].pct_change()

df = data.groupby(['month', 'day']).mean()
df2 = data1.groupby(['month', 'day']).mean()

df['cumsum_pct_day']=df['pct_day'].cumsum(axis = 0)
df2['cumsum_pct_day']=df2['pct_day'].cumsum(axis = 0)

ax = df.plot(y='cumsum_pct_day', grid = True, label='df')
df2.plot(y='cumsum_pct_day', grid= True, ax=ax, label='df2')

ylims = ax.get_ylim()
ax.vlines(end, ylims[0], data1.Close[0], linestyles='--')
ax.text(end, data1.Close[0], end, ha='right', va='top', rotation=90)
ax.set_ylim(ylims)


plt.show()

enter image description here

For some reason i am getting warning :

enter image description here

But the line is not plotting. Could you advise why it is not doing so?

Upvotes: 6

Views: 440

Answers (1)

Mykola Zotko
Mykola Zotko

Reputation: 17794

Instead of vlines that plots multiple lines, you can use the method axvline that adds a single vertical line:

from datetime import timedelta

def gen_df(size):
    arr1 = pd.date_range(start='1/1/2018', periods=size)
    arr2 = np.random.exponential(size=size).cumsum()
    return pd.DataFrame({'col1': arr1, 'col2': arr2})

df1 = gen_df(60)
df2 = gen_df(50)

ax = df1.plot(x='col1', y='col2', label='df1')
df2.plot(x='col1', y='col2', ax=ax, label='df2')
ax.axvline(x=df2['col1'].max(), color='red')
ax.annotate(s=df2['col1'].max().date(), xy=(df2['col1'].max()-timedelta(days=2), 35), rotation=90)

enter image description here

Let's reproduce your data set:

def gen_df(size):
    arr1 = pd.date_range(start='1/1/2018', periods=size)
    arr2 = np.random.standard_exponential(size)
    return pd.DataFrame({'Adj Close': arr2}, index=arr1)

df1 = gen_df(150)
df2 = gen_df(130)

print(df1.head())

Output:

            Adj Close
2018-01-01   0.061166
2018-01-02   0.669330
2018-01-03   0.123332
2018-01-04   0.029007
2018-01-05   1.024210

for df in [df1, df2]:
    df['year'] = df.index.year
    df['month'] = df.index.month
    df['week'] = df.index.week
    df['day'] = df.index.day
    df.set_index('month', append=True, inplace=True)
    df.set_index('week', append=True, inplace=True)
    df.set_index('day', append=True, inplace=True)
    df['pct_day']= df['Adj Close'].pct_change()

print(df1.head())

Output:

                           Adj Close  year    pct_day
           month week day                            
2018-01-01 1     1    1     0.061166  2018        NaN
2018-01-02 1     1    2     0.669330  2018   9.942917
2018-01-03 1     1    3     0.123332  2018  -0.815739
2018-01-04 1     1    4     0.029007  2018  -0.764804
2018-01-05 1     1    5     1.024210  2018  34.308892

df1 = df1.groupby(['month', 'day']).mean()
df1['cumsum_pct_day'] = df1['pct_day'].cumsum(axis = 0)

df2 = df2.groupby(['month', 'day']).mean()
df2['cumsum_pct_day'] = df2['pct_day'].cumsum(axis = 0)

print(df1.head())

Output:

           Adj Close  year    pct_day  cumsum_pct_day
month day                                            
1     1     0.061166  2018        NaN             NaN
      2     0.669330  2018   9.942917        9.942917
      3     0.123332  2018  -0.815739        9.127178
      4     0.029007  2018  -0.764804        8.362375
      5     1.024210  2018  34.308892       42.671267

Add a single line with axvline:

ax = df1.plot(y ='cumsum_pct_day', label='df1')
df2.plot(y ='cumsum_pct_day', ax=ax, label='df2')

df = df1 if len(df1) < len(df2) else df2 # get a smaller DataFrame
ax.axvline(x=len(df), color='red')

y_min, y_max = ax.get_ylim()
middle = (y_max - y_min) / 2
ax.annotate(s=df.index.max(), xy=(len(df) - 5, middle), rotation=90)

axvline

Add multiple lines with vlines:

ax = df1.plot(y ='cumsum_pct_day', label='df1')
df2.plot(y ='cumsum_pct_day', ax=ax, label='df2')

y_min, y_max = ax.get_ylim()
ax.vlines(x=np.arange(len(df2), len(df1), step=.1), ymin=y_min, ymax=y_max, color='red')

middle = (y_max - y_min) / 2
ax.annotate(s=df.index.max(), xy=(len(df) - 5, middle), rotation=90)

enter image description here

Upvotes: 7

Related Questions