Reputation: 461
From the dataframe bellow, I would like to calculate the number of days until each event per year
lst = [['2011-02-01',''],
['2011-02-02',''],
['2011-02-03',''],
['2011-02-04','event1'],
['2011-02-05',''],
['2011-02-06','event_2'],
['2012-02-01',''],
['2012-02-02',''],
['2012-02-03',''],
['2012-02-04',''],
['2012-02-05','event_1'],
['2012-02-06','event_2']]
df = pd.DataFrame(lst,columns=['date','event'])
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
Desired output would be something like bellow:
Upvotes: 0
Views: 91
Reputation: 13397
Try:
import numpy as np
df=df.assign(days_until_event_1=None, days_until_event_2=None)
epoch_factor=24*60*60*10**9
for y in df["year"].unique():
dfel=df.loc[df["year"].eq(y)]
dfel=dfel.iloc[::-1]
dfel["days_until_event_1"]=np.where(dfel["event"].eq("event_1"), dfel["date"].astype(str), None)
dfel["days_until_event_1"]=pd.to_datetime(dfel["days_until_event_1"].ffill().bfill())-dfel["date"]
dfel["days_until_event_2"]=np.where(dfel["event"].eq("event_2"), dfel["date"].astype(str), None)
dfel["days_until_event_2"]=pd.to_datetime(dfel["days_until_event_2"].ffill().bfill())-dfel["date"]
df.loc[df["year"].eq(y), ["days_until_event_1", "days_until_event_2"]]=dfel[["days_until_event_1", "days_until_event_2"]].div(epoch_factor)
Outputs:
>>> df.iloc[:, -2:]
days_until_event_1 days_until_event_2
0 3 5
1 2 4
2 1 3
3 0 2
4 -1 1
5 -2 0
6 4 5
7 3 4
8 2 3
9 1 2
10 0 1
11 -1 0
Upvotes: 1
Reputation: 22031
lst = [['2011-02-01',''],
['2011-02-02',''],
['2011-02-03',''],
['2011-02-04','event_1'],
['2011-02-05',''],
['2011-02-06','event_2'],
['2012-02-01',''],
['2012-02-02',''],
['2012-02-03',''],
['2012-02-04',''],
['2012-02-05','event_1'],
['2012-02-06','event_2']]
df = pd.DataFrame(lst,columns=['date','event'])
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
#### utility functions ####
def count_down(event, code):
results = []
i = -1
partial = []
for e in np.asarray(event):
if e == code:
partial.append(i+1)
results.extend(partial[::-1])
partial = []
i = -1
else:
i += 1
partial.append(i)
results.extend(partial[::-1])
return results
def reverse_count_down(event, code):
results = []
i = 0
partial = []
for e in np.asarray(event):
if e == code:
partial.append(i+1)
results.extend(partial)
partial = []
i = 0
else:
i += 1
partial.append(i)
results.extend(partial)
return results
df['days_until_event1'] = count_down(df.event, 'event_1')
df['days_until_event2'] = count_down(df.event, 'event_2')
# insert nevative values
c_down = np.asarray(reverse_count_down(df.event, 'event_1'))
substitute = np.where(df.days_until_event1>=df.days_until_event2)[0]
df.loc[substitute, 'days_until_event1'] = -c_down[substitute]
df
Upvotes: 0