Reputation: 388
Here is my problem,
I have this DataFrame:
df=pd.DataFrame({'Date':['2017-05-19','2017-05-22','2017-05-23','2017-05-24','2017-05-25','2017-05-26','2017-05-29'],
'A':[153,152,153,155,153,154,155],
'B':[139,137,141,141.5,141.8,142.1,142.06],})
df['Date']=pd.to_datetime(df['Date'])
df = df.set_index('Date')
My goal was to create another DataFrame named df_end
where I will have a column named "total" corresponding to the total number of column of df
(which is 2) and two column A and B that will take the value "1" if the 1 day rolling mean is > the 2 days rolling mean
Here is the code that I wrote :
def test(dataframe):
df_init=(dataframe*0).assign(total=0)
df_end=(dataframe*0).assign(total=0)
df_init.iloc[0,df_init.columns.get_loc('total')]=2
df_end.iloc[0,df_end.columns.get_loc('total')]=2
rolling_2D=dataframe.rolling(2).mean().fillna(0)
rolling_1D=dataframe.rolling(1).mean().fillna(0)
calendar=pd.Series(df_init.index)[1:]
for date in calendar:
for cols in dataframe:
prev_date=df_init.index[df_init.index<date][-1]
df_init.loc[date,:]=df_end.loc[prev_date,:] #df_init takes the value of df_end on the previous date
M=2 #number of columns different from the column 'total'
count = (df_init.loc[date,dataframe.columns]!=0).sum() #this will be used to compute the weight
#Weight
if count != M:
weight = 1/(M-count)
else:
weight = 1
#Repartition between different columns
repartition = df_init.loc[date,cols]+df_init.loc[date,'total']*weight
#Conditions
if rolling_1D.loc[date,cols] > rolling_2D.loc[date,cols] and df_init.loc[date,cols]==0:
df_end.loc[date,cols]=repartition
df_end.loc[date,'total']=df_init.loc[date,'total'] - df_end.loc[date,cols]
elif rolling_1D.loc[date,cols] > rolling_2D.loc[date,cols] and df_init.loc[date,cols]!=0:
df_end.loc[date,cols]=df_init.loc[date,cols]
df_end.loc[date,'total']=df_init.loc[date,'total']
else:
df_end.loc[date,cols]=0
df_end.loc[date,'total']=df_init.loc[date,cols]+df_init.loc[date,'total']
return df_end
When I return df_end this is what I have:
A B total
Date
2017-05-19 0.0 0.0 2.0
2017-05-22 0.0 0.0 2.0
2017-05-23 1.0 1.0 1.0
2017-05-24 1.0 1.0 1.0
2017-05-25 0.0 1.0 1.0
2017-05-26 1.0 1.0 1.0
2017-05-29 1.0 0.0 2.0
As you can see, my program take into account the column B only and I don't know where is my mistake.
The result that I am looking for is:
A B total
Date
2017-05-19 0.0 0.0 2.0
2017-05-22 0.0 0.0 2.0
2017-05-23 1.0 1.0 0.0
2017-05-24 1.0 1.0 0.0
2017-05-25 0.0 1.0 1.0
2017-05-26 1.0 1.0 0.0
2017-05-29 1.0 0.0 1.0
I know that I am almost done with this code and I'm stuck with this problem.
Do you have any idea where my problem could be?
Thank you so much for the help
Upvotes: 0
Views: 38
Reputation: 12503
Here's a fairly simple way to do that:
new_df = (df.rolling(1).mean() > df.rolling(2).mean()).astype(int)
new_df["total"] = 2 - new_df.sum(axis=1)
print(new_df)
The output is:
A B total
Date
2017-05-19 0 0 2
2017-05-22 0 0 2
2017-05-23 1 1 0
2017-05-24 1 1 0
2017-05-25 0 1 1
2017-05-26 1 1 0
2017-05-29 1 0 1
Upvotes: 1