Reputation: 45
for-loop
would help, but I'm unsure of how the return
statement will be made in one small paragraph like I have now.def stat_generator(df,date1,date2,df2,date3,date4,df4,date5,date6):
##First Date Filter for First Dataframe, and calculations for first dataframe
df['Announcement Date'] = pd.to_datetime(df['Announcement Date'])
mask = ((df['Announcement Date'] >= date1) & (df['Announcement Date'] <= date2))
df_new = df.loc[mask]
total = len(df_new)
better = df_new[(df_new['performance'] == 'better')]
better_perc = round(((len(better)/total)*100),2)
worse = df_new[(df_new['performance'] == 'worse')]
worse_perc = round(((len(worse)/total)*100),2)
statement1 = "During the time period between {} and {}, {} % of the students performed better. {} %
of the students performed worse" .format(date1,date2,better_perc,worse_perc)
##Second Date Filter for Second Dataframe, and calculations for second dataframe
df2['Announcement Date'] = pd.to_datetime(df2['Announcement Date'])
mask2 = ((df2['Announcement Date'] >= date3) & (df2['Announcement Date'] <= date4))
df_new2 = df2.loc[mask2]
total2 = len(df_new2)
better2 = df_new2[(df_new2['performance'] == 'better')]
better_perc2 = round(((len(better2)/total2)*100),2)
worse2 = df_new2[(df_new2['performance'] == 'worse')]
worse_perc2 = round(((len(worse2)/total2)*100),2)
statement2 = "During the time period between {} and {}, {} % of the students performed better. {} %
of the students performed worse" .format(date3,date4,better_perc2,worse_perc2)
##Third Date Filter for Third Dataframe, and calculations for third dataframe
df3['Announcement Date'] = pd.to_datetime(df3['Announcement Date'])
mask3 = ((df3['Announcement Date'] >= date5) & (df3['Announcement Date'] <= date6))
df_new3 = df3.loc[mask3]
total3 = len(df_new3)
better3 = df_new3[(df_new3['performance'] == 'better')]
better_perc3 = round(((len(better3)/total3)*100),2)
worse3 = df_new3[(df_new3['performance'] == 'worse')]
worse_perc3 = round(((len(worse3)/total3)*100),2)
statement3 = "During the time period between {} and {}, {} % of the students performed better. {} %
of the students performed worse" .format(date5,date6,better_perc3,worse_perc3)
statement = statement1 + statement2 + statement3
return statement
Upvotes: 0
Views: 143
Reputation: 62403
df
parameter in stat_generator
to df1
, so df
can be used in the for-loop
.statements
list, to be returneddate1
and date2
are changed to d1
and d2
in the loopstatement1
to use a more easily readable f-string
.mask
to mask = df['Announcement Date'].between(d1, d2, inclusive=True)
def stat_generator(df1, date1 ,date2 ,df2 ,date3 ,date4 ,df4 ,date5 ,date6):
##First Date Filter for First Dataframe, and calculations for first dataframe
# create groups
groups = [(df1, date1, date2), (df2, date3, date4), (df3, date5, date6)]
# create a statements list for each statement
statements = list()
# iterate through each group
for (df, d1, d2) in groups:
df['Announcement Date'] = pd.to_datetime(df['Announcement Date'])
mask = ((df['Announcement Date'] >= d1) & (df['Announcement Date'] <= d2))
df_new = df.loc[mask]
total = len(df_new)
better = df_new[(df_new['performance'] == 'better')]
better_perc = round(((len(better)/total)*100),2)
worse = df_new[(df_new['performance'] == 'worse')]
worse_perc = round(((len(worse)/total)*100),2)
statement1 = f"During the time period between {d1} and {d2}, {better_perc}% of the students performed better. {worse_perc}% of the students performed worse"
# append the statement of the dataframe
statements.append(statement1)
# return a list of all the statements
return statements
list
or print
them.better
and worse
.
.value_counts()
with normalize=True
to get the percentage.def stat_generator(df: pd.DataFrame, d1: str, d2: str) -> str:
df['Announcement Date'] = pd.to_datetime(df['Announcement Date'])
# create the mask
mask = df['Announcement Date'].between(d1, d2, inclusive=True)
# apply the mask
df_new = df.loc[mask]
# calculate the percentage
per = (df_new.performance.value_counts(normalize=True) * 100).round(2)
return f"During the time period between {d1} and {d2}, {per['better']}% of the students performed better. {per['worse']}% of the students performed worse"
groups = [(df1, date1, date2), (df2, date3, date4), (df3, date5, date6)]
statements = list()
for group in groups:
statements.append(stat_generator(*group))
Upvotes: 1
Reputation: 384
I would just pass 3 parameters to your function those being df, date1 and date2 and then call your function 3 times.
def stat_generator(df,date1,date2):
"..."
return statement
Then pass in your data as a list of lists or something similar. For example:
data = [[df,date1,date2],[df2,date3,date4],[df4,date5,date6]]
for lists in data:
stat_generator(*lists)
Upvotes: 2