Reputation:
The way I went about to transform my DataFrame to add columns that are derived from existing data seemed pretty painful. At first, I considered using apply, but I realized that these additional columns hinged on certain aspects of my original DataFrame (e.g. the MMR information was unique to the "ReplayID" and whether the team won or not as found in "Is Winner"). I found a way forward, but I'm wondering if there was a more efficient way to go about it. The DataFrames are below and my code below those:
Before Transformation
After Transformation
import pandas as pd
import numpy as np
df = pd.read_csv('small.csv', index_col=False)
del df['Unnamed: 0']
replayGroup = df['MMR Before'].groupby([df['ReplayID'], df['Is Winner']])
summaryReplayGroup = replayGroup.agg([np.max, np.mean, np.min, np.std])
# i[0] -> Index
# i[1] -> ReplayID
# i[2] -> Is Auto Select
# i[3] -> HeroID
# i[4] -> Hero Level
# i[5] -> Is Winner
# i[6] -> MMR Before
maxMMR = []
meanMMR = []
minMMR = []
stdMMR = []
for i in df.itertuples():
key = (i[1], i[5])
maxMMR.append(summaryReplayGroup.loc[key]['amax'])
meanMMR.append(summaryReplayGroup.loc[key]['mean'])
minMMR.append(summaryReplayGroup.loc[key]['amin'])
stdMMR.append(summaryReplayGroup.loc[key]['std'])
df['Max Team MMR'] = maxMMR
df['Mean Team MMR'] = meanMMR
df['Min Team MMR'] = minMMR
df['Std Team MMR'] = stdMMR
# Negative diff indicates disadvantage
maxDiffMMR = []
meanDiffMMR = []
minDiffMMR = []
stdDiffMMR = []
# i[0] -> Index
# i[1] -> ReplayID
# i[2] -> Is Auto Select
# i[3] -> HeroID
# i[4] -> Hero Level
# i[5] -> Is Winner
# i[6] -> MMR Before
# i[7] -> Max Team MMR
# i[8] -> Mean Team MMR
# i[9] -> Min Team MMR
# i[10] -> Std Team MMR
for i in df.itertuples():
if i[5]:
opposite = 0
else:
opposite = 1
replayId = i[1]
oppTeamMaxMMR = df.loc[df['ReplayID'] == replayId].loc[df['Is Winner'] == opposite]['Max Team MMR'].mean()
teamMaxMMR = i[7]
diffMaxMMR = teamMaxMMR - oppTeamMaxMMR
oppTeamMeanMMR = df.loc[df['ReplayID'] == replayId].loc[df['Is Winner'] == opposite]['Mean Team MMR'].mean()
teamMeanMMR = i[8]
diffMeanMMR = teamMeanMMR - oppTeamMeanMMR
oppTeamMinMMR = df.loc[df['ReplayID'] == replayId].loc[df['Is Winner'] == opposite]['Min Team MMR'].mean()
teamMinMMR = i[8]
diffMinMMR = teamMinMMR - oppTeamMinMMR
oppTeamStdMMR = df.loc[df['ReplayID'] == replayId].loc[df['Is Winner'] == opposite]['Std Team MMR'].mean()
teamStdMMR = i[9]
diffStdMMR = teamStdMMR - oppTeamStdMMR
maxDiffMMR.append(diffMaxMMR)
meanDiffMMR.append(diffMeanMMR)
minDiffMMR.append(diffMinMMR)
stdDiffMMR.append(diffStdMMR)
df['Diff Max MMR'] = maxDiffMMR
df['Diff Mean MMR'] = meanDiffMMR
df['Diff Min MMR'] = minDiffMMR
df['Diff Std MMR'] = stdDiffMMR
Thanks for taking the time to look!
Upvotes: 1
Views: 67
Reputation: 294546
df3
is your final dataframe
funcs = {
'Max Team MMR': 'max',
'Mean Team MMR': 'mean',
'Min Team MMR': 'min',
'Std Team MMR': 'std'
}
idx_cols = ['ReplayID', 'Is Winner']
val_col = 'MMR Before'
df1 = df.set_index(idx_cols)
summaryReplayGroup = df1[val_col].groupby(level=idx_cols).agg(funcs)
df2 = df1.join(summaryReplayGroup)
diffs = summaryReplayGroup.unstack(idx_cols[0]).diff().dropna().squeeze().unstack(0)
diffs.columns = diffs.columns.str.replace(r'(.+) Team', r'Diff \1')
diffs = pd.concat([diffs, -diffs], keys=[1, 0])
df3 = df2.reset_index().join(diffs, on=idx_cols[::-1])
df3
results
setup
for others to try
df = pd.DataFrame([
[57010496, 0, 36, 9, 0, 2589],
[57010496, 0, 20, 9, 1, 2354],
[57010496, 0, 14, 6, 1, 2314],
[57010496, 0, 12, 10, 0, 2288],
[57010496, 0, 39, 10, 0, 2486],
[57010496, 0, 19, 10, 1, 2292],
[57010496, 0, 27, 9, 1, 2385],
[57010496, 0, 11, 7, 0, 2183],
[57010496, 0, 24, 9, 1, 2471],
[57010496, 0, 35, 3, 0, 2166],
[57010518, 0, 22, 4, 0, 2582],
[57010518, 0, 29, 6, 1, 2470],
[57010518, 0, 36, 8, 1, 2590],
[57010518, 0, 31, 9, 1, 2313],
[57010518, 0, 19, 8, 1, 2159],
[57010518, 0, 13, 7, 0, 1996],
[57010518, 0, 7, 6, 1, 2441],
[57010518, 0, 21, 7, 0, 2220],
[57010518, 0, 42, 9, 0, 2465],
[57010518, 0, 18, 11, 0, 2392],
], columns=['ReplayID', 'Is Auto Select', 'HeroID',
'Hero Level', 'Is Winner', 'MMR Before'])
Upvotes: 1