Reputation: 7448
I have a df
,
key amount code year_month
CHQ 100 1000 201701
EFT 200 1000 201701
CHQ 300 2000 201702
CHQ 400 2000 201702
EFT 500 3000 201703
EFT 600 4000 201703
I like to groupby
code
and year_month
, and then aggregate count on key
in a dictionary, so the result looks like
key
code year_month
1000 201701 {'CHQ':1, 'EFT':1}
2000 201702 {'CHQ':2, 'EFT':0}
3000 201703 {'CHQ':0, 'EFT':2}
and then transform the above df
into a matrix with code
values as row labels, and year_month
values as column labels, and also do aggregation on row 0 and column 0, that e.g. (1,1)
sums count
for all code
and year_month
, (1,2)
sums the count
for 201701
.
0 1 2 3 4
0 -1 0 201701 201702 201703
1 0 {'CHQ':3,'EFT':3} {'CHQ':1,'EFT':1} {'CHQ':2,'EFT':0} {'CHQ':0,'EFT':2}
2 1000 {'CHQ':1,'EFT':1} {'CHQ':1,'EFT':1} {'CHQ':0,'EFT':0} {'CHQ':0,'EFT':0}
3 2000 {'CHQ':2,'EFT':0} {'CHQ':0,'EFT':0} {'CHQ':2,'EFT':0} {'CHQ':0,'EFT':0}
4 3000 {'CHQ':0,'EFT':2} {'CHQ':0,'EFT':0} {'CHQ':0,'EFT':0} {'CHQ':0,'EFT':2}
Upvotes: 1
Views: 770
Reputation: 25397
This would be my take if I got that right:
df_tmp = (df.groupby(['code', 'year_month'])['key']
.agg(lambda x: (Counter(x),))
.to_frame()
.reset_index()
)
# Pivot
df_pivot = pd.pivot_table(df_tmp, index='code', columns='year_month', values='key', aggfunc=np.sum)
# Filling the gaps
df_final = df_pivot.apply(lambda x: [v[0] if v is not None else Counter(dict.fromkeys(df.key, 0)) for v in x ])
Note that this tupeling "(Counter(x),)
" in the agg
function is a little workaround since pandas doesn't seem to like working with objects. However, this is going to be unwrapped in the final apply
call anyway.
Since you have Counter
instances you can to things like summing over columns/rows and get the correct result:
df_final.sum(1)
from io import StringIO
import pandas as pd
import numpy as np
from collections import Counter
data = """
key amount code year_month
CHQ 100 1000 201701
EFT 200 1000 201701
EFT 200 1000 201702
EFT 200 1000 201702
EFT 200 1000 201702
EFT 200 1000 201702
EFT 200 1000 201702
CHQ 300 2000 201702
CHQ 400 2000 201702
EFT 500 3000 201703
EFT 600 4000 201703
"""
df = pd.read_csv(StringIO(data), sep='\s+')
df_tmp = (df.groupby(['code', 'year_month'])['key']
.agg(lambda x: (Counter(x),))
.to_frame()
.reset_index()
)
# Pivot
df_pivot = pd.pivot_table(df_tmp, index='code', columns='year_month', values='key', aggfunc=np.sum)
# Filling in the gaps
df_final = df_pivot.apply(lambda x: [v[0] if v is not None else Counter(dict.fromkeys(df.key, 0)) for v in x ])
Upvotes: 1
Reputation: 294258
defaultdict
from collections import defaultdict
a = dict.fromkeys(df.key.unique(), 0)
b = defaultdict(lambda:defaultdict(lambda:a))
for t in df.itertuples():
b[t.year_month][t.code][t.key] += 1
b['All'][t.code][t.key] += 1
b[t.year_month]['All'][t.key] += 1
idx = df.code.unique().tolist() + ['All']
idx = pd.CategoricalIndex(idx, idx, ordered=True)
col = df.year_month.unique().tolist() + ['All']
col = pd.CategoricalIndex(col, col, ordered=True)
m = pd.DataFrame(b, idx, col)
m = m.mask(m.isna(), a)
m
201701 201702 201703 All
1000 {'CHQ': 9, 'EFT': 9} {'CHQ': 9, 'EFT': 9} {'CHQ': 9, 'EFT': 9} {'CHQ': 9, 'EFT': 9}
2000 {'CHQ': 9, 'EFT': 9} {'CHQ': 9, 'EFT': 9} {'CHQ': 9, 'EFT': 9} {'CHQ': 9, 'EFT': 9}
3000 {'CHQ': 9, 'EFT': 9} {'CHQ': 9, 'EFT': 9} {'CHQ': 9, 'EFT': 9} {'CHQ': 9, 'EFT': 9}
4000 {'CHQ': 9, 'EFT': 9} {'CHQ': 9, 'EFT': 9} {'CHQ': 9, 'EFT': 9} {'CHQ': 9, 'EFT': 9}
All {'CHQ': 9, 'EFT': 9} {'CHQ': 9, 'EFT': 9} {'CHQ': 9, 'EFT': 9} {'CHQ': 9, 'EFT': 9}
append
and concat
d = df.groupby(['code', 'year_month', 'key']).size().unstack([1, 2], fill_value=0)
d = d.reindex(columns=pd.MultiIndex.from_product(d.columns.levels), fill_value=0)
def add_level(d, k):
return pd.concat([d], keys=[k], axis=1)
m = d.join(d.sum(1, level=1).pipe(add_level, k='All')).pipe(
lambda d: d.append(d.sum().rename('All'))
)
m.groupby(axis=1, level=0).apply(lambda x: pd.Series(x.xs(x.name, 1).to_dict('i')))
201701 201702 201703 All
1000 {'CHQ': 1, 'EFT': 1} {'CHQ': 0, 'EFT': 0} {'CHQ': 0, 'EFT': 0} {'CHQ': 1, 'EFT': 1}
2000 {'CHQ': 0, 'EFT': 0} {'CHQ': 2, 'EFT': 0} {'CHQ': 0, 'EFT': 0} {'CHQ': 2, 'EFT': 0}
3000 {'CHQ': 0, 'EFT': 0} {'CHQ': 0, 'EFT': 0} {'CHQ': 0, 'EFT': 1} {'CHQ': 0, 'EFT': 1}
4000 {'CHQ': 0, 'EFT': 0} {'CHQ': 0, 'EFT': 0} {'CHQ': 0, 'EFT': 1} {'CHQ': 0, 'EFT': 1}
All {'CHQ': 1, 'EFT': 1} {'CHQ': 2, 'EFT': 0} {'CHQ': 0, 'EFT': 2} {'CHQ': 3, 'EFT': 3}
Alternate to_dict
pd.DataFrame({
j: {
i: m.loc[i, j].to_dict()
for i in m.index
} for j in m.columns.levels[0]
})
import pandas as pd
from collections import Counter
d = df.groupby(['code', 'year_month', 'key']).size().unstack(fill_value=0).assign(
key=lambda d: d.to_dict('r')
)[['key']].reset_index().rename_axis(None, 1).assign(key=lambda d: list(map(Counter, d.key)))
d
code year_month key
0 1000 201701 {'CHQ': 1, 'EFT': 1}
1 2000 201702 {'CHQ': 2, 'EFT': 0}
2 3000 201703 {'CHQ': 0, 'EFT': 1}
3 4000 201703 {'CHQ': 0, 'EFT': 1}
Then
fv = Counter(dict.fromkeys(df.key.unique(), 0))
d2 = d.pivot_table(
'key', 'code', 'year_month', 'sum',
margins=True
).pipe(lambda d: d.mask(d.isna(), fv))
d2
year_month 201701 201702 201703 All
code
1000 {'CHQ': 1, 'EFT': 1} {'CHQ': 0, 'EFT': 0} {'CHQ': 0, 'EFT': 0} {'CHQ': 1, 'EFT': 1}
2000 {'CHQ': 0, 'EFT': 0} {'CHQ': 2, 'EFT': 0} {'CHQ': 0, 'EFT': 0} {'CHQ': 2, 'EFT': 0}
3000 {'CHQ': 0, 'EFT': 0} {'CHQ': 0, 'EFT': 0} {'CHQ': 0, 'EFT': 1} {'CHQ': 0, 'EFT': 1}
4000 {'CHQ': 0, 'EFT': 0} {'CHQ': 0, 'EFT': 0} {'CHQ': 0, 'EFT': 1} {'CHQ': 0, 'EFT': 1}
All {'CHQ': 1, 'EFT': 1} {'CHQ': 2, 'EFT': 0} {'EFT': 2} {'CHQ': 3, 'EFT': 3}
We can hack some stuff to get things to work. I don't feel good about this but maybe it's enough to get you what you need.
Use Counter.update
to alter objects inplace
d2.applymap(lambda x: x.update(fv));
d2
year_month 201701 201702 201703 All
code
1000 {'CHQ': 1, 'EFT': 1} {'CHQ': 0, 'EFT': 0} {'CHQ': 0, 'EFT': 0} {'CHQ': 1, 'EFT': 1}
2000 {'CHQ': 0, 'EFT': 0} {'CHQ': 2, 'EFT': 0} {'CHQ': 0, 'EFT': 0} {'CHQ': 2, 'EFT': 0}
3000 {'CHQ': 0, 'EFT': 0} {'CHQ': 0, 'EFT': 0} {'CHQ': 0, 'EFT': 1} {'CHQ': 0, 'EFT': 1}
4000 {'CHQ': 0, 'EFT': 0} {'CHQ': 0, 'EFT': 0} {'CHQ': 0, 'EFT': 1} {'CHQ': 0, 'EFT': 1}
All {'CHQ': 1, 'EFT': 1} {'CHQ': 2, 'EFT': 0} {'EFT': 2, 'CHQ': 0} {'CHQ': 3, 'EFT': 3}
Upvotes: 1