Reputation: 27
I'm trying to convert a multi-index df to a nested dictionary by aggregation sum of all index.
DataFrame is in the format
jan feb
first second third
bar bar1 bar3 0 5
bar4 4 4
bar2 bar5 1 9
foo foo1 foo3 7 2
foo4 7 7
foo2 foo5 0 3
I tried to aggregate with each index separately and convert to dict using .to_dict('records')
. But couldn't able to achieve nested dictionary with aggregation of each index
expected result is
[
{'first': 'bar',
'jan': 5,
'feb': 18,
'sub_rows': [{'second': 'bar1',
'jan': 4,
'feb': 9,
'sub_rows': [{'third': 'bar3', 'jan': 0, 'feb': 5}, {'third': 'bar4', 'jan': 4, 'feb': 4}]
},
{'second': 'bar2',
'jan': 1,
'feb': 9,
'sub_rows': [{'third': 'bar5', 'jan': 1, 'feb': 9}]
}]
},
{'first': 'foo',
'jan': 14,
'feb': 12,
'sub_rows': [{'second': 'foo1',
'jan': 14,
'feb': 9,
'sub_rows': [{'third': 'foo3', 'jan': 7, 'feb': 2}, {'third': 'foo4', 'jan': 7, 'feb': 7}]
},
{'second': 'foo2',
'jan': 0,
'feb': 3,
'sub_rows': [{'third': 'foo5', 'jan': 0, 'feb': 3}]
}]
}
]
Can you please guide me to achieve this format. Thanks
Upvotes: 1
Views: 124
Reputation: 765
sub_rows3=df.reset_index(level=2).apply(lambda ss:ss.to_dict(),axis=1).tolist()
df1=df.assign(sub_rows=sub_rows3)\
.groupby(level=[0,1]).agg({'jan':sum,'feb':sum,"sub_rows":lambda ss:ss.tolist()})\
.assign(second=lambda dd:dd.index.get_level_values(1))\
.assign(sub_rows=lambda ss:ss.apply(lambda ss:ss.to_dict(),axis=1))\
df1.groupby(level=0).agg({'jan':sum,'feb':sum,"sub_rows":lambda ss:ss.tolist()})\
.reset_index().to_dict('r')
out:
[{'first': 'bar',
'jan': 5,
'feb': 18,
'sub_rows': [{'jan': 4,
'feb': 9,
'sub_rows': [{'third': 'bar3', 'jan': 0, 'feb': 5},
{'third': 'bar4', 'jan': 4, 'feb': 4}],
'second': 'bar1'},
{'jan': 1,
'feb': 9,
'sub_rows': [{'third': 'bar5', 'jan': 1, 'feb': 9}],
'second': 'bar2'}]},
{'first': 'foo',
'jan': 14,
'feb': 12,
'sub_rows': [{'jan': 14,
'feb': 9,
'sub_rows': [{'third': 'foo3', 'jan': 7, 'feb': 2},
{'third': 'foo4', 'jan': 7, 'feb': 7}],
'second': 'foo1'},
{'jan': 0,
'feb': 3,
'sub_rows': [{'third': 'foo5', 'jan': 0, 'feb': 3}],
'second': 'foo2'}]}]
Upvotes: 0
Reputation: 120391
Another recursive answer using a list as parameter:
def to_nested_dict(df, data=[]):
lvl_name = df.index.get_level_values(0).name
for name, subdf in df.groupby(level=0):
d = {lvl_name: name} | subdf.sum().to_dict()
data.append(d)
if df.index.nlevels > 1:
l = d.setdefault('sub_rows', [])
to_nested_dict(subdf.loc[name], l)
return data
data = to_nested_dict(df)
Output:
>>> data
[{'first': 'bar',
'jan': 5,
'feb': 18,
'sub_rows': [{'second': 'bar1',
'jan': 4,
'feb': 9,
'sub_rows': [{'third': 'bar3', 'jan': 0, 'feb': 5},
{'third': 'bar4', 'jan': 4, 'feb': 4}]},
{'second': 'bar2',
'jan': 1,
'feb': 9,
'sub_rows': [{'third': 'bar5', 'jan': 1, 'feb': 9}]}]},
{'first': 'foo',
'jan': 14,
'feb': 12,
'sub_rows': [{'second': 'foo1',
'jan': 14,
'feb': 9,
'sub_rows': [{'third': 'foo3', 'jan': 7, 'feb': 2},
{'third': 'foo4', 'jan': 7, 'feb': 7}]},
{'second': 'foo2',
'jan': 0,
'feb': 3,
'sub_rows': [{'third': 'foo5', 'jan': 0, 'feb': 3}]}]}]
Upvotes: 1
Reputation: 96
import pandas as pd
def multiindex_df_to_list(df):
out_list = []
nlevels = df.index.nlevels
level_name = df.index.levels[0].name if nlevels > 1 else df.index.name
for i, level_value in enumerate(df.groupby(level=0)):
out_list.append({})
out_list[i][level_name] = level_value[0]
for col, sum_val in level_value[1].sum().iteritems():
out_list[i][col] = sum_val
if nlevels > 1:
out_list[i]['sub_rows'] = multiindex_df_to_list(level_value[1].droplevel(level=0))
return out_list
def main():
idx = pd.MultiIndex.from_arrays(
[
['bar', 'bar', 'bar', 'foo', 'foo', 'foo'],
['bar1', 'bar1', 'bar2', 'foo1', 'foo1', 'foo2'],
['bar3', 'bar4', 'bar5', 'foo3', 'foo4', 'foo5']
],
names=('first', 'second', 'third'))
col = ['jan', 'feb']
data = [[0, 5], [4, 4], [1, 9], [7, 2], [7, 7], [0, 3]]
df = pd.DataFrame(data, idx, columns=col)
out_list = multiindex_df_to_list(df)
return out_list
if __name__ == "__main__":
main()
Upvotes: 1