Reputation: 11
I have input_data
like this:
input_data = [
{'vehicle': '001', 'store': 'foo1', 'qty': 100},
{'vehicle': '001', 'store': 'foo1', 'qty': 200},
{'vehicle': '001', 'store': 'baz1', 'qty': 300},
{'vehicle': '001', 'store': 'baz1', 'qty': 400},
{'vehicle': '002', 'store': 'foo2', 'qty': 500},
{'vehicle': '002', 'store': 'baz2', 'qty': 600},
{'vehicle': '002', 'store': 'baz2', 'qty': 700}]
What is the best way in Python to have the out_put
data look like as follows:
output_data = [{'vehicle': '001',
'store': [{'store': 'foo1', 'qty': [100, 200]},
{'store': 'baz1', 'qty': [300, 400]},
]
},
{'vehicle': '002',
'store': [{'store': 'foo2', 'qty': [500]},
{'store': 'baz2', 'qty': [600, 700]},
]
}]
Upvotes: 1
Views: 96
Reputation: 11
Below code produces expected output:
result_list=[] def check_duplicate(vehicle_name): for index,d in enumerate(result_list): if d.get("vehicle")==vehicle_name: return True,index for store_index ,store_value in enumerate(d.get("store")): if store_value.get("store")==vehicle_name: return True,store_index
return False,None
for data in input_data:
if isinstance(data,dict) and data.has_key("vehicle"):
#print data
result= check_duplicate(data.get('vehicle'))
if not result[0]:
result_list.append({'vehicle':data.get('vehicle'),'store':[{'store':data.get('store') , 'qty': [data.get('qty')]}]})
if result[0]:
print "check if store is already present"
store_check= check_duplicate(data.get('store'))
if store_check[0]:
result_list[result[1]].get("store")[store_check[1]].get("qty").append(data.get('qty'))
if not store_check[0]:
result_list[result[1]].get("store").append({'store':data.get('store') , 'qty': [data.get('qty')]})
print result_list
Upvotes: 0
Reputation: 520
One way of doing it with inbuilt itertools - groupby method
:
# Input data
input_data = [
{'vehicle': '001', 'store': 'foo1', 'qty': 100},
{'vehicle': '001', 'store': 'foo1', 'qty': 200},
{'vehicle': '001', 'store': 'baz1', 'qty': 300},
{'vehicle': '001', 'store': 'baz1', 'qty': 400},
{'vehicle': '002', 'store': 'foo2', 'qty': 500},
{'vehicle': '002', 'store': 'baz2', 'qty': 600},
{'vehicle': '002', 'store': 'baz2', 'qty': 700}]
# Main code
from itertools import groupby # For grouping a dataset
data = groupby(input_data, key = lambda x: x['vehicle']) # Grouping vehicles
final_data = []
for i in data:
inner = {}
outer = {}
for j in list(i[1]):
inner.setdefault(j['store'], []).append(j['qty'])
outer["vehicle"] = i[0]
outer["store"] = [{"store":i, "qty": j} for i, j in inner.items()]
final_data.append(outer)
print (final_data)
# Output
# {'vehicle': '001', 'store': [{'store': 'foo1', 'qty': [100, 200]}, {'store': 'baz1', 'qty': [300, 400]}]}, {'vehicle': '002', 'store': [{'store': 'foo2', 'qty': [500]}, {'store': 'baz2', 'qty': [600, 700]}]}]
I hope this helps and counts!
Upvotes: 0
Reputation: 27495
You can use itertools.groupby
and operator.itemgetter
names = v, s, q = 'vehicle', 'store', 'qty'
v_key, s_key, q_key = map(itemgetter, names)
output_data = [{v: {s: [{s: sk, q: list(map(q_key, sv))}
for sk, sv in groupby(vv, s_key)]}}
for vk, vv in groupby(input_data, v_key)]
Results:
[{'vehicle': {'store': [{'qty': [100, 200], 'store': 'foo1'},
{'qty': [300, 400], 'store': 'baz1'}]}},
{'vehicle': {'store': [{'qty': [500], 'store': 'foo2'},
{'qty': [600, 700], 'store': 'baz2'}]}}]
Upvotes: 0
Reputation: 15872
You can try this, I am sure there's an elegant way, but can't figure it out at the moment:
import pandas as pd
input_data = [
{'vehicle': '001', 'store': 'foo1', 'qty': 100},
{'vehicle': '001', 'store': 'foo1', 'qty': 200},
{'vehicle': '001', 'store': 'baz1', 'qty': 300},
{'vehicle': '001', 'store': 'baz1', 'qty': 400},
{'vehicle': '002', 'store': 'foo2', 'qty': 500},
{'vehicle': '002', 'store': 'baz2', 'qty': 600},
{'vehicle': '002', 'store': 'baz2', 'qty': 700}]
df = pd.DataFrame(pd.DataFrame(input_data).groupby(['vehicle','store'])['qty'].apply(list))
dct = df.groupby(level=0).apply(lambda df: df.xs(df.name)['qty'].to_dict()).to_dict()
final_list = []
for k in dct:
temp_dct = {}
temp_dct['vehicle'] = k
temp_l = [dict(zip(['store','qty'],[key,v])) for key,v in dct[k].items()]
temp_dct['store'] = temp_l
final_list.append(temp_dct)
print(final_list)
Output:
[{'vehicle': '001',
'store': [{'qty': [300, 400], 'store': 'baz1'},
{'qty': [100, 200], 'store': 'foo1'}]},
{'vehicle': '002',
'store': [{'qty': [600, 700], 'store': 'baz2'},
{'qty': [500], 'store': 'foo2'}]}]
Upvotes: 1