Reputation: 71
I import data from excel using xlrd. After coverting it to JSON and loading, the data format looks like this:
[{'receipt_id': '1', 'service': 'A', 'charge': '2000', 'company': 'Company A'},
{'receipt_id': '1', 'service': 'B', 'charge': '3000', 'company': 'Company A'},
{'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]
How can I merge the lines that have same receipt_id, to make a new list like this:
[{'receipt_id': '1', 'service': 'A 2000 B 3000', 'charge': '5000', 'company': 'Company A'},
{'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]
I've tried changing this list into dict, but I still got some problems. Any int how to get it done?
Upvotes: 0
Views: 134
Reputation: 1281
Used Pandas dataframe.
aa=[{'receipt_id': '1', 'service': 'A', 'charge': '2000', 'company': 'Company A'},
{'receipt_id': '1', 'service': 'B', 'charge': '3000', 'company': 'Company A'},
{'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]
df = pd.DataFrame(aa)
def add_service(x):
return " ".join(x)
def add_charge(x):
#results = map(int, x) # In py2
results = list(map(int, x)) #In py3:
return sum(results)
def add_comp(x):
a = list(set(x))
return " ".join(a)
grouped = df.groupby(['receipt_id'])
ser = grouped['service'].agg(add_service)
cha = grouped['charge'].agg(add_charge)
com = grouped['company'].agg(add_comp)
df_new = pd.DataFrame({"service": ser, "charge": cha, "company": com})
list(df_new.T.to_dict().values()) # To get json format
Upvotes: 1
Reputation: 1691
I think this should work. I used defaultdict to group the duplicates and process according to the requirement.
from collections import defaultdict
def group_duplicates(data, index):
groups = defaultdict(list)
for obj in data:
groups[obj[index]].append(obj)
return groups.values()
def process_duplicates(duplicate_data):
new_list = []
for objects in duplicate_data:
fist_element = (objects[0])
merged_element = fist_element
service = ''
charge = 0
for obj in objects:
if len(objects) > 1:
service += obj['service'] + ' ' + obj['charge'] + ' '
else:
service = obj['service']
charge += int( obj['charge'])
merged_element['service'] = service.strip()
merged_element['charge'] = str(charge)
new_list.append(merged_element)
return new_list
def main():
receipts = [{'receipt_id': '1', 'service': 'A', 'charge': '2000', 'company': 'Company A'},
{'receipt_id': '1', 'service': 'B', 'charge': '3000', 'company': 'Company A'},
{'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]
grouped_receipts = group_duplicates(receipts, 'receipt_id')
merged_receipts = process_duplicates(grouped_receipts)
print(merged_receipts)
if __name__ == '__main__':
main()
output
[{'receipt_id': '1', 'service': 'A 2000 B 3000', 'charge': '5000', 'company': 'Company A'}, {'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]
Upvotes: 0
Reputation: 46779
First combine all the receipt_id
values using a defaultdict(list)
and then build the output list by combining the necessary items:
from collections import defaultdict
combined = defaultdict(list)
data = [
{'receipt_id': '1', 'service': 'A', 'charge': '2000', 'company': 'Company A'},
{'receipt_id': '1', 'service': 'B', 'charge': '3000', 'company': 'Company A'},
{'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]
for d in data:
combined[d['receipt_id']].append(d)
output = []
for id, items in combined.items():
services = [d['service'] for d in items]
charges = [int(d['charge']) for d in items]
companies = {d['company'] for d in items}
service = ' '.join('{} {}'.format(s, c) for s, c in zip(services, charges))
output.append({'receipt_id':id, 'service':service, 'charge':str(sum(charges)), 'company':' '.join(companies)})
print(output)
This would give you:
[
{'receipt_id': '1', 'service': 'A 2000 B 3000', 'charge': '5000', 'company': 'Company A'},
{'receipt_id': '2', 'service': 'C 1000', 'charge': '1000', 'company': 'Company B'}
]
Note, if the company
entry also differs, this is also shown.
Upvotes: 0
Reputation: 12679
You can try something like this.
data=[{'receipt_id': '1', 'service': 'A', 'charge': '2000', 'company': 'Company A'},
{'receipt_id': '1', 'service': 'B', 'charge': '3000', 'company': 'Company A'},
{'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]
new_dict={}
for i in data:
if i['receipt_id'] not in new_dict:
new_dict[i['receipt_id']]=[i]
else:
new_dict[i['receipt_id']][0]['charge']=int(new_dict[i['receipt_id']][0]['charge'])+int(i['charge'])
new_dict[i['receipt_id']][0]['service']="{} {} {} {}".format(str(new_dict[i['receipt_id']][0]['service']),str(new_dict[i['receipt_id']][0]['charge']),str(i['service']),str(i['charge']))
print([j[0] for i,j in new_dict.items()])
output:
[{'company': 'Company A', 'service': 'A 5000 B 3000', 'charge': 5000, 'receipt_id': '1'}, {'company': 'Company B', 'service': 'C', 'charge': '1000', 'receipt_id': '2'}]
Upvotes: 0
Reputation: 7186
If you don't want to use pandas
, you can write your own group_by
function:
from collections import defaultdict
def group_by(dicts, key):
out = defaultdict(list)
for d in dicts:
out[d[key]].append(d)
return out
def combine_receipts(receipts):
for receipt_id, receipts in group_by(receipts, 'receipt_id').items():
if len(receipts) > 1:
service = ' '.join(f"{r['service']} {r['charge']}" for r in receipts)
charge = str(sum(float(r['charge']) for r in receipts))
yield {'receipt_id': receipt_id,
'service': service,
'charge': charge,
'company': receipts[0]['company']}
elif len(receipts) == 1:
yield receipts[0]
else:
raise RuntimeError("empty group")
if __name__ == "__main__":
receipts = [{'receipt_id': '1', 'service': 'A', 'charge': '2000', 'company': 'Company A'},
{'receipt_id': '1', 'service': 'B', 'charge': '3000', 'company': 'Company A'},
{'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]
print(list(combine_receipts(receipts)))
# [{'receipt_id': '1', 'service': 'A 2000 B 3000', 'charge': '5000.0', 'company': 'Company A'},
# {'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]
Upvotes: 0