Leavlzi
Leavlzi

Reputation: 71

Python 3 merge list by same value

I import data from excel using xlrd. After coverting it to JSON and loading, the data format looks like this:

[{'receipt_id': '1', 'service': 'A', 'charge': '2000', 'company': 'Company A'},
{'receipt_id': '1', 'service': 'B', 'charge': '3000', 'company': 'Company A'},
{'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]

How can I merge the lines that have same receipt_id, to make a new list like this:

[{'receipt_id': '1', 'service': 'A 2000 B 3000', 'charge': '5000', 'company': 'Company A'},
{'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]

I've tried changing this list into dict, but I still got some problems. Any int how to get it done?

Upvotes: 0

Views: 134

Answers (5)

Rao Sahab
Rao Sahab

Reputation: 1281

Used Pandas dataframe.

aa=[{'receipt_id': '1', 'service': 'A', 'charge': '2000', 'company': 'Company A'},
{'receipt_id': '1', 'service': 'B', 'charge': '3000', 'company': 'Company A'},
{'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]

df = pd.DataFrame(aa)

def add_service(x):
    return " ".join(x)

def add_charge(x):
    #results = map(int, x) # In py2
    results = list(map(int, x)) #In py3:
    return sum(results)

def add_comp(x):
    a = list(set(x))
    return " ".join(a)
grouped = df.groupby(['receipt_id'])

ser = grouped['service'].agg(add_service)
cha = grouped['charge'].agg(add_charge)
com = grouped['company'].agg(add_comp)

df_new = pd.DataFrame({"service": ser, "charge": cha, "company": com})
list(df_new.T.to_dict().values()) # To get json format

Upvotes: 1

Sumit Jha
Sumit Jha

Reputation: 1691

I think this should work. I used defaultdict to group the duplicates and process according to the requirement.

from collections import defaultdict

def group_duplicates(data, index):
    groups = defaultdict(list)
    for obj in data:        
        groups[obj[index]].append(obj)
    return groups.values()


def process_duplicates(duplicate_data):
    new_list = []
    for objects in duplicate_data:
        fist_element = (objects[0])
        merged_element = fist_element
        service = ''
        charge = 0
        for obj in objects:
            if len(objects) > 1:
                service += obj['service'] + ' ' + obj['charge'] + ' '
            else:
                service = obj['service']

            charge +=  int( obj['charge'])

        merged_element['service'] = service.strip()
        merged_element['charge'] = str(charge)
        new_list.append(merged_element)

    return new_list

def main():
    receipts = [{'receipt_id': '1', 'service': 'A', 'charge': '2000', 'company': 'Company A'},
                {'receipt_id': '1', 'service': 'B', 'charge': '3000', 'company': 'Company A'},
                {'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]

    grouped_receipts = group_duplicates(receipts, 'receipt_id')
    merged_receipts = process_duplicates(grouped_receipts)
    print(merged_receipts)



if __name__ == '__main__':
    main()

output

[{'receipt_id': '1', 'service': 'A 2000 B 3000', 'charge': '5000', 'company': 'Company A'}, {'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]

Upvotes: 0

Martin Evans
Martin Evans

Reputation: 46779

First combine all the receipt_id values using a defaultdict(list) and then build the output list by combining the necessary items:

from collections import defaultdict    

combined = defaultdict(list)

data = [
    {'receipt_id': '1', 'service': 'A', 'charge': '2000', 'company': 'Company A'},
    {'receipt_id': '1', 'service': 'B', 'charge': '3000', 'company': 'Company A'},
    {'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]    

for d in data:
    combined[d['receipt_id']].append(d)

output = []

for id, items in combined.items():
    services = [d['service'] for d in items]
    charges = [int(d['charge']) for d in items]
    companies = {d['company'] for d in items}
    service = ' '.join('{} {}'.format(s, c) for s, c in zip(services, charges))
    output.append({'receipt_id':id, 'service':service, 'charge':str(sum(charges)), 'company':' '.join(companies)})

print(output)

This would give you:

[
  {'receipt_id': '1', 'service': 'A 2000 B 3000', 'charge': '5000', 'company': 'Company A'}, 
  {'receipt_id': '2', 'service': 'C 1000', 'charge': '1000', 'company': 'Company B'}
]

Note, if the company entry also differs, this is also shown.

Upvotes: 0

Aaditya Ura
Aaditya Ura

Reputation: 12679

You can try something like this.

data=[{'receipt_id': '1', 'service': 'A', 'charge': '2000', 'company': 'Company A'},
{'receipt_id': '1', 'service': 'B', 'charge': '3000', 'company': 'Company A'},
{'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]

new_dict={}
for i in data:
    if i['receipt_id'] not in new_dict:
        new_dict[i['receipt_id']]=[i]
    else:
        new_dict[i['receipt_id']][0]['charge']=int(new_dict[i['receipt_id']][0]['charge'])+int(i['charge'])
        new_dict[i['receipt_id']][0]['service']="{} {} {} {}".format(str(new_dict[i['receipt_id']][0]['service']),str(new_dict[i['receipt_id']][0]['charge']),str(i['service']),str(i['charge']))

print([j[0] for i,j in new_dict.items()])

output:

[{'company': 'Company A', 'service': 'A 5000 B 3000', 'charge': 5000, 'receipt_id': '1'}, {'company': 'Company B', 'service': 'C', 'charge': '1000', 'receipt_id': '2'}]

Upvotes: 0

Graipher
Graipher

Reputation: 7186

If you don't want to use pandas, you can write your own group_by function:

from collections import defaultdict

def group_by(dicts, key):
    out = defaultdict(list)
    for d in dicts:
        out[d[key]].append(d)
    return out

def combine_receipts(receipts):
    for receipt_id, receipts in group_by(receipts, 'receipt_id').items():
        if len(receipts) > 1:
            service = ' '.join(f"{r['service']} {r['charge']}" for r in receipts)
            charge = str(sum(float(r['charge']) for r in receipts))
            yield {'receipt_id': receipt_id,
                   'service': service,
                   'charge': charge,
                   'company': receipts[0]['company']}
        elif len(receipts) == 1:
            yield receipts[0]
        else:
            raise RuntimeError("empty group")

if __name__ == "__main__":
    receipts = [{'receipt_id': '1', 'service': 'A', 'charge': '2000', 'company': 'Company A'},
                {'receipt_id': '1', 'service': 'B', 'charge': '3000', 'company': 'Company A'},
                {'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]

    print(list(combine_receipts(receipts)))
# [{'receipt_id': '1', 'service': 'A 2000 B 3000', 'charge': '5000.0', 'company': 'Company A'},
#  {'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]

Upvotes: 0

Related Questions