Reputation: 81
I have a nested dictionary in a list looks like this:
my_list =
[{'id': '166073',
'ref': [{'MeSH': 'C548074'},
{'UMLS': 'C1969084'},
{'OMIM': '611523'},
{'ICD-10': 'Q04.3'}]},
{'id': '213',
'ref': [{'MeSH': 'D003554'},
{'UMLS': 'C0010690'},
{'MedDRA': '10011777'},
{'ICD-10': 'E72.0'},
{'OMIM': '219750'},
{'OMIM': '219800'},
{'OMIM': '219900'}]},
{'id': '333',
'ref': [{'UMLS': 'C2936785'},
{'ICD-10': 'E75.2'},
{'MeSH': 'C537075'},
{'MeSH': 'D055577'},
{'UMLS': 'C0268255'},
{'OMIM': '228000'}]}
.
.
.
]
I wanted to merge the dictionaries with the same key in the nested dictionary as a list in the key like this:
my_list =
[{'id': '166073',
'ref': [{'MeSH': 'C548074'},
{'UMLS': 'C1969084'},
{'OMIM': '611523'},
{'ICD-10': 'Q04.3'}]},
{'id': '213',
'ref': [{'MeSH': 'D003554'},
{'UMLS': 'C0010690'},
{'MedDRA': '10011777'},
{'ICD-10': 'E72.0'},
{'OMIM': ['219750', '219800', '219900']}]},
{'id': '333',
'ref': [{'UMLS': 'C2936785'},
{'ICD-10': 'E75.2'},
{'MeSH': ['C537075', 'D055577']},
{'UMLS': 'C0268255'},
{'OMIM': '228000'}]}
.
.
.
]
I tried to merge by by reading the dictionary with double for-loop and store the information to another new dictionary, yet I found the method is suboptimal, is there any other recommended way to complete such merging? Thanks!
Upvotes: 1
Views: 154
Reputation: 152
Use python's List comprehension:
def merge(item):
from collections import defaultdict
merged = defaultdict(list)
[[merged[k].append(v) for k, v in ref.items()] for ref in item.get('ref', [])]
return {**item, 'ref': dict(merged)}
Upvotes: 1
Reputation: 5152
I've found it easiest to create a dictionary to collect the values, than unpack it to the required format:
new_list = []
for item in my_list:
d = {'id': item['id'], 'ref': {}}
for r in item['ref']:
only_key = list(r.keys())[0]
d['ref'][only_key] = d['ref'].get(only_key, []) + [r[only_key]]
new_list.append(d)
new_ref = []
for k, v in d['ref'].items():
new_ref.append({k: v if len(v) > 1 else v[0]})
d['ref'] = new_ref
[{'id': '166073', 'ref': [{'OMIM': '611523'}, {'MeSH': 'C548074'}, {'ICD-10': 'Q04.3'}, {'UMLS': 'C1969084'}]},
{'id': '213', 'ref': [{'MeSH': 'D003554'}, {'UMLS': 'C0010690'}, {'MedDRA': '10011777'}, {'ICD-10': 'E72.0'}, {'OMIM': ['219750', '219800', '219900']}]},
{'id': '333', 'ref': [{'ICD-10': 'E75.2'}, {'OMIM': '228000'}, {'MeSH': ['C537075', 'D055577']}, {'UMLS': ['C2936785', 'C0268255']}]}]
Upvotes: 1
Reputation: 96937
#!/usr/bin/env python
o = {'id': '213',
'ref': [{'MeSH': 'D003554'},
{'UMLS': 'C0010690'},
{'MedDRA': '10011777'},
{'ICD-10': 'E72.0'},
{'OMIM': '219750'},
{'OMIM': '219800'},
{'OMIM': '219900'}]}
n = {'id': o['id'],
'ref': {x:[] for x in set([item for sublist in o['ref'] for item in sublist])}}
for p in o['ref']:
for k, v in p.items():
n['ref'][k].append(v)
n['ref'] = [n['ref']]
print(n)
Upvotes: 1
Reputation: 1410
Why is it suboptimal? I think this kind of merging should be just fine. I assume that your merging would be like:
my_list = [{'id': '166073',
'ref': [{'MeSH': 'C548074'},
{'UMLS': 'C1969084'},
{'OMIM': '611523'},
{'ICD-10': 'Q04.3'}]},
{'id': '213',
'ref': [{'MeSH': 'D003554'},
{'UMLS': 'C0010690'},
{'MedDRA': '10011777'},
{'ICD-10': 'E72.0'},
{'OMIM': '219750'},
{'OMIM': '219800'},
{'OMIM': '219900'}]},
{'id': '333',
'ref': [{'UMLS': 'C2936785'},
{'ICD-10': 'E75.2'},
{'MeSH': 'C537075'},
{'MeSH': 'D055577'},
{'UMLS': 'C0268255'},
{'OMIM': '228000'}]}]
def merge(item):
from collections import defaultdict
merged = defaultdict(list)
for ref in item.get('ref', []):
for key, val in ref.items():
merged[key].append(val)
return {**item, 'ref': dict(merged)}
print(list(map(merge, my_list)))
Upvotes: 3