Reputation: 2587
I have a list of dictionaries which contain matching sites and matching devices, I would like to regroup those dictionaries by site and then by device.
I have added a sample output dictionary and a desired dictionary.
I thought I could use itertools to do multiple groups which has worked I do have the groups but I'm unsure how to merge it all or if this is the most efficient method
itertools attempt:
site_groups = itertools.groupby(bgp_data_query, lambda i: i['location'])
for key, site in site_groups:
device_groups = itertools.groupby(site, lambda i: i['device_name'])
for key, device in site_groups:
raw data
[
{
"bgp_peer_as": "1",
"bgp_session": "3:35",
"bgp_routes": "0",
"service_status": "Down",
"location": "London",
"circuit_name": "MPLS",
"device_name": "LON-EDGE",
"timestamp" : "2019-5-8 12:30:00"
},
{
"bgp_peer_as": "3",
"bgp_session": "4:25",
"bgp_routes": "100",
"service_status": "UP",
"location": "London",
"circuit_name": "MPLS 02",
"device_name": "LON-EDGE",
"timestamp" : "2019-5-8 12:30:00"
},
{
"bgp_peer_as": "18",
"bgp_session": "1:25",
"bgp_routes": "1",
"service_status": "UP",
"location": "London",
"circuit_name": "INTERNET",
"device_name": "LON-INT-GW",
"timestamp" : "2019-5-8 12:31:00"
},
{
"bgp_peer_as": "20",
"bgp_session": "1:25",
"bgp_routes": "1",
"service_status": "UP",
"location": "Manchester",
"circuit_name": "INTERNET",
"device_name": "MAN-INT-GW",
"timestamp" : "2019-5-8 12:20:00"
},
{
"bgp_peer_as": "20",
"bgp_session": "1:25",
"bgp_routes": "1",
"service_status": "UP",
"location": "Manchester",
"circuit_name": "INTERNET 02",
"device_name": "MAN-INT-GW",
"timestamp" : "2019-5-8 12:20:00"
},
{
"bgp_peer_as": "45",
"bgp_session": "1:25",
"bgp_routes": "1",
"service_status": "UP",
"location": "Manchester",
"circuit_name": "MPLS 01",
"device_name": "MAN-EDGE",
"timestamp" : "2019-5-8 12:21:00"
},
]
desired dict
[
{
"London": {
"LON-EDGE": {
"bgp_peer_as": "1",
"bgp_session": "3:35",
"bgp_routes": "0",
"service_status": "DOWN",
"circuit_name": "MPLS",
},
{
"bgp_peer_as": "1",
"bgp_session": "4:25",
"bgp_routes": "100",
"service_status": "UP",
"circuit_name": "MPLS 02",
}
},
{
"LON-INT-GW" : {
"bgp_peer_as": "18",
"bgp_session": "1:25",
"bgp_routes": "1",
"service_status": "UP",
"circuit_name": "INTERNET",
}
}
}
],
[
{
"Manchester": {
"MAN-EDGE": {
"bgp_peer_as": "45",
"bgp_session": "1:25",
"bgp_routes": "1",
"service_status": "UP",
"circuit_name": "MPLS 01",
}
},
{
"MAN-INT-GW": {
"bgp_peer_as": "20",
"bgp_session": "1:25",
"bgp_routes": "1",
"service_status": "UP",
"circuit_name": "INTERNET",
},
{
"bgp_peer_as": "20",
"bgp_session": "1:25",
"bgp_routes": "1",
"service_status": "UP",
"circuit_name": "INTERNET 02",
}
}
}
]
Upvotes: 3
Views: 111
Reputation: 140186
use a double collections.defaultdict
with a list at the deepest level for this, and loop on the items, popping the "keys" so they don't appear in the final data:
result = collections.defaultdict(lambda :collections.defaultdict(list))
for d in raw_dict:
location = d.pop("location")
device_name = d.pop("device_name")
result[location][device_name].append(d)
result with your data (dumped as json
to get rid of the representation of the special dicts):
import json
print(json.dumps(result,indent=4))
{
"Manchester": {
"MAN-INT-GW": [
{
"bgp_routes": "1",
"service_status": "UP",
"bgp_peer_as": "20",
"circuit_name": "INTERNET",
"bgp_session": "1:25"
},
{
"bgp_routes": "1",
"service_status": "UP",
"bgp_peer_as": "20",
"circuit_name": "INTERNET 02",
"bgp_session": "1:25"
}
],
"MAN-EDGE": [
{
"bgp_routes": "1",
"service_status": "UP",
"bgp_peer_as": "45",
"circuit_name": "MPLS 01",
"bgp_session": "1:25"
}
]
},
"London": {
"LON-EDGE": [
{
"bgp_routes": "0",
"service_status": "Down",
"bgp_peer_as": "1",
"circuit_name": "MPLS",
"bgp_session": "3:35"
},
{
"bgp_routes": "100",
"service_status": "UP",
"bgp_peer_as": "3",
"circuit_name": "MPLS 02",
"bgp_session": "4:25"
}
],
"LON-INT-GW": [
{
"bgp_routes": "1",
"service_status": "UP",
"bgp_peer_as": "18",
"circuit_name": "INTERNET",
"bgp_session": "1:25"
}
]
}
}
note that itertools.groupby
-based solutions also work, but only when identical keys are contiguous. Else it creates several groups, not what you want.
Upvotes: 2
Reputation: 1858
Can use defaultdict
along with itertools.groupby
:
import itertools
from collections import defaultdict
res = defaultdict(dict)
for x, g in itertools.groupby(bgp_data_query, key=lambda x: x["location"]):
for d, f in itertools.groupby(g, key=lambda x: x["device_name"]):
res[x][d] = [{k:v} for z in f for k, v in z.items() if k not in {"location", "device_name"}]
print(dict(res))
Output:
{'London': {'LON-EDGE': [{'bgp_peer_as': '1'},
{'bgp_routes': '0'},
{'circuit_name': 'MPLS'},
{'bgp_session': '3:35'},
{'service_status': 'Down'},
{'bgp_peer_as': '3'},
{'bgp_routes': '100'},
{'circuit_name': 'MPLS 02'},
{'bgp_session': '4:25'},
{'service_status': 'UP'}],
'LON-INT-GW': [{'bgp_peer_as': '18'},
{'bgp_routes': '1'},
{'circuit_name': 'INTERNET'},
{'bgp_session': '1:25'},
{'service_status': 'UP'}]},
'Manchester': {'MAN-EDGE': [{'bgp_peer_as': '45'},
{'bgp_routes': '1'},
{'circuit_name': 'MPLS 01'},
{'bgp_session': '1:25'},
{'service_status': 'UP'}],
'MAN-INT-GW': [{'bgp_peer_as': '20'},
{'bgp_routes': '1'},
{'circuit_name': 'INTERNET'},
{'bgp_session': '1:25'},
{'service_status': 'UP'},
{'bgp_peer_as': '20'},
{'bgp_routes': '1'},
{'circuit_name': 'INTERNET 02'},
{'bgp_session': '1:25'},
{'service_status': 'UP'}]}}
Upvotes: 1