Austin Joy
Austin Joy

Reputation: 59

How to Group Json data based on Key and combine values under in Python?

Have a json that contains structure as shown below: What I'm trying is to group and merge data based on 'name' , for example the two examples below have same name i.e 'abc', so they will get merged into a single collection and 'id' and 'nested prop' will be an array under the 'name' with two contents

 [{
"id" : [{"random" : "12345"}],
"name" : "abc",
"nestedprop" : {
    "malfunc" : [ 
        {
            "Info" : {
                "xyz" : [ 
                    {
                        "vamp" : "104531_0_46095",
                        "ramp" : {
                            "samp" : [ 
                                {
                                    "int" : 532,
                                }
                            ],
                        },
                        "class_unique_id" : "05451",
                    }
                ],
                "nati" : 39237,
                "apper" : 00,
                "supp" : {
                    "sess" : ""
                },
                "session_id" : "42461920181213044516299872341"
            }
        },       
                      {
            "Info" : {
                "xyz" : [ 
                    {
                        "vamp" : "104531_0_46095",
                        "ramp" : {
                            "samp" : [ 
                                {
                                    "int" : 5325,
                                }
                            ],
                        },
                        "class_unique_id" : "05451",
                    }
                ],
                "nati" : 392537,
                "apper" : 00,
                "supp" : {
                    "sess" : ""
                },
                "session_id" : "42461920181213044516299872341"
            }
        },
    ]
  },
},
   { 
"id" : [{"asdad" : "63653"}],
 "name" : "abc",
 "nestedprop" : {
    "malfunc" : [ 
        {
            "Info" : {
                "xyz" : [ 
                    {
                        "vamp" : "104531_0_46095",
                        "ramp" : {
                            "samp" : [ 
                                {
                                    "int" : 532,
                                }
                            ],
                        },
                        "class_unique_id" : "05451",
                    }
                ],
                "nati" : 39237,
                "apper" : 00,
                "supp" : {
                    "sess" : ""
                },
                "session_id" : "42461920181213044516299872341"
            }
        },
      {
            "Info" : {
                "xyz" : [ 
                    {
                        "vamp" : "104531_0_46095",
                        "ramp" : {
                            "samp" : [ 
                                {
                                    "int" : 532,
                                }
                            ],
                        },
                        "class_unique_id" : "05451",
                    }
                ],
                "nati" : 39237,
                "apper" : 00,
                "supp" : {
                    "sess" : ""
                },
                "session_id" : "42461920181213044516299872341"
            }
        },
    ]
  }
 }]

Expected Result:

[{
 "id" : {"0":[{"random" : "12345"}],"1":[{"random" : "12345"}]},
 "name" : "abc",
 "nestedprop" : {"0":[{"content from 1st obj"}],"1":[{"content from 1st obj"}]
 },
 {"id" : "0":[{"randsdaom" : "123sdas45"}]},
 "name" : "def",
 "nestedprop" : {"0":[{"content from 1st obj"}]}]

Note: I have achieved this using Mapreduce in Mongodb,however I wanted to try it out in Python

Upvotes: 0

Views: 3888

Answers (1)

Abdur Rehman
Abdur Rehman

Reputation: 1101

There could be many other approaches but this one would give you desired results,

names = []
for dic in dictList:
    names.append(dic['name'])

# unique list of names
names = list(set(names))


results = []

for name in names:
    idx = 0
    ids = {}
    props = {}
    names = []
    for dic in dictList:
        dic_name = dic['name']
        if dic_name == name:
            ids[str(idx)] = dic['id']
            props[str(idx)] = dic['nestedprop']
            idx += 1
    result_dict = {"name": name,
                   "id": ids,
                   "nestedprop": props}     

    results.append(result_dict)

# result = json.dumps(result)  to convert back into double quotes ""  
results

Output:

   [{'name': 'abc',
  'id': {'0': [{'random': '12345'}],
   '1': [{'asdad': '63653'}],
   '2': [{'asdad': '63653'}]},
  'nestedprop': {'0': {'malfunc': [{'Info': {'xyz': [{'vamp': '104531_0_46095',
         'ramp': {'samp': [{'int': 532}]},
         'class_unique_id': '05451'}],
       'nati': 39237,
       'apper': 0,
       'supp': {'sess': ''},
       'session_id': '42461920181213044516299872341'}},
     {'Info': {'xyz': [{'vamp': '104531_0_46095',
         'ramp': {'samp': [{'int': 5325}]},
         'class_unique_id': '05451'}],
       'nati': 392537,
       'apper': 0,
       'supp': {'sess': ''},
       'session_id': '42461920181213044516299872341'}}]},
   '1': {'malfunc': [{'Info': {'xyz': [{'vamp': '104531_0_46095',
         'ramp': {'samp': [{'int': 532}]},
         'class_unique_id': '05451'}],
       'nati': 39237,
       'apper': 0,
       'supp': {'sess': ''},
       'session_id': '42461920181213044516299872341'}},
     {'Info': {'xyz': [{'vamp': '104531_0_46095',
         'ramp': {'samp': [{'int': 532}]},
         'class_unique_id': '05451'}],
       'nati': 39237,
       'apper': 0,
       'supp': {'sess': ''},
       'session_id': '42461920181213044516299872341'}}]},
   '2': {'malfunc': [{'Info': {'xyz': [{'vamp': '104531_0_46095',
         'ramp': {'samp': [{'int': 532}]},
         'class_unique_id': '05451'}],
       'nati': 39237,
       'apper': 0,
       'supp': {'sess': ''},
       'session_id': '42461920181213044516299872341'}},
     {'Info': {'xyz': [{'vamp': '104531_0_46095',
         'ramp': {'samp': [{'int': 532}]},
         'class_unique_id': '05451'}],
       'nati': 39237,
       'apper': 0,
       'supp': {'sess': ''},
       'session_id': '42461920181213044516299872341'}}]}}},
 {'name': 'def',
  'id': {'0': [{'asdad': '63653'}], '1': [{'asdad': '63653'}]},
  'nestedprop': {'0': {'malfunc': [{'Info': {'xyz': [{'vamp': '104531_0_46095',
         'ramp': {'samp': [{'int': 532}]},
         'class_unique_id': '05451'}],
       'nati': 39237,
       'apper': 0,
       'supp': {'sess': ''},
       'session_id': '42461920181213044516299872341'}},
     {'Info': {'xyz': [{'vamp': '104531_0_46095',
         'ramp': {'samp': [{'int': 532}]},
         'class_unique_id': '05451'}],
       'nati': 39237,
       'apper': 0,
       'supp': {'sess': ''},
       'session_id': '42461920181213044516299872341'}}]},
   '1': {'malfunc': [{'Info': {'xyz': [{'vamp': '104531_0_46095',
         'ramp': {'samp': [{'int': 532}]},
         'class_unique_id': '05451'}],
       'nati': 39237,
       'apper': 0,
       'supp': {'sess': ''},
       'session_id': '42461920181213044516299872341'}},
     {'Info': {'xyz': [{'vamp': '104531_0_46095',
         'ramp': {'samp': [{'int': 532}]},
         'class_unique_id': '05451'}],
       'nati': 39237,
       'apper': 0,
       'supp': {'sess': ''},
       'session_id': '42461920181213044516299872341'}}]}}}]

Upvotes: 1

Related Questions