Python grouping JSON object using multiple keys

I have this JSON object which has structure as follows (the json object was extracted from pandas dataframe using to_json(orient="records"))

data = [{'month': 'Jan','date': '18','activity': 'cycling','duration': 3},
        {'month': 'Jan', 'date': '18','activity': 'reading', 'duration': 3.0},
        {'month': 'Jan', 'date': '19', 'activity': 'scripting', 'duration': 19.5},
        {'month': 'Feb','date': '18', 'activity': 'work', 'duration': 22.0 },
        {'month': 'Feb', 'date': '19', 'activity': 'cooking','duration': 0.7},
        {'month': 'March', 'date': '16', 'activity': 'hiking', 'duration': 8.0}]

Am trying to group by two fields month and date Expected result:

data =  [{
            "month": "Jan",
            "details": [{
                "date": "18",
                "effort": [{
                    "activity": "cycling",
                    "duration": 3
                }, {
                    "activity": "reading",
                    "duration": 3.0
                }]
            }, {
                "date": "19",
                "effort": [{
                    "activity": "scripting",
                    "duration": 19.5
                }]
            }]
        }, {
            "month": "Feb",
            "details": [{
                "date": "18",
                "effort": [{
                    "activity": "work",
                    "duration": 22.0
                }]
            }, {
                "date": "19",
                "effort": [{
                    "activity": "cooking",
                    "duration": 0.7
                }]
            }]
        }, {
            "month": "March",
            "details": [{
                "date": "16",
                "effort": [{
                    "activity": "hiking",
                    "duration": 8.0
                }]
            }]
        }]

I tried having the data as python dictionary which is extracted from pandas dataframe using to_dict(orient="records")

list_ = []

for item in dict_:
    list_.append({"month" : item["month"],
                                "details":
                                [{
                                    "date" : item["date"],
                                    "efforts" : 
                                        [{
                                            "activity" : item["activity"],
                                            "duration": item["duration"]
                                        }]
                                }]
                            })

json.dumps(list_)       

and the output i got is

[{
    "month": "Jan",
    "details": [{
        "date": "18",
        "efforts": [{
            "duration": 3,
            "activity": "cycling"
        }]
    }]
}, {
    "month": "Jan",
    "details": [{
        "date": "18",
        "efforts": [{
            "duration": 3.0,
            "activity": "reading"
        }]
    }]
}, {
    "month": "Jan",
    "details": [{
        "date": "19",
        "efforts": [{
            "duration": 19.5,
            "activity": "scripting"
        }]
    }]
}, {
    "month": "Feb",
    "details": [{
        "date": "18",
        "efforts": [{
            "duration": 22.0,
            "activity": "work"
        }]
    }]
}, {
    "month": "Feb",
    "details": [{
        "date": "19",
        "efforts": [{
            "duration": 0.7,
            "activity": "cooking"
        }]
    }]
}, {
    "month": "March",
    "details": [{
        "date": "16",
        "efforts": [{
            "duration": 8.0,
            "activity": "hiking"
        }]
    }]
}]

am not handling the concatenation of values to the existing fields.

Tried using python as well as java-script, do you guys have any advice or solution to the problem? Thanks

Upvotes: 3

Views: 7242

Answers (2)

user3300185
user3300185

Reputation: 21

A generic function to group JSON. You must pass field to group and key array name for groups

def groupBy(vetor, campos, pos):
    if(pos >= len(campos)):
      return vetor
    gmx = campos[pos]
    agrupado = gmx["field"]
    kx = gmx["gbkey"]
    tam = len(campos)
    agrupados = {}
    saida = {}
    retorno = []    
    for l in vetor:
        lmf = {}
        for k, s in l.items():
            val_agrupado = l[agrupado]             
            if not (val_agrupado in agrupados):
                agrupados[val_agrupado] = []
            if agrupado != k:
              lmf[k] = s
        agrupados[val_agrupado].append(lmf)              
    for l in agrupados:
        agrup = agrupados[l]
        if(len(campos) > 1):            
            agrup = groupBy(agrup, campos, pos + 1)
        saida = {}
        saida[agrupado] = l
        saida[kx] = agrup
        retorno.append(saida)
    return retorno
    
data = [{'month': 'Jan','date': '18','activity': 'cycling','duration': 3},
        {'month': 'Jan', 'date': '18','activity': 'reading', 'duration': 3.0},
        {'month': 'Jan', 'date': '19', 'activity': 'scripting', 'duration': 19.5},
        {'month': 'Feb','date': '18', 'activity': 'work', 'duration': 22.0 },
        {'month': 'Feb', 'date': '19', 'activity': 'cooking','duration': 0.7},
        {'month': 'March', 'date': '16', 'activity': 'hiking', 'duration': 8.0}]
print(groupBy(data, [{'field':'month', 'gbkey': 'details'}, {'field':'date', 'gbkey': 'effort'}], 0))

It will produces something like

[
   {
      "month":"Jan",
      "details":[
         {
            "date":"18",
            "effort":[
               {
                  "activity":"cycling",
                  "duration":3
               },
               {
                  "activity":"reading",
                  "duration":3.0
               }
            ]
         },
         {
            "date":"19",
            "effort":[
               {
                  "activity":"scripting",
                  "duration":19.5
               }
            ]
         }
      ]
   },
   {
      "month":"Feb",
      "details":[
         {
            "date":"18",
            "effort":[
               {
                  "activity":"work",
                  "duration":22.0
               }
            ]
         },
         {
            "date":"19",
            "effort":[
               {
                  "activity":"cooking",
                  "duration":0.7
               }
            ]
         }
      ]
   },
   {
      "month":"March",
      "details":[
         {
            "date":"16",
            "effort":[
               {
                  "activity":"hiking",
                  "duration":8.0
               }
            ]
         }
      ]
   }
]

Upvotes: 0

dheiberg
dheiberg

Reputation: 1914

This seems to work:

Code

data = [{'month': 'Jan','date': '18','activity': 'cycling','duration': 3},
        {'month': 'Jan', 'date': '18','activity': 'reading', 'duration': 3.0},
        {'month': 'Jan', 'date': '19', 'activity': 'scripting', 'duration': 19.5},
        {'month': 'Feb','date': '18', 'activity': 'work', 'duration': 22.0 },
        {'month': 'Feb', 'date': '19', 'activity': 'cooking','duration': 0.7},
        {'month': 'March', 'date': '16', 'activity': 'hiking', 'duration': 8.0}]

new_data = []
not_found = True
for item in data:
    for month in new_data:
        not_found = True
        if item['month'] == month['month']:
            not_found = False
            for date in month['details']:
                if item['date'] == date['date']:
                    date['effort'].append({'activity':item['activity'], 'duration':item['duration']})
                else:
                    month['details'].append({'date':item['date'], 'effort':[{'activity':item['activity'], 'duration':item['duration']}]})
            break
    if not_found:
        new_data.append({'month':item['month'], 'details':[{'date':item['date'], \
            'effort':[{'activity':item['activity'], 'duration':item['duration']}]}]})

print new_data

Output

[{'details': [{'date': '18', 'effort': [{'duration': 3, 'activity': 'cycling'}, {'duration': 3.0, 'activity': 'reading'}]}, {'date': '19', 'effort': [{'duration': 19.5, 'activity': 'scripting'}, {'duration': 19.5, 'activity': 'scripting'}]}], 'month': 'Jan'}, {'details': [{'date': '18', 'effort': [{'duration': 22.0, 'activity': 'work'}]}, {'date': '19', 'effort': [{'duration': 0.7, 'activity': 'cooking'}, {'duration': 0.7, 'activity': 'cooking'}]}], 'month': 'Feb'}, {'details': [{'date': '16', 'effort': [{'duration': 8.0, 'activity': 'hiking'}]}], 'month': 'March'}]

Basically just iterate through each entry, first check if the month exists, then if it does, check if the date exists already, and append to the new data accordingly. So if no month exists, you append everything, if no date exists, you append the date details and the new activity. If the date exists too, then you just append the activity

Upvotes: 5

Related Questions