Reputation: 876
I have this JSON object which has structure as follows (the json object was extracted from pandas dataframe using to_json(orient="records")
)
data = [{'month': 'Jan','date': '18','activity': 'cycling','duration': 3},
{'month': 'Jan', 'date': '18','activity': 'reading', 'duration': 3.0},
{'month': 'Jan', 'date': '19', 'activity': 'scripting', 'duration': 19.5},
{'month': 'Feb','date': '18', 'activity': 'work', 'duration': 22.0 },
{'month': 'Feb', 'date': '19', 'activity': 'cooking','duration': 0.7},
{'month': 'March', 'date': '16', 'activity': 'hiking', 'duration': 8.0}]
Am trying to group by two fields month
and date
Expected result:
data = [{
"month": "Jan",
"details": [{
"date": "18",
"effort": [{
"activity": "cycling",
"duration": 3
}, {
"activity": "reading",
"duration": 3.0
}]
}, {
"date": "19",
"effort": [{
"activity": "scripting",
"duration": 19.5
}]
}]
}, {
"month": "Feb",
"details": [{
"date": "18",
"effort": [{
"activity": "work",
"duration": 22.0
}]
}, {
"date": "19",
"effort": [{
"activity": "cooking",
"duration": 0.7
}]
}]
}, {
"month": "March",
"details": [{
"date": "16",
"effort": [{
"activity": "hiking",
"duration": 8.0
}]
}]
}]
I tried having the data as python dictionary which is extracted from pandas dataframe using to_dict(orient="records")
list_ = []
for item in dict_:
list_.append({"month" : item["month"],
"details":
[{
"date" : item["date"],
"efforts" :
[{
"activity" : item["activity"],
"duration": item["duration"]
}]
}]
})
json.dumps(list_)
and the output i got is
[{
"month": "Jan",
"details": [{
"date": "18",
"efforts": [{
"duration": 3,
"activity": "cycling"
}]
}]
}, {
"month": "Jan",
"details": [{
"date": "18",
"efforts": [{
"duration": 3.0,
"activity": "reading"
}]
}]
}, {
"month": "Jan",
"details": [{
"date": "19",
"efforts": [{
"duration": 19.5,
"activity": "scripting"
}]
}]
}, {
"month": "Feb",
"details": [{
"date": "18",
"efforts": [{
"duration": 22.0,
"activity": "work"
}]
}]
}, {
"month": "Feb",
"details": [{
"date": "19",
"efforts": [{
"duration": 0.7,
"activity": "cooking"
}]
}]
}, {
"month": "March",
"details": [{
"date": "16",
"efforts": [{
"duration": 8.0,
"activity": "hiking"
}]
}]
}]
am not handling the concatenation of values to the existing fields.
Tried using python as well as java-script, do you guys have any advice or solution to the problem? Thanks
Upvotes: 3
Views: 7242
Reputation: 21
A generic function to group JSON. You must pass field to group and key array name for groups
def groupBy(vetor, campos, pos):
if(pos >= len(campos)):
return vetor
gmx = campos[pos]
agrupado = gmx["field"]
kx = gmx["gbkey"]
tam = len(campos)
agrupados = {}
saida = {}
retorno = []
for l in vetor:
lmf = {}
for k, s in l.items():
val_agrupado = l[agrupado]
if not (val_agrupado in agrupados):
agrupados[val_agrupado] = []
if agrupado != k:
lmf[k] = s
agrupados[val_agrupado].append(lmf)
for l in agrupados:
agrup = agrupados[l]
if(len(campos) > 1):
agrup = groupBy(agrup, campos, pos + 1)
saida = {}
saida[agrupado] = l
saida[kx] = agrup
retorno.append(saida)
return retorno
data = [{'month': 'Jan','date': '18','activity': 'cycling','duration': 3},
{'month': 'Jan', 'date': '18','activity': 'reading', 'duration': 3.0},
{'month': 'Jan', 'date': '19', 'activity': 'scripting', 'duration': 19.5},
{'month': 'Feb','date': '18', 'activity': 'work', 'duration': 22.0 },
{'month': 'Feb', 'date': '19', 'activity': 'cooking','duration': 0.7},
{'month': 'March', 'date': '16', 'activity': 'hiking', 'duration': 8.0}]
print(groupBy(data, [{'field':'month', 'gbkey': 'details'}, {'field':'date', 'gbkey': 'effort'}], 0))
It will produces something like
[
{
"month":"Jan",
"details":[
{
"date":"18",
"effort":[
{
"activity":"cycling",
"duration":3
},
{
"activity":"reading",
"duration":3.0
}
]
},
{
"date":"19",
"effort":[
{
"activity":"scripting",
"duration":19.5
}
]
}
]
},
{
"month":"Feb",
"details":[
{
"date":"18",
"effort":[
{
"activity":"work",
"duration":22.0
}
]
},
{
"date":"19",
"effort":[
{
"activity":"cooking",
"duration":0.7
}
]
}
]
},
{
"month":"March",
"details":[
{
"date":"16",
"effort":[
{
"activity":"hiking",
"duration":8.0
}
]
}
]
}
]
Upvotes: 0
Reputation: 1914
This seems to work:
data = [{'month': 'Jan','date': '18','activity': 'cycling','duration': 3},
{'month': 'Jan', 'date': '18','activity': 'reading', 'duration': 3.0},
{'month': 'Jan', 'date': '19', 'activity': 'scripting', 'duration': 19.5},
{'month': 'Feb','date': '18', 'activity': 'work', 'duration': 22.0 },
{'month': 'Feb', 'date': '19', 'activity': 'cooking','duration': 0.7},
{'month': 'March', 'date': '16', 'activity': 'hiking', 'duration': 8.0}]
new_data = []
not_found = True
for item in data:
for month in new_data:
not_found = True
if item['month'] == month['month']:
not_found = False
for date in month['details']:
if item['date'] == date['date']:
date['effort'].append({'activity':item['activity'], 'duration':item['duration']})
else:
month['details'].append({'date':item['date'], 'effort':[{'activity':item['activity'], 'duration':item['duration']}]})
break
if not_found:
new_data.append({'month':item['month'], 'details':[{'date':item['date'], \
'effort':[{'activity':item['activity'], 'duration':item['duration']}]}]})
print new_data
[{'details': [{'date': '18', 'effort': [{'duration': 3, 'activity': 'cycling'}, {'duration': 3.0, 'activity': 'reading'}]}, {'date': '19', 'effort': [{'duration': 19.5, 'activity': 'scripting'}, {'duration': 19.5, 'activity': 'scripting'}]}], 'month': 'Jan'}, {'details': [{'date': '18', 'effort': [{'duration': 22.0, 'activity': 'work'}]}, {'date': '19', 'effort': [{'duration': 0.7, 'activity': 'cooking'}, {'duration': 0.7, 'activity': 'cooking'}]}], 'month': 'Feb'}, {'details': [{'date': '16', 'effort': [{'duration': 8.0, 'activity': 'hiking'}]}], 'month': 'March'}]
Basically just iterate through each entry, first check if the month exists, then if it does, check if the date exists already, and append to the new data accordingly. So if no month exists, you append everything, if no date exists, you append the date details and the new activity. If the date exists too, then you just append the activity
Upvotes: 5