Reputation: 907
I am trying to build a Python function that will format data into a JSON string to be used by D3.
I need it to be in the format:
{
"name": "flare",
"children": [
{
"name": "analytics",
"children": [
{
"name": "cluster",
"children": [
{"name": "AgglomerativeCluster", "size": 3938},
{"name": "CommunityStructure", "size": 3812},
{"name": "HierarchicalCluster", "size": 6714},
{"name": "MergeEdge", "size": 743}
]
},
per http://bl.ocks.org/mbostock/4063550 for this type: http://johan.github.io/d3/ex/tree.html
What I have come up with so far is a data structure like:
{'nlp':{'course':['course','range','topics','language','processing','word']}}
and need it to come out like:
{
"name":"Natural Language Processing",
"children":[
{
"name":"course",
"children":[
{
"name":"course",
"size":700
},
{
"name":"range",
"size":700
},
{
"name":"topics",
"size":700
},
{
"name":"language",
"size":700
},
{
"name":"processing",
"size":700
},
{
"name":"word",
"size":700
}
]
}
]
}
and started down the road of
def format_d3_circle(data_input):
d3_data = {};
#root level
d3_data['name'] = data_input[data_input.keys()[0]].keys()[0]
sub_levels = data_input[data_input.keys()[0]]
for level_one_key, level_one_data in sub_levels:
d3_data['children'] = []
return json.dumps(d3_data)
but it seems I am not approaching the problem correctly, and am finding it difficult to efficiently visualize a good solution for creating the JSON nodes as it were.
Any suggestions on how to abstract this problem, and build whatever nested JSON structures I need from dictionary/list/JSON input etc?
Upvotes: 2
Views: 1976
Reputation: 11577
Here's a solution that I've been working on that works with tabular input data for the general case of an arbitrary number of levels.
import pandas as pd
import json
def find_element(children_list,name):
"""
Find element in children list
if exists or return none
"""
for i in children_list:
if i["name"] == name:
return i
#If not found return None
return None
def add_node(path,value,nest):
"""
The path is a list. Each element is a name that corresponds
to a level in the final nested dictionary.
"""
#Get first name from path
this_name = path.pop(0)
#Does the element exist already?
element = find_element(nest["children"], this_name)
#If the element exists, we can use it, otherwise we need to create a new one
if element:
if len(path)>0:
add_node(path,value, element)
#Else it does not exist so create it and return its children
else:
if len(path) == 0:
nest["children"].append({"name": this_name, "value": value})
else:
#Add new element
nest["children"].append({"name": this_name, "children":[]})
#Get added element
element = nest["children"][-1]
#Still elements of path left so recurse
add_node(path,value, element)
And here is an example of how you can use it. You have to tell it which columns to use as the levels of the hierarchy and which column stores the values.
df = pd.read_json('{"l1":{"0":"a","1":"a","2":"a","3":"a","4":"b","5":"b","6":"b","7":"b"},"l2":{"0":"a1","1":"a1","2":"a2","3":"a2","4":"b1","5":"b1","6":"b2","7":"b3"},"l3":{"0":"a11","1":"a12","2":"a21","3":"a22","4":"b11","5":"b12","6":"b22","7":"b34"},"val":{"0":1,"1":2,"2":3,"3":4,"4":5,"5":6,"6":7,"7":8}}')
d = {"name": "root",
"children": []}
levels = ["l1","l2", "l3"]
for row in df.iterrows():
r = row[1]
path = list(r[levels])
value = r["val"]
add_node(path,value,d)
print json.dumps(d, sort_keys=False,
indent=2)
Upvotes: 2