Reputation: 182
i need help with similar request raised in Python: recursively create dictionary from paths but my path is having list also. I have paths sometime with index also like below
PATH VALUE
/a/b/c 'Hi'
/a/b/d 1
/a/c/d 1
/b/c/d 1
/a/e[0]/f 1
/a/e[1]/g 2
/b/x/y[1] 'thank'
/b/x/y[2] 'you'
/b/j/b/c 2
/b/j/b/d 1
/a/e[2]/k[0]/s '2D_1'
/a/e[2]/k[1]/s '2D_2'
Expected output Dictionary i am looking for is below :
{
"a": {
"b": {
"c": "Hi",
"d": 1
},
"c": {
"d": 1
},
"e": [
{
"f": 1
},
{
"g": 2
},
{
"k": [
{
"s": "2D_1"
},
{
"s": "2D_2"
}
]
}
]
},
"b": {
"c": {
"d": 1
},
"x": {
"y": [
null,
"thank",
"you"
]
},
"j": {
"b": {
"c": 2,
"d": 1
}
}
}
}
Note: Path can be more than 4 parts (any number) . Recursive way to handle this and create the dict from paths and values ?
i tried with below sample code , but stuck with list processing.
import re
def create_dict(data,path,value):
nodes = path.split('/')
thisdict = data
index = -1
for node in nodes[:-1]:
keyislist = False
if '[' in node :
index = int(re.findall(r"\[\s*\+?(-?\d+)\s*\]", node)[0])
node = node.split('[')[0]
keyislist = True
if (node not in thisdict):
if keyislist:
thisdict[node] = []
elif isinstance(thisdict,dict):
thisdict[node] = {}
if isinstance(thisdict[node],dict):
thisdict = thisdict[node]
elif isinstance(thisdict[node],list):
thisdict[node].insert(index,thisdict[node])
thisdict[nodes[-1]] = value
return data
data = {}
keys = '/a/b/c[0]/d/e/f'
value = 123456
path = keys[1:]
print(create_dict(data,path,value))
print('---------------')
keys = '/a/b/c[1]/d/e/g'
value = 'ABCDEFG'
path = keys[1:]
print(create_dict(data,path,value))
Also added 2 more path . order of path k[1] can be first then later K[0] path.
/a/e[2]/k[1]/s '2D_2'
/a/e[2]/k[0]/s '2D_1'
Upvotes: 0
Views: 447
Reputation: 56895
As with the linked code, the expected result is invalid, so I made a guess or two as to your intent.
Firstly,
{'b':
{'c': 'Hi' },
{'d': 1 }
},
is a syntax error. You can't have two values for a key like this. 'b'
has to be either a list or a dict. Since you've taken pains to add lists to the specification, I asssume this should be a dict.
Secondly, {'y' : ['thank' , 'you']}
seems like a surprising result to expect from
/b/x/y[1] 'thank'
/b/x/y[2] 'you'
which uses indexes 1 and 2. If you want the original result, use .append
(confusing!), or fix the indexes in the input (not confusing).
Beyond that, parsing the new list requirement involves picking out the index and element with a regex and using the index as well as the key to dive into the next level of nesting.
import json
import re
def add_path(d, path, val):
path = path.split("/")[1:]
for i, e in enumerate(path):
if re.search(r".?\[\d+\]$", e):
e, idx = re.fullmatch(r"(.+)\[(\d+)\]", e).groups()
idx = int(idx)
if e not in d:
d[e] = [None] * (idx + 1)
elif len(d[e]) <= idx:
d[e] += [None] * (idx - len(d[e]) + 1)
if i == len(path) - 1:
d[e][idx] = val
elif not d[e][idx]:
d[e][idx] = {}
d = d[e][idx]
else:
if i == len(path) - 1:
d[e] = val
else:
if e not in d:
d[e] = {}
d = d[e]
if __name__ == "__main__":
data = """
/a/b/c 'Hi'
/a/b/d 1
/a/c/d 1
/b/c/d 1
/a/e[0]/f 1
/a/e[1]/g 2
/b/x/y[1] 'thank'
/b/x/y[2] 'you'
/b/j/b/c 2
/b/j/b/d 1
"""
d = {}
def clean(x):
try:
return int(x)
except ValueError:
return x.strip(" '")
for path, val in [[clean(x) for x in re.split(r"\s{4,}", x)][1:]
for x in data.split("\n") if x.strip()]:
add_path(d, path, val)
print(json.dumps(d, indent=4))
Output:
{
"a": {
"b": {
"c": "Hi",
"d": 1
},
"c": {
"d": 1
},
"e": [
{
"f": 1
},
{
"g": 2
}
]
},
"b": {
"c": {
"d": 1
},
"x": {
"y": [
null,
"thank",
"you"
]
},
"j": {
"b": {
"c": 2,
"d": 1
}
}
}
}
Cleaning this code up a bit is left as an exercise to the reader.
Upvotes: 1
Reputation: 46960
Haha. Pretty similar to @ggorlen's.
import re
import pprint
class Parser(object):
def __init__(self):
self.index_pattern = re.compile(r'([^[]*)\[(\d+)\]')
def Add(self, tree, path, value):
for seg in path[:-1]:
match = self.index_pattern.fullmatch(seg)
if match:
lst, ix = self.AddList(match, tree, dict)
tree = lst[ix]
else:
node = tree[seg] if seg in tree else {}
tree[seg] = node
tree = node
match = self.index_pattern.fullmatch(path[-1])
if match:
lst, ix = self.AddList(match, tree, lambda: None)
lst[ix] = value
else:
tree[path[-1]] = value
@staticmethod
def AddList(match, tree, ctor):
name = match.group(1)
ix = int(match.group(2))
lst = tree[name] if name in tree else []
lst.extend(ctor() for i in range(ix - len(lst) + 1))
tree[name] = lst
return lst, ix
def Process(self, data):
tree = {}
for path, value in data.items():
self.Add(tree, path.split('/')[1:], value)
return tree
def Run():
data = {
'/a/b/c': 'Hi',
'/a/b/d': 1,
'/a/c/d': 1,
'/b/c/d': 1,
'/a/e[0]/f': 1,
'/a/e[1]/g': 2,
'/b/x/y[1]': 'thank',
'/b/x/y[2]': 'you',
'/b/j/b/c': 2,
'/b/j/b/d': 1,
}
pprint.pprint(Parser().Process(data))
Run()
Output:
{'a': {'b': {'c': 'Hi', 'd': 1}, 'c': {'d': 1}, 'e': [{'f': 1}, {'g': 2}]},
'b': {'c': {'d': 1},
'j': {'b': {'c': 2, 'd': 1}},
'x': {'y': [None, 'thank', 'you']}}}
Upvotes: 1