arshwarsi
arshwarsi

Reputation: 182

Create Nested Dict from paths and values

i need help with similar request raised in Python: recursively create dictionary from paths but my path is having list also. I have paths sometime with index also like below

PATH       VALUE
/a/b/c      'Hi'
/a/b/d       1
/a/c/d       1
/b/c/d       1
/a/e[0]/f    1
/a/e[1]/g    2
/b/x/y[1]    'thank'
/b/x/y[2]    'you'
/b/j/b/c     2
/b/j/b/d     1
/a/e[2]/k[0]/s     '2D_1'
/a/e[2]/k[1]/s     '2D_2'

Expected output Dictionary i am looking for is below :

{
    "a": {
        "b": {
            "c": "Hi",
            "d": 1
        },
        "c": {
            "d": 1
        },
        "e": [
            {
                "f": 1
            },
            {
                "g": 2
            },
            {
                "k": [
                    {
                        "s": "2D_1"
                    },
                    {
                        "s": "2D_2"
                    }
                ]
            }
        ]
    },
    "b": {
        "c": {
            "d": 1
        },
        "x": {
            "y": [
                null,
                "thank",
                "you"
            ]
        },
        "j": {
            "b": {
                "c": 2,
                "d": 1
            }
        }
    }
}

Note: Path can be more than 4 parts (any number) . Recursive way to handle this and create the dict from paths and values ?

i tried with below sample code , but stuck with list processing.

import re

def create_dict(data,path,value):
    nodes = path.split('/')
    thisdict = data
    index = -1
    for node in nodes[:-1]:
        keyislist = False
        if '[' in node :
            index = int(re.findall(r"\[\s*\+?(-?\d+)\s*\]", node)[0])
            node = node.split('[')[0]
            keyislist = True
        if (node not in thisdict):
            if keyislist:
                thisdict[node] = []
            elif isinstance(thisdict,dict):
                thisdict[node] = {}
        if isinstance(thisdict[node],dict):
            thisdict = thisdict[node]
        elif isinstance(thisdict[node],list):
            thisdict[node].insert(index,thisdict[node])
    thisdict[nodes[-1]] = value
    return data

data = {}
keys = '/a/b/c[0]/d/e/f'
value = 123456
path = keys[1:]
print(create_dict(data,path,value))

print('---------------')

keys = '/a/b/c[1]/d/e/g'
value = 'ABCDEFG'
path = keys[1:]
print(create_dict(data,path,value))

Also added 2 more path . order of path k[1] can be first then later K[0] path.

/a/e[2]/k[1]/s     '2D_2'
/a/e[2]/k[0]/s     '2D_1'

Upvotes: 0

Views: 447

Answers (2)

ggorlen
ggorlen

Reputation: 56895

As with the linked code, the expected result is invalid, so I made a guess or two as to your intent.

Firstly,

{'b':
    {'c': 'Hi' },
    {'d': 1 }
},

is a syntax error. You can't have two values for a key like this. 'b' has to be either a list or a dict. Since you've taken pains to add lists to the specification, I asssume this should be a dict.

Secondly, {'y' : ['thank' , 'you']} seems like a surprising result to expect from

/b/x/y[1]    'thank'
/b/x/y[2]    'you'

which uses indexes 1 and 2. If you want the original result, use .append (confusing!), or fix the indexes in the input (not confusing).

Beyond that, parsing the new list requirement involves picking out the index and element with a regex and using the index as well as the key to dive into the next level of nesting.

import json
import re

def add_path(d, path, val):
    path = path.split("/")[1:]

    for i, e in enumerate(path):
        if re.search(r".?\[\d+\]$", e):
            e, idx = re.fullmatch(r"(.+)\[(\d+)\]", e).groups()
            idx = int(idx)

            if e not in d:
                d[e] = [None] * (idx + 1)
            elif len(d[e]) <= idx:
                d[e] += [None] * (idx - len(d[e]) + 1)

            if i == len(path) - 1:
                d[e][idx] = val 
            elif not d[e][idx]:    
                d[e][idx] = {}

            d = d[e][idx]
        else:
            if i == len(path) - 1:
                d[e] = val
            else:
                if e not in d:
                    d[e] = {}

                d = d[e]

if __name__ == "__main__":
    data = """
    /a/b/c      'Hi'
    /a/b/d       1
    /a/c/d       1
    /b/c/d       1
    /a/e[0]/f    1
    /a/e[1]/g    2
    /b/x/y[1]    'thank'
    /b/x/y[2]    'you'
    /b/j/b/c     2
    /b/j/b/d     1
    """
    d = {}

    def clean(x):   
        try:
            return int(x)
        except ValueError:
            return x.strip(" '")

    for path, val in [[clean(x) for x in re.split(r"\s{4,}", x)][1:] 
                      for x in data.split("\n") if x.strip()]:
        add_path(d, path, val)
    
    print(json.dumps(d, indent=4))

Output:

{
    "a": {
        "b": {
            "c": "Hi",
            "d": 1
        },
        "c": {
            "d": 1
        },
        "e": [
            {
                "f": 1
            },
            {
                "g": 2
            }
        ]
    },
    "b": {
        "c": {
            "d": 1
        },
        "x": {
            "y": [
                null,
                "thank",
                "you"
            ]
        },
        "j": {
            "b": {
                "c": 2,
                "d": 1
            }
        }
    }
}

Cleaning this code up a bit is left as an exercise to the reader.

Upvotes: 1

Gene
Gene

Reputation: 46960

Haha. Pretty similar to @ggorlen's.

import re
import pprint

class Parser(object):
  def __init__(self):
    self.index_pattern = re.compile(r'([^[]*)\[(\d+)\]')

  def Add(self, tree, path, value):
    for seg in path[:-1]:
      match = self.index_pattern.fullmatch(seg)
      if match:
        lst, ix = self.AddList(match, tree, dict)
        tree = lst[ix]
      else:
        node = tree[seg] if seg in tree else {}
        tree[seg] = node
        tree = node

    match = self.index_pattern.fullmatch(path[-1])
    if match:
      lst, ix = self.AddList(match, tree, lambda: None)
      lst[ix] = value
    else:
      tree[path[-1]] = value

  @staticmethod
  def AddList(match, tree, ctor):
    name = match.group(1)
    ix = int(match.group(2))
    lst = tree[name] if name in tree else []
    lst.extend(ctor() for i in range(ix - len(lst) + 1))
    tree[name] = lst
    return lst, ix

  def Process(self, data):
    tree = {}
    for path, value in data.items():
      self.Add(tree, path.split('/')[1:], value)
    return tree

def Run():
  data = {
    '/a/b/c': 'Hi',
    '/a/b/d': 1,
    '/a/c/d': 1,
    '/b/c/d': 1,
    '/a/e[0]/f': 1,
    '/a/e[1]/g': 2,
    '/b/x/y[1]': 'thank',
    '/b/x/y[2]': 'you',
    '/b/j/b/c': 2,
    '/b/j/b/d': 1,
  }
  pprint.pprint(Parser().Process(data))

Run()

Output:

{'a': {'b': {'c': 'Hi', 'd': 1}, 'c': {'d': 1}, 'e': [{'f': 1}, {'g': 2}]},
 'b': {'c': {'d': 1},
       'j': {'b': {'c': 2, 'd': 1}},
       'x': {'y': [None, 'thank', 'you']}}}

Upvotes: 1

Related Questions