James
James

Reputation: 4050

Custom Format Parsing: Pair up Keys and Values in a List to Build a Dict

I'm trying to parse a file in a custom format (from max3ds, in case anyone has encountered the same problem).

I have a list of nodes that looks like this:

[['*NODE_NAME', '"30deg017"', '*CAMERA_TYPE', 'Target', '*NODE_TM', ['*NODE_NAME', '"30deg017"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '1', '1', '1', '*TM_ROW0', '0.0939', '-0.9815', '-0.1668', '*TM_ROW1', '0.8819', '0.0043', '0.4714', '*TM_ROW2', '-0.4619', '-0.1913', '0.8660', '*TM_ROW3', '-230.9698', '-95.6709', '433.0127', '*TM_POS', '-230.9698', '-95.6709', '433.0127', '*TM_ROTAXIS', '-0.3314', '0.1476', '0.9319', '*TM_ROTANGLE', '1.5887', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.9925', '-0.1222', '-0.0000', '*TM_SCALEAXISANG', '0.1253'], '*NODE_TM', ['*NODE_NAME', '"30deg017.Target"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '0', '0', '0', '*TM_ROW0', '1.0000', '0.0000', '0.0000', '*TM_ROW1', '0.0000', '1.0000', '0.0000', '*TM_ROW2', '0.0000', '0.0000', '1.0000', '*TM_ROW3', '0.0000', '0.0000', '0.0000', '*TM_POS', '0.0000', '0.0000', '0.0000', '*TM_ROTAXIS', '0.0000', '0.0000', '0.0000', '*TM_ROTANGLE', '0.0000', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.0000', '0.0000', '0.0000', '*TM_SCALEAXISANG', '0.0000'], '*CAMERA_SETTINGS', ['*TIMEVALUE', '0', '*CAMERA_NEAR', '0.0000', '*CAMERA_FAR', '1000000.0000', '*CAMERA_FOV', '1.1752', '*CAMERA_TDIST', '500.0000']]]

I'd like to get this into a dict. Every element of the list that starts with an asterisk is a key to its neighbour, until the next key is found. There are also nested elements. Can this be done just using natural python?

This is my current attempt:


class AllCameras:

    def __init__(self, file_string, line_ending='\n'):
        self.file_string = file_string
        self.output_dict = dict()
        self.line_ending = line_ending

    def __find_node_name(self, node):

        node_lines = node.split(self.line_ending)

        name_lines = [
            line for line in node_lines if "*NODE_NAME" in line.strip()]
        leading_name = name_lines[0].strip()
        leading_name = leading_name.split(" \"")[-1].replace("\"", "")
        return leading_name

    def __get_nodes(self):
        return self.file_string.split("*CAMERAOBJECT ")

    def __get_node_values(self):
        parse_nodes = []
        for node in self.__get_nodes():
            open_brace_pos = node.find('{')
            if open_brace_pos >= 0:
                parse_nodes.append(pyparsing.nestedExpr(
                    '{', '}').parseString(node[open_brace_pos:]).asList())
        return parse_nodes

    def parse_node(self, node):

        for n in node:
            if type(n) is list:
                self.parse_node(n)

            if type(n) is str and n.startswith('*'):
                print n, "IS KEY"

    def parse(self):
        nodes = self.__get_node_values()

        for node in nodes:

            self.parse_node(node)

I can isolate all of the keys in the node, but I'm not sure of the best way to pair keys an values, particularly since it's a recursive problem.

If there are multiple values next to a key, they should be grouped in a list. For instance "*INHERIT_POS", "0", "0", "0" -> "*INHERIT_POS":[0, 0, 0]

Upvotes: 0

Views: 43

Answers (1)

Ajax1234
Ajax1234

Reputation: 71451

You can use recursion to traverse the nested lists to create the dictionary:

def get_dict(d):
   return {d[i]:d[i+1] if isinstance(d[i+1], str) else get_dict(d[i+1]) for i in range(0, len(d), 2)}
s = [['*NODE_NAME', '"30deg017"', '*CAMERA_TYPE', 'Target', '*NODE_TM', ['*NODE_NAME', '"30deg017"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '1', '1', '1', '*TM_ROW0', '0.0939', '-0.9815', '-0.1668', '*TM_ROW1', '0.8819', '0.0043', '0.4714', '*TM_ROW2', '-0.4619', '-0.1913', '0.8660', '*TM_ROW3', '-230.9698', '-95.6709', '433.0127', '*TM_POS', '-230.9698', '-95.6709', '433.0127', '*TM_ROTAXIS', '-0.3314', '0.1476', '0.9319', '*TM_ROTANGLE', '1.5887', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.9925', '-0.1222', '-0.0000', '*TM_SCALEAXISANG', '0.1253'], '*NODE_TM', ['*NODE_NAME', '"30deg017.Target"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '0', '0', '0', '*TM_ROW0', '1.0000', '0.0000', '0.0000', '*TM_ROW1', '0.0000', '1.0000', '0.0000', '*TM_ROW2', '0.0000', '0.0000', '1.0000', '*TM_ROW3', '0.0000', '0.0000', '0.0000', '*TM_POS', '0.0000', '0.0000', '0.0000', '*TM_ROTAXIS', '0.0000', '0.0000', '0.0000', '*TM_ROTANGLE', '0.0000', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.0000', '0.0000', '0.0000', '*TM_SCALEAXISANG', '0.0000'], '*CAMERA_SETTINGS', ['*TIMEVALUE', '0', '*CAMERA_NEAR', '0.0000', '*CAMERA_FAR', '1000000.0000', '*CAMERA_FOV', '1.1752', '*CAMERA_TDIST', '500.0000']]]
print(get_dict(s[0]))

Output:

{'*NODE_NAME': '"30deg017"', '*CAMERA_TYPE': 'Target', '*NODE_TM': {'*NODE_NAME': '"30deg017.Target"', '*INHERIT_POS': '0', '0': '0', '*INHERIT_ROT': '0', '*INHERIT_SCL': '0', '*TM_ROW0': '1.0000', '0.0000': '0.0000', '*TM_ROW1': '0.0000', '1.0000': '1.0000', '*TM_ROW2': '0.0000', '*TM_ROW3': '0.0000', '*TM_POS': '0.0000', '*TM_ROTAXIS': '0.0000', '*TM_ROTANGLE': '0.0000', '*TM_SCALE': '1.0000', '*TM_SCALEAXIS': '0.0000', '*TM_SCALEAXISANG': '0.0000'}, '*CAMERA_SETTINGS': {'*TIMEVALUE': '0', '*CAMERA_NEAR': '0.0000', '*CAMERA_FAR': '1000000.0000', '*CAMERA_FOV': '1.1752', '*CAMERA_TDIST': '500.0000'}}

Edit:

regarding the fact that there can be multiple "runs" of data that need to be grouped, you can try this:

s = [['*NODE_NAME', '"30deg017"', '*TM_ROW0', '1.0000', '0.0000', '0.0000', '*CAMERA_TYPE', 'Target', '*NODE_TM', ['*NODE_NAME', '"30deg017"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '1', '1', '1', '*TM_ROW0', '0.0939', '-0.9815', '-0.1668', '*TM_ROW1', '0.8819', '0.0043', '0.4714', '*TM_ROW2', '-0.4619', '-0.1913', '0.8660', '*TM_ROW3', '-230.9698', '-95.6709', '433.0127', '*TM_POS', '-230.9698', '-95.6709', '433.0127', '*TM_ROTAXIS', '-0.3314', '0.1476', '0.9319', '*TM_ROTANGLE', '1.5887', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.9925', '-0.1222', '-0.0000', '*TM_SCALEAXISANG', '0.1253'], '*NODE_TM', ['*NODE_NAME', '"30deg017.Target"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '0', '0', '0', '*TM_ROW0', '1.0000', '0.0000', '0.0000', '*TM_ROW1', '0.0000', '1.0000', '0.0000', '*TM_ROW2', '0.0000', '0.0000', '1.0000', '*TM_ROW3', '0.0000', '0.0000', '0.0000', '*TM_POS', '0.0000', '0.0000', '0.0000', '*TM_ROTAXIS', '0.0000', '0.0000', '0.0000', '*TM_ROTANGLE', '0.0000', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.0000', '0.0000', '0.0000', '*TM_SCALEAXISANG', '0.0000'], '*CAMERA_SETTINGS', ['*TIMEVALUE', '0', '*CAMERA_NEAR', '0.0000', '*CAMERA_FAR', '1000000.0000', '*CAMERA_FOV', '1.1752', '*CAMERA_TDIST', '500.0000']]]

import itertools
def create_dict(f):
   def wrapper(d):
      def make_d(s):
         return {s[i]:s[i+1] if all(not c.startswith('*') for c in s[i+1]) or isinstance(s[i+1], str) else make_d(s[i+1]) for i in range(0, len(s), 2)}
      return make_d(f(d))
   return wrapper

@create_dict
def group_data(d):
   data = [(a, list(b)) for a, b in itertools.groupby(d, key=lambda x:isinstance(x, list) or not x.startswith('*'))]
   return [b[0] if not a else b if all(isinstance(i, str) for i in b) and len(b) > 1 else b[0] for a, b in data]

print(group_data(s[0]))

Output:

{'*NODE_NAME': '"30deg017"', '*TM_ROW0': ['1.0000', '0.0000', '0.0000'], '*CAMERA_TYPE': 'Target', '*NODE_TM': {'*NODE_NAME': '"30deg017.Target"', '*INHERIT_POS': '0', '0': '0', '*INHERIT_ROT': '0', '*INHERIT_SCL': '0', '*TM_ROW0': '1.0000', '0.0000': '0.0000', '*TM_ROW1': '0.0000', '1.0000': '1.0000', '*TM_ROW2': '0.0000', '*TM_ROW3': '0.0000', '*TM_POS': '0.0000', '*TM_ROTAXIS': '0.0000', '*TM_ROTANGLE': '0.0000', '*TM_SCALE': '1.0000', '*TM_SCALEAXIS': '0.0000', '*TM_SCALEAXISANG': '0.0000'}, '*CAMERA_SETTINGS': {'*TIMEVALUE': '0', '*CAMERA_NEAR': '0.0000', '*CAMERA_FAR': '1000000.0000', '*CAMERA_FOV': '1.1752', '*CAMERA_TDIST': '500.0000'}}

Upvotes: 2

Related Questions