Reputation: 4050
I'm trying to parse a file in a custom format (from max3ds, in case anyone has encountered the same problem).
I have a list of nodes that looks like this:
[['*NODE_NAME', '"30deg017"', '*CAMERA_TYPE', 'Target', '*NODE_TM', ['*NODE_NAME', '"30deg017"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '1', '1', '1', '*TM_ROW0', '0.0939', '-0.9815', '-0.1668', '*TM_ROW1', '0.8819', '0.0043', '0.4714', '*TM_ROW2', '-0.4619', '-0.1913', '0.8660', '*TM_ROW3', '-230.9698', '-95.6709', '433.0127', '*TM_POS', '-230.9698', '-95.6709', '433.0127', '*TM_ROTAXIS', '-0.3314', '0.1476', '0.9319', '*TM_ROTANGLE', '1.5887', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.9925', '-0.1222', '-0.0000', '*TM_SCALEAXISANG', '0.1253'], '*NODE_TM', ['*NODE_NAME', '"30deg017.Target"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '0', '0', '0', '*TM_ROW0', '1.0000', '0.0000', '0.0000', '*TM_ROW1', '0.0000', '1.0000', '0.0000', '*TM_ROW2', '0.0000', '0.0000', '1.0000', '*TM_ROW3', '0.0000', '0.0000', '0.0000', '*TM_POS', '0.0000', '0.0000', '0.0000', '*TM_ROTAXIS', '0.0000', '0.0000', '0.0000', '*TM_ROTANGLE', '0.0000', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.0000', '0.0000', '0.0000', '*TM_SCALEAXISANG', '0.0000'], '*CAMERA_SETTINGS', ['*TIMEVALUE', '0', '*CAMERA_NEAR', '0.0000', '*CAMERA_FAR', '1000000.0000', '*CAMERA_FOV', '1.1752', '*CAMERA_TDIST', '500.0000']]]
I'd like to get this into a dict. Every element of the list that starts with an asterisk is a key to its neighbour, until the next key is found. There are also nested elements. Can this be done just using natural python?
This is my current attempt:
class AllCameras:
def __init__(self, file_string, line_ending='\n'):
self.file_string = file_string
self.output_dict = dict()
self.line_ending = line_ending
def __find_node_name(self, node):
node_lines = node.split(self.line_ending)
name_lines = [
line for line in node_lines if "*NODE_NAME" in line.strip()]
leading_name = name_lines[0].strip()
leading_name = leading_name.split(" \"")[-1].replace("\"", "")
return leading_name
def __get_nodes(self):
return self.file_string.split("*CAMERAOBJECT ")
def __get_node_values(self):
parse_nodes = []
for node in self.__get_nodes():
open_brace_pos = node.find('{')
if open_brace_pos >= 0:
parse_nodes.append(pyparsing.nestedExpr(
'{', '}').parseString(node[open_brace_pos:]).asList())
return parse_nodes
def parse_node(self, node):
for n in node:
if type(n) is list:
self.parse_node(n)
if type(n) is str and n.startswith('*'):
print n, "IS KEY"
def parse(self):
nodes = self.__get_node_values()
for node in nodes:
self.parse_node(node)
I can isolate all of the keys in the node, but I'm not sure of the best way to pair keys an values, particularly since it's a recursive problem.
If there are multiple values next to a key, they should be grouped in a list. For instance "*INHERIT_POS", "0", "0", "0" -> "*INHERIT_POS":[0, 0, 0]
Upvotes: 0
Views: 43
Reputation: 71451
You can use recursion to traverse the nested lists to create the dictionary:
def get_dict(d):
return {d[i]:d[i+1] if isinstance(d[i+1], str) else get_dict(d[i+1]) for i in range(0, len(d), 2)}
s = [['*NODE_NAME', '"30deg017"', '*CAMERA_TYPE', 'Target', '*NODE_TM', ['*NODE_NAME', '"30deg017"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '1', '1', '1', '*TM_ROW0', '0.0939', '-0.9815', '-0.1668', '*TM_ROW1', '0.8819', '0.0043', '0.4714', '*TM_ROW2', '-0.4619', '-0.1913', '0.8660', '*TM_ROW3', '-230.9698', '-95.6709', '433.0127', '*TM_POS', '-230.9698', '-95.6709', '433.0127', '*TM_ROTAXIS', '-0.3314', '0.1476', '0.9319', '*TM_ROTANGLE', '1.5887', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.9925', '-0.1222', '-0.0000', '*TM_SCALEAXISANG', '0.1253'], '*NODE_TM', ['*NODE_NAME', '"30deg017.Target"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '0', '0', '0', '*TM_ROW0', '1.0000', '0.0000', '0.0000', '*TM_ROW1', '0.0000', '1.0000', '0.0000', '*TM_ROW2', '0.0000', '0.0000', '1.0000', '*TM_ROW3', '0.0000', '0.0000', '0.0000', '*TM_POS', '0.0000', '0.0000', '0.0000', '*TM_ROTAXIS', '0.0000', '0.0000', '0.0000', '*TM_ROTANGLE', '0.0000', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.0000', '0.0000', '0.0000', '*TM_SCALEAXISANG', '0.0000'], '*CAMERA_SETTINGS', ['*TIMEVALUE', '0', '*CAMERA_NEAR', '0.0000', '*CAMERA_FAR', '1000000.0000', '*CAMERA_FOV', '1.1752', '*CAMERA_TDIST', '500.0000']]]
print(get_dict(s[0]))
Output:
{'*NODE_NAME': '"30deg017"', '*CAMERA_TYPE': 'Target', '*NODE_TM': {'*NODE_NAME': '"30deg017.Target"', '*INHERIT_POS': '0', '0': '0', '*INHERIT_ROT': '0', '*INHERIT_SCL': '0', '*TM_ROW0': '1.0000', '0.0000': '0.0000', '*TM_ROW1': '0.0000', '1.0000': '1.0000', '*TM_ROW2': '0.0000', '*TM_ROW3': '0.0000', '*TM_POS': '0.0000', '*TM_ROTAXIS': '0.0000', '*TM_ROTANGLE': '0.0000', '*TM_SCALE': '1.0000', '*TM_SCALEAXIS': '0.0000', '*TM_SCALEAXISANG': '0.0000'}, '*CAMERA_SETTINGS': {'*TIMEVALUE': '0', '*CAMERA_NEAR': '0.0000', '*CAMERA_FAR': '1000000.0000', '*CAMERA_FOV': '1.1752', '*CAMERA_TDIST': '500.0000'}}
Edit:
regarding the fact that there can be multiple "runs" of data that need to be grouped, you can try this:
s = [['*NODE_NAME', '"30deg017"', '*TM_ROW0', '1.0000', '0.0000', '0.0000', '*CAMERA_TYPE', 'Target', '*NODE_TM', ['*NODE_NAME', '"30deg017"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '1', '1', '1', '*TM_ROW0', '0.0939', '-0.9815', '-0.1668', '*TM_ROW1', '0.8819', '0.0043', '0.4714', '*TM_ROW2', '-0.4619', '-0.1913', '0.8660', '*TM_ROW3', '-230.9698', '-95.6709', '433.0127', '*TM_POS', '-230.9698', '-95.6709', '433.0127', '*TM_ROTAXIS', '-0.3314', '0.1476', '0.9319', '*TM_ROTANGLE', '1.5887', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.9925', '-0.1222', '-0.0000', '*TM_SCALEAXISANG', '0.1253'], '*NODE_TM', ['*NODE_NAME', '"30deg017.Target"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '0', '0', '0', '*TM_ROW0', '1.0000', '0.0000', '0.0000', '*TM_ROW1', '0.0000', '1.0000', '0.0000', '*TM_ROW2', '0.0000', '0.0000', '1.0000', '*TM_ROW3', '0.0000', '0.0000', '0.0000', '*TM_POS', '0.0000', '0.0000', '0.0000', '*TM_ROTAXIS', '0.0000', '0.0000', '0.0000', '*TM_ROTANGLE', '0.0000', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.0000', '0.0000', '0.0000', '*TM_SCALEAXISANG', '0.0000'], '*CAMERA_SETTINGS', ['*TIMEVALUE', '0', '*CAMERA_NEAR', '0.0000', '*CAMERA_FAR', '1000000.0000', '*CAMERA_FOV', '1.1752', '*CAMERA_TDIST', '500.0000']]]
import itertools
def create_dict(f):
def wrapper(d):
def make_d(s):
return {s[i]:s[i+1] if all(not c.startswith('*') for c in s[i+1]) or isinstance(s[i+1], str) else make_d(s[i+1]) for i in range(0, len(s), 2)}
return make_d(f(d))
return wrapper
@create_dict
def group_data(d):
data = [(a, list(b)) for a, b in itertools.groupby(d, key=lambda x:isinstance(x, list) or not x.startswith('*'))]
return [b[0] if not a else b if all(isinstance(i, str) for i in b) and len(b) > 1 else b[0] for a, b in data]
print(group_data(s[0]))
Output:
{'*NODE_NAME': '"30deg017"', '*TM_ROW0': ['1.0000', '0.0000', '0.0000'], '*CAMERA_TYPE': 'Target', '*NODE_TM': {'*NODE_NAME': '"30deg017.Target"', '*INHERIT_POS': '0', '0': '0', '*INHERIT_ROT': '0', '*INHERIT_SCL': '0', '*TM_ROW0': '1.0000', '0.0000': '0.0000', '*TM_ROW1': '0.0000', '1.0000': '1.0000', '*TM_ROW2': '0.0000', '*TM_ROW3': '0.0000', '*TM_POS': '0.0000', '*TM_ROTAXIS': '0.0000', '*TM_ROTANGLE': '0.0000', '*TM_SCALE': '1.0000', '*TM_SCALEAXIS': '0.0000', '*TM_SCALEAXISANG': '0.0000'}, '*CAMERA_SETTINGS': {'*TIMEVALUE': '0', '*CAMERA_NEAR': '0.0000', '*CAMERA_FAR': '1000000.0000', '*CAMERA_FOV': '1.1752', '*CAMERA_TDIST': '500.0000'}}
Upvotes: 2