Reputation: 221
I have an xml that looks like this:
<G2>some text</G2>
<G3>some text</G3>
<G2>some text</G2>
<G3>some text</G3>
<G2>some text</G2>
<G3>some text</G3>
Im trying to transform this xml into a nested dictionary called "G":
{ 1: {G1: 1,
G2: some text,
G3: some text,
GP: { 1: {GP1: 1,
GP2: a,
GP3: a},
2: {GP1: 2,
GP2: b,
GP3: b},
3: {GP1: 3,
GP2: c,
GP3: c}}
2: {G1: 2,
G2: some text,
G3: some text,
GP: { 1: {GP1: 1,
GP2: aa,
GP3: aa},
2: {GP1: 2,
GP2: bb,
GP3: bb},
3: {GP1: 3,
GP2: cc,
GP3: cc}}
3: {G1: 3,
G2: some text,
G3: some text,
GP: { 1: {GP1: 1,
GP2: a,
GP3: a},
2: {GP1: 2,
GP2: bbb,
GP3: bbb},
3: {GP1: 3,
GP2: ccc,
GP3: ccc}}
My code works fine to get all elements that are straight under "G", so G1, G2 etc, but for GP I either only just get one record, either I get all of them but it duplicates the same thing couple of times either I get all 9 GP elements under one single GP in the dictionary. Here is my code:
f = 'path to file'
tree = ET.parse(f)
root = tree.getroot()
self.tree = tree
self.root = root
gs = len(self.tree.getiterator('G'))
g = {}
for i in range(0, gs):
d = {}
for elem in self.tree.getiterator('G')[i]:
if elem.text == "\n " and elem.tag not in ['GP']:
dd = {}
for parent in elem:
if parent.text == "\n ":
ddd = {}
for child in parent:
ddd[child.tag] = child.text
dd[parent.tag] = ddd
dd[parent.tag] = parent.text
d[elem.tag] = dd
d[elem.tag] = elem.text
g[i+1] = d
# Build GP
count = 0
gp = {}
for elem in self.tree.getiterator('GP'):
d = {}
for parent in elem:
if parent.text == "\n ":
dd = {}
for child in parent:
dd[child.tag] = child.text
d[parent.tag] = dd
d[parent.tag] = parent.text
count += 1
gp[count] = d
g["GP"] = gp
Upvotes: 1
Views: 998
Reputation: 41186
#!/usr/bin/env python3
import sys
import xml.etree.ElementTree as ET
from pprint import pprint as pp
FILE_NAME = "data.xml"
def convert_node(node, depth_level=0):
#print(" " * depth_level + node.tag)
child_nodes = list(node)
if not child_nodes:
return (node.text or "").strip()
ret_dict = dict()
child_node_tags = [item.tag for item in child_nodes]
child_index = 0
for child_node in child_nodes:
tag = child_node.tag
if child_node_tags.count(tag) > 1:
sub_obj_dict = ret_dict.get(tag, dict())
child_index += 1
sub_obj_dict[str(child_index)] = convert_node(child_node, depth_level=depth_level + 1)
ret_dict[tag] = sub_obj_dict
ret_dict[tag] = convert_node(child_node, depth_level=depth_level + 1)
return ret_dict
def main():
tree = ET.parse(FILE_NAME)
root_node = tree.getroot()
converted_xml = convert_node(root_node)
print("\nResulting dict(s):\n")
for key in converted_xml: # converted_xml should be a dictionary having only one key (in our case "G" - we only care about its value, to match the required output)
if __name__ == "__main__":
print("Python {:s} on {:s}\n".format(sys.version, sys.platform))
(py_064_03.05.04_test0) e:\Work\Dev\StackOverflow\q045799991>"e:\Work\Dev\VEnvs\py_064_03.05.04_test0\Scripts\python.exe" Python 3.5.4 (v3.5.4:3f56838, Aug 8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)] on win32 Resulting dict(s): {'1': {'G1': '1', 'G2': 'some text', 'G3': 'some text', 'GP': {'1': {'GP1': '1', 'GP2': 'a', 'GP3': 'a'}, '2': {'GP1': '2', 'GP2': 'b', 'GP3': 'b'}, '3': {'GP1': '3', 'GP2': 'c', 'GP3': 'c'}}}, '2': {'G1': '2', 'G2': 'some text', 'G3': 'some text', 'GP': {'1': {'GP1': '1', 'GP2': 'aa', 'GP3': 'aa'}, '2': {'GP1': '2', 'GP2': 'bb', 'GP3': 'bb'}, '3': {'GP1': '3', 'GP2': 'cc', 'GP3': 'cc'}}}, '3': {'G1': '3', 'G2': 'some text', 'G3': 'some text', 'GP': {'1': {'GP1': '1', 'GP2': 'aaa', 'GP3': 'aaa'}, '2': {'GP1': '2', 'GP2': 'bbb', 'GP3': 'bbb'}, '3': {'GP1': '3', 'GP2': 'ccc', 'GP3': 'ccc'}}}}
Upvotes: 3