Reputation: 1553
I have parsed XML file looking like this. Maybe I just didn't copy well,but it's ok, so, here it is:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE raml SYSTEM 'raml20.dtd'>
<raml version="2.0" xmlns="raml20.xsd">
<cmData type="actual">
<managedObject class="LN" distName="PTR" id="2425">
<p name="aak">220</p>
<p name="orp">05</p>
<p name="name">Portro</p>
<p name="optres">false</p>
<p name="optblu">false</p>
<p name="aoptdet">false</p>
<p name="advcell">false</p>
<list name="sibList">
<item>
<p name="sibcity">177</p>
<p name="sibrep">2</p>
</item>
<item>
<p name="sibcity">177</p>
<p name="sibrep">1</p>
</item>
</list>
</managedObject>
<managedObject class="LN" distName="KRNS" id="93886">
<p name="aak">150</p>
<p name="orp">05</p>
<p name="name">Portro</p>
<p name="optres">false</p>
<p name="optblu">tru</p>
<p name="aoptdet">false</p>
<p name="advcell">true</p>
<list name="sibList">
<item>
<p name="sibcity">177</p>
<p name="sibrep">1</p>
</item>
<item>
<p name="sibcity">180</p>
<p name="sibrep">2</p>
</item>
</list>
</managedObject>
....
<managedObject>
...
</managedObject>
...
</cmData>
</raml>
I need to go through all "managedObject" and compare every parameter(p name) from first managedObject with parameters (aak, orp, etc.) from another managedObjects, and get output of different parameters and values of them, if there is no different values of parameters, do nothing. I wrote code for comparasion, but I don't know how to go through list (it's named "sibList") and compare parameters. I wrote this function, where key is "p name" and value is value of "p name":
temp = []
for i in temp_ln:
for j, k in zip(i.getchildren(), i):
temp.append([i.get('distName'), j.get('name'), j.text])
tempdict = {}
for i in temp_ln:
td = {}
for j in i.getchildren():
td.update({j.get('name'): j.text})
tempdict.update({i.get('distName'): td})
elements_list = {}
if j.get('name') == 'sibList':
for item in j.getchildren():
for w in item.getchildren():
elements_list.update({ w.get('name'): w.text})
main_dif = {}
for key, value in tempdict.iteritems():
dif_k = {}
for k, v in value.iteritems():
try:
a = ref[k]
except:
a = None
if v != a:
if k == 'name':
pass
else:
dif_k.update({k:(v, a)})
main_dif.update({key:dif_k})
Upvotes: 1
Views: 478
Reputation: 466
Here is a solution that parse the XML file, and compares each managedObject with all others, and print out the resulting diff object.
import json
from xml.etree import ElementTree
tree = ElementTree.parse('raml20.xml')
ns = {'ns': 'raml20.xsd'}
nsP, nsList, nsItem = ('{%s}%s' % (ns['ns'], i) for i in ('p', 'list', 'item'))
def pkv(o):
"""Return dict with name:text of p elements"""
return {k.attrib['name']: k.text for k in o.iter(nsP)}
def parse(tree):
root = tree.getroot()
objs = {}
for mo in root.findall('./ns:cmData/ns:managedObject', ns):
obj = pkv(mo)
for i in mo.iter(nsList):
obj[i.attrib['name']] = [pkv(j) for j in i.iter(nsItem)]
objs[mo.attrib['distName']] = obj
return objs
def diff_dicts(d1, d2, ignore_keys=set()):
"""Return dict with differences between the dicts provided as arguments"""
k1 = set(d1.keys())
k2 = set(d2.keys())
diff = {}
diff.update(
{i: (d1[i], d2[i]) for i in (k1 & k2) - ignore_keys if d1[i] != d2[i]})
diff.update({i: (d1.get(i), d2.get(i)) for i in (k1 ^ k2) - ignore_keys})
return diff
def diff_lists(l1, l2):
"""Return dict with differences between lists of dicts provided as arguments"""
diff = {}
# note: assumes that lists are of same length
for i, (d1, d2) in enumerate(zip(l1, l2)):
d = diff_dicts(d1, d2)
if d:
diff[i] = d
return diff
def diff_objects(o1, o2):
"""Return dict with differences between two objects (dicts) provided as arguments"""
listkeys = set(
i for o in (o1, o2) for i in o if isinstance(o.get(i), list))
diff = diff_dicts(o1, o2, listkeys)
for i in listkeys:
if i in o1 and i in o2:
diff.update({i: diff_lists(o1[i], o2[i])})
else:
diff.update({i: (o1.get(i), o2.get(i))})
return diff
def compare_objects(objs):
diffs = []
keys = list(objs)
for k1, k2 in zip(keys[:-1], keys[1:]):
o1, o2 = objs[k1], objs[k2]
diff = diff_objects(o1, o2)
if diff:
diffs.append((k1, k2, diff))
return diffs
res = compare_objects(parse(tree))
print(json.dumps(res, indent=2))
I've tested with the following raml20.xml
file:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE raml SYSTEM 'raml20.dtd'>
<raml version="2.0" xmlns="raml20.xsd">
<cmData type="actual">
<managedObject class="LN" distName="PTR" id="2425">
<p name="aak">220</p>
<p name="orp">05</p>
<p name="name">Portro</p>
<p name="optres">false</p>
<p name="optblu">false</p>
<p name="aoptdet">false</p>
<p name="advcell">false</p>
<list name="sibList">
<item>
<p name="sibcity">177</p>
<p name="sibrep">2</p>
</item>
<item>
<p name="sibcity">177</p>
<p name="sibrep">1</p>
</item>
</list>
</managedObject>
<managedObject class="LN" distName="KRNS" id="93886">
<p name="aak">150</p>
<p name="orp">05</p>
<p name="name">Portro</p>
<p name="optres">false</p>
<p name="optblu">tru</p>
<p name="aoptdet">false</p>
<p name="advcell">true</p>
<list name="sibList">
<item>
<p name="sibcity">177</p>
<p name="sibrep">1</p>
</item>
<item>
<p name="sibcity">180</p>
<p name="sibrep">2</p>
</item>
</list>
</managedObject>
</cmData>
</raml>
The resulting diff object is:
[
[
"PTR",
"KRNS",
{
"advcell": [
"false",
"true"
],
"optblu": [
"false",
"tru"
],
"sibcity": [
"177",
"180"
],
"aak": [
"220",
"150"
],
"sibrep": [
"1",
"2"
],
"sibList": {
"0": {
"sibrep": [
"2",
"1"
]
},
"1": {
"sibcity": [
"177",
"180"
],
"sibrep": [
"1",
"2"
]
}
}
}
]
]
Upvotes: 1