jovicbg
jovicbg

Reputation: 1553

Python function for xml list

I have parsed XML file looking like this. Maybe I just didn't copy well,but it's ok, so, here it is:

     <?xml version="1.0" encoding="UTF-8"?>
        <!DOCTYPE raml SYSTEM 'raml20.dtd'>
        <raml version="2.0" xmlns="raml20.xsd">
        <cmData type="actual">
            <managedObject class="LN" distName="PTR" id="2425">
              <p name="aak">220</p>
              <p name="orp">05</p>
              <p name="name">Portro</p>
              <p name="optres">false</p>
              <p name="optblu">false</p>
              <p name="aoptdet">false</p>
              <p name="advcell">false</p>
              <list name="sibList">
                <item>
                  <p name="sibcity">177</p>
                  <p name="sibrep">2</p>
                </item>
                <item>
                  <p name="sibcity">177</p>
                  <p name="sibrep">1</p>
                </item>
              </list>
            </managedObject>
            <managedObject class="LN" distName="KRNS" id="93886">
              <p name="aak">150</p>
              <p name="orp">05</p>
              <p name="name">Portro</p>
              <p name="optres">false</p>
              <p name="optblu">tru</p>
              <p name="aoptdet">false</p>
              <p name="advcell">true</p>
              <list name="sibList">
                <item>
                  <p name="sibcity">177</p>
                  <p name="sibrep">1</p>
                </item>
                <item>
                  <p name="sibcity">180</p>
                  <p name="sibrep">2</p>
                </item>
               </list>
            </managedObject>
             ....
            <managedObject>
             ...
            </managedObject>

            ...
        </cmData>
        </raml>

I need to go through all "managedObject" and compare every parameter(p name) from first managedObject with parameters (aak, orp, etc.) from another managedObjects, and get output of different parameters and values of them, if there is no different values of parameters, do nothing. I wrote code for comparasion, but I don't know how to go through list (it's named "sibList") and compare parameters. I wrote this function, where key is "p name" and value is value of "p name":

temp = []
for i in temp_ln:
    for j, k in zip(i.getchildren(), i):
        temp.append([i.get('distName'), j.get('name'), j.text])

    tempdict = {}
    for i in temp_ln:
        td = {}
        for j in i.getchildren():
            td.update({j.get('name'): j.text})
        tempdict.update({i.get('distName'): td})


elements_list = {}
   if j.get('name') == 'sibList':
            for item in j.getchildren():
                for w in item.getchildren():
                    elements_list.update({ w.get('name'): w.text})

        main_dif = {}
        for key, value in tempdict.iteritems():
            dif_k = {}
            for k, v in value.iteritems():
                try: 
                    a = ref[k]
                except:
                    a = None
                if v != a:
                    if k == 'name':
                        pass
                    else:
                        dif_k.update({k:(v, a)})
            main_dif.update({key:dif_k})

Upvotes: 1

Views: 478

Answers (1)

jcbsv
jcbsv

Reputation: 466

Here is a solution that parse the XML file, and compares each managedObject with all others, and print out the resulting diff object.

import json
from xml.etree import ElementTree


tree = ElementTree.parse('raml20.xml')

ns = {'ns': 'raml20.xsd'}
nsP, nsList, nsItem = ('{%s}%s' % (ns['ns'], i) for i in ('p', 'list', 'item'))


def pkv(o):
    """Return dict with name:text of p elements"""
    return {k.attrib['name']: k.text for k in o.iter(nsP)}


def parse(tree):
    root = tree.getroot()
    objs = {}
    for mo in root.findall('./ns:cmData/ns:managedObject', ns):
        obj = pkv(mo)
        for i in mo.iter(nsList):
            obj[i.attrib['name']] = [pkv(j) for j in i.iter(nsItem)]
        objs[mo.attrib['distName']] = obj
    return objs


def diff_dicts(d1, d2, ignore_keys=set()):
    """Return dict with differences between the dicts provided as arguments"""
    k1 = set(d1.keys())
    k2 = set(d2.keys())
    diff = {}
    diff.update(
        {i: (d1[i], d2[i]) for i in (k1 & k2) - ignore_keys if d1[i] != d2[i]})
    diff.update({i: (d1.get(i), d2.get(i)) for i in (k1 ^ k2) - ignore_keys})
    return diff


def diff_lists(l1, l2):
    """Return dict with differences between lists of dicts provided as arguments"""
    diff = {}
    # note: assumes that lists are of same length
    for i, (d1, d2) in enumerate(zip(l1, l2)):
        d = diff_dicts(d1, d2)
        if d:
            diff[i] = d
    return diff


def diff_objects(o1, o2):
    """Return dict with differences between two objects (dicts) provided as arguments"""
    listkeys = set(
        i for o in (o1, o2) for i in o if isinstance(o.get(i), list))
    diff = diff_dicts(o1, o2, listkeys)
    for i in listkeys:
        if i in o1 and i in o2:
            diff.update({i: diff_lists(o1[i], o2[i])})
        else:
            diff.update({i: (o1.get(i), o2.get(i))})
    return diff


def compare_objects(objs):
    diffs = []
    keys = list(objs)
    for k1, k2 in zip(keys[:-1], keys[1:]):
        o1, o2 = objs[k1], objs[k2]
        diff = diff_objects(o1, o2)
        if diff:
            diffs.append((k1, k2, diff))
    return diffs


res = compare_objects(parse(tree))
print(json.dumps(res, indent=2))

I've tested with the following raml20.xml file:

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE raml SYSTEM 'raml20.dtd'>
<raml version="2.0" xmlns="raml20.xsd">
  <cmData type="actual">
    <managedObject class="LN" distName="PTR" id="2425">
      <p name="aak">220</p>
      <p name="orp">05</p>
      <p name="name">Portro</p>
      <p name="optres">false</p>
      <p name="optblu">false</p>
      <p name="aoptdet">false</p>
      <p name="advcell">false</p>
      <list name="sibList">
        <item>
          <p name="sibcity">177</p>
          <p name="sibrep">2</p>
        </item>
        <item>
          <p name="sibcity">177</p>
          <p name="sibrep">1</p>
        </item>
      </list>
    </managedObject>
    <managedObject class="LN" distName="KRNS" id="93886">
      <p name="aak">150</p>
      <p name="orp">05</p>
      <p name="name">Portro</p>
      <p name="optres">false</p>
      <p name="optblu">tru</p>
      <p name="aoptdet">false</p>
      <p name="advcell">true</p>
      <list name="sibList">
        <item>
          <p name="sibcity">177</p>
          <p name="sibrep">1</p>
        </item>
        <item>
          <p name="sibcity">180</p>
          <p name="sibrep">2</p>
        </item>
       </list>
    </managedObject>
  </cmData>
</raml>

The resulting diff object is:

[
  [
    "PTR",
    "KRNS",
    {
      "advcell": [
        "false",
        "true"
      ],
      "optblu": [
        "false",
        "tru"
      ],
      "sibcity": [
        "177",
        "180"
      ],
      "aak": [
        "220",
        "150"
      ],
      "sibrep": [
        "1",
        "2"
      ],
      "sibList": {
        "0": {
          "sibrep": [
            "2",
            "1"
          ]
        },
        "1": {
          "sibcity": [
            "177",
            "180"
          ],
          "sibrep": [
            "1",
            "2"
          ]
        }
      }
    }
  ]
]

Upvotes: 1

Related Questions