user8542696
user8542696

Reputation:

Parse complex XML to CSV using Python

I am trying to parse a complex XML file to CSV.

This is how my XML file looks like:

<?xml version="1.0" encoding="UTF-8"?>
<ke3600-menu-file language="en" display="English" index="1">
  <version major="0" minor="1" patch="0"/>
  <locale name="en_EN" timezone="CET-1CEST,M3.5.0,M10.5.0/3"/>
  <menu name="main_menu" display="Main Menu">
    <menu name="broadband" display="Broadband" help="100_help_broadband">
      <onenter proc="activateGfast"/>
      <menu name="load_save_profiles" display="Load and Save Profiles" help="601_help_profiles">
        <application name="load_profiles" display="Load Profile"/>
        <application name="save_profiles" display="Save Profile"/>
        <application name="remove_profiles" display="Delete Profile"/>
      </menu>
      <parameter type="list" name="wanInterface" display="Interface" help="101_help_wanInterface">
        <value id="0" name="ifDSL" display="xDSL"/>
        <value id="1" name="ifSFP" display="SFP"/>
        <value id="2" name="ifETH" display="Ethernet"/>
        <!-- <value i d="3" name="ifWLAN" display="WLAN"/> -->
        <value id="4" name="ifSHDSL" display="SHDSL"/>
      </parameter>

      <menu name="xdslinterface" display="xDSL Interface" help="200_help_xDslInterface">
        <parameter type="list" name="annex" display="xDSL Mode" help="201_help_xdslModi">
          <value id="0" name="xdsl_line_annex_a" display="Annex A/M"/>
          <value id="1" name="xdsl_line_annex_b" display="Annex B/J"/>
        </parameter>
        <application name="xdsl_macAddr" display="MAC Address" caption="MAC Address">
          <param type="string" name="xdsl_mac_addr" display="MAC Address" length="25"/>
        </application>
        <parameter type="list" name="xdsl_vectoring_mode" display="Vectoring Mode" help="202_help_Vectoring">
          <value id="0" name="xdsl_vectoring_mode_disabled" display="Disabled"/>
          <value id="1" name="xdsl_vectoring_mode_enabled" display="Enabled"/>
          <value id="2" name="xdsl_vectoring_mode_friendly" display="Friendly"/>
        </parameter>
        <parameter type="list" name="xdsl_gfast_mode" display="G.FAST">
          <value id="0" name="xdsl_gfast_mode_disabled" display="Disabled"/>
          <value id="1" name="xdsl_gfast_mode_enabled" display="Enabled"/>
        </parameter>
        <parameter type="list" name="resync" display="Auto Re-Sync">
          <value id="1" name="re_sync_on" display="On"/>
          <value id="0" name="re_sync_off" display="Off"/>
        </parameter>
        <parameter type="list" name="seamlessRateAdaption" display="Seamless Rate Adaption">
          <value id="0" name="sra_off" display="Off"/>
          <value id="1" name="sra_on" display="On"/>
        </parameter>
        <parameter type="list" name="xdsl_ginp" display="G.INP">
          <value id="0" name="xdsl_ginp_none" display="Off"/>
          <value id="1" name="xdsl_ginp_down" display="Down"/>
          <value id="2" name="xdsl_ginp_up" display="Up"/>
          <value id="3" name="xdsl_ginp_both" display="Down/Up"/>
        </parameter>
        <parameter type="list" name="power_cut_back" display="Power Cut Back">
          <value id="0" name="power_cut_back_off" display="Off"/>
          <value id="1" name="power_cut_back_on" display="On"/>
        </parameter>
        <parameter type="list" name="atmType" display="ATM Type">
          <value id="0" name="atm_type_aal5" display="AAL5"/>
          <value id="1" name="atm_type_aal0pkt" display="AAL0PKT"/>
          <value id="2" name="atm_type_aal0cell" display="AAL0CELL"/>
        </parameter>
        <parameter type="list" name="voip_atmType" display="Voip Interface ATM Type">
          <value id="0" name="voip_atm_type_aal5" display="AAL5"/>
          <value id="1" name="voip_atm_type_aal0pkt" display="AAL0PKT"/>
          <value id="2" name="voip_atm_type_aal0cell" display="AAL0CELL"/>
        </parameter>
        <parameter type="list" name="iptv_atmType" display="Iptv Interface ATM Type">
          <value id="0" name="iptv_atm_type_aal5" display="AAL5"/>
          <value id="1" name="iptv_atm_type_aal0pkt" display="AAL0PKT"/>
          <value id="2" name="iptv_atm_type_aal0cell" display="AAL0CELL"/>
        </parameter>
      </menu>

      <menu name="ethernetinterface" display="Ethernet Interface" help="300_help_ethernetInterface">
        <parameter type="list" name="ethInterfaceAuto" display="Autonegotiation">
          <value id="1" name="ethInterfaceAutoOn" display="On"/>
          <value id="0" name="ethInterfaceAutoOff" display="Off"/>
        </parameter>
        <parameter type="list" name="ethInterfaceSpeed" display="Speed">
          <value id="2" name="ethIf1000" display="1000MBit/s"/>
          <value id="1" name="ethIf100" display="100MBit/s"/>
          <value id="0" name="ethIf10" display="10MBit/s"/>
        </parameter>
        <parameter type="list" name="ethInterfaceDuplexMode" display="Duplex Mode">
          <value id="1" name="ethIfFull" display="Full"/>
          <value id="0" name="ethIfHalf" display="Half"/>
        </parameter>
      </menu>

      <menu name="shdslinterface" display="SHDSL Interface" help="400_help_shdslInterface">
        <menu name="shdslservice" display="SHDSL Service">
          <parameter type="list" name="shdslmode" display="SHDSL Mode" help="401_help_shdslModi">
            <value id="0" name="shdslAtm" display="ATM"/>
            <value id="1" name="shdslEfm" display="EFM"/>
            <value id="2" name="shdslTdm" display="TDM"/>
            <value id="3" name="shdslTu12" display="TU-12oSHDSL"/>
          </parameter>
          <parameter type="uint" name="shdsl_vpi" display="VPI" min="0" max="127" step="1" default="1" help="402_help_shdsl_vpi"/>
          <parameter type="uint" name="shdsl_vci" display="VCI" min="32" max="255" step="1" default="32"/>
          <parameter type="list" name="shdsl_encapsulation" display="Encapsulation">
            <value id="0" name="encap_vcmux" display="VCMUX"/>
            <value id="1" name="encap_llc" display="LLC"/>
          </parameter>
          <parameter type="list" name="shdsl_eth_fcs" display="Eth-FCS in PDU">
            <value id="1" name="eth_fcs_enabled" display="Enabled"/>
            <value id="0" name="eth_fcs_disabled" display="Disabled"/>
          </parameter>
          <parameter type="list" name="shdsl_efm_standard" display="Standard">
            <value id="0" name="shdsl_efm_standard_8023ah" display="IEEE 802.3ah"/>
            <value id="1" name="shdsl_efm_standard_g9912" display="G.991.2"/>
            <value id="2" name="shdsl_efm_standard_auto" display="Auto"/>
          </parameter>
          <parameter type="list" name="shdsl_tdm_framing" display="Clockmode">
            <value id="4" name="shdsl_tdm_framing_cm3a" display="CM3a: Synchron"/>
            <value id="1" name="shdsl_tdm_framing_cm1" display="CM1: Plesiochron"/>
            <value id="2" name="shdsl_tdm_framing_cm2" display="CM2: Plesiochron NTR"/>
          </parameter>
          <parameter type="list" name="shdsl_tdm_loop" display="Loopback">
            <value id="0" name="shdsl_tdm_loop_disabled" display="Disabled"/>
            <value id="1" name="shdsl_tdm_loop_lineside" display="Lineside"/>
          </parameter>
        </menu>
        <parameter type="list" name="shdsl_side" display="Side">
          <value id="0" name="shdsl_tur" display="STU-R"/>
          <value id="1" name="shdsl_tuc" display="STU-C"/>
        </parameter>
        <menu name="shdslmapping" display="Line Mapping">
          <parameter type="list" name="shdsl_mapping_line0" display="Line 1">
            <value id="0" name="shdsl_mapping_line0_0" display="4+5"/>
            <value id="1" name="shdsl_mapping_line0_1" display="3+6"/>
            <value id="2" name="shdsl_mapping_line0_2" display="7+8"/>
            <value id="3" name="shdsl_mapping_line0_3" display="1+2"/>
          </parameter>
          <parameter type="list" name="shdsl_mapping_line1" display="Line 2">
            <value id="1" name="shdsl_mapping_line1_1" display="3+6"/>
            <value id="2" name="shdsl_mapping_line1_2" display="7+8"/>
            <value id="3" name="shdsl_mapping_line1_3" display="1+2"/>
            <value id="0" name="shdsl_mapping_line1_0" display="4+5"/>
          </parameter>
          <parameter type="list" name="shdsl_mapping_line2" display="Line 3">
            <value id="2" name="shdsl_mapping_line2_2" display="7+8"/>
            <value id="3" name="shdsl_mapping_line2_3" display="1+2"/>
            <value id="0" name="shdsl_mapping_line2_0" display="4+5"/>
            <value id="1" name="shdsl_mapping_line2_1" display="3+6"/>
          </parameter>
          <parameter type="list" name="shdsl_mapping_line3" display="Line 4">
            <value id="3" name="shdsl_mapping_line3_3" display="1+2"/>
            <value id="0" name="shdsl_mapping_line3_0" display="4+5"/>
            <value id="1" name="shdsl_mapping_line3_1" display="3+6"/>
            <value id="2" name="shdsl_mapping_line3_2" display="7+8"/>
          </parameter>
        </menu>

I want to extract the following data:

"Count" "Label" "Name" "Type"
"1" "" "locale" ""
"1" "Main Menu" "menu" ""
"2" "Broadband" "menu" ""
"3" "Load and Save Profiles" "menu" ""

The first line defines the column names, and the rest of the data is just tab separated values, one line per record. The Count represents the menu position in the XML tree.

I have tried using the following code:

import xml.etree.ElementTree as ET
import csv

tree = ET.parse("menu.xml")
root = tree.getroot()

# open a file for writing

menu_data = open('/tmp/menu.csv', 'w')

# create the csv writer object

csvwriter = csv.writer(menu_data)
menu_head = []

count = 0
for member in root.findall('menu'):
    menu = []
    menu_list = []
    if count == 0:
        name = member.find('name').tag
        menu_head.append(name)
        label = member.find('Label').tag
        menu_head.append(Label)
        Count = member[3].tag
        menu_head.append(Count)
        csvwriter.writerow(menu_head)
        count = count + 1

    name = member.find('name').text
    menu.append(name)
    Count = member[3][0].text
    menu_list.append(Count)
    csvwriter.writerow(menu)
menu_data.close()

But I get the following error:

Detail AttributeError: 'NoneType' object has no attribute 'tag' 

I can't figure out what's wrong. Also the tutorial I am following doesnt have much sub branches, But I have menu inside menu inside menu as you can see in my XML file. Please let me know if you have any suggestion.

Upvotes: 1

Views: 1886

Answers (1)

stovfl
stovfl

Reputation: 15513

Question: 'NoneType' object has no attribute 'tag'

You get that Error at this Line:

name = member.find('name').tag

The Reason, there is no Tag <name>, 'name' is a attribute of the Tag <menu name="main_menu">. Therefore you have to use:

name = member.attrib['name']

I assume you want something like that:

    csvwriter.writerow(["Count", "Label", "Name", "Type"])

    def iter_menu(menu_list, level=[1], deep=1):
        for n, menu in enumerate(menu_list, 1):
            level[deep-1] = n

            csvwriter.writerow(('.'.join([str(i) for i in level]), menu.attrib['display'], menu.attrib['name'], 'Type?'))
            level.append(0)
            iter_menu(menu.findall('menu'), level, deep+1)
            level = level[:deep]

    iter_menu(root.findall('menu'))

Output:

Count,Label,Name,Type
1,Main Menu,main_menu,Type?
1.1,Broadband,broadband,Type?
1.1.1,Load and Save Profiles,load_save_profiles,Type?
1.2,xDSL Interface,xdslinterface,Type?
1.3,Ethernet Interface,ethernetinterface,Type?
1.4,SHDSL Interface,shdslinterface,Type?
1.4.1,SHDSL Service,shdslservice,Type?
1.4.2,Line Mapping,shdslmapping,Type?

Tested with Python: 3.4.2

Upvotes: 1

Related Questions