Converting multisheet XML to Excel with python

Question

I have hundreds of XML files I need to convert to XLSX for downstream analysis, but I have far too many to do by hand. I have been trying to figure out a python way to do this, and have found a number of tutorials using either xml2xlsx or xml.etree.ElementTree. However, the problem I am running into is that all of these are dealing with singular sheets, whereas my xml has 3 sheets (see below sections for example). If someone knows how to do this, or how I should be going about parsing it apart, I would appreciate any help.



 
  False
  False
 
 
  
    Node ID
    X Coord
    Y Coord
    Z Coord
    Coordination Number
   
    0
    12.25489235
    21.835989
    6.916931152
    1
   
...
  
   
   
  
   False
   False
  
 
 
  
    Point ID
    thickness
    X Coord
    Y Coord
    Z Coord
   
...
  
   
  
   False
   False
  
 
 
  
    Segment ID
    Node ID #1
    Node ID #2
    Point IDs
   
...
Here is what I have attempted so far, First using xml2xlsx, which returns an empty excel sheet:
from xml2xlsx import xml2xlsx
from lxml import etree
import sys
import os
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('-f', '--files', nargs = '+', required = True, help='one or more xml files to convert')
#parser.add_argument('-c', '--conditions', nargs = '+', default = ['Condition 1', 'Condition 2'], help = 'Names of conditions to analyze. replace any spaces in filename with underscores.')
args = parser.parse_args()

for file in args.files:
    filename = os.path.splitext(os.path.basename(file))[0].lower()
    filename = filename.replace(' ', '_')
    output = filename+'.xlsx'
    with open(file) as xml_file:
        template = xml_file.read().encode()
    with open(output, 'wb') as xlsx_file:
        xlsx_file.write(xml2xlsx(template))
Then this prewritten code, I am having trouble getting to work.
#XML TO EXCEL FILE
import xml.etree.ElementTree as ET
from openpyxl import Workbook
import os 
import sys
import argparse

def readFile(filename):
    '''
        Checks if file exists, parses the file and extracts the needed data
        returns a 2 dimensional list without "header"
    '''
    if not os.path.exists(filename): return
    tree = ET.parse(filename)
    root = tree.getroot()
    #you may need to adjust the keys based on your file structure
    dict_keys = ["id","first_name","last_name","email","gender","ip_address" ] #all keys to be extracted from xml
    mdlist = []
    for child in root:
        temp = []
        for key in dict_keys:
            temp.append(child.find(key).text)
        mdlist.append(temp)
    return mdlist

def to_Excel(mdlist):
    '''
        Generates excel file with given data
        mdlist: 2 Dimenusional list containing data
    '''
    wb = Workbook()
    ws = wb.active
    for i,row in enumerate(mdlist):
        for j,value in enumerate(row):
            ws.cell(row=i+1, column=j+1).value = value
    newfilename = os.path.abspath("./xml_to_excel.xlsx")
    wb.save(newfilename)
    print("complete")
    return

parser = argparse.ArgumentParser()
parser.add_argument('-f', '--files', nargs = '+', required = True, help='one or more xml files to convert')
#parser.add_argument('-c', '--conditions', nargs = '+', default = ['Condition 1', 'Condition 2'], help = 'Names of conditions to analyze. replace any spaces in filename with underscores.')
args = parser.parse_args()

for f in args.files:
    result = readFile(f)
    if result:
        to_Excel(result)
And finally, this, which spat out a long single line of nonsense tags.
import xml.etree.ElementTree as ET
tree = ET.parse(r'pathToXML')
root = tree.getroot()
tag = root.tag
att = root.attrib
#Flatten XML to CSV
for child in root:
    for subchild in child:
        mainlevel = child.tag
        xmltocsv = ''
        for elem in root.iter():
            if elem.tag == root.tag:
                continue
            if elem.tag == mainlevel:
                xmltocsv = xmltocsv + '
'
            xmltocsv = xmltocsv + str(elem.tag).rstrip() + str(elem.attrib).rstrip() + ';' + str(elem.text).rstrip() + ';'
with open('output.csv', 'w') as file:
    file.write(xmltocsv)

Converting multisheet XML to Excel with python

Answers (1)

Related Questions