AuctionKing
AuctionKing

Reputation: 1

Loop through multiple xml files

I'm fairly new to python and would like to loop through multiple xml files. I'm currently using the existing code to pull in sample2 xml file:

import xml.etree.ElementTree as ET
import pandas as pd
import os


tree=ET.parse("sample2.xml")
root = tree.getroot()

qty=root.iterfind(".//Qty")
pri=root.iterfind(".//PriceAmount")
cor=root.iterfind(".//AuctionIdentification")

data =[]
for x, y, z in zip(qty, pri, cor):
    #print(x.get("v"), y.get("v"))
    a = x.get("v"), y.get("v"), z.get("v")
    data.append(a)
    
    
df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
df['Qty'] = df['Qty'].astype(float)
df['Price'] = df['Price'].astype(float)

#print(df)

total = df['Qty'].sum()
price = df['Price'].mean()
border = df.loc[0,'Border']

df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])

df2['Qty'] = [total]
df2['Price'] = [price]
df2['Border'] = [str(border)[0:12]]

I tried adding soup xml to the below line of code but this didn't work

tree=ET.parse("sample2.xml , "soup xml")

root = tree.getroot()

Upvotes: 0

Views: 233

Answers (2)

MYK
MYK

Reputation: 2987

Consider turning your code into a function and calling it for the various files you need:

import xml.etree.ElementTree as ET
import pandas as pd
import os

def my_xml_processor(filename):

   tree=ET.parse(filename)
   root = tree.getroot()

   qty=root.iterfind(".//Qty")
   pri=root.iterfind(".//PriceAmount")
   cor=root.iterfind(".//AuctionIdentification")

   data =[]
   for x, y, z in zip(qty, pri, cor):
       #print(x.get("v"), y.get("v"))
       a = x.get("v"), y.get("v"), z.get("v")
       data.append(a)
    
    
   df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
   df['Qty'] = df['Qty'].astype(float)
   df['Price'] = df['Price'].astype(float)

   #print(df)

   total = df['Qty'].sum()
   price = df['Price'].mean()
   border = df.loc[0,'Border']

   df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])

   df2['Qty'] = [total]
   df2['Price'] = [price]
   df2['Border'] = [str(border)[0:12]]

   return df2

You can then call it for your files:

my_xml_processor("sample2.xml")

my_xml_processor("soup.xml")

EDIT: these are some minor code changes that I'd recommend:

import xml.etree.ElementTree as ET
import pandas as pd
import os

def my_xml_processor(filename:str)->pd.DataFrame: # <- Add type hints

   root = ET.parse(filename).getroot() # <- tree is not used

   qty = root.iterfind(".//Qty")
   pri = root.iterfind(".//PriceAmount")
   cor = root.iterfind(".//AuctionIdentification")

   data = [ # <- This could be a list comprehension
     (x.get('v'), y.get('v'), z.get('v')) 
     for x,y,z in zip(qty, pri, cor)
    ]
    
   df = (pd
         .DataFrame(data, columns=["Qty", "Price" , "Border"])
         .astype({
            'Qty': float, 
            'Price': float, 
          })
         )
   
   df2 = df.agg({
        'Qty':'sum', 
        'Price':'mean',
        'Border': lambda x: str(x[0])[:12]
    }).to_frame().T


   return df2

Upvotes: 2

nordmanden
nordmanden

Reputation: 340

You could use your existing code, but running it in a loop for each filename you have, something like:


import xml.etree.ElementTree as ET
import pandas as pd
import os


files = ['sample2.xml', 'sample3.xml', 'sample4.xml']

for file in files: #read each filename from above list
    tree=ET.parse(file)
    root = tree.getroot()

    qty=root.iterfind(".//Qty")
    pri=root.iterfind(".//PriceAmount")
    cor=root.iterfind(".//AuctionIdentification")

    data =[]
    for x, y, z in zip(qty, pri, cor):
        #print(x.get("v"), y.get("v"))
        a = x.get("v"), y.get("v"), z.get("v")
        data.append(a)
        
        
    df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
    df['Qty'] = df['Qty'].astype(float)
    df['Price'] = df['Price'].astype(float)

    #print(df)

    total = df['Qty'].sum()
    price = df['Price'].mean()
    border = df.loc[0,'Border']

    df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])

    df2['Qty'] = [total]
    df2['Price'] = [price]
    df2['Border'] = [str(border)[0:12]]

Upvotes: 0

Related Questions