Reputation: 1
I'm fairly new to python and would like to loop through multiple xml files. I'm currently using the existing code to pull in sample2 xml file:
import xml.etree.ElementTree as ET
import pandas as pd
import os
tree=ET.parse("sample2.xml")
root = tree.getroot()
qty=root.iterfind(".//Qty")
pri=root.iterfind(".//PriceAmount")
cor=root.iterfind(".//AuctionIdentification")
data =[]
for x, y, z in zip(qty, pri, cor):
#print(x.get("v"), y.get("v"))
a = x.get("v"), y.get("v"), z.get("v")
data.append(a)
df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
df['Qty'] = df['Qty'].astype(float)
df['Price'] = df['Price'].astype(float)
#print(df)
total = df['Qty'].sum()
price = df['Price'].mean()
border = df.loc[0,'Border']
df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])
df2['Qty'] = [total]
df2['Price'] = [price]
df2['Border'] = [str(border)[0:12]]
I tried adding soup xml to the below line of code but this didn't work
tree=ET.parse("sample2.xml , "soup xml")
root = tree.getroot()
Upvotes: 0
Views: 233
Reputation: 2987
Consider turning your code into a function and calling it for the various files you need:
import xml.etree.ElementTree as ET
import pandas as pd
import os
def my_xml_processor(filename):
tree=ET.parse(filename)
root = tree.getroot()
qty=root.iterfind(".//Qty")
pri=root.iterfind(".//PriceAmount")
cor=root.iterfind(".//AuctionIdentification")
data =[]
for x, y, z in zip(qty, pri, cor):
#print(x.get("v"), y.get("v"))
a = x.get("v"), y.get("v"), z.get("v")
data.append(a)
df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
df['Qty'] = df['Qty'].astype(float)
df['Price'] = df['Price'].astype(float)
#print(df)
total = df['Qty'].sum()
price = df['Price'].mean()
border = df.loc[0,'Border']
df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])
df2['Qty'] = [total]
df2['Price'] = [price]
df2['Border'] = [str(border)[0:12]]
return df2
You can then call it for your files:
my_xml_processor("sample2.xml")
my_xml_processor("soup.xml")
import xml.etree.ElementTree as ET
import pandas as pd
import os
def my_xml_processor(filename:str)->pd.DataFrame: # <- Add type hints
root = ET.parse(filename).getroot() # <- tree is not used
qty = root.iterfind(".//Qty")
pri = root.iterfind(".//PriceAmount")
cor = root.iterfind(".//AuctionIdentification")
data = [ # <- This could be a list comprehension
(x.get('v'), y.get('v'), z.get('v'))
for x,y,z in zip(qty, pri, cor)
]
df = (pd
.DataFrame(data, columns=["Qty", "Price" , "Border"])
.astype({
'Qty': float,
'Price': float,
})
)
df2 = df.agg({
'Qty':'sum',
'Price':'mean',
'Border': lambda x: str(x[0])[:12]
}).to_frame().T
return df2
Upvotes: 2
Reputation: 340
You could use your existing code, but running it in a loop for each filename you have, something like:
import xml.etree.ElementTree as ET
import pandas as pd
import os
files = ['sample2.xml', 'sample3.xml', 'sample4.xml']
for file in files: #read each filename from above list
tree=ET.parse(file)
root = tree.getroot()
qty=root.iterfind(".//Qty")
pri=root.iterfind(".//PriceAmount")
cor=root.iterfind(".//AuctionIdentification")
data =[]
for x, y, z in zip(qty, pri, cor):
#print(x.get("v"), y.get("v"))
a = x.get("v"), y.get("v"), z.get("v")
data.append(a)
df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
df['Qty'] = df['Qty'].astype(float)
df['Price'] = df['Price'].astype(float)
#print(df)
total = df['Qty'].sum()
price = df['Price'].mean()
border = df.loc[0,'Border']
df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])
df2['Qty'] = [total]
df2['Price'] = [price]
df2['Border'] = [str(border)[0:12]]
Upvotes: 0