Reputation: 1
I need a little help from you guys.
Im new to programming, so dont expect much from my code.
Here is the thing, i need to parse a bunch of XML files in a folder and write it on a .xls or a .csv. Until now i've made it to parse the xml and write it to a .txt, but the file that ive use it is located in the same folder that the program is.
Here is the code:
from xml.dom import minidom
from datetime import *
ano = int(input("Year: "))
mes = int(input("Month: "))
dia = int(input("Day: "))
dt_obj = datetime(ano, mes, dia)
date_str = dt_obj.strftime("%Y-%m-%d")
#Extracting the information from the XML nodes
xmldoc = minidom.parse("NAME OF THE FILE.XML")
NFe = xmldoc.getElementsByTagName("NFe")[0]
infNFe = NFe.getElementsByTagName("infNFe")[0]
ide = infNFe.getElementsByTagName("ide")[0]
nNF = ide.getElementsByTagName("nNF")[0].firstChild.data
dEmi = ide.getElementsByTagName("dEmi")[0].firstChild.data
serie = ide.getElementsByTagName("serie")[0].firstChild.data
emit = infNFe.getElementsByTagName("emit")[0]
cnpj = emit.getElementsByTagName("CNPJ")[0].firstChild.data
nfeProc = xmldoc.getElementsByTagName("nfeProc")[0]
chNFe = nfeProc.getElementsByTagName("chNFe")[0].firstChild.data
try:
# This will create a new file or **overwrite an existing file**.
f = open(date_str+".txt", "w")
try:
f.write("CNPJ: "+cnpj) # Write a string to a file
f.writelines("\nNUMERO DA NOTA: "+nNF)
f.write("\nDATA DE EMISSAO: "+dEmi)
f.write("\nSERIE: "+serie)
f.write("\nCHAVE ELETRONICA: "+chNFe)
finally:
f.close()
except IOError:
pass
I've succeed reading the XML, parsing it and write the information from the nodes that i needed.
What i need now is to read a folder with a bunch of them and writing on a .XLS
Anyone?
Upvotes: 0
Views: 6403
Reputation: 43497
try this on for size.
from xml.dom import minidom
from datetime import *
ano = int(input("Year: "))
mes = int(input("Month: "))
dia = int(input("Day: "))
dt_obj = datetime(ano, mes, dia)
date_str = dt_obj.strftime("%Y-%m-%d")
#Extracting the information from the XML nodes
def get_files(d):
return [os.path.join(d, f) for f in os.listdir(d) if os.path.isfile(os.path.join(d,f))]
def parse(files):
for xml_file in files:
xmldoc = minidom.parse(xml_file)
NFe = xmldoc.getElementsByTagName("NFe")[0]
infNFe = NFe.getElementsByTagName("infNFe")[0]
ide = infNFe.getElementsByTagName("ide")[0]
nNF = ide.getElementsByTagName("nNF")[0].firstChild.data
dEmi = ide.getElementsByTagName("dEmi")[0].firstChild.data
serie = ide.getElementsByTagName("serie")[0].firstChild.data
emit = infNFe.getElementsByTagName("emit")[0]
cnpj = emit.getElementsByTagName("CNPJ")[0].firstChild.data
# now whatever you want...
parse(get_files(DIRECTORY))
DIRECTORY being the location where the XML files are.
since this is only partial of your code, you will need to fill in the rest on your own. you have not provided exactly what you want to write, or the format you want to write it in....
something to help you write your CSV FILE:
# csv_lovation is a location os a *.csv file, and contents is a list of lists:
# ( [ ["row1 item1", "row1 item2", "row1 item3"], ["row2 item1", "row2 item2", "row2 item3"] ] )
def write_csv(csv_location, contents):
with open(csv_location, "w") as file_writer:
file_writer.write("Header,Items,Here\n") #if you have no need for a header, remove this line.
for line in contents:
file_writer.write("%s\n" % ",".join(line))
Upvotes: 0
Reputation: 8165
If the xml files are in a single folder, you can do something like:
import os
import sys
def select_files_in_folder(dir, ext):
for file in os.listdir(dir):
if file.endswith('.%s' % ext):
yield os.path.join(dir, file)
for file in select_files_in_folder(sys.argv[1], 'xml'):
process_xml_file(file)
Or, if the files can be in subfolders, use:
def select_files_in_subfolders(dir, ext):
for root, dirs, files in os.walk(dir):
for file in files:
if file.endswith('.%s' % ext):
yield os.path.join(dir, file)
Upvotes: 1