Reputation: 301
I am new to Python. I want to retrieve a tag value from multiple xml files and print it in an excel sheet. I tried and got the script working fine other than the excel printing part.
Here is my script
from xml.dom.minidom import parse, parseString
import xlwt
import os
def sh(dir):
for r,d,f in os.walk(dir):
n=0
for files in f:
if files.endswith(".xml"):
print files
dom=parse(os.path.join(r, files))
name = dom.getElementsByTagName('rev')
title = dom.getElementsByTagName('title')
a=xlwt.Workbook()
sheet=a.add_sheet('sheet1')
sheet.write(n, 0, files)
sheet.write(n, 1, title[0].firstChild.nodeValue)
sheet.write(n, 2, name[0].firstChild.nodeValue)
n=n+1
a.save('sha.xls')
print title[0].firstChild.nodeValue
print name[0].firstChild.nodeValue
sh("path")
The problem i stuck up with is, the output is printed in only in these columns (0, 0), (0, 1), (0,2).
e.g. if i want
A B C
D E F
G H I
my output
G H I
in (0,0), (0,1), (0,2). So i understood that every new output is overridden on the existing one and only the final output is displayed. How can i avoid this and get what i wanted?
Upvotes: 1
Views: 2271
Reputation: 473863
You should define your workbook and sheet outside the loop:
def sh(dir):
a = xlwt.Workbook()
sheet = a.add_sheet('sheet1')
n = 0
for r,d,f in os.walk(dir):
for files in f:
if files.endswith(".xml"):
print files
dom=parse(os.path.join(r, files))
name = dom.getElementsByTagName('rev')
title = dom.getElementsByTagName('title')
sheet.write(n, 0, files)
sheet.write(n, 1, title[0].firstChild.nodeValue)
sheet.write(n, 2, name[0].firstChild.nodeValue)
n += 1
print title[0].firstChild.nodeValue
print name[0].firstChild.nodeValue
a.save('sha.xls')
Also, if you don't need to search for xml files inside subdirectories, consider switching to glob.glob() instead of using os.walk()
:
def sh(dir):
a = xlwt.Workbook()
sheet = a.add_sheet('sheet1')
n = 0
for f in glob.glob(os.path.join(dir, '*.xml')):
dom = parse(os.path.join(dir, f))
name = dom.getElementsByTagName('rev')
title = dom.getElementsByTagName('title')
sheet.write(n, 0, f)
sheet.write(n, 1, title[0].firstChild.nodeValue)
sheet.write(n, 2, name[0].firstChild.nodeValue)
n += 1
a.save('sha.xls')
Upvotes: 1