Reputation: 211
I am trying to write a code where it search all the XML files in directory then parse those XML and save some data to a CSV file. I have 50 plus XML files in that directory. Whenever I run my code a CSV file created but it only prints data of the last xml file. How can i print all the XML file's data to a CSV file?Please help Here is my code :
from xml.dom.minidom import parse
import csv
import os
def writeToCSV(frelation):
csvfile = open('data.csv', 'w')
fieldnames = ['sub', 'sup']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
relation = frelation.getElementsByTagName("predicate")
for elem in relation:
sub = elem.attributes['sub'].value
for elem1 in elem.getElementsByTagName("sup"):
sup = elem1.attributes['name'].value
writer.writerow({'sub': sub, 'sup': sup})
for root, dirs, files in os.walk('data/frames'):
for file in files:
if (file.endswith('.xml')):
xmldoc = parse(os.path.join(root, file))
frelation = xmldoc.getElementsByTagName("frameset")[0]
relation = frelation.getElementsByTagName("predicate")
writeToCSV(frelation)
Upvotes: 0
Views: 1880
Reputation: 1340
U are overwriting the same file again and again in the WriteToCSV , may be a little change as below:
def writeToCSV(frelation,file_id):
csvfile = open('data'+str(file_id)+'.csv', 'w')
fieldnames = ['sub', 'sup']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
relation = frelation.getElementsByTagName("predicate")
for elem in relation:
sub = elem.attributes['sub'].value
for elem1 in elem.getElementsByTagName("sup"):
sup = elem1.attributes['name'].value
writer.writerow({'sub': sub, 'sup': sup})
file_id=1;
for root, dirs, files in os.walk('data/frames'):
for file in files:
if (file.endswith('.xml')):
xmldoc = parse(os.path.join(root, file))
frelation = xmldoc.getElementsByTagName("frameset")[0]
relation = frelation.getElementsByTagName("predicate")
writeToCSV(frelation,file_id)
file_id+=1
if you want only one CSV file, u need to open the file in append mode, a+ mode indicates create file if does not exist.:
def writeToCSV(frelation):
csvfile = open('data.csv', 'a+')
fieldnames = ['sub', 'sup']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
relation = frelation.getElementsByTagName("predicate")
for elem in relation:
sub = elem.attributes['sub'].value
for elem1 in elem.getElementsByTagName("sup"):
sup = elem1.attributes['name'].value
writer.writerow({'sub': sub, 'sup': sup})
No changes required in other code.
Upvotes: 2