Reputation: 1127
I have input csv file:
col2 col3 col1
a1 b1 c1
a2 b2 c2
a3 b3 c3
Script that converts to an xml file:
import csv
csvFile = r'C:\Users\tkp\Desktop\Holdings_Download\testxml.csv'
xmlFile = r'C:\Users\tkp\Desktop\Holdings_Download\testxml.xml'
csvData = csv.reader(open(csvFile), delimiter=';')
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="utf-8"?>' + "\n" +'<TabularXml>' + "\n" )
xmlData.write('<Sheet>' + "\n" )
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(len(tags)):
tags[i] = tags[i].replace(' ', '_')
else:
xmlData.write(' '+'<Row' + ' ' +'srcidx='+'"'+str(rowNum+1)+'"'+ '>' + "\n")
for i in range (len(tags)):
if tags[i]=='col1':
if row[i] !='':
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="1"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="String"'+' '+'>' \
+ row[i] + '</' + 'Cell' + '>' + "\n")
else:
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="1"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="String"'+' '+'>' \
+ "\n")
if tags[i]=='col2':
if row[i] !='':
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="2"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="DateTime"'+' '+'>' \
+ row[i] + '</' + 'Cell' + '>' + "\n")
else:
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="2"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="DateTime"'+' '+'>' \
+ "\n")
if tags[i]=='col3':
if row[i] !='':
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="3"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="Int32"'+' '+'>' \
+ row[i] + '</' + 'Cell' + '>' + "\n")
else:
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="3"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="Int32"'+' '+'>' \
+ "\n")
xmlData.write(' '+'</Row>' + "\n")
rowNum +=1
xmlData.write('</Sheet>' + "\n")
xmlData.write('</TabularXml>' + "\n")
xmlData.close()
Output xml file:
<?xml version="1.0" encoding="utf-8"?>
<TabularXml>
<Sheet>
<Row srcidx="2">
<Cell idx="2" name="col2" type="DateTime" >a1</Cell>
<Cell idx="3" name="col3" type="Int32" >b1</Cell>
<Cell idx="1" name="col1" type="String" >c1</Cell>
</Row>
<Row srcidx="3">
<Cell idx="2" name="col2" type="DateTime" >a2</Cell>
<Cell idx="3" name="col3" type="Int32" >b2</Cell>
<Cell idx="1" name="col1" type="String" >c2</Cell>
</Row>
<Row srcidx="4">
<Cell idx="2" name="col2" type="DateTime" >a3</Cell>
<Cell idx="3" name="col3" type="Int32" >b3</Cell>
<Cell idx="1" name="col1" type="String" >c3</Cell>
</Row>
</Sheet>
</TabularXml>
How can I sorted data by 'Cell' nodes and 'idx' values?
Expected result:
<?xml version="1.0" encoding="utf-8"?>
<TabularXml>
<Sheet>
<Row srcidx="2">
<Cell idx="1" name="col1" type="String" >c1</Cell>
<Cell idx="2" name="col2" type="DateTime" >a1</Cell>
<Cell idx="3" name="col3" type="Int32" >b1</Cell>
</Row>
<Row srcidx="3">
<Cell idx="1" name="col1" type="String" >c2</Cell>
<Cell idx="2" name="col2" type="DateTime" >a2</Cell>
<Cell idx="3" name="col3" type="Int32" >b2</Cell>
</Row>
<Row srcidx="4">
<Cell idx="1" name="col1" type="String" >c3</Cell>
<Cell idx="2" name="col2" type="DateTime" >a3</Cell>
<Cell idx="3" name="col3" type="Int32" >b3</Cell>
</Row>
</Sheet>
</TabularXml>
Upvotes: 0
Views: 34
Reputation: 143187
You have always the same column names so you can use them to create order
order = [
column_names.index('col1'),
column_names.index('col2'),
column_names.index('col3')
]
and then you can use it to sort data
column_names = [
column_names[order[0]],
column_names[order[1]],
column_names[order[2]]
]
row = [
row[order[0]],
row[order[1]],
row[order[2]]
]
For more columns you could even use for
-loop but I skip this idea.
Minimal working code.
I use io
only to simulate file in memory.
I use next()
to get first/single row from csv
text = '''col2;col3;col1
a1;b1;c1
a2;b2;c2
a3;b3;c3'''
import csv
import io
csv_reader = csv.reader(io.StringIO(text), delimiter=';')
column_names = next(csv_reader)
order = [
column_names.index('col1'),
column_names.index('col2'),
column_names.index('col3')
]
print('order:', order)
print('old column_names:', column_names)
column_names = [
column_names[order[0]],
column_names[order[1]],
column_names[order[2]]
]
print('new column_names:', column_names)
for row in csv_reader:
print('---')
print('old row:', row)
row = [
row[order[0]],
row[order[1]],
row[order[2]]
]
print('new row:', row)
Result:
order: [2, 0, 1]
old column_names: ['col2', 'col3', 'col1']
new column_names: ['col1', 'col2', 'col3']
---
old row: ['a1', 'b1', 'c1']
new row: ['c1', 'a1', 'b1']
---
old row: ['a2', 'b2', 'c2']
new row: ['c2', 'a2', 'b2']
---
old row: ['a3', 'b3', 'c3']
new row: ['c3', 'a3', 'b3']
Upvotes: 1