Tomasz Przemski
Tomasz Przemski

Reputation: 1127

How to sorted xml node values within against a group?

I have input csv file:

col2  col3  col1
a1    b1    c1
a2    b2    c2
a3    b3    c3

Script that converts to an xml file:

import csv

csvFile = r'C:\Users\tkp\Desktop\Holdings_Download\testxml.csv'
xmlFile = r'C:\Users\tkp\Desktop\Holdings_Download\testxml.xml'

csvData = csv.reader(open(csvFile), delimiter=';')
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="utf-8"?>' + "\n" +'<TabularXml>' + "\n" )
xmlData.write('<Sheet>' + "\n" )

rowNum = 0
for row in csvData:
    if rowNum == 0:
        tags = row
        # replace spaces w/ underscores in tag names
        for i in range(len(tags)):
            tags[i] = tags[i].replace(' ', '_')
    else: 
        xmlData.write(' '+'<Row' + ' ' +'srcidx='+'"'+str(rowNum+1)+'"'+ '>' + "\n")
        for i in range (len(tags)):
            
            if tags[i]=='col1':
                if row[i] !='':
                    xmlData.write('    ' +'<' + 'Cell' + ' ' +'idx="1"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="String"'+' '+'>' \
                      + row[i] + '</' + 'Cell' + '>' + "\n")
                else:
                    xmlData.write('    ' +'<' + 'Cell' + ' ' +'idx="1"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="String"'+' '+'>' \
                      + "\n")
            if tags[i]=='col2':
                if row[i] !='':
                    xmlData.write('    ' +'<' + 'Cell' + ' ' +'idx="2"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="DateTime"'+' '+'>' \
                      + row[i] + '</' + 'Cell' + '>' + "\n")
                else:
                    xmlData.write('    ' +'<' + 'Cell' + ' ' +'idx="2"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="DateTime"'+' '+'>' \
                      + "\n")
            if tags[i]=='col3':
                if row[i] !='':
                    xmlData.write('    ' +'<' + 'Cell' + ' ' +'idx="3"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="Int32"'+' '+'>' \
                      + row[i] + '</' + 'Cell' + '>' + "\n")
                else:
                    xmlData.write('    ' +'<' + 'Cell' + ' ' +'idx="3"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="Int32"'+' '+'>' \
                      + "\n")
                
        xmlData.write(' '+'</Row>' + "\n")

    rowNum +=1

xmlData.write('</Sheet>' + "\n")
xmlData.write('</TabularXml>' + "\n")
xmlData.close()

Output xml file:

<?xml version="1.0" encoding="utf-8"?>
<TabularXml>
<Sheet>
 <Row srcidx="2">
    <Cell idx="2" name="col2" type="DateTime" >a1</Cell>
    <Cell idx="3" name="col3" type="Int32" >b1</Cell>
    <Cell idx="1" name="col1" type="String" >c1</Cell>
 </Row>
 <Row srcidx="3">
    <Cell idx="2" name="col2" type="DateTime" >a2</Cell>
    <Cell idx="3" name="col3" type="Int32" >b2</Cell>
    <Cell idx="1" name="col1" type="String" >c2</Cell>
 </Row>
 <Row srcidx="4">
    <Cell idx="2" name="col2" type="DateTime" >a3</Cell>
    <Cell idx="3" name="col3" type="Int32" >b3</Cell>
    <Cell idx="1" name="col1" type="String" >c3</Cell>
 </Row>
</Sheet>
</TabularXml>

How can I sorted data by 'Cell' nodes and 'idx' values?

Expected result:

<?xml version="1.0" encoding="utf-8"?>
<TabularXml>
<Sheet>
 <Row srcidx="2">
    <Cell idx="1" name="col1" type="String" >c1</Cell>
    <Cell idx="2" name="col2" type="DateTime" >a1</Cell>
    <Cell idx="3" name="col3" type="Int32" >b1</Cell>
 </Row>
 <Row srcidx="3">
    <Cell idx="1" name="col1" type="String" >c2</Cell>
    <Cell idx="2" name="col2" type="DateTime" >a2</Cell>
    <Cell idx="3" name="col3" type="Int32" >b2</Cell>
 </Row>
 <Row srcidx="4">
    <Cell idx="1" name="col1" type="String" >c3</Cell>
    <Cell idx="2" name="col2" type="DateTime" >a3</Cell>
    <Cell idx="3" name="col3" type="Int32" >b3</Cell>
 </Row>
</Sheet>
</TabularXml>

Upvotes: 0

Views: 34

Answers (1)

furas
furas

Reputation: 143187

You have always the same column names so you can use them to create order

order = [
    column_names.index('col1'), 
    column_names.index('col2'), 
    column_names.index('col3')
]

and then you can use it to sort data

column_names = [
    column_names[order[0]], 
    column_names[order[1]], 
    column_names[order[2]]
]

row = [
    row[order[0]], 
    row[order[1]], 
    row[order[2]]
]

For more columns you could even use for-loop but I skip this idea.


Minimal working code.

I use io only to simulate file in memory.

I use next() to get first/single row from csv

text = '''col2;col3;col1
a1;b1;c1
a2;b2;c2
a3;b3;c3'''

import csv
import io


csv_reader = csv.reader(io.StringIO(text), delimiter=';')

column_names = next(csv_reader)

order = [
    column_names.index('col1'), 
    column_names.index('col2'), 
    column_names.index('col3')
]
print('order:', order)


print('old column_names:', column_names)
column_names = [
    column_names[order[0]], 
    column_names[order[1]], 
    column_names[order[2]]
]
print('new column_names:', column_names)

for row in csv_reader:
    print('---')
    print('old row:', row)
    row = [
        row[order[0]], 
        row[order[1]], 
        row[order[2]]
    ]
    print('new row:', row)

Result:

order: [2, 0, 1]
old column_names: ['col2', 'col3', 'col1']
new column_names: ['col1', 'col2', 'col3']
---
old row: ['a1', 'b1', 'c1']
new row: ['c1', 'a1', 'b1']
---
old row: ['a2', 'b2', 'c2']
new row: ['c2', 'a2', 'b2']
---
old row: ['a3', 'b3', 'c3']
new row: ['c3', 'a3', 'b3']

Upvotes: 1

Related Questions