GeorgeC
GeorgeC

Reputation: 1037

Code being dropped from xml created using python

I am copying and then updating a metadata xml file using python -this works fine except that the following code from the original metafile is being deleted

<?xml version="1.0" encoding="utf-8"?><?xml-stylesheet type='text/xsl' href='ANZMeta.xsl'?>

It needs to go at the start of the file.

The answer for this in PHP is @ xml insertion at specific point of xml file but I need a solution for Python.

The code and full explanation is in my original post but I am seperating this question as it is different from the original issues I had. Search and replace multiple lines in xml/text files using python

Thanks,

FULL CODE

import os, xml, arcpy, shutil, datetime, Tkinter, tkFileDialog, tkSimpleDialog
from xml.etree import ElementTree as et 

path=os.getcwd()
RootDirectory=path
currentPath=path
arcpy.env.workspace = path
Count=0
DECLARATION = """<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type='text/xsl' href='ANZMeta.xsl'?>\n"""
Generated_XMLs=RootDirectory+'\GeneratedXML_LOG.txt'
f = open(Generated_XMLs, 'a')
f.write("Log of Metadata Creation Process - Update: "+str(datetime.datetime.now())+"\n")
f.close()

for root, dirs, files in os.walk(RootDirectory, topdown=False):
    #print root, dirs
    for directory in dirs:
        try:
            currentPath=os.path.join(root,directory)
        except:
            pass
        os.chdir(currentPath)
        arcpy.env.workspace = currentPath
        print currentPath
#def Create_xml(currentPath):

        FileList = arcpy.ListFeatureClasses()
        zone="_Zone"

        for File in FileList:
            Count+=1
            FileDesc_obj = arcpy.Describe(File)
            FileNm=FileDesc_obj.file
            check_meta=os.listdir(currentPath)
            existingXML=FileNm[:FileNm.find('.')]
            existingExtension=FileNm[FileNm.find('.'):]
            print "XML: "+existingXML
            #print check_meta
            #if  existingXML+'.xml' in check_meta:
            #newMetaFile='new'
            for f in check_meta:
                if f.startswith(existingXML) and f.endswith('.xml'):
                    print "exists, file name:", f
                    newMetaFile=FileNm+"_2012Metadata.xml"
                    try:
                        shutil.copy2(f, newMetaFile)
                    except:
                        pass
                    break
                else:
                    #print "Does not exist"
                    newMetaFile=FileNm+"_BaseMetadata.xml"

            print "New meta file: "+newMetaFile+ " for: "+File
            if newMetaFile.endswith('_BaseMetadata.xml'):        
                print "calling tkinter"
                root = Tkinter.Tk()
                root.withdraw()
                file = tkFileDialog.askopenfile(parent=root,mode='rb',title='Choose a xml base file to match with: '+File)
                if file != None:
                    metafile=os.path.abspath(file.name)
                    file.close()
                    #print metafile
                    shutil.copy2(metafile,newMetaFile)
                    print "copied"+metafile
                    root.destroy

                else:
                    shutil.copy2('L:\Data_Admin\QA\Metadata_python_toolset\Master_Metadata.xml', newMetaFile)
                    #root = Tkinter.Tk()
                    #root.withdraw()
                    #newTitle=tkSimpleDialog.askstring('title', 'prompt')
                    #root.destroy
                    #print newTitle

            print "Parsing meta file: "+newMetaFile
            tree=et.parse(newMetaFile)        
            print "Processing: "+str(File)

            for node in tree.findall('.//title'):
                node.text = str(FileNm)
            for node in tree.findall('.//procstep/srcused'):
                node.text = str(currentPath+"\\"+existingXML+".xml")
            dt=dt=str(datetime.datetime.now())
            for node in tree.findall('.//procstep/date'):
                node.text = str(dt[:10])
            for node in tree.findall('.//procstep/time'):
                node.text = str(dt[11:13]+dt[16:19])
            for node in tree.findall('.//metd/date'):
                node.text = str(dt[:10])
            for node in tree.findall('.//northbc'):
                node.text = str(FileDesc_obj.extent.YMax)
            for node in tree.findall('.//southbc'):
                node.text = str(FileDesc_obj.extent.YMin)
            for node in tree.findall('.//westbc'):
                node.text = str(FileDesc_obj.extent.XMin)
            for node in tree.findall('.//eastbc'):
                node.text = str(FileDesc_obj.extent.XMax)        
            for node in tree.findall('.//native/nondig/formname'):
                node.text = str(os.getcwd()+"\\"+File)
            for node in tree.findall('.//native/digform/formname'):
                node.text = str(FileDesc_obj.featureType)
            for node in tree.findall('.//avlform/nondig/formname'):
                node.text = str(FileDesc_obj.extension)
            for node in tree.findall('.//avlform/digform/formname'):
                node.text = str(float(os.path.getsize(File))/int(1024))+" KB"
            for node in tree.findall('.//theme'):
                node.text = str(FileDesc_obj.spatialReference.name +" ; EPSG: "+str(FileDesc_obj.spatialReference.factoryCode))
            print node.text
            projection_info=[]
            Zone=FileDesc_obj.spatialReference.name

            if "GCS" in str(FileDesc_obj.spatialReference.name):
                projection_info=[FileDesc_obj.spatialReference.GCSName, FileDesc_obj.spatialReference.angularUnitName, FileDesc_obj.spatialReference.datumName, FileDesc_obj.spatialReference.spheroidName]
                print "Geographic Coordinate system"
            else:
                projection_info=[FileDesc_obj.spatialReference.datumName, FileDesc_obj.spatialReference.spheroidName, FileDesc_obj.spatialReference.angularUnitName, Zone[Zone.rfind(zone)-3:]]
                print "Projected Coordinate system"
            x=0
            for node in tree.findall('.//spdom'):
                for node2 in node.findall('.//keyword'):
                    #print node2.text
                    node2.text = str(projection_info[x])
                    #print node2.text
                    x=x+1


            tree.write(newMetaFile)
            with open(newMetaFile, 'w') as output: # would be better to write to temp file and rename
                output.write(DECLARATION)
                tree.write(output, xml_declaration=False, encoding='utf-8') 
    # xml_declaration=False - don't write default declaration   

            f = open(Generated_XMLs, 'a')
            f.write(str(Count)+": "+File+"; "+newMetaFile+"; "+currentPath+";"+existingXML+"\n")
            f.close()



    #        Create_xml(currentPath)

Error message from Wing IDE

xml.parsers.expat.ExpatError: no element found: line 3, column 0 File "L:\Data_Admin\QA\Metadata_python_toolset\test2\update_Metadata1f.py", line 78, in tree=et.parse(newMetaFile) File "C:\Python26\ArcGIS10.0\Lib\xml\etree\ElementTree.py", line 862, in parse tree.parse(source, parser) File "C:\Python26\ArcGIS10.0\Lib\xml\etree\ElementTree.py", line 587, in parse self._root = parser.close() File "C:\Python26\ArcGIS10.0\Lib\xml\etree\ElementTree.py", line 1254, in close self._parser.Parse("", 1) # end of data

Upvotes: 0

Views: 1624

Answers (2)

Arthur Neufeld
Arthur Neufeld

Reputation: 21

I struggled with adding PI's to the start of an ElementTree document too. I came up with a solution using a fake root node (with None as the element tag) to hold any required processing instructions and then the real document root node.

import xml.etree.ElementTree as ET

# Build your XML document as normal...
root = ET.Element('root')

# Create 'fake' root node
fake_root = ET.Element(None)

# Add desired processing instructions.  Repeat as necessary.
pi = ET.PI("xml-stylesheet", "type='text/xsl' href='ANZMeta.xsl'")
pi.tail = "\n"
fake_root.append(pi)

# Add real root as last child of fake root
fake_root.append(root)

# Write to file, using ElementTree.write( ) to generate <?xml ...?> tag.
tree = ET.ElementTree(fake_root)
tree.write("doc.xml", xml_declaration=True)

The resulting doc.xml file:

<?xml version='1.0' encoding='us-ascii'?>
<?xml-stylesheet type='text/xsl' href='ANZMeta.xsl'?>
<root />

Upvotes: 2

reclosedev
reclosedev

Reputation: 9522

If all your xml files have the same declaration, you can write it by yourself:

import xml.etree.ElementTree as ET


DECLARATION = """<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type='text/xsl' href='ANZMeta.xsl'?>\n"""

tree = ET.parse(filename)
# do some work on tree

with open(filename, 'w') as output: # would be better to write to temp file and rename
    output.write(DECLARATION)
    tree.write(output, xml_declaration=False, encoding='utf-8') 
    # xml_declaration=False - don't write default declaration

Upvotes: 1

Related Questions