Reputation: 1037
I am copying and then updating a metadata xml file using python -this works fine except that the following code from the original metafile is being deleted
<?xml version="1.0" encoding="utf-8"?><?xml-stylesheet type='text/xsl' href='ANZMeta.xsl'?>
It needs to go at the start of the file.
The answer for this in PHP is @ xml insertion at specific point of xml file but I need a solution for Python.
The code and full explanation is in my original post but I am seperating this question as it is different from the original issues I had. Search and replace multiple lines in xml/text files using python
Thanks,
FULL CODE
import os, xml, arcpy, shutil, datetime, Tkinter, tkFileDialog, tkSimpleDialog
from xml.etree import ElementTree as et
path=os.getcwd()
RootDirectory=path
currentPath=path
arcpy.env.workspace = path
Count=0
DECLARATION = """<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type='text/xsl' href='ANZMeta.xsl'?>\n"""
Generated_XMLs=RootDirectory+'\GeneratedXML_LOG.txt'
f = open(Generated_XMLs, 'a')
f.write("Log of Metadata Creation Process - Update: "+str(datetime.datetime.now())+"\n")
f.close()
for root, dirs, files in os.walk(RootDirectory, topdown=False):
#print root, dirs
for directory in dirs:
try:
currentPath=os.path.join(root,directory)
except:
pass
os.chdir(currentPath)
arcpy.env.workspace = currentPath
print currentPath
#def Create_xml(currentPath):
FileList = arcpy.ListFeatureClasses()
zone="_Zone"
for File in FileList:
Count+=1
FileDesc_obj = arcpy.Describe(File)
FileNm=FileDesc_obj.file
check_meta=os.listdir(currentPath)
existingXML=FileNm[:FileNm.find('.')]
existingExtension=FileNm[FileNm.find('.'):]
print "XML: "+existingXML
#print check_meta
#if existingXML+'.xml' in check_meta:
#newMetaFile='new'
for f in check_meta:
if f.startswith(existingXML) and f.endswith('.xml'):
print "exists, file name:", f
newMetaFile=FileNm+"_2012Metadata.xml"
try:
shutil.copy2(f, newMetaFile)
except:
pass
break
else:
#print "Does not exist"
newMetaFile=FileNm+"_BaseMetadata.xml"
print "New meta file: "+newMetaFile+ " for: "+File
if newMetaFile.endswith('_BaseMetadata.xml'):
print "calling tkinter"
root = Tkinter.Tk()
root.withdraw()
file = tkFileDialog.askopenfile(parent=root,mode='rb',title='Choose a xml base file to match with: '+File)
if file != None:
metafile=os.path.abspath(file.name)
file.close()
#print metafile
shutil.copy2(metafile,newMetaFile)
print "copied"+metafile
root.destroy
else:
shutil.copy2('L:\Data_Admin\QA\Metadata_python_toolset\Master_Metadata.xml', newMetaFile)
#root = Tkinter.Tk()
#root.withdraw()
#newTitle=tkSimpleDialog.askstring('title', 'prompt')
#root.destroy
#print newTitle
print "Parsing meta file: "+newMetaFile
tree=et.parse(newMetaFile)
print "Processing: "+str(File)
for node in tree.findall('.//title'):
node.text = str(FileNm)
for node in tree.findall('.//procstep/srcused'):
node.text = str(currentPath+"\\"+existingXML+".xml")
dt=dt=str(datetime.datetime.now())
for node in tree.findall('.//procstep/date'):
node.text = str(dt[:10])
for node in tree.findall('.//procstep/time'):
node.text = str(dt[11:13]+dt[16:19])
for node in tree.findall('.//metd/date'):
node.text = str(dt[:10])
for node in tree.findall('.//northbc'):
node.text = str(FileDesc_obj.extent.YMax)
for node in tree.findall('.//southbc'):
node.text = str(FileDesc_obj.extent.YMin)
for node in tree.findall('.//westbc'):
node.text = str(FileDesc_obj.extent.XMin)
for node in tree.findall('.//eastbc'):
node.text = str(FileDesc_obj.extent.XMax)
for node in tree.findall('.//native/nondig/formname'):
node.text = str(os.getcwd()+"\\"+File)
for node in tree.findall('.//native/digform/formname'):
node.text = str(FileDesc_obj.featureType)
for node in tree.findall('.//avlform/nondig/formname'):
node.text = str(FileDesc_obj.extension)
for node in tree.findall('.//avlform/digform/formname'):
node.text = str(float(os.path.getsize(File))/int(1024))+" KB"
for node in tree.findall('.//theme'):
node.text = str(FileDesc_obj.spatialReference.name +" ; EPSG: "+str(FileDesc_obj.spatialReference.factoryCode))
print node.text
projection_info=[]
Zone=FileDesc_obj.spatialReference.name
if "GCS" in str(FileDesc_obj.spatialReference.name):
projection_info=[FileDesc_obj.spatialReference.GCSName, FileDesc_obj.spatialReference.angularUnitName, FileDesc_obj.spatialReference.datumName, FileDesc_obj.spatialReference.spheroidName]
print "Geographic Coordinate system"
else:
projection_info=[FileDesc_obj.spatialReference.datumName, FileDesc_obj.spatialReference.spheroidName, FileDesc_obj.spatialReference.angularUnitName, Zone[Zone.rfind(zone)-3:]]
print "Projected Coordinate system"
x=0
for node in tree.findall('.//spdom'):
for node2 in node.findall('.//keyword'):
#print node2.text
node2.text = str(projection_info[x])
#print node2.text
x=x+1
tree.write(newMetaFile)
with open(newMetaFile, 'w') as output: # would be better to write to temp file and rename
output.write(DECLARATION)
tree.write(output, xml_declaration=False, encoding='utf-8')
# xml_declaration=False - don't write default declaration
f = open(Generated_XMLs, 'a')
f.write(str(Count)+": "+File+"; "+newMetaFile+"; "+currentPath+";"+existingXML+"\n")
f.close()
# Create_xml(currentPath)
Error message from Wing IDE
xml.parsers.expat.ExpatError: no element found: line 3, column 0 File "L:\Data_Admin\QA\Metadata_python_toolset\test2\update_Metadata1f.py", line 78, in tree=et.parse(newMetaFile) File "C:\Python26\ArcGIS10.0\Lib\xml\etree\ElementTree.py", line 862, in parse tree.parse(source, parser) File "C:\Python26\ArcGIS10.0\Lib\xml\etree\ElementTree.py", line 587, in parse self._root = parser.close() File "C:\Python26\ArcGIS10.0\Lib\xml\etree\ElementTree.py", line 1254, in close self._parser.Parse("", 1) # end of data
Upvotes: 0
Views: 1624
Reputation: 21
I struggled with adding PI's to the start of an ElementTree document too. I came up with a solution using a fake root node (with None as the element tag) to hold any required processing instructions and then the real document root node.
import xml.etree.ElementTree as ET
# Build your XML document as normal...
root = ET.Element('root')
# Create 'fake' root node
fake_root = ET.Element(None)
# Add desired processing instructions. Repeat as necessary.
pi = ET.PI("xml-stylesheet", "type='text/xsl' href='ANZMeta.xsl'")
pi.tail = "\n"
fake_root.append(pi)
# Add real root as last child of fake root
fake_root.append(root)
# Write to file, using ElementTree.write( ) to generate <?xml ...?> tag.
tree = ET.ElementTree(fake_root)
tree.write("doc.xml", xml_declaration=True)
The resulting doc.xml file:
<?xml version='1.0' encoding='us-ascii'?>
<?xml-stylesheet type='text/xsl' href='ANZMeta.xsl'?>
<root />
Upvotes: 2
Reputation: 9522
If all your xml files have the same declaration, you can write it by yourself:
import xml.etree.ElementTree as ET
DECLARATION = """<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type='text/xsl' href='ANZMeta.xsl'?>\n"""
tree = ET.parse(filename)
# do some work on tree
with open(filename, 'w') as output: # would be better to write to temp file and rename
output.write(DECLARATION)
tree.write(output, xml_declaration=False, encoding='utf-8')
# xml_declaration=False - don't write default declaration
Upvotes: 1