Reputation: 11
I am getting below error in bandit. Using lxml.etree.parse to parse untrusted XML data is known to be vulnerable to XML attacks. Replace lxml.etree.parse with its defusedxml equivalent function.
I want the below code's equivlent with defusedxml.
from lxml import etree, objectify
def fn_read_xml_root(xml_file):
"""
function open xml and remove annotation and return the root node
xml_file : xml file to be parsed
"""
with open(xml_file, "r", encoding="utf-8") as x_file:
xml_data = x_file.read()
parser = etree.XMLParser(remove_blank_text=True)
xtree = etree.parse(xml_file, parser)
xroot = xtree.getroot()
for elem in xroot.getiterator():
if not hasattr(elem.tag, "find"):
continue # (1)
idx = elem.tag.find("}")
if idx >= 0:
elem.tag = elem.tag[idx + 1:]
objectify.deannotate(xroot, cleanup_namespaces=True)
# return xml data and root node of the file
return xml_data, xroot
Upvotes: 0
Views: 610
Reputation: 11
def remove_namespace(elem):
"""
function to remove namespace from doc element
node_key : xml doc element
"""
elem = elem[elem.find("}") + 1 :] if elem.startswith("{") else elem
return elem
def remove_all_namespaces(doc):
"""
function to remove namespaces from xml
doc : xml doc element
"""
for elem in doc.iter():
elem.tag = remove_namespace(elem.tag)
elem.attrib = {remove_namespace(key): value for key, value in elem.attrib.items()}
return doc
def fn_read_xml_root(xml_file):
"""
function open xml and remove annotation and return the root node
xml_file : xml file to be parsed
"""
with open(xml_file, "r", encoding="utf-8") as x_file:
xml_data = x_file.read()
xroot = ET.parse(xml_file).getroot()
try:
xroot = remove_all_namespaces(xroot)
except Exception as exp:
logging.info(f"XML namespace remove error {str(exp)}")
# return xml data and root node of the file
return xml_data, xroot
Upvotes: 0