Reputation: 85
Data:
name phone email address address1 address2 dateofbirth
John 111111111 [email protected] Chicago Illinois Phoenix
Mark 222222222 [email protected] London
My code:
def convert_row(row):
return """<Document>
<Name>%s</Name>
<Phone>%s</Phone>
<Email>%s</Email>
<Address>%s</Address>
<Address1>%s</Address1>
<Address2>%s</Address2>
<DateofBirth>%s</DateofBirth>
</Document>""" % (row.name, row.phone, row.email, row.address,
row.address1,
row.address2,row.dob)
with open('out.xml', 'w') as f:
f.write('\n'.join(df.apply(convert_row, axis=1)))
Current output:
<Customer>
<Name>John</Name>
<Phone>111111111</Phone>
<Email>[email protected]</Email>
<Address>Chicago</Address>
<Address1>Illinois</Address1>
<Address2>Phoenix</Address2>
<DateofBirth></DateofBirth>
</Customer>
<Customer>
<Name>Mark</Name>
<Phone>222222222</Phone>
<Email>[email protected]</Email>
<Address>London</Address>
<Address1></Address1>
<Address2></Address2>
<DateofBirth></DateofBirth>
</Customer>
Could anyone please help me how to remove only Address1 and Address2 tags when the value is null (e.g. customer Mark). However empty dateofbirth tag must be retained though it's null. Any help much appreciated!
Upvotes: 1
Views: 59
Reputation: 3581
You can remove the empty tags after creation:
import pandas as pd
import xml.etree.ElementTree as ET
columns = ['Name','Phone','Email','Address','Address1','Address2','DateofBirth']
row = [['John','111111111','[email protected]','Chicago','Illinois','Phoenix',''],['Mark', '222222222', '[email protected]', 'London','','','']]
df = pd.DataFrame(row, columns=columns)
print(df.to_string(index=False))
xml = df.to_xml(root_name='Document', row_name='Customer', index=False, encoding='utf-8', xml_declaration=True, pretty_print=True)
tree = ET.fromstring(xml)
def rem (rem_list, root):
""" Remove listed empty tags from root"""
parent_map = {(c, p) for p in root.iter( ) for c in p}
for (c, p) in parent_map:
if c.tag in rem_list:
p.remove(c)
sel = ['Address1', 'Address2']
for customer in tree.findall('.//Customer'):
rem_list = []
for empty_tag in customer.iter():
if empty_tag.text == None and empty_tag.tag in sel:
rem_list.append(empty_tag.tag)
# Call remove function
rem(rem_list, customer)
rem_list = []
ET.dump(tree)
tree1 = ET.ElementTree(tree)
ET.indent(tree1, space= ' ')
tree1.write('customer.xml', encoding="utf-8", xml_declaration=True)
Output:
Name Phone Email Address Address1 Address2 Dateofbirth
John 111111111 [email protected] Chicago Illinois Phoenix
Mark 222222222 [email protected] London
<Document>
<Customer>
<Name>John</Name>
<Phone>111111111</Phone>
<Email>[email protected]</Email>
<Address>Chicago</Address>
<Address1>Illinois</Address1>
<Address2>Phoenix</Address2>
<DateofBirth />
</Customer>
<Customer>
<Name>Mark</Name>
<Phone>222222222</Phone>
<Email>[email protected]</Email>
<Address>London</Address>
<DateofBirth />
</Customer>
</Document>
Upvotes: 0
Reputation: 195613
Try:
def convert(row):
row = row[row.notna()]
out = ["\t<Customer>"]
for c in row.index:
out.append(f"\t\t<{c.capitalize()}>{row[c]}</{c.capitalize()}>")
return "\n".join(out + ["\t</Customer>"])
out = "<Document>\n" + "\n".join(df.apply(convert, axis=1)) + "\n</Document>"
print(out)
Prints:
<Document>
<Customer>
<Name>John</Name>
<Phone>111111111</Phone>
<Email>[email protected]</Email>
<Address>Chicago</Address>
<Address1>Illinois</Address1>
<Address2>Phoenix</Address2>
</Customer>
<Customer>
<Name>Mark</Name>
<Phone>222222222</Phone>
<Email>[email protected]</Email>
<Address>London</Address>
</Customer>
</Document>
Initial dataframe:
name phone email address address1 address2 dateofbirth
0 John 111111111 [email protected] Chicago Illinois Phoenix NaN
1 Mark 222222222 [email protected] London NaN NaN NaN
Upvotes: 1