Reputation: 3
I'm trying to modify an XML file by deleting tags (the b tag in the example below) that have been added incorrectly while preserving all children (nested elements and text) of these elements. I was hoping to do this with XMLParser as this is part of a larger script where the order of elements plays a role at a later stage.
Example:
<?xml version="1.0" encoding="UTF-8"?>
<a>
<b>
<c />
Test 1
</b>
<b>
<c />
Test 2
</b>
</a>
What I want is something like:
<?xml version="1.0" encoding="UTF-8"?>
<a>
<c />
<c />
Test 1
Test 2
</a>
What I tried so far is the following (args[0] being a file containing the above example) aiming to simply attach all the children to the parent node:
import groovy.xml.XmlUtil
def f = new File(args[0])
def parser = new XmlParser()
parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
parser.setFeature("http://xml.org/sax/features/namespaces", false)
def log = parser.parse(f)
def bs = log.'**'.findAll { node -> (node instanceof Node) && node.name() == 'b' }
bs.each { b ->
Node bparent = b.parent()
def bchildren = b.children()
bchildren.each { child ->
if (child instanceof Node) {
bparent.append(child)
}
}
b.replaceNode{}
}
def xmlu = new XmlUtil()
def result = XmlUtil.serialize(log)
println result
But using this code, I lose the actual text content of the element as XMLParser returns Node and String objects. So the output looks:
<?xml version="1.0" encoding="UTF-8"?>
<a>
<c />
<c />
</a>
Is there a way to also add the String objects to the parent node? Or is there perhaps a much simpler approach that I have overlooked so far?
Upvotes: 0
Views: 2308
Reputation: 38639
The following will output
<?xml version='1.0' encoding='UTF-8'?>
<a foo='bar'>
<c blub='bla' />
Test 1
<c />
Test 2
</a>
import groovy.xml.*
def parser = new XmlParser()
parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
parser.setFeature("http://xml.org/sax/features/namespaces", false)
def log = parser.parseText('''<?xml version="1.0" encoding="UTF-8"?>
<a foo="bar">
<b bii="boo">
<c blub="bla" />
Test 1
</b>
<b>
<c />
Test 2
</b>
</a>''')
def recurse
recurse = { builder, element ->
element.children().each { child ->
switch (child) {
case { (it instanceof Node) && (it.name() == 'b') }:
recurse builder, child
break
case { it instanceof Node }:
builder."${child.name()}"(child.attributes()) {
recurse builder, child
}
break
default:
builder.mkp.yield child
break
}
}
}
def builder = new MarkupBuilder(new PrintWriter(System.out))
builder.mkp.xmlDeclaration version: '1.0', encoding: 'UTF-8'
builder."${log.name()}"(log.attributes()) {
recurse builder, log
}
Upvotes: 0
Reputation: 171084
This seems to work... Clone the nodes and add them to the parent, then remove the b from it's parent:
def xmlText = '''<?xml version="1.0" encoding="UTF-8"?>
<a>
<b>
<c />
Test 1
</b>
<b>
<c />
Test 2
</b>
</a>'''
import groovy.xml.*
def xml = new XmlParser().parseText(xmlText)
def bs = xml.'*'.findAll { it.name() == 'b' }
bs.each { b ->
b.children().collect { it instanceof Node ? it.clone() : it }.each { b.parent().children().add(it) }
b.parent().remove(b)
}
println XmlUtil.serialize(xml)
Upvotes: 2