Reputation: 167401
In Java XML parsing - merging the output of xi:include the poster wanted to use XInclude but used the wrong namespace on the include
element. I thought that an XMLFilter put before the XInclude aware parser, where the XMLFilter takes care of correcting the namespace, could solve this problem (without having to edit files manually respectively without having a separate processing step that first creates intermediary files with the corrected namespace).
So I wrote the following XMLFilter
, extending the XMLFilterImpl
that SAX provides:
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.XMLFilterImpl;
public class XIncludeNsFixup extends XMLFilterImpl {
static String correctURI = "http://www.w3.org/2001/XInclude";
static String oldURI = "http://www.w3.org/2003/XInclude";
@Override
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
if (uri.equals(oldURI)) {
super.startElement(correctURI, localName, qName, atts);
}
else {
super.startElement(uri, localName, qName, atts);
}
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if (uri.equals(oldURI)) {
super.endElement(correctURI, localName, qName);
}
else {
super.endElement(uri, localName, qName);
}
}
@Override
public void startPrefixMapping(String prefix, String uri) throws SAXException {
if (uri.equals(oldURI)) {
super.startPrefixMapping(prefix, correctURI);
}
else {
super.startPrefixMapping(prefix, uri);
}
}
}
Then I created an XInclude aware SAXParser respectively XMLReader
to be chained to that filter and loaded a sample document as a SAXSource
from that filter into a default Transformer
to build a DOMResult
:
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
spf.setXIncludeAware(true);
XMLReader inputReader = spf.newSAXParser().getXMLReader();
XMLFilter fixNs = new XIncludeNsFixup();
fixNs.setParent(inputReader);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer builder = tf.newTransformer();
DOMResult fixedInput = new DOMResult();
builder.transform(new SAXSource(fixNs, new InputSource("file3.xml")), fixedInput);
Document doc = (Document) fixedInput.getNode();
Transformer serializer = tf.newTransformer();
serializer.transform(new DOMSource(doc), new StreamResult(System.out));
A sample document file3.xml
that I used has one xi:xinclude
element in the proper XInclude namespace and one in the old, not supported namespace:
<?xml version="1.0" encoding="UTF-8"?>
<contexts>
<context name="a">
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="reuse.xml"/>
<foo>Original text 1.</foo>
</context>
<context name="b">
<xi:include xmlns:xi="http://www.w3.org/2003/XInclude" href="reuse.xml"/>
<foo>Original text 2.</foo>
</context>
</contexts>
My expectation is that the filter first fixes the namespace and then the XMLReader performs the XInclude on both elements. However, when running the code with Java 1.8 the output is as follows:
<?xml version="1.0" encoding="UTF-8" standalone="no"?><contexts>
<context name="a">
<text xml:base="reuse.xml">I am XIncluded text.</text>
<foo>Original text 1.</foo>
</context>
<context name="b">
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="reuse.xml"/>
<foo>Original text 2.</foo>
</context>
</contexts>
So the filter has fixed the namespace on the second include
element but the XMLReader has only applied the XInclude inclusion on the first include
element.
Where have I gone wrong? How can I chain the filter and an XInclude aware XMLReader to fix the namespace and perform XInclude inclusion on namespace corrected elements?
For completeness, here is the reuse.xml
:
<?xml version="1.0" encoding="UTF-8"?>
<text>I am XIncluded text.</text>
And the full code of the Java program to allow for easy testing:
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLFilter;
import org.xml.sax.XMLReader;
public class XIncludeTest1 {
public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException, TransformerConfigurationException, TransformerException {
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
spf.setXIncludeAware(true);
XMLReader inputReader = spf.newSAXParser().getXMLReader();
XMLFilter fixNs = new XIncludeNsFixup();
fixNs.setParent(inputReader);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer builder = tf.newTransformer();
DOMResult fixedInput = new DOMResult();
builder.transform(new SAXSource(fixNs, new InputSource("file3.xml")), fixedInput);
Document doc = (Document) fixedInput.getNode();
Transformer serializer = tf.newTransformer();
serializer.transform(new DOMSource(doc), new StreamResult(System.out));
}
}
I also tried to put the latest Xerces Java from Apache on the class path to see whether it fixes the problem but the output remains the same.
Upvotes: 1
Views: 640