Martin Honnen
Martin Honnen

Reputation: 167401

How to chain an XInclude aware XMLReader to an XMLFilter so that XInclude inclusion is performed on any elements the filter has changed?

In Java XML parsing - merging the output of xi:include the poster wanted to use XInclude but used the wrong namespace on the include element. I thought that an XMLFilter put before the XInclude aware parser, where the XMLFilter takes care of correcting the namespace, could solve this problem (without having to edit files manually respectively without having a separate processing step that first creates intermediary files with the corrected namespace).

So I wrote the following XMLFilter, extending the XMLFilterImpl that SAX provides:

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.XMLFilterImpl;


public class XIncludeNsFixup extends XMLFilterImpl {

    static String correctURI = "http://www.w3.org/2001/XInclude";
    static String oldURI = "http://www.w3.org/2003/XInclude";

    @Override
    public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
        if (uri.equals(oldURI)) {
            super.startElement(correctURI, localName, qName, atts);
        }
        else {
            super.startElement(uri, localName, qName, atts);
        }
    }

    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {
        if (uri.equals(oldURI)) {
            super.endElement(correctURI, localName, qName);
        }
        else {
            super.endElement(uri, localName, qName);
        }
    }

    @Override
    public void startPrefixMapping(String prefix, String uri) throws SAXException {
        if (uri.equals(oldURI)) {
            super.startPrefixMapping(prefix, correctURI);
        }
        else {
            super.startPrefixMapping(prefix, uri);
        }
    }

}

Then I created an XInclude aware SAXParser respectively XMLReader to be chained to that filter and loaded a sample document as a SAXSource from that filter into a default Transformer to build a DOMResult:

    SAXParserFactory spf = SAXParserFactory.newInstance();
    spf.setNamespaceAware(true);
    spf.setXIncludeAware(true);

    XMLReader inputReader = spf.newSAXParser().getXMLReader();

    XMLFilter fixNs = new XIncludeNsFixup();
    fixNs.setParent(inputReader);

    TransformerFactory tf = TransformerFactory.newInstance();

    Transformer builder = tf.newTransformer();

    DOMResult fixedInput = new DOMResult();

    builder.transform(new SAXSource(fixNs, new InputSource("file3.xml")), fixedInput);

    Document doc = (Document) fixedInput.getNode();

    Transformer serializer = tf.newTransformer();

    serializer.transform(new DOMSource(doc), new StreamResult(System.out));

A sample document file3.xml that I used has one xi:xinclude element in the proper XInclude namespace and one in the old, not supported namespace:

<?xml version="1.0" encoding="UTF-8"?>
<contexts>
    <context name="a">
        <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="reuse.xml"/>
        <foo>Original text 1.</foo>
    </context>
    <context name="b">
        <xi:include xmlns:xi="http://www.w3.org/2003/XInclude" href="reuse.xml"/>
        <foo>Original text 2.</foo>
    </context>
</contexts>

My expectation is that the filter first fixes the namespace and then the XMLReader performs the XInclude on both elements. However, when running the code with Java 1.8 the output is as follows:

<?xml version="1.0" encoding="UTF-8" standalone="no"?><contexts>
    <context name="a">
        <text xml:base="reuse.xml">I am XIncluded text.</text>
        <foo>Original text 1.</foo>
    </context>
    <context name="b">
        <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="reuse.xml"/>
        <foo>Original text 2.</foo>
    </context>
</contexts>

So the filter has fixed the namespace on the second include element but the XMLReader has only applied the XInclude inclusion on the first include element.

Where have I gone wrong? How can I chain the filter and an XInclude aware XMLReader to fix the namespace and perform XInclude inclusion on namespace corrected elements?

For completeness, here is the reuse.xml:

<?xml version="1.0" encoding="UTF-8"?>
<text>I am XIncluded text.</text>

And the full code of the Java program to allow for easy testing:

import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLFilter;
import org.xml.sax.XMLReader;


public class XIncludeTest1 {


    public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException, TransformerConfigurationException, TransformerException {

        SAXParserFactory spf = SAXParserFactory.newInstance();
        spf.setNamespaceAware(true);
        spf.setXIncludeAware(true);

        XMLReader inputReader = spf.newSAXParser().getXMLReader();

        XMLFilter fixNs = new XIncludeNsFixup();
        fixNs.setParent(inputReader);

        TransformerFactory tf = TransformerFactory.newInstance();

        Transformer builder = tf.newTransformer();

        DOMResult fixedInput = new DOMResult();

        builder.transform(new SAXSource(fixNs, new InputSource("file3.xml")), fixedInput);

        Document doc = (Document) fixedInput.getNode();

        Transformer serializer = tf.newTransformer();

        serializer.transform(new DOMSource(doc), new StreamResult(System.out));
    }

}

I also tried to put the latest Xerces Java from Apache on the class path to see whether it fixes the problem but the output remains the same.

Upvotes: 1

Views: 640

Answers (0)

Related Questions