Reputation: 5928
I'm trying to figure out why my simple XSLT transformation, which is supposed to transform XML to XML, doesn't seem to be able to achieve that.
The transformation simply copies everything:
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<xsl:output method="xml" encoding="utf-8" />
<xsl:template match="*|@*">
<xsl:copy>
<xsl:apply-templates select="*|@*|text()" />
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
With an input XML file, such as below:
<?xml version="1.0" encoding="utf-8"?>
<foo xmlns="uri:foo">
<name>丕𠀆𠀅𠀍𠁀</name>
</foo>
the following is the result:
<?xml version="1.0" encoding="utf-8"?>
<foo xmlns="uri:foo">
<name>丕��������</name>
</foo>
The tools I'm using all rely on (Java) Apache Xalan 2.7.1 XSLT processor, including Eclipse (Mars) with the XSL Developer Tools plugin, where I created this sample.
The latter plugin claims the input XML is well formed, but the output XML isn't (character reference � is an invalid XML character).
Why is my XSLT processor generating invalid XML and how do I prevent it from doing so?
The actual code is along the lines of this (you need Xalan in your classpath):
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import javax.xml.transform.*;
import javax.xml.transform.stream.*;
public class XSLTTest {
private final TransformerFactory xalanTransFact;
public XSLTTest() {
xalanTransFact = new org.apache.xalan.processor.TransformerFactoryImpl();
}
public Templates createCustomTransformation(
File transformation
) throws TransformerException, IOException {
InputStreamReader readerTransformation = null;
try {
readerTransformation = new InputStreamReader(
new FileInputStream(transformation), StandardCharsets.UTF_8);
Templates transformer = xalanTransFact.newTemplates(
new StreamSource(readerTransformation)
);
return transformer;
} catch (TransformerException | IOException ex) {
throw ex;
} finally {
try {
if (readerTransformation != null) {
readerTransformation.close();
}
} catch (IOException ex) {}
}
}
public File applyCustomTransformation(
Transformer transformer, Reader transformeeReader, Path out,
boolean indent
) throws TransformerException, IOException {
Writer writer = null;
try {
File file = out.toFile();
writer = new OutputStreamWriter(new FileOutputStream(file), StandardCharsets.UTF_8);
if (indent) {
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(
"{http://xml.apache.org/xslt}indent-amount",
String.valueOf(2));
}
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
transformer.transform(
new StreamSource(transformeeReader),
new StreamResult(writer));
return file;
} catch (TransformerException | IOException ex) {
throw ex;
} finally {
try {
if (writer != null) {
writer.close();
}
} catch (IOException ex) {}
}
}
private void saveToFile(File selectedFile, String content)
throws FileNotFoundException, IOException {
Writer writer = null;
try {
writer = new OutputStreamWriter(
new FileOutputStream(selectedFile), StandardCharsets.UTF_8);
writer.write(content);
writer.flush();
}
catch (FileNotFoundException ex) {
throw ex;
} catch (IOException ex) {
throw ex;
} finally {
if (writer != null) {
try {
writer.close();
} catch (IOException ex) {
}
}
}
}
public static void main(String[] args) throws IOException, TransformerException {
String xslText = "" +
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" +
"<xsl:stylesheet xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\"\n" +
" version=\"1.0\">\n" +
" <xsl:output method=\"xml\" encoding=\"utf-8\" />\n" +
" <xsl:template match=\"*|@*\">\n" +
" <xsl:copy>\n" +
" <xsl:apply-templates select=\"*|@*|text()\" />\n" +
" </xsl:copy>\n" +
" </xsl:template>\n" +
"</xsl:stylesheet>";
String xmlToParse = "" +
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" +
"<foo xmlns=\"uri:foo\">\n" +
" <name>丕𠀆𠀅𠀍𠁀</name>\n" +
"</foo>";
XSLTTest test = new XSLTTest();
Path xsl = Files.createTempFile("test", ".xsl");
test.saveToFile(xsl.toFile(), xslText);
Templates templates = test.createCustomTransformation(xsl.toFile());
Transformer transformer = templates.newTransformer();
Path xml = Files.createTempFile("test-out", ".xml");
StringReader reader = new StringReader(xmlToParse);
test.applyCustomTransformation(transformer, reader, xml, true);
System.out.println("Result is at: " + xml.toString());
}
}
For reasons, I am not able to switch to another XSLT processor.
Upvotes: 0
Views: 886
Reputation: 5928
As @VGR wrote in a comment, this is a manifestation of bug https://issues.apache.org/jira/browse/XALANJ-2419.
A comment on their JIRA suggests a workaround - use UTF-16 as output encoding for the transformation, instead of UTF-8, since the bug only affects the latter.
So, in my example, line
transformer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
needs to be replaced with
// workaround for https://issues.apache.org/jira/browse/XALANJ-2419
transformer.setOutputProperty(OutputKeys.ENCODING, "utf-16");
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
writer.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
while everything else stays the same. The actual files are still written as UTF-8, but the transformation will be handled as UTF-16 internally.
Upvotes: 0