Reputation: 131
i have a XML-Schema which contains base64 binary data. The problem is, if the binary is big enough I unsurprisingly get an OutOfMemoryError. I managed to generate the affected java class to use the DataHanlder instead of a byte[] but still JAXb seams to do the marshaling in RAM. The used schema cannot be changed and is very complex so building the XML by handy is not a solution. The only idea i have on this is adding a placeholder instead of the big binary and replacing it afterwards. But I believe there is a better solution!?
Thanks for your hints
Sample Schema:
<schema
xmlns="http://www.w3.org/2001/XMLSchema"
targetNamespace="http://example.com/"
xmlns:xmime="http://www.w3.org/2005/05/xmlmime"
xmlns:tns="http://example.com/"
elementFormDefault="qualified">
<element name="Document">
<complexType>
<sequence>
<element
name="text"
type="base64Binary"
xmime:expectedContentTypes="anything/else" />
</sequence>
</complexType>
</element>
</schema>
Generated Java Class:
package com.example.gen;
import javax.activation.DataHandler;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlMimeType;
import javax.xml.bind.annotation.XmlRootElement;
import javax.xml.bind.annotation.XmlType;
@XmlAccessorType(XmlAccessType.FIELD)
@XmlType(name = "", propOrder = {
"text"
})
@XmlRootElement(name = "Document")
public class Document {
@XmlElement(required = true)
@XmlMimeType("anything/else")
protected DataHandler text;
public DataHandler getText() {
return text;
}
public void setText(DataHandler value) {
this.text = value;
}
}
Sample Code:
File bigFile = new File("./temp/bigFile.bin");
File outFile = new File("./temp/bigXML.xml");
Document document = new Document();
DataHandler bigDocDH = new DataHandler(new FileDataSource(bigFile));
document.setText(bigDocDH);
JAXBContext jaxbContext = JAXBContext.newInstance("com.example.gen");
Marshaller marshaller = jaxbContext.createMarshaller();
OutputStream outputStream = new FileOutputStream(outFile);
marshaller.marshal(document, outputStream);
Upvotes: 2
Views: 2057
Reputation: 131
Ok, i have found a solution which works for me: First i replace the DataHandler which points to a large file with a DataHandler which contains a small byte array as content.
After this i implemented a XMLStreamWriterWrapper which delegates all methods to another XMLStreamWriter. If the content of the Datahandler with simple content is written to the XMLSteamWriterWrapper i remove the data and stream the original data to this position.
Constructor and Factory:
/**
* Constructor.
*
* @param outputStream
* {@link #outputStream}
* @param binaryData
* {@link #binaryData}
* @param token
* the search token.
* @throws XMLStreamException
* In case the XMLStreamWriter cannot be constructed.
*/
private XMLStreamWriterWrapper(OutputStream outputStream, DataHandler binaryData, String token) throws XMLStreamException {
this.xmlStreamWriter = XMLOutputFactory.newFactory().createXMLStreamWriter(outputStream);
// ensure the OutputStream is buffered. otherwise encoding of large data
// takes hours.
if (outputStream instanceof BufferedOutputStream) {
this.outputStream = outputStream;
} else {
this.outputStream = new BufferedOutputStream(outputStream);
}
this.binaryData = binaryData;
// calculate the token.
byte[] encode = Base64.getEncoder().encode(token.getBytes(Charset.forName("UTF-8")));
this.tokenAsString = new String(encode, Charset.forName("UTF-8"));
this.token = this.tokenAsString.toCharArray();
}
/**
* Factory method to create the {@link XMLStreamWriterWrapper}.
*
* @param outputStream
* The OutputStream where to marshal the xml to.
* @param binaryData
* The binary data which shall be streamed to the xml.
* @param token
* The token which akts as placeholder for the binary data.
* @return The {@link XMLStreamWriterWrapper}
* @throws XMLStreamException
* In case the XMLStreamWriter could not be constructed.
*/
public static XMLStreamWriterWrapper newInstance(OutputStream outputStream, DataHandler binaryData, String token) throws XMLStreamException {
return new XMLStreamWriterWrapper(outputStream, binaryData, token);
}
writeCharacters implementation:
/*
* (non-Javadoc)
*
* @see javax.xml.stream.XMLStreamWriter#writeCharacters(java.lang.String)
*/
@Override
public void writeCharacters(String text) throws XMLStreamException {
if (this.tokenAsString.equals(text)) {
writeCharacters(text.toCharArray(), 0, text.length());
} else {
xmlStreamWriter.writeCharacters(text);
}
}
/*
* (non-Javadoc)
*
* @see javax.xml.stream.XMLStreamWriter#writeCharacters(char[], int, int)
*/
@Override
public void writeCharacters(char[] text, int start, int len) throws XMLStreamException {
char[] range = Arrays.copyOfRange(text, 0, len);
if (Arrays.equals(range, token)) {
LOGGER.debug("Found replace token. Start streaming binary data.");
// force the XMLStreamWriter to close the start tag.
xmlStreamWriter.writeCharacters("");
try {
// flush the content of the streams.
xmlStreamWriter.flush();
outputStream.flush();
// do base64 encoding.
OutputStream wrap = Base64.getMimeEncoder().wrap(outputStream);
this.binaryData.writeTo(wrap);
} catch (IOException e) {
throw new XMLStreamException(e);
} finally {
try {
// flush the output stream
outputStream.flush();
} catch (IOException e) {
throw new XMLStreamException(e);
}
}
LOGGER.debug("Successfully inserted binary data.");
} else {
xmlStreamWriter.writeCharacters(text, start, len);
}
}
Example usage:
//Original file DataHandler
DataHandler bigDocDH = new DataHandler(new FileDataSource(bigFile));
Document document = new Document();
String replaceToken = UUID.randomUUID().toString();
//DataHandler with content replaced by the XMLStreamWriterWrapper
DataHandler tokenDH = new DataHandler(new ByteArrayDataSource(replaceToken.getBytes(Charset.forName("UTF-8")), bigDocDH.getContentType()));
document.setText(tokenDH);
try (OutputStream outStream = new FileOutputStream(outFile)) {
XMLStreamWriter streamWriter = XMLStreamWriterWrapper.newInstance(outStream, bigDocDH, replaceToken);
marshaller.marshal(document, streamWriter);
}
Upvotes: 1