twfurst
twfurst

Reputation: 263

Set up URI or catalog resolver with Saxon/XQuery

I am developing a simple command line application in Java to mine data from a large XML data set (15,000+ XML files). I have chosen to use Saxon S9API as the XQuery processor for this. Everything works fine so long as there is open access to the internet where the parser used by Saxon can resolve the xsi:noNamespaceSchemaLocation URI (or any other I will assume).

I have scoured Stackoverflow, as well as general Google searching, for answers on how to provide a catalog to the XQuery processor. I have not found a good explanation on how to do so.

This is the simple code I have at this point, which as I stated works fine when there is open access the Internet:


    package ipd.part.info.mining.app;

    import java.io.File;
    import java.util.List;
    import java.util.Scanner;
    import java.util.logging.Level;
    import java.util.logging.Logger;
    import javax.xml.parsers.DocumentBuilder;
    import javax.xml.parsers.DocumentBuilderFactory;
    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.transform.Transformer;
    import javax.xml.transform.TransformerException;
    import javax.xml.transform.dom.DOMSource;
    import javax.xml.transform.stream.StreamResult;
    import net.sf.saxon.Configuration;
    import net.sf.saxon.TransformerFactoryImpl;
    import net.sf.saxon.s9api.DOMDestination;
    import net.sf.saxon.s9api.Processor;
    import net.sf.saxon.s9api.QName;
    import net.sf.saxon.s9api.SaxonApiException;
    import net.sf.saxon.s9api.XQueryCompiler;
    import net.sf.saxon.s9api.XQueryEvaluator;
    import net.sf.saxon.s9api.XQueryExecutable;
    import net.sf.saxon.s9api.XdmAtomicValue;
    import net.sf.saxon.lib.*;
    import static org.apache.xerces.jaxp.JAXPConstants.JAXP_SCHEMA_LANGUAGE;
    import static org.apache.xerces.jaxp.JAXPConstants.W3C_XML_SCHEMA;
    import org.apache.xerces.util.XMLCatalogResolver;
    import org.apache.xml.resolver.tools.CatalogResolver;
    import org.w3c.dom.Document;
    import org.xml.sax.ErrorHandler;

    /**
     *
     * @author tfurst
     */
    public class IPDPartInfoMiningApp {

        /**
         * @param args the command line arguments
         */
        private static Scanner scanner = new Scanner(System.in);
        private static String ietmPath;
        private static String outputPath;

        private static CatalogResolver resolver;
        private static org.apache.xerces.util.XMLCatalogResolver xres;
        private static ErrorHandler eHandler;

        private static DocumentBuilderFactory DBF;
        private static DocumentBuilder DB;

        public static void main(String[] args) {
            initDb();
            try {
                // TODO code application logic here
                System.out.println("Enter path to complete IETM Export:");
                ietmPath = scanner.nextLine();
                System.out.println("Enter path to save report:");
                outputPath = scanner.nextLine();

                Processor proc = new Processor(true);

                XQueryCompiler comp = proc.newXQueryCompiler();

                //File xq = fixXquery(new File(XQ));
                //XQueryExecutable exp = comp.compile(xq);
                XQueryExecutable exp = comp.compile("declare variable $path external;\n" +
    "\n" +
    "let $coll := collection(concat($path,'?select=*.xml'))//itemSequenceNumber \n" +
    "\n" +
    "return\n" +
    "<parts>\n" +
    "{\n" +
    "    for $mod in $coll\n" +
    "    let $pn := normalize-space($mod/partNumber)\n" +
    "    let $nomen := $mod/partIdentSegment[1]/descrForPart\n" +
    "    let $smr := $mod/locationRcmdSegment/locationRcmd/sourceMaintRecoverability\n" +
    "    order by $pn\n" +
    "    return <part pn=\"{$pn}\" nomen=\"{$nomen}\" smr=\"{$smr}\"/>\n" +
    "}\n" +
    "</parts>");
                //Serializer out = proc.newSerializer(System.out);

                Document dom = DB.newDocument();

                XQueryEvaluator ev = exp.load();
                ev.setExternalVariable(new QName("path"), new XdmAtomicValue(new File(ietmPath).toPath().toUri().toString().substring(0, new File(ietmPath).toPath().toUri().toString().lastIndexOf("/"))));
                ev.run(new DOMDestination(dom));

                TransformerFactoryImpl tfact = new net.sf.saxon.TransformerFactoryImpl();

                Transformer trans = tfact.newTransformer();
                DOMSource src = new DOMSource(dom);
                StreamResult res = new StreamResult(new File(outputPath + File.separator + "output.xml"));
                trans.transform(src, res);


            } catch (SaxonApiException | TransformerException ex) {
                Logger.getLogger(IPDPartInfoMiningApp.class.getName()).log(Level.SEVERE, null, ex);
            }
        }

        private static XMLCatalogResolver createXMLCatalogResolver(CatalogResolver resolver)
        {
            int i = 0;

            List files = resolver.getCatalog().getCatalogManager().getCatalogFiles();
            String[] catalogs = new String[files.size()];
            XMLCatalogResolver xcr = new XMLCatalogResolver();

            for(Object file : files)
            {
                catalogs[i] = new File(file.toString()).getAbsolutePath();
            }

            xcr.setCatalogList(catalogs);
            return xcr;
        }

        private static void initDb()
        {
            try 
            {
                resolver = new CatalogResolver();
                eHandler = new DocumentErrorHandler();
                xres = createXMLCatalogResolver(resolver);
                DBF = DocumentBuilderFactory.newInstance();
                DBF.setAttribute(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA);
                DBF.setNamespaceAware(true);
                DB = DBF.newDocumentBuilder();
                DB.setEntityResolver(xres);
                DB.setErrorHandler(eHandler);
            } 
            catch (ParserConfigurationException ex) 
            {
                ex.printStackTrace();
            }
        }

    }

I am receiving this error when I disconnect my machine from the network:

C:\Users\tfurst\Desktop\XQuery Test\testXml\test\tool>java -jar IPD_Part_Info_Mining_App.jar
Enter path to complete IETM Export:
C:\Users\tfurst\Desktop\Wire Repl Testing
Enter path to save report:
C:\Users\tfurst\Desktop\Wire Repl Testing\report
Error on line 6 column 2
  collection(): failed to parse XML file
  file:/C:/Users/tfurst/Desktop/Wire%20Repl%20Testing/DMC-HH60W-A-52-21-0001-04AAA-520A-B.xml: I/O error reported by XML parser processing file:/C:/Users/tfurst/Desktop/Wire%20Repl%20Testing/DMC-HH60W-A-52-21-0001-04AAA-520A-B.xml: Read timed out
Aug 20, 2019 2:55:23 PM ipd.part.info.mining.app.IPDPartInfoMiningApp main
SEVERE: null
net.sf.saxon.s9api.SaxonApiException: collection(): failed to parse XML file file:/C:/Users/tfurst/Desktop/Wire%20Repl%20Testing/DMC-HH60W-A-52-21-0001-04AAA-520A-B.xml: I/O error reported by XML parser processing file:/C:/Users/tfurst/Desktop/Wire%20Repl%20Testing/DMC-HH60W-A-52-21-0001-04AAA-520A-B.xml: Read timed out
        at net.sf.saxon.s9api.XQueryEvaluator.run(XQueryEvaluator.java:372)
        at ipd.part.info.mining.app.IPDPartInfoMiningApp.main(IPDPartInfoMiningApp.java:80)
Caused by: net.sf.saxon.trans.XPathException: collection(): failed to parse XML file file:/C:/Users/tfurst/Desktop/Wire%20Repl%20Testing/DMC-HH60W-A-52-21-0001-04AAA-520A-B.xml: I/O error reported by XML parser processing file:/C:/Users/tfurst/Desktop/Wire%20Repl%20Testing/DMC-HH60W-A-52-21-0001-04AAA-520A-B.xml: Read timed out
        at net.sf.saxon.resource.XmlResource.getItem(XmlResource.java:113)
        at net.sf.saxon.functions.CollectionFn$2.mapItem(CollectionFn.java:246)
        at net.sf.saxon.expr.ItemMappingIterator.next(ItemMappingIterator.java:113)
        at net.sf.saxon.expr.ItemMappingIterator.next(ItemMappingIterator.java:108)
        at net.sf.saxon.expr.ItemMappingIterator.next(ItemMappingIterator.java:108)
        at net.sf.saxon.om.FocusTrackingIterator.next(FocusTrackingIterator.java:85)
        at net.sf.saxon.expr.ContextMappingIterator.next(ContextMappingIterator.java:59)
        at net.sf.saxon.expr.sort.DocumentOrderIterator.<init>(DocumentOrderIterator.java:47)
        at net.sf.saxon.expr.sort.DocumentSorter.iterate(DocumentSorter.java:230)
        at net.sf.saxon.expr.flwor.ForClausePush.processTuple(ForClausePush.java:34)
        at net.sf.saxon.expr.flwor.FLWORExpression.process(FLWORExpression.java:841)
        at net.sf.saxon.expr.instruct.ElementCreator.processLeavingTail(ElementCreator.java:337)
        at net.sf.saxon.expr.instruct.ElementCreator.processLeavingTail(ElementCreator.java:284)
        at net.sf.saxon.expr.instruct.Instruction.process(Instruction.java:151)
        at net.sf.saxon.query.XQueryExpression.run(XQueryExpression.java:411)
        at net.sf.saxon.s9api.XQueryEvaluator.run(XQueryEvaluator.java:370)
        ... 1 more


C:\Users\tfurst\Desktop\XQuery Test\testXml\test\tool>pause
Press any key to continue . . .

I am sure this is probably a relatively simple fix, most likely something I have overlooked. I know how to handle this when working with XSL tranformations, by supplying a catalog and the location of the schemas. Thanks in advance for any help, much appreciated.

Upvotes: 0

Views: 951

Answers (1)

ond1
ond1

Reputation: 771

To use an XML catalog file something like the following in your code should work:

        Processor proc = new Processor(false); //false for Saxon-HE
        XQueryCompiler compiler = proc.newXQueryCompiler();
        XmlCatalogResolver.setCatalog("path/catalog.xml", proc.getUnderlyingConfiguration(), false);
        ...

Upvotes: 1

Related Questions