wilx
wilx

Reputation: 18228

XML parser wrapper

Is there any kind of XML parser wrapper library that would allow switching the actual XML parser engine at configuration or run time instead of forcing me to choose between libxml2, expat or Xalan-C++?

Upvotes: 1

Views: 800

Answers (1)

dirkgently
dirkgently

Reputation: 111130

I wrote something similar a while back:

struct xerces;
struct msxml;
struct rapid;
struct tiny;
struct pugixml;

template <typename T> struct platform_manager;
template <typename T> double parse_file(std::string const& f, QueryPerfCounter& qpc);

template<class T>
void demo(std::string const& f, size_t N = 10) {
    platform_manager<T> pm;
    QueryPerfCounter qpc;
    std::vector<double> timing_data;
    timing_data.reserve(N);
    std::generate_n(std::back_inserter(timing_data), N, std::tr1::bind(&parse_file<typename T>, f, qpc));
    adobe::Statistics<double> s(timing_data.begin(), timing_data.end());
    std::cout << "Iteration count: " << s.count() << " Mean time: " << s.mean() << "s. Variance: " << s.variance() << "s.\n";
}
/***************************************************************/
template <> 
struct platform_manager<msxml> {
    platform_manager() {        
        if (FAILED(CoInitialize(NULL)))
            throw std::runtime_error("CoCreateInstance failed");
    }

    ~platform_manager() {
        CoUninitialize();
    }
};

template<>
double parse_file<msxml>(std::string const& f, QueryPerfCounter& qpc) {
    CComPtr<IXMLDOMDocument> pXMLDom;
    HRESULT hr = CoCreateInstance(__uuidof(MSXML2::DOMDocument60), NULL, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&pXMLDom));
    CComPtr<IXMLDOMParseError> pXMLErr;
    VARIANT_BOOL varStatus;
    qpc.Start();
    if (FAILED(pXMLDom->load(CComVariant(f.c_str()), &varStatus)))
        std::cout << "Parsing failed" << std::endl;
    qpc.Stop();
    return qpc.Duration(QueryPerfCounter::seconds);
}

/***************************************************************/
#include <xercesc/parsers/XercesDOMParser.hpp>
#include <xercesc/dom/DOM.hpp>
#include <xercesc/sax/HandlerBase.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/PlatformUtils.hpp>

#ifdef XERCES_CPP_NAMESPACE_USE
XERCES_CPP_NAMESPACE_USE
#endif 

template <> 
struct platform_manager<xerces> {
    platform_manager() try {
        XMLPlatformUtils::Initialize();
    } catch (const XMLException& toCatch) {
        char* message = XMLString::transcode(toCatch.getMessage());
        std::cout << "Failed to init: " << XMLString::transcode(message) << std::endl;
        XMLString::release(&message);
    }

    ~platform_manager() {
        XMLPlatformUtils::Terminate();
    }
};

template<>
double parse_file<xerces>(std::string const& f, QueryPerfCounter& qpc) {
    double duration = 0;
    std::tr1::shared_ptr<XercesDOMParser> parser(new XercesDOMParser());
    parser->setValidationScheme(XercesDOMParser::Val_Always);
    parser->setDoNamespaces(true);    // optional

    std::tr1::shared_ptr<ErrorHandler> errHandler(new HandlerBase());
    parser->setErrorHandler(errHandler.get());

    try {
        qpc.Start();
        parser->parse(f.c_str());
        qpc.Stop();
        duration = qpc.Duration(QueryPerfCounter::seconds);
    }
    catch (const XMLException& toCatch) {
        char* message = XMLString::transcode(toCatch.getMessage());
        std::cout << "Exception message is: \n"
            << message << "\n";
        XMLString::release(&message);
    }
    catch (const DOMException& toCatch) {
        char* message = XMLString::transcode(toCatch.msg);
        std::cout << "Exception message is: \n"
            << message << "\n";
        XMLString::release(&message);
    }
    catch (...) {
        std::cout << "Unexpected Exception \n" ;
    }
    return duration;
}

/***************************************************************/
#include "rapidxml.hpp"
#include <vector>
#include <fstream>
#include <iterator>

template <> 
struct platform_manager<rapid> {};

enum size_hint { B = 1, KB = 1024, MB = 1024 * 1024 };

double file_size(std::ifstream& f, size_hint factor = MB) {
    f.seekg (0, std::ios::end);
    size_t length = f.tellg();
    f.seekg (0, std::ios::beg);
    return double(length) / factor;
}

template<>
double parse_file<rapid>(std::string const& f, QueryPerfCounter& qpc) {
    double duration = 0;
    rapidxml::xml_document<> doc;
    try {
        qpc.Start();
        std::ifstream myfile(f.c_str());
        myfile.seekg (0, std::ios::end);
        size_t length = myfile.tellg();
        myfile.seekg (0, std::ios::beg);
        std::vector<char> buffer(length);
        myfile.read(& buffer[0], length);
        //buffer.reserve(length);
        //buffer.insert(std::istreambuf_iterator<char>(myfile)), std::istreambuf_iterator<char>( ));
        //std::copy(std::istreambuf_iterator<char>(myfile), std::istreambuf_iterator<char>( ), std::back_insert_iterator(buffer));
        buffer.push_back('\0');
        qpc.Stop();
        duration += qpc.Duration(QueryPerfCounter::seconds);
        //std::cout << "Buffer load time: " << duration << "s" << std::endl;

        //QueryPerfCounter qpc;
        qpc.Start();
        doc.parse<rapidxml::parse_non_destructive>(&buffer[0]);
        qpc.Stop();
        duration += qpc.Duration(QueryPerfCounter::seconds);        
    } catch (rapidxml::parse_error const& e) {
        std::cout << e.what() << std::endl;
    } catch (std::exception const& e) {
        std::cout << e.what() << std::endl;
    }
    return duration;
}
/***************************************************************/
template <> 
struct platform_manager<tiny> {};

template<>
double parse_file<tiny>(std::string const& f, QueryPerfCounter& qpc) {
    tinyxml2::XMLDocument doc;
    qpc.Start();
    doc.LoadFile(f.c_str());
    doc.PrintError(); // emits nothing on success
    qpc.Stop();
    return qpc.Duration(QueryPerfCounter::seconds); 
}
/***************************************************************/
struct banner_printer {
    banner_printer(std::string const& libname, std::string const& input) : lib(libname), in(input) {
        std::cout << "/*+------------------- BEGIN test for " << lib << " with file: " << in << " -------------------+*/" << std::endl;
    }
    ~banner_printer() {
        std::cout << "/*+------------------- END test for " << lib << " with file: " << in << " -------------------+*/" << std::endl;
    }
private:
    std::string lib, in;
};
/***************************************************************/
#include "pugixml.hpp"

template <> 
struct platform_manager<pugixml> {};

template<>
double parse_file<pugixml>(std::string const& f, QueryPerfCounter& qpc) {
    pugi::xml_document doc;
    qpc.Start();
    pugi::xml_parse_result result = doc.load_file(f.c_str());
    qpc.Stop();
    if (!result) {
        std::cout << "XML [" << f << "] parsed with errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n";
        std::cout << "Error description: " << result.description() << "\n";
        std::cout << "Error offset: " << result.offset << " (error at offset [..." << (result.offset) << "]\n\n";
    }
    return qpc.Duration(QueryPerfCounter::seconds);
}
/***************************************************************/

int main() {
    std::vector<std::string> v = parse_catalog("D:/Work/xml_parsers/perfcompare/benchmark/catalog.txt");
    std::for_each(v.begin(), v.end(), [](std::string const& s) {
    {
        std::ifstream f(s);
        std::cout << "Input file name: " << s << " size: " << file_size(f) << "MB\n\n";
    }
    {
        banner_printer b("xerces", s);
        demo<xerces>(s);
    }
    {
        banner_printer b("rapid", s);
        demo<rapid>(s);
    }
    {
        banner_printer b("tiny", s);
        demo<tiny>(s);
    }
    {
        banner_printer b("pugi", s);
        demo<pugixml>(s);
    }
    {
        banner_printer b("MSXML6", s);
        demo<msxml>(s);
    }
    }
    );
    //expat_demo(argc, argv);
    return 0;
} 

It may or may not help you get started. I've skipped header includes and some other trivia. I tried to keep the interface simple and identical. This meant that some libraries required additional helper functions.

Upvotes: 1

Related Questions