crastinus
crastinus

Reputation: 133

Parsing recursive structure on boost::spirit

I won to parse structure like "text { < > }". Spirit documentation contents similar AST example. For parsing string like this

<tag1>text1<tag2>text2</tag1></tag2>

this code work:

    templ     = (tree | text)       [_val = _1];

    start_tag = '<' 
            >> !lit('/') 
            >> lexeme[+(char_- '>') [_val += _1]] 
            >>'>'; 

    end_tag   =  "</" 
            >> string(_r1) 
            >> '>'; 

    tree =  start_tag          [at_c<1>(_val) = _1]
            >> *templ          [push_back(at_c<0>(_val), _1) ]
            >> end_tag(at_c<1>(_val) )
            ;

For parsing string like this

<tag<tag>some_text>

This code not work:

    templ     = (tree | text)       [_val = _1];


    tree =  '<'
            >> *templ          [push_back(at_c<0>(_val), _1) ]
            >> '>'
            ;

templ is parsing structure with recursive_wrapper inside:

namespace client {

   struct tmp;

   typedef boost::variant <
        boost::recursive_wrapper<tmp>,
        std::string
   > tmp_node;

   struct tmp {
     std::vector<tmp_node> content;
     std::string text;
   };
}

BOOST_FUSION_ADAPT_STRUCT(
     tmp_view::tmp,
     (std::vector<tmp_view::tmp_node>, content)
     (std::string,text)
)

Who may explain why it happened? Maybe who knows similar parsers wrote on boost::spirit?

Upvotes: 3

Views: 990

Answers (1)

sehe
sehe

Reputation: 392833

Just guessing you didn't actually want to parse XML at all, but rather some kind of mixed-content markup language for hierarchical text, I'd do

        simple = +~qi::char_("><");
        nested = '<' >> *soup >> '>';
        soup   = nested|simple;

With the AST/rules defined as

typedef boost::make_recursive_variant<
        boost::variant<std::string, std::vector<boost::recursive_variant_> > 
    >::type tag_soup;

qi::rule<It, std::string()>           simple;
qi::rule<It, std::vector<tag_soup>()> nested;
qi::rule<It, tag_soup()>              soup;

See it Live On Coliru:

////  #define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/variant/recursive_variant.hpp>

#include <iostream>
#include <fstream>

namespace client
{
    typedef boost::make_recursive_variant<
            boost::variant<std::string, std::vector<boost::recursive_variant_> > 
        >::type tag_soup;

    namespace qi = boost::spirit::qi;

    template <typename It>
    struct parser : qi::grammar<It, tag_soup()>
    {
        parser() : parser::base_type(soup)
        {
            simple = +~qi::char_("><");
            nested = '<' >> *soup >> '>';
            soup   = nested|simple;

            BOOST_SPIRIT_DEBUG_NODES((simple)(nested)(soup))
        }
      private:
        qi::rule<It, std::string()>           simple;
        qi::rule<It, std::vector<tag_soup>()> nested;
        qi::rule<It, tag_soup()>              soup;
    };
}

namespace boost { // leverage ADL on variant<>
    static std::ostream& operator<<(std::ostream& os, std::vector<client::tag_soup> const& soup)
    {
        os << "<";
        std::copy(soup.begin(), soup.end(), std::ostream_iterator<client::tag_soup>(os));
        return os << ">";
    }
}

int main(int argc, char **argv)
{
    if (argc < 2) {
        std::cerr << "Error: No input file provided.\n";
        return 1;
    }

    std::ifstream in(argv[1]);
    std::string const storage(std::istreambuf_iterator<char>(in), {}); // We will read the contents here.

    if (!(in || in.eof())) {
        std::cerr << "Error: Could not read from input file\n";
        return 1;
    }

    static const client::parser<std::string::const_iterator> p;

    client::tag_soup ast; // Our tree
    bool ok = parse(storage.begin(), storage.end(), p, ast);

    if (ok) std::cout << "Parsing succeeded\nData: " << ast << "\n";
    else    std::cout << "Parsing failed\n";

    return ok? 0 : 1;
}

If you define BOOST_SPIRIT_DEBUG you'll get verbose output of the parsing process.

For the input

<some text with nested <tags <etc...> >more text>

prints

Parsing succeeded
Data: <some text with nested <tags <etc...> >more text>

Note that the output is printed from the variant, not the original text.

Upvotes: 2

Related Questions