Hassan Syed
Hassan Syed

Reputation: 20475

Spirit Lex -> qi grammar issue

edit : I have ripped out the lexer as it does not cleanly integrate with Qi and just obfuscates grammars (see here).


The full grammar and lexer is at the end. The grammar and input that I am having issues with is presented first.

This works fine :

/*input*/ " EVENT 1:ev1 { OPTIONAL 1:hassan int4;   }  ";
event_descriptor_ = tok.event 
                    >> oid_ 
                    >> tok.left_curly >> event_entry_ >> tok.right_curly;

This fails :

/*input*/ " EVENT 1:ev1 { OPTIONAL 1:hassan int4; OPTION 2:hassan2 int8; }";

event_descriptor_ = tok.event 
                    >> oid_ 
                    >> tok.left_curly >> *(event_entry_) >> tok.right_curly;

with :

Assertion failed: (std::size_t(~0) != token_state_), function parse, file /usr/local/include/boost/spirit/home/lex/lexer/token_def.hpp, line 100.
Abort trap: 6

Pointing to a source code comment of :

//  If the following assertion fires you probably forgot to
//  associate this token definition with a lexer instance.
BOOST_ASSERT(std::size_t(~0) != token_state_);

Removing the last curly from the grammar and input works as well :

/*input*/ " EVENT 1:ev1 { OPTIONAL 1:hassan int4; OPTION 2:hassan2 int8; ";

event_descriptor_ = tok.event 
                    >> oid_ 
                    >> tok.left_curly >> *(event_entry_);

Full file :

#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/home/phoenix/bind/bind_member_variable.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/none.hpp>
#include <boost/cstdint.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <string>
#include <exception>
#include <vector>

namespace lex = boost::spirit::lex;
namespace px = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

template <typename Lexer>
struct tokens : lex::lexer<Lexer>
{
    tokens()
        : left_curly("\"{\""),
        right_curly("\"}\""),
        left_paren("\"(\""),
        right_paren("\")\""),
        colon(":"),
        scolon(";"),
        namespace_("(?i:namespace)"),
        event("(?i:event)"),
        optional("(?i:optional)"),
        required("(?i:required)"),
        repeated("(?i:repeated)"),
        t_int_4("(?i:int4)"),
        t_int_8("(?i:int8)"),
        t_string("(?i:string)"),
        ordinal("\\d+"),
        identifier("\\w+")

    {
        using boost::spirit::lex::_val;

        this->self
            = 
              left_curly    [ std::cout << px::val("lpar") << std::endl]
            | right_curly   [ std::cout << px::val("rpar") << std::endl]
            | left_paren
            | right_paren
            | colon               [ std::cout << px::val("colon") << std::endl]
            | scolon
            | namespace_          [ std::cout << px::val("kw namesapce") << std::endl]
            | event               [ std::cout << px::val("kw event") << std::endl]
            | optional            [ std::cout << px::val("optional ")  << "-->" << _val << "<--" << std::endl]
            | required            [ std::cout << px::val("required") << std::endl]
            | t_int_4
            | t_int_8
            | t_string
            | ordinal             [ std::cout << px::val("val ordinal (") << _val << ")" << std::endl]
            | identifier          [std::cout << px::val("val identifier(") << _val << ")" << std::endl];


        this->self("WS") =   lex::token_def<>("[ \\t\\n]+");
    }


    lex::token_def<lex::omit> left_curly, right_curly, colon, scolon,repeated, left_paren, right_paren;
    lex::token_def<lex::omit> namespace_, event, optional, required,t_int_4, t_int_8, t_string;
    lex::token_def<boost::uint32_t> ordinal;
    lex::token_def<> identifier;
};

enum event_entry_qualifier
{
    ENTRY_OPTIONAL,
    ENTRY_REQUIRED,
    ENTRY_REPEATED
};

enum entry_type
{
    RBL_INT4,
    RBL_INT8,
    RBL_STRING,
    RBL_EVENT
};

struct oid
{
    boost::uint32_t   ordinal;
    std::string       name;
};

BOOST_FUSION_ADAPT_STRUCT
(
  oid,
  (boost::uint32_t, ordinal)
  (std::string, name)
)

struct type_descriptor
{
    entry_type  type_id;
    std::string referenced_event;
};

BOOST_FUSION_ADAPT_STRUCT
(
  type_descriptor,
  (entry_type, type_id)
  (std::string, referenced_event)
)

struct event_entry
{
    event_entry_qualifier  qualifier;
    oid                   identifier;
    type_descriptor       descriptor;
};

BOOST_FUSION_ADAPT_STRUCT
(
  event_entry,
  (event_entry_qualifier, qualifier)
  (oid, identifier)
  (type_descriptor, descriptor)
)

struct event_descriptor
{
    oid                       identifier;
    std::vector<event_entry>  event_entries;
};

BOOST_FUSION_ADAPT_STRUCT
(
    event_descriptor,
    (oid, identifier)
    (std::vector<event_entry>, event_entries)
)

template <typename Iterator, typename Lexer>
struct grammar : qi::grammar<Iterator,event_descriptor(), qi::in_state_skipper<Lexer> >
{
    template <typename TokenDef>
    grammar(TokenDef const& tok)
      : grammar::base_type(event_descriptor_)
    {
      using qi::_val;
      //start = event;
      event_descriptor_ = tok.event >> oid_ >> tok.left_curly >> *(event_entry_);

      event_entry_ = event_qualifier >> oid_ >> type_descriptor_ >> tok.scolon;

      event_qualifier = tok.optional [ _val = ENTRY_OPTIONAL]  
                      | tok.required [ _val = ENTRY_REQUIRED]
                      | tok.repeated [ _val = ENTRY_REPEATED];

      oid_  = tok.ordinal 
            >> tok.colon 
            >> tok.identifier;

      type_descriptor_ 
          = (( atomic_type >> qi::attr("")) 
          | ( event_type >> tok.left_paren >> tok.identifier >> tok.right_paren));

      atomic_type = tok.t_int_4         [ _val = RBL_INT4]
              | tok.t_int_8             [ _val = RBL_INT8]
              | tok.t_string            [ _val = RBL_STRING];

      event_type = tok.event            [_val = RBL_EVENT];
    }

    qi::rule<Iterator> start;
    qi::rule<Iterator, event_descriptor(), qi::in_state_skipper<Lexer> > event_descriptor_; 
    qi::rule<Iterator, event_entry(), qi::in_state_skipper<Lexer> > event_entry_;
    qi::rule<Iterator, event_entry_qualifier()> event_qualifier;
    qi::rule<Iterator, entry_type()> atomic_type;
    qi::rule<Iterator, entry_type()> event_type;
    qi::rule<Iterator, type_descriptor(),qi::in_state_skipper<Lexer> > type_descriptor_;
    qi::rule<Iterator, oid()> oid_; 
};

std::string test = " EVENT 1:sihan { OPTIONAL 123:hassan int4; OPTIONAL 123:hassan int4;  ";

int main()
{
    typedef lex::lexertl::token<std::string::iterator, boost::mpl::vector<boost::uint32_t, std::string> > token_type;
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    typedef tokens<lexer_type>::iterator_type iterator_type;

    tokens<lexer_type> token_lexer;
    grammar<iterator_type,tokens<lexer_type>::lexer_def> grammar(token_lexer);

    std::string::iterator it = test.begin();
    iterator_type first = token_lexer.begin(it, test.end());
    iterator_type last = token_lexer.end();

    bool r; 

    r = qi::phrase_parse(first, last, grammar, qi::in_state("WS")[token_lexer.self]);

    if(r)
        ;
    else
    {
        std::cout << "parsing failed" << std::endl;
    }
   /* 
    lexer_type::iterator_type iter; 

    try
    {
        iter = token_lexer.begin(first,last);
    }
    catch(std::exception & e)
    {
        std::cout << e.what() << std::endl;
    }

    lexer_type::iterator_type end = token_lexer.end();

    while (iter != end && token_is_valid(*iter))
        ++iter;
   */ 
}

Upvotes: 1

Views: 207

Answers (1)

Hassan Syed
Hassan Syed

Reputation: 20475

The tok.left_curly might seem like it is causing the problem; however, it is wired up correctly. The source comment does point to a token not being wired up correctly, and as cv_and_he discovered there is a token missing.

So, if a token is used in a grammar, make sure it is correctly initialised in the lexer or you might get weird errors while growing a grammar.

Upvotes: 3

Related Questions