Reputation: 20475
edit : I have ripped out the lexer as it does not cleanly integrate with Qi and just obfuscates grammars (see here).
The full grammar and lexer is at the end. The grammar and input that I am having issues with is presented first.
This works fine :
/*input*/ " EVENT 1:ev1 { OPTIONAL 1:hassan int4; } ";
event_descriptor_ = tok.event
>> oid_
>> tok.left_curly >> event_entry_ >> tok.right_curly;
This fails :
/*input*/ " EVENT 1:ev1 { OPTIONAL 1:hassan int4; OPTION 2:hassan2 int8; }";
event_descriptor_ = tok.event
>> oid_
>> tok.left_curly >> *(event_entry_) >> tok.right_curly;
with :
Assertion failed: (std::size_t(~0) != token_state_), function parse, file /usr/local/include/boost/spirit/home/lex/lexer/token_def.hpp, line 100.
Abort trap: 6
Pointing to a source code comment of :
// If the following assertion fires you probably forgot to
// associate this token definition with a lexer instance.
BOOST_ASSERT(std::size_t(~0) != token_state_);
Removing the last curly from the grammar and input works as well :
/*input*/ " EVENT 1:ev1 { OPTIONAL 1:hassan int4; OPTION 2:hassan2 int8; ";
event_descriptor_ = tok.event
>> oid_
>> tok.left_curly >> *(event_entry_);
Full file :
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/home/phoenix/bind/bind_member_variable.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/none.hpp>
#include <boost/cstdint.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <string>
#include <exception>
#include <vector>
namespace lex = boost::spirit::lex;
namespace px = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename Lexer>
struct tokens : lex::lexer<Lexer>
{
tokens()
: left_curly("\"{\""),
right_curly("\"}\""),
left_paren("\"(\""),
right_paren("\")\""),
colon(":"),
scolon(";"),
namespace_("(?i:namespace)"),
event("(?i:event)"),
optional("(?i:optional)"),
required("(?i:required)"),
repeated("(?i:repeated)"),
t_int_4("(?i:int4)"),
t_int_8("(?i:int8)"),
t_string("(?i:string)"),
ordinal("\\d+"),
identifier("\\w+")
{
using boost::spirit::lex::_val;
this->self
=
left_curly [ std::cout << px::val("lpar") << std::endl]
| right_curly [ std::cout << px::val("rpar") << std::endl]
| left_paren
| right_paren
| colon [ std::cout << px::val("colon") << std::endl]
| scolon
| namespace_ [ std::cout << px::val("kw namesapce") << std::endl]
| event [ std::cout << px::val("kw event") << std::endl]
| optional [ std::cout << px::val("optional ") << "-->" << _val << "<--" << std::endl]
| required [ std::cout << px::val("required") << std::endl]
| t_int_4
| t_int_8
| t_string
| ordinal [ std::cout << px::val("val ordinal (") << _val << ")" << std::endl]
| identifier [std::cout << px::val("val identifier(") << _val << ")" << std::endl];
this->self("WS") = lex::token_def<>("[ \\t\\n]+");
}
lex::token_def<lex::omit> left_curly, right_curly, colon, scolon,repeated, left_paren, right_paren;
lex::token_def<lex::omit> namespace_, event, optional, required,t_int_4, t_int_8, t_string;
lex::token_def<boost::uint32_t> ordinal;
lex::token_def<> identifier;
};
enum event_entry_qualifier
{
ENTRY_OPTIONAL,
ENTRY_REQUIRED,
ENTRY_REPEATED
};
enum entry_type
{
RBL_INT4,
RBL_INT8,
RBL_STRING,
RBL_EVENT
};
struct oid
{
boost::uint32_t ordinal;
std::string name;
};
BOOST_FUSION_ADAPT_STRUCT
(
oid,
(boost::uint32_t, ordinal)
(std::string, name)
)
struct type_descriptor
{
entry_type type_id;
std::string referenced_event;
};
BOOST_FUSION_ADAPT_STRUCT
(
type_descriptor,
(entry_type, type_id)
(std::string, referenced_event)
)
struct event_entry
{
event_entry_qualifier qualifier;
oid identifier;
type_descriptor descriptor;
};
BOOST_FUSION_ADAPT_STRUCT
(
event_entry,
(event_entry_qualifier, qualifier)
(oid, identifier)
(type_descriptor, descriptor)
)
struct event_descriptor
{
oid identifier;
std::vector<event_entry> event_entries;
};
BOOST_FUSION_ADAPT_STRUCT
(
event_descriptor,
(oid, identifier)
(std::vector<event_entry>, event_entries)
)
template <typename Iterator, typename Lexer>
struct grammar : qi::grammar<Iterator,event_descriptor(), qi::in_state_skipper<Lexer> >
{
template <typename TokenDef>
grammar(TokenDef const& tok)
: grammar::base_type(event_descriptor_)
{
using qi::_val;
//start = event;
event_descriptor_ = tok.event >> oid_ >> tok.left_curly >> *(event_entry_);
event_entry_ = event_qualifier >> oid_ >> type_descriptor_ >> tok.scolon;
event_qualifier = tok.optional [ _val = ENTRY_OPTIONAL]
| tok.required [ _val = ENTRY_REQUIRED]
| tok.repeated [ _val = ENTRY_REPEATED];
oid_ = tok.ordinal
>> tok.colon
>> tok.identifier;
type_descriptor_
= (( atomic_type >> qi::attr(""))
| ( event_type >> tok.left_paren >> tok.identifier >> tok.right_paren));
atomic_type = tok.t_int_4 [ _val = RBL_INT4]
| tok.t_int_8 [ _val = RBL_INT8]
| tok.t_string [ _val = RBL_STRING];
event_type = tok.event [_val = RBL_EVENT];
}
qi::rule<Iterator> start;
qi::rule<Iterator, event_descriptor(), qi::in_state_skipper<Lexer> > event_descriptor_;
qi::rule<Iterator, event_entry(), qi::in_state_skipper<Lexer> > event_entry_;
qi::rule<Iterator, event_entry_qualifier()> event_qualifier;
qi::rule<Iterator, entry_type()> atomic_type;
qi::rule<Iterator, entry_type()> event_type;
qi::rule<Iterator, type_descriptor(),qi::in_state_skipper<Lexer> > type_descriptor_;
qi::rule<Iterator, oid()> oid_;
};
std::string test = " EVENT 1:sihan { OPTIONAL 123:hassan int4; OPTIONAL 123:hassan int4; ";
int main()
{
typedef lex::lexertl::token<std::string::iterator, boost::mpl::vector<boost::uint32_t, std::string> > token_type;
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
typedef tokens<lexer_type>::iterator_type iterator_type;
tokens<lexer_type> token_lexer;
grammar<iterator_type,tokens<lexer_type>::lexer_def> grammar(token_lexer);
std::string::iterator it = test.begin();
iterator_type first = token_lexer.begin(it, test.end());
iterator_type last = token_lexer.end();
bool r;
r = qi::phrase_parse(first, last, grammar, qi::in_state("WS")[token_lexer.self]);
if(r)
;
else
{
std::cout << "parsing failed" << std::endl;
}
/*
lexer_type::iterator_type iter;
try
{
iter = token_lexer.begin(first,last);
}
catch(std::exception & e)
{
std::cout << e.what() << std::endl;
}
lexer_type::iterator_type end = token_lexer.end();
while (iter != end && token_is_valid(*iter))
++iter;
*/
}
Upvotes: 1
Views: 207
Reputation: 20475
The tok.left_curly
might seem like it is causing the problem; however, it is wired up correctly. The source comment does point to a token not being wired up correctly, and as cv_and_he discovered there is a token missing.
So, if a token is used in a grammar, make sure it is correctly initialised in the lexer or you might get weird errors while growing a grammar.
Upvotes: 3