Reputation: 463
I am trying to parse a string with boost spirit qi, which has the following form:
"\[email protected]\0test\r\n"
with the following grammar: Here is the hpp:
class EmailGrammar :
public boost::spirit::qi::grammar< const boost::uint8_t*,
boost::tuple< boost::iterator_range< const boost::uint8_t*>,
boost::iterator_range< const boost::uint8_t*> >()>
{
public:
const static EmailGrammar instance;
EmailGrammar ();
/* omitting uninteresting stuff here i.e. constructors and assignment */
private:
boost::spirit::qi::rule< const boost::uint8_t*,
boost::tuple<
boost::iterator_range< const boost::uint8_t*>,
boost::iterator_range< const boost::uint8_t* >()> m_start;
};
and the cpp of the grammar looks like this:
EmailGrammar::EmailGrammar() :
EmailGrammar::base_type(m_start),
m_start()
{
namespace qi = boost::spirit::qi;
m_start =
(
qi::lit('\0')
>> (
qi::raw[*(qi::char_ - qi::lit('\0'))]
)
>> qi::lit('\0')
>> (
qi::raw[*(qi::char_ - qi::eol)]
)
>> qi::eol >> qi::eoi
);
}
Which I intend to use to parse the two strings and break them into two separate iterator ranges.
This is then called like so:
int main()
{
typedef typename EmailGrammar::start_type::attr_type attr;
std::string testStr("\[email protected]\0test\r\n");
// this is not done this way in the real code just as a test
boost::iterator_range<const boost::uint8_t*> data =
boost::make_iterator_range(
reinterpret_cast< const boost::uint8_t* >(testStr.data()),
reinterpret_cast< const boost::uint8_t* >(testStr.data() + testStr.length()));
attr exposedAttribute;
if (boost::spirit::qi::parse(data.begin(),
data.end(),
EmailGrammar::instance,
exposedAttribute)
{
std::cout << "success" << std::endl;
}
}
The problem appears to be in parsing the null terminator. I think this because when I add the debug(m_rule);
to the code I get the xml output:
<unnamed-rule>
<try></try>
<fail/>
</unnamed-rule>
However. if I explicitly erase for example the first null terminator I get the output:
<unnamed-rule>
<try>[email protected]</try>
<fail/>
</unnamed-rule>
Which leads to the questions:
How does one parse null terminators with spirit I have look for documentation and haven't been able to find any information on it except a mention of null terminated strings at the very bottom of this page which mentions the default models in spirit.
Does spirit do look ahead in a manner that if the parser sees in the look ahead it doesn't end correctly it automatically fails?
Is there any documentation I am missing that I can use to read up on this sort of behavior?
Upvotes: 1
Views: 275
Reputation: 393114
More than likely the whole problem originates here:
std::string testStr("\[email protected]\0test\r\n");
doesn't do what you think. It creates an empty string. Instead, specify the length of the raw literal/buffer:
std::string testStr("\[email protected]\0test\r\n", 31);
If you don't want to do the math/counting (you shouldn't!), make a helper:
template <typename Char, size_t N>
std::string bake(Char const (&p)[N], bool include_terminator = false) {
return { p, p + N - (include_terminator?0:1) };
}
Which you can then use like:
std::string const testStr = bake("\[email protected]\0test\r\n");
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/boost_tuple.hpp>
namespace qi = boost::spirit::qi;
using It = uint8_t const*;
using Range = boost::iterator_range<It>;
using Attribute = boost::tuple<Range, Range>;
class EmailGrammar : public qi::grammar<It, Attribute()> {
public:
const static EmailGrammar instance;
EmailGrammar() : EmailGrammar::base_type(m_start)
{
using namespace qi;
m_start =
'\0' >> raw[*(char_ - '\0')] >>
'\0' >> raw[*(char_ - eol)] >>
eol >> eoi
;
BOOST_SPIRIT_DEBUG_NODES((m_start))
}
private:
qi::rule<It, Attribute()> m_start;
};
const EmailGrammar EmailGrammar::instance {};
template <typename Char, size_t N>
std::string bake(Char const (&p)[N], bool include_terminator = false) {
return { p, p + N - (include_terminator?0:1) };
}
int main() {
std::string const testStr = bake("\[email protected]\0test\r\n");
It f = reinterpret_cast<It>(testStr.data()),
l = f + testStr.length();
Attribute exposedAttribute;
if (boost::spirit::qi::parse(f, l, EmailGrammar::instance, exposedAttribute)) {
std::cout << "success" << std::endl;
}
}
Prints
<m_start>
<try></try>
<success></success>
<attributes>[[[h, e, l, p, @, m, a, s, o, n, l, i, v, e, ., g, m, u, ., e, d, u], [t, e, s, t]]]</attributes>
</m_start>
success
Upvotes: 1