Pablo
Pablo

Reputation: 611

Parsing blank lines and error detection in boost::spirit::qi parser

I need to update a parser to provide it with extra features.

The parser will be used to read all the lines given in a script file, and copy all of them (even blank lines and #comments) in a std::vector, so there shall be a equallity between the number of lines in the original file and the size of the vector.

Also, in case the script contains an error, the parser shall stop parsing, and store the number of the line where the error has arisen (not implemented in the example below).

My problems are:

1/ I am not able to make a rule to parse blank lines (a blank line may be an empty line or just one containing white spaces and/or tabs). I have tried qi::eps and some other attemps, but still not able to achieve it. Maybe is related with the fact that qi::blank_type is used as skipper in rules, but not sure.

2/ Detecting the error line, if any, can be done by different ways: counting the numbers of lines in the files after parsing and comparing with the size of vector (this one is really naive, discard it), using on_error clause and change all sequence rules (a >> b) for expectation rules (a > b)... I wonder if there is some other better method to chase the number of error line?

The code is next:

#include <boost/spirit/include/qi.hpp>
#include <boost/phoenix/phoenix.hpp>

namespace qi = boost::spirit::qi;

enum class TYPE { NONE, LOG, END, JUMP_TO, WAIT_TIME, SEND, COMMENT, LABEL };
const char* names[]{ "NONE", "LOG", "END", "JUMP_TO", "WAIT_TIME", "SEND", "COMMENT", "LABEL" };

struct Command
{
    TYPE type;
    std::string arg1;
    std::string arg2;
};

typedef std::vector<Command> Commands;

BOOST_FUSION_ADAPT_STRUCT(Command, type, arg1, arg2)

template <typename It>
class Parser : public qi::grammar<It, Commands()>
{
private:
    qi::rule<It> none;
    qi::rule<It, Command(), qi::blank_type> log;
    qi::rule<It, Command(), qi::blank_type> end;
    qi::rule<It, Command(), qi::blank_type> jump_to;
    qi::rule<It, Command(), qi::blank_type> wait_time;
    qi::rule<It, Command(), qi::blank_type> send;
    qi::rule<It, Command(), qi::blank_type> comment;
    qi::rule<It, Command(), qi::blank_type> label;//By its very nature, "label" must be the last command to be checked
    qi::rule<It, Commands()> start;

public:
    Parser() : Parser::base_type(start)
    {
        using namespace qi;

        //none = *~char_("\r\n"); // 'none' rule should parse for blank lines, but I can not figure out how.

        log = lit("LOG") >> '('
            >> attr(TYPE::LOG)
            >> lexeme[+~char_(")\r\n")] >> ')'
            >> attr(std::string{});//ignore arg2

        end = lit("END") >> '('
            >> attr(TYPE::END)
            >> raw[double_] >> ')'//NOTE: "as_string[raw[double_]]" is also valid
            >> attr(std::string{});//ignore arg2

        jump_to = lit("JUMP_TO") >> '('
            >> attr(TYPE::JUMP_TO)
            >> lexeme[+~char_(")\r\n")] >> ')'
            >> attr(std::string{});//ignore arg2

        wait_time = lit("WAIT_TIME") >> '('
            >> attr(TYPE::WAIT_TIME)
            >> raw[double_] >> ')'//NOTE: "as_string[raw[double_]]" is also valid
            >> attr(std::string{});//ignore arg2

        send = lit("SEND") >> '('
            >> attr(TYPE::SEND)
            >> lexeme[+~char_(",)\r\n")] >> ','
            >> +xdigit >> ')';

        comment = lit("#")
            >> attr(TYPE::COMMENT)
            >> lexeme[+~char_("\r\n")]
            >> attr(std::string{});//ignore arg2

        label = attr(TYPE::LABEL)
            >> lexeme[+~char_(": \r\n")] >> ':'
            >> attr(std::string{});//ignore arg2

        start = skip(blank)[(log | end | jump_to | wait_time | send | comment | label | none) % eol];

        on_error<fail>
        (
            start,
            boost::phoenix::ref(std::cout) << "Error detected" << std::endl
        );
    }
};

Commands parse(std::istream& in)
{
    using It = boost::spirit::istream_iterator;

    static const Parser<It> parser;

    Commands commands;
    It first(in >> std::noskipws), last;//No white space skipping

    if (!qi::parse(first, last, parser, commands))
        throw std::runtime_error("command parse error");

    return commands;
}

int main()
{
    std::stringstream test1;
    test1 << "JUMP_TO(etiqueta)" << '\n'
        << "LOG(this is to be writen in the log)" << '\n'
        << "SEND(id_8, AF9E02CA7EFF)" << '\n'
        << "LOG(write me in a log 1)" << '\n'
        << "LOG(write me in a log 2" << '\n'  //On purpose error!!! Missed second parenthesis ')'
        << "END(5.75)" << '\n'
        << "LABEL1:" << '\n'
        << '\n'
        << "#On the fly comment" << '\n'
        << "WAIT_TIME(25)" << '\n'
        << "SEND(id_3, AF9E02CA7EFF)" << '\n';

    std::stringstream test2;
    test2 << "JUMP_TO(etiqueta)" << '\n'
        << "LOG(this is to write in the log)" << '\n'
        << "SEND(id_8, AF9E02CA7EFF)" << '\n'
        << "LOG(write me in a log 1)" << '\n'
        << "LOG(write me in a log 2)" << '\n'
        << "END(5.75)" << '\n'
        << "xxxxx non sense xxxxxx" << '\n'   //On purpose error!!! Nonsense sentence
        << "LABEL1:" << '\n'
        << '\n'
        << "#On the fly comment" << '\n'
        << "WAIT_TIME(25)" << '\n'
        << "SEND(id_3, AF9E02CA7EFF)" << '\n';

    std::stringstream test3;
    test3 << "JUMP_TO(etiqueta)" << '\n'
        << "LOG(this is to write in the log)" << '\n'
        << "SEND(id_8, AF9E02CA7EFF)" << '\n'
        << "LOG(write me in a log 1)" << '\n'
        << "LOG(write me in a log 2)" << '\n'
        << "END(5.75)" << '\n'
        << '\n'   //This is not an error, but a permitted blank line!!! It should be parses as NONE type
        << "LABEL1:" << '\n'
        << '\n'
        << "#On the fly comment" << '\n'
        << "WAIT_TIME(25)" << '\n'
        << "SEND(id_8, AF9E02CA7EFF)" << '\n';

    try
    {
        auto commands1 = parse(test1);
        std::cout << "Test1:\n";
        for (auto& cmd : commands1) std::cout << names[static_cast<int>(cmd.type)] << '\t' << cmd.arg1 << '\t' << cmd.arg2 << std::endl;//vector size must be 4 (4 lines in console) - Ok

        std::cout << "\nTest2:\n";
        auto commands2 = parse(test2);
        for (auto& cmd : commands2) std::cout << names[static_cast<int>(cmd.type)] << '\t' << cmd.arg1 << '\t' << cmd.arg2 << std::endl;//vector size must be 6 (6 lines in console) - Ok

        std::cout << "\nTest3:\n";
        auto commands3 = parse(test3);
        for (auto& cmd : commands3) std::cout << names[static_cast<int>(cmd.type)] << '\t' << cmd.arg1 << '\t' << cmd.arg2 << std::endl;//vector size must be 12 (12 lines in console) - Wrong
    }
    catch (std::exception const& e)
    {
        std::cout << e.what() << "\n";
    }
}

and its coliru link: http://coliru.stacked-crooked.com/a/c4830afc26371712

Any help will be greatly appreciated.

Upvotes: 1

Views: 201

Answers (1)

sehe
sehe

Reputation: 393134

First off, to keep lines in synch you should make none build a command attribute as well.

Second, to match all blanks:

none = *blank

But that would try to assign vector to a TYPE field, so we tell it to omit the attribute:

none = omit[*blank]

Now that might read zero or more blanks, but doesn't prove we're at the end of the line, let's assert that:

none = omit[*blank] >> &eol

But what if the last line is an empty one? Well with the % eol that's not an issue, but in the interest of clean/defensive code:

none = omit[*blank] >> &(eol | eoi)

Now, let's make sure the result is a NONE command with no args:

none = omit[*blank] >> &(eol | eoi) //
        >> attr(TYPE::NONE) >> stub_arg >> stub_arg;

Here I hid the attr(string{}) thing under a prettier name:

auto stub_arg = copy(attr(std::string{}));

By now we have a reasonably working demo:

//#define BOOST_SPIRIT_DEBUG
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/phoenix/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>
#include <sstream>

namespace qi = boost::spirit::qi;

enum class TYPE {
    NONE,
    LOG,
    END,
    JUMP_TO,
    WAIT_TIME,
    SEND,
    COMMENT,
    LABEL
};

static inline std::ostream& operator<<(std::ostream& os, TYPE t) {
    static constexpr std::array names{"NONE",      "LOG",  "END",     "JUMP_TO",
                                      "WAIT_TIME", "SEND", "COMMENT", "LABEL"};
    return os << names.at(static_cast<int>(t));
}

struct Command {
    TYPE        type = TYPE::NONE;
    std::string arg1, arg2;
};

using Commands = std::vector<Command>;

BOOST_FUSION_ADAPT_STRUCT(Command, type, arg1, arg2)

template <typename It>
class Parser : public qi::grammar<It, Commands()>
{
  private:
    qi::rule<It, Command(), qi::blank_type> none, //
        log, end, jump_to, wait_time, send, comment, label;
    qi::rule<It, Commands()> start;

  public:
    Parser() : Parser::base_type(start)
    {
        using namespace qi;
        auto stub_arg = copy(attr(std::string{}));

        log = lit("LOG") >> '('                 //
            >> attr(TYPE::LOG)                  //
            >> lexeme[+~char_(")\r\n")] >> ')'  //
            >> stub_arg;                        //
        end = lit("END") >> '('                 //
            >> attr(TYPE::END)                  //
            >> raw[double_] >> ')'              //
            >> stub_arg;                        //
        jump_to = lit("JUMP_TO") >> '('         //
            >> attr(TYPE::JUMP_TO)              //
            >> lexeme[+~char_(")\r\n")] >> ')'  //
            >> stub_arg;                        //
        wait_time = lit("WAIT_TIME") >> '('     //
            >> attr(TYPE::WAIT_TIME)            //
            >> raw[double_] >> ')'              //
            >> stub_arg;                        //
        send = lit("SEND") >> '('               //
            >> attr(TYPE::SEND)                 //
            >> lexeme[+~char_(",)\r\n")] >> ',' //
            >> +xdigit >> ')';
        comment = lit("#")                      //
            >> attr(TYPE::COMMENT)              //
            >> lexeme[+~char_("\r\n")]          //
            >> stub_arg;                        //
        label = attr(TYPE::LABEL)               //
            >> lexeme[+~char_(": \r\n")] >> ':' //
            >> stub_arg;                        //

        none = omit[*blank] >> &(eol | eoi) //
            >> attr(TYPE::NONE) >> stub_arg >> stub_arg;

        start =
            skip(blank)[ //
                (log | end | jump_to | wait_time | send | comment | label | none
                 ) %
                eol];

        BOOST_SPIRIT_DEBUG_NODES(
            (none)(log)(end)(jump_to)(wait_time)(send)(comment)(label)(start))
    }
};

Commands parse(std::string text)
{
    std::istringstream in(std::move(text));
    using It = boost::spirit::istream_iterator;

    static const Parser<It> parser;

    Commands commands;
    It first(in >> std::noskipws), last;//No white space skipping

    if (!qi::parse(first, last, parser, commands))
        // throw std::runtime_error("command parse error")
        ;

    return commands;
}

int main()
{
    for (std::string const test : {
             R"(JUMP_TO(etiqueta)
                LOG(this is to be writen in the log)
                SEND(id_8, AF9E02CA7EFF)
                LOG(write me in a log 1)
                LOG(write me in a log 2
                END(5.75)
                LABEL1:

                #On the fly comment
                WAIT_TIME(25)
                SEND(id_3, AF9E02CA7EFF))",
             R"(JUMP_TO(etiqueta)
                LOG(this is to write in the log)
                SEND(id_8, AF9E02CA7EFF)
                LOG(write me in a log 1)
                LOG(write me in a log 2)
                END(5.75)
                xxxxx non sense xxxxxx
                LABEL1:

                #On the fly comment
                WAIT_TIME(25)
                SEND(id_3, AF9E02CA7EFF))",
             R"(JUMP_TO(etiqueta)
                LOG(this is to write in the log)
                SEND(id_8, AF9E02CA7EFF)
                LOG(write me in a log 1)
                LOG(write me in a log 2)
                END(5.75)
                
                LABEL1:

                #On the fly comment
                WAIT_TIME(25)
                SEND(id_8, AF9E02CA7EFF))"})
        try {
            std::cout << "================================\n";
            std::vector<std::string_view> lines;
            boost::algorithm::split(
                lines, test, boost::algorithm::is_any_of("\n"));
            auto commands = parse(test);

            for (size_t i = 0;
                 i < std::min(std::size(commands), std::size(lines)); ++i) //
            {
                std::cout << "#" << std::left << std::setw(4) << i
                          << " " << std::quoted(lines[i]) << "\n";

                auto& cmd = commands[i];
                std::cout << std::setw(6) << " -> " << cmd.type;

                std::cout << "(" //
                          << std::quoted(cmd.arg1) << ", "
                          << std::quoted(cmd.arg2) << ")" << std::endl;
            }

            for (size_t i = std::size(commands); i < std::size(lines); ++i) //
            {
                std::cout << "#" << std::left << std::setw(4) << i
                          << " " << std::quoted(lines[i]) << "\n";
            }
        } catch (std::exception const& e) {
            std::cout << e.what() << "\n";
        }
}

Prints (live on Wandbox and Compiler Explorer):

================================
#0    "JUMP_TO(etiqueta)"
 ->   JUMP_TO("etiqueta", "")
#1    "                LOG(this is to be writen in the log)"
 ->   LOG("this is to be writen in the log", "")
#2    "                SEND(id_8, AF9E02CA7EFF)"
 ->   SEND("id_8", "AF9E02CA7EFF")
#3    "                LOG(write me in a log 1)"
 ->   LOG("write me in a log 1", "")
#4    "                LOG(write me in a log 2"
#5    "                END(5.75)"
#6    "                LABEL1:"
#7    ""
#8    "                #On the fly comment"
#9    "                WAIT_TIME(25)"
#10   "                SEND(id_3, AF9E02CA7EFF)"
================================
#0    "JUMP_TO(etiqueta)"
 ->   JUMP_TO("etiqueta", "")
#1    "                LOG(this is to write in the log)"
 ->   LOG("this is to write in the log", "")
#2    "                SEND(id_8, AF9E02CA7EFF)"
 ->   SEND("id_8", "AF9E02CA7EFF")
#3    "                LOG(write me in a log 1)"
 ->   LOG("write me in a log 1", "")
#4    "                LOG(write me in a log 2)"
 ->   LOG("write me in a log 2", "")
#5    "                END(5.75)"
 ->   END("5.75", "")
#6    "                xxxxx non sense xxxxxx"
#7    "                LABEL1:"
#8    ""
#9    "                #On the fly comment"
#10   "                WAIT_TIME(25)"
#11   "                SEND(id_3, AF9E02CA7EFF)"
================================
#0    "JUMP_TO(etiqueta)"
 ->   JUMP_TO("etiqueta", "")
#1    "                LOG(this is to write in the log)"
 ->   LOG("this is to write in the log", "")
#2    "                SEND(id_8, AF9E02CA7EFF)"
 ->   SEND("id_8", "AF9E02CA7EFF")
#3    "                LOG(write me in a log 1)"
 ->   LOG("write me in a log 1", "")
#4    "                LOG(write me in a log 2)"
 ->   LOG("write me in a log 2", "")
#5    "                END(5.75)"
 ->   END("5.75", "")
#6    "                "
 ->   NONE("", "")
#7    "                LABEL1:"
 ->   LABEL("LABEL1", "")
#8    ""
 ->   NONE("", "")
#9    "                #On the fly comment"
 ->   COMMENT("On the fly comment", "")
#10   "                WAIT_TIME(25)"
 ->   WAIT_TIME("25", "")
#11   "                SEND(id_8, AF9E02CA7EFF)"
 ->   SEND("id_8", "AF9E02CA7EFF")

OTHER STUFF

I'd simplify a lot. Also, it might be nice to add a "catch-all" handler to mark erroneous lines, so like

none = omit[*blank] >> &(eol | eoi) //
    >> attr(TYPE::NONE) >> stub_arg >> stub_arg;
fail = omit[*~char_("\r\n")] //
    >> attr(TYPE::_SYN_ERR) >> stub_arg >> stub_arg;

Then you could get: Live On Compiler Explorer/Wandbox

================================
#0    "JUMP_TO(etiqueta)"
 ->   JUMP_TO("etiqueta", "")
#1    "                LOG(this is to be writen in the log)"
 ->   LOG("this is to be writen in the log", "")
#2    "                SEND(id_8, AF9E02CA7EFF)"
 ->   SEND("id_8", "AF9E02CA7EFF")
#3    "                LOG(write me in a log 1)"
 ->   LOG("write me in a log 1", "")
#4    "                LOG(write me in a log 2"
 ->   #SYNTAX
#5    "                END(5.75)"
 ->   END("5.75", "")
#6    "                LABEL1:"
 ->   LABEL("LABEL1", "")
#7    ""
 ->   NONE("", "")
#8    "                #On the fly comment"
 ->   COMMENT("On the fly comment", "")
#9    "                WAIT_TIME(25)"
 ->   WAIT_TIME("25", "")
#10   "                SEND(id_3, AF9E02CA7EFF)"
 ->   SEND("id_3", "AF9E02CA7EFF")
================================
#0    "JUMP_TO(etiqueta)"
 ->   JUMP_TO("etiqueta", "")
#1    "                LOG(this is to write in the log)"
 ->   LOG("this is to write in the log", "")
#2    "                SEND(id_8, AF9E02CA7EFF)"
 ->   SEND("id_8", "AF9E02CA7EFF")
#3    "                LOG(write me in a log 1)"
 ->   LOG("write me in a log 1", "")
#4    "                LOG(write me in a log 2)"
 ->   LOG("write me in a log 2", "")
#5    "                END(5.75)"
 ->   END("5.75", "")
#6    "                xxxxx non sense xxxxxx"
 ->   #SYNTAX
#7    "                LABEL1:"
 ->   LABEL("LABEL1", "")
#8    ""
 ->   NONE("", "")
#9    "                #On the fly comment"
 ->   COMMENT("On the fly comment", "")
#10   "                WAIT_TIME(25)"
 ->   WAIT_TIME("25", "")
#11   "                SEND(id_3, AF9E02CA7EFF)"
 ->   SEND("id_3", "AF9E02CA7EFF")
================================
#0    "JUMP_TO(etiqueta)"
 ->   JUMP_TO("etiqueta", "")
#1    "                LOG(this is to write in the log)"
 ->   LOG("this is to write in the log", "")
#2    "                SEND(id_8, AF9E02CA7EFF)"
 ->   SEND("id_8", "AF9E02CA7EFF")
#3    "                LOG(write me in a log 1)"
 ->   LOG("write me in a log 1", "")
#4    "                LOG(write me in a log 2)"
 ->   LOG("write me in a log 2", "")
#5    "                END(5.75)"
 ->   END("5.75", "")
#6    "                "
 ->   NONE("", "")
#7    "                LABEL1:"
 ->   LABEL("LABEL1", "")
#8    "                \"\\n\""
 ->   #SYNTAX
#9    "                #On the fly comment"
 ->   COMMENT("On the fly comment", "")
#10   "                WAIT_TIME(25)"
 ->   WAIT_TIME("25", "")
#11   "                SEND(id_8, AF9E02CA7EFF)"
 ->   SEND("id_8", "AF9E02CA7EFF")

BONUS TAKE

Here's what I had in min with "simplify" above:

    calltype_.add                           //
        ("LOG", Ast::Call::LOG)             //
        ("END", Ast::Call::LOG)             //
        ("WAIT_TIME", Ast::Call::WAIT_TIME) //
        ("SEND", Ast::Call::SEND)           //
        ("JUMP_TO", Ast::Call::JUMP_TO)     //
        ;

    call    = calltype_ >> '(' >> arg % ',' >> ')';
    arg     = +~char_("(),\r\n");
    comment = "#" >> +~char_("\r\n");
    empty   = &(eol | eoi);
    invalid = omit[*~char_("\r\n")] >> attr(Ast::Invalid{});
    label   = +~char_(": \r\n") >> ':';

    start = skip(blank)[(call | comment | label | empty | invalid) % eol] >>
        eoi;

That's the whole grammar. And the output is now

split(lines, test, boost::algorithm::is_any_of("\n"));
auto script = parse(test);

for (size_t i = 0; i < std::size(lines); ++i) {
    boost::algorithm::trim_copy(lines[i]);
    std::cout << "#" << std::left << std::setw(4) << i << " "
              << std::quoted(trim_copy(lines[i])) << "\n";

    if (i < script.size()) {
        std::cout << std::setw(6) << " -> " << script[i] << "\n";
    }
}

Full Live Demo: Live On Wandbox/Compiler Explorer

#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/phoenix/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>
#include <sstream>

using boost::algorithm::split;
using boost::algorithm::trim_copy;
namespace qi = boost::spirit::qi;

namespace Ast {
    using Arg  = std::string;
    using Args = std::vector<Arg>;
    struct Empty { };
    struct Label { std::string name; };
    struct Comment { std::string text; };
    struct Invalid { };

    struct Call {
        enum Type { LOG, END, JUMP_TO, WAIT_TIME, SEND };
        Type type;
        Args args;

        bool is_valid() const
        {
            switch (type) {
                case Type::LOG:
                case Type::END:
                case Type::JUMP_TO:
                case Type::WAIT_TIME: return args.size() == 1;
                case Type::SEND: return args.size() == 2;
            }
            return false;
        }

        friend inline std::ostream& operator<<(std::ostream& os, Type t) {
            static constexpr std::array names{
                "LOG", "END", "JUMP_TO", "WAIT_TIME", "SEND",
            };
            return os << names.at(static_cast<int>(t));
        }

        friend inline std::ostream& operator<<(std::ostream& os, Call const& c) {
            os << c.type << "(";
            bool first = true;
            for (auto& arg : c.args)
                os << (std::exchange(first, false) ? "" : ",") << arg;
            return os << ")";
        }
    };

    using Line   = boost::variant<Empty, Call, Label, Comment, Invalid>;
    using Script = std::vector<Line>;

    static inline std::ostream& operator<<(std::ostream& os, Invalid)          { return os << "#INVALID COMMAND LINE"; } 
    static inline std::ostream& operator<<(std::ostream& os, Empty)            { return os << "(empty line)"; } 
    static inline std::ostream& operator<<(std::ostream& os, Label const& l)   { return os << l.name << ":";  } 
    static inline std::ostream& operator<<(std::ostream& os, Comment const& c) { return os << "#" << c.text;  } 
} // namespace Ast

BOOST_FUSION_ADAPT_STRUCT(Ast::Call, type, args)
BOOST_FUSION_ADAPT_STRUCT(Ast::Label, name)
BOOST_FUSION_ADAPT_STRUCT(Ast::Comment, text)

template <typename It>
class Parser : public qi::grammar<It, Ast::Script()>
{
  public:
    Parser() : Parser::base_type(start)
    {
        using namespace qi;

        calltype_.add                           //
            ("LOG", Ast::Call::LOG)             //
            ("END", Ast::Call::LOG)             //
            ("WAIT_TIME", Ast::Call::WAIT_TIME) //
            ("SEND", Ast::Call::SEND)           //
            ("JUMP_TO", Ast::Call::JUMP_TO)     //
            ;

        call    = calltype_ >> '(' >> arg % ',' >> ')';
        arg     = +~char_("(),\r\n");
        comment = "#" >> +~char_("\r\n");
        empty   = &(eol | eoi);
        invalid = omit[*~char_("\r\n")] >> attr(Ast::Invalid{});
        label   = +~char_(": \r\n") >> ':';

        start = skip(blank)[(call | comment | label | empty | invalid) % eol] >>
            eoi;

        BOOST_SPIRIT_DEBUG_NODES((empty)(call)(arg)(comment)(label)(start)(invalid))
    }

  private:
    qi::symbols<char, Ast::Call::Type>        calltype_;
    qi::rule<It, Ast::Call(), qi::blank_type> call;
    qi::rule<It, Ast::Script()>               start;

    qi::rule<It, Ast::Arg()>     arg;
    qi::rule<It, Ast::Comment()> comment;
    qi::rule<It, Ast::Empty()>   empty;
    qi::rule<It, Ast::Invalid()> invalid;
    qi::rule<It, Ast::Label()>   label;
};

Ast::Script parse(std::string text)
{
    std::istringstream in(std::move(text));
    using It = boost::spirit::istream_iterator;

    static const Parser<It> parser;

    Ast::Script commands;
    It first(in >> std::noskipws), last;//No white space skipping

    if (!qi::parse(first, last, parser, commands))
        throw std::runtime_error("command parse error");

    return commands;
}

int main()
{
    for (std::string const& test : {
             R"(JUMP_TO(etiqueta)
                LOG(this is to be writen in the log)
                SEND(id_8, AF9E02CA7EFF)
                LOG(write me in a log 1)
                LOG(write me in a log 2
                END(5.75)
                LABEL1:

                #On the fly comment
                WAIT_TIME(25)
                SEND(id_3, AF9E02CA7EFF))",
             R"(JUMP_TO(etiqueta)
                LOG(this is to write in the log)
                SEND(id_8, AF9E02CA7EFF)
                LOG(write me in a log 1)
                LOG(write me in a log 2)
                END(5.75)
                xxxxx non sense xxxxxx
                LABEL1:

                #On the fly comment
                WAIT_TIME(25)
                SEND(id_3, AF9E02CA7EFF))",
             R"(JUMP_TO(etiqueta)
                LOG(this is to write in the log)
                SEND(id_8, AF9E02CA7EFF)
                LOG(write me in a log 1)
                LOG(write me in a log 2)
                END(5.75)
                
                LABEL1:
                "\n"
                #On the fly comment
                WAIT_TIME(25)
                SEND(id_8, AF9E02CA7EFF))"})
        try {
            std::cout << "================================\n";
            std::vector<std::string_view> lines;
            split(lines, test, boost::algorithm::is_any_of("\n"));
            auto script = parse(test);

            for (size_t i = 0; i < std::size(lines); ++i) {
                std::cout << "#" << std::left << std::setw(4) << i << " "
                          << std::quoted(trim_copy(lines[i])) << "\n";

                if (i < script.size()) {
                    std::cout << std::setw(6) << " -> " << script[i] << "\n";
                }
            }
        } catch (std::exception const& e) {
            std::cout << e.what() << "\n";
        }
}

Printing

================================
#0    "JUMP_TO(etiqueta)"
 ->   JUMP_TO(etiqueta)
#1    "LOG(this is to be writen in the log)"
 ->   LOG(this is to be writen in the log)
#2    "SEND(id_8, AF9E02CA7EFF)"
 ->   SEND(id_8,AF9E02CA7EFF)
#3    "LOG(write me in a log 1)"
 ->   LOG(write me in a log 1)
#4    "LOG(write me in a log 2"
 ->   #INVALID COMMAND LINE
#5    "END(5.75)"
 ->   LOG(5.75)
#6    "LABEL1:"
 ->   LABEL1:
#7    ""
 ->   (empty line)
#8    "#On the fly comment"
 ->   #On the fly comment
#9    "WAIT_TIME(25)"
 ->   WAIT_TIME(25)
#10   "SEND(id_3, AF9E02CA7EFF)"
 ->   SEND(id_3,AF9E02CA7EFF)
================================
#0    "JUMP_TO(etiqueta)"
 ->   JUMP_TO(etiqueta)
#1    "LOG(this is to write in the log)"
 ->   LOG(this is to write in the log)
#2    "SEND(id_8, AF9E02CA7EFF)"
 ->   SEND(id_8,AF9E02CA7EFF)
#3    "LOG(write me in a log 1)"
 ->   LOG(write me in a log 1)
#4    "LOG(write me in a log 2)"
 ->   LOG(write me in a log 2)
#5    "END(5.75)"
 ->   LOG(5.75)
#6    "xxxxx non sense xxxxxx"
 ->   #INVALID COMMAND LINE
#7    "LABEL1:"
 ->   LABEL1:
#8    ""
 ->   (empty line)
#9    "#On the fly comment"
 ->   #On the fly comment
#10   "WAIT_TIME(25)"
 ->   WAIT_TIME(25)
#11   "SEND(id_3, AF9E02CA7EFF)"
 ->   SEND(id_3,AF9E02CA7EFF)
================================
#0    "JUMP_TO(etiqueta)"
 ->   JUMP_TO(etiqueta)
#1    "LOG(this is to write in the log)"
 ->   LOG(this is to write in the log)
#2    "SEND(id_8, AF9E02CA7EFF)"
 ->   SEND(id_8,AF9E02CA7EFF)
#3    "LOG(write me in a log 1)"
 ->   LOG(write me in a log 1)
#4    "LOG(write me in a log 2)"
 ->   LOG(write me in a log 2)
#5    "END(5.75)"
 ->   LOG(5.75)
#6    ""
 ->   (empty line)
#7    "LABEL1:"
 ->   LABEL1:
#8    "\"\\n\""
 ->   #INVALID COMMAND LINE
#9    "#On the fly comment"
 ->   #On the fly comment
#10   "WAIT_TIME(25)"
 ->   WAIT_TIME(25)
#11   "SEND(id_8, AF9E02CA7EFF)"
 ->   SEND(id_8,AF9E02CA7EFF)

Upvotes: 2

Related Questions