Reputation: 35
I have a list of key value pairs, separated by EOL.
I got Boost Spirit to do what I want for properly formatted lines (i.e. "MyKey : MyValue \r\n MyKey2 : MyValue2"
).
Now my problem is that I want to skip lines that do not comply. For example:
This is some title line!
Key1:Value1
Some more gibberish to skip
Key2:Value2
I came up with the following code that I thought would work, but instead, the resulting map is empty and parsing fails.
KeyRule
, I added '- qi::eol' to avoid eating up the invalid line until the first KeyValue
separator is encountered.ItemRule
, both PairRule
's are made optional and the eol
is 1 or more to address multiple breaklines.I read the following thread:
Why does parsing a blank line with Spirit produce an empty key value pair in map?
It skips the comment line (starting with #) via a custom skipper but in my
case, I want to skip ANY lines not containing the Key Value separator :
.
There has to be something elegant.
#include <iostream>
#include <string>
#include <map>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/std_pair.hpp>
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper = qi::blank_type>
struct KeyValueParser : qi::grammar<Iterator, std::map<std::string, std::string>(), Skipper> {
KeyValueParser() : KeyValueParser::base_type(ItemRule) {
ItemRule = -PairRule >> *(+qi::eol >> -PairRule) >> -qi::eol;
PairRule = KeyRule >> ':' >> ValueRule;
KeyRule = qi::raw[+(qi::char_ - ':' - qi::eol)];
ValueRule = qi::raw[+(qi::char_ - qi::eol)];
}
qi::rule<Iterator, std::map<std::string, std::string>(), Skipper> ItemRule;
qi::rule<Iterator, std::pair<std::string, std::string>(), Skipper> PairRule;
qi::rule<Iterator, std::string(), Skipper> KeyRule;
qi::rule<Iterator, std::string(), Skipper> ValueRule;
};
int main() {
const std::string input = " Line To Skip! \r\n My Key : Value \r\n My2ndKey : Long Value \r\n";
std::string::const_iterator iter = input.begin(), end = input.end();
KeyValueParser<std::string::const_iterator> parser;
typedef std::map<std::string, std::string> MyMap;
MyMap parsed_map;
bool result = qi::phrase_parse(iter, end, parser, qi::blank, parsed_map);
if (result && (iter == end)) {
std::cout << "Success." << std::endl;
for (MyMap::const_iterator pIter = parsed_map.begin(); pIter != parsed_map.end(); ++pIter) {
std::cout << "\"" << pIter->first << "\" : \"" << pIter->second << "\"" << std::endl;
}
} else {
std::cout << "Something failed. Unparsed: ->|" << std::string(iter, end) << "|<-" << std::endl;
}
getchar();
return 0;
}
Upvotes: 2
Views: 686
Reputation: 392979
The most elegant I can think of is to parse a keyvalue pair /optionally/, followed by any gibberish till the end of the line.
You could write:
ItemRule = -PairRule % (*~char_("\r\n") >> eol);
The only caveat is that on gibberish lines, the "default" pair (empty key & value) will be inserted, so you'd have to remove that post-parse.
An equivalent way to write it (but less elegant) would be:
ItemRule = (hold[PairRule] | omit[ *~char_("\r\n") ]) % eol;
Here's a full demo. Note I also moved the skipper knowledge inside the grammar (it's essential to the correct operation of the grammar).
Finally, I used BOOST_SPIRIT_DEBUG to print debug output.
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <map>
namespace qi = boost::spirit::qi;
template <typename Iterator>
struct KeyValueParser : qi::grammar<Iterator, std::map<std::string, std::string>()> {
KeyValueParser() : KeyValueParser::base_type(ItemRule) {
using namespace qi;
ItemRule = skip(blank) [ -PairRule % (*~char_("\r\n") >> eol) ];
ItemRule = skip(blank) [ hold[PairRule] | omit[ *~char_("\r\n") ] ] % eol;
PairRule = KeyRule >> ':' >> ValueRule;
KeyRule = +~char_("\r\n:");
ValueRule = +~char_("\r\n");
BOOST_SPIRIT_DEBUG_NODES((ItemRule)(PairRule)(KeyRule)(ValueRule))
}
private:
qi::rule<Iterator, std::map<std::string, std::string>()> ItemRule;
qi::rule<Iterator, std::pair<std::string, std::string>(), qi::blank_type> PairRule;
// lexemes
qi::rule<Iterator, std::string()> KeyRule, ValueRule;
};
int main() {
const std::string input = R"(
Line To Skip!
My Key : Value
Some more gibberish to skip
My2ndKey : Long Value
)";
std::string::const_iterator iter = input.begin(), end = input.end();
KeyValueParser<std::string::const_iterator> parser;
std::map<std::string, std::string> parsed_map;
bool result = qi::parse(iter, end, parser, parsed_map);
if (result && (iter == end)) {
std::cout << "Success.\n";
// drop empty lines:
parsed_map.erase("");
for (auto& p : parsed_map)
std::cout << "\"" << p.first << "\" : \"" << p.second << "\"\n";
} else {
std::cout << "Something failed. Unparsed: ->|" << std::string(iter, end) << "|<-\n";
}
}
Prints
Success.
"My Key " : "Value "
"My2ndKey " : "Long Value "
With debug information
<ItemRule>
<try>\n Line To Skip! \n M</try>
<PairRule>
<try>\n Line To Skip! \n M</try>
<KeyRule>
<try>\n Line To Skip! \n M</try>
<fail/>
</KeyRule>
<fail/>
</PairRule>
<PairRule>
<try> Line To Skip! \n My</try>
<KeyRule>
<try>Line To Skip! \n My </try>
<success>\n My Key : Value \nS</success>
<attributes>[[L, i, n, e, , T, o, , S, k, i, p, !, ]]</attributes>
</KeyRule>
<fail/>
</PairRule>
<PairRule>
<try> My Key : Value \nSo</try>
<KeyRule>
<try>My Key : Value \nSome</try>
<success>: Value \nSome more g</success>
<attributes>[[M, y, , K, e, y, ]]</attributes>
</KeyRule>
<ValueRule>
<try>Value \nSome more gib</try>
<success>\nSome more gibberish</success>
<attributes>[[V, a, l, u, e, ]]</attributes>
</ValueRule>
<success>\nSome more gibberish</success>
<attributes>[[[M, y, , K, e, y, ], [V, a, l, u, e, ]]]</attributes>
</PairRule>
<PairRule>
<try>Some more gibberish </try>
<KeyRule>
<try>Some more gibberish </try>
<success>\n My2ndKey : Long </success>
<attributes>[[S, o, m, e, , m, o, r, e, , g, i, b, b, e, r, i, s, h, , t, o, , s, k, i, p]]</attributes>
</KeyRule>
<fail/>
</PairRule>
<PairRule>
<try> My2ndKey : Long </try>
<KeyRule>
<try>My2ndKey : Long V</try>
<success>: Long Value \n</success>
<attributes>[[M, y, 2, n, d, K, e, y, ]]</attributes>
</KeyRule>
<ValueRule>
<try>Long Value \n</try>
<success>\n</success>
<attributes>[[L, o, n, g, , , , , V, a, l, u, e, ]]</attributes>
</ValueRule>
<success>\n</success>
<attributes>[[[M, y, 2, n, d, K, e, y, ], [L, o, n, g, , , , , V, a, l, u, e, ]]]</attributes>
</PairRule>
<PairRule>
<try></try>
<KeyRule>
<try></try>
<fail/>
</KeyRule>
<fail/>
</PairRule>
<success></success>
<attributes>[[[[], []], [[M, y, , K, e, y, ], [V, a, l, u, e, ]], [[M, y, 2, n, d, K, e, y, ], [L, o, n, g, , , , , V, a, l, u, e, ]]]]</attributes>
</ItemRule>
Upvotes: 2