Reputation: 91
I'm currently trying to get some work done using boost::spirit::qi::phrase_parse
but I'm not able to figure this out by myself.
Worth mentioning: I'm totally new to boost and so to boost::spirit.
I'm getting an input of the form "{A [B C] -> F [D E], C ->E,B->Z}"
I'd like to parse this type of input into a std::map<std::string, std::string>
. The key should hold every std::string
before the "->"
and the value every std::string
after the "->"
until the ','
occurs.
Furthermore the '['
and ']'
shouldn't be stored.
So the content of the std::map
should be something like this after the parsing succeeded:
{
("A", "F"),
("A", "D E"),
("B C", "F"),
("B C", "D E"),
("C", "E"),
("B", "Z")
}
My first approach was to store all the keys/values in a std::vector<std::string>
.
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>
#include <vector>
int main()
{
using boost::spirit::qi::phrase_parse;
using boost::spirit::qi::char_;
using boost::spirit::qi::lexeme;
std::string input = "{A [B C] -> F [D E], C ->E,B->Z}";
std::string::const_iterator beg = input.begin(), end = input.end();
std::vector<std::string> sdvec;
bool r = phrase_parse( beg,
end,
'{' >> (+(+char_("a-zA-Z0-9") | lexeme[('[' >> +char_("a-zA-Z0-9 ") >> ']')]) >> '-' >> '>' >> +(+char_("a-zA-Z0-9") | lexeme[('[' >> +char_("a-zA-Z0-9 ") >> ']')])) % ',' >> '}',
boost::spirit::ascii::space,
sdvec
);
if(beg != end) {
std::cout << "Parsing failed!" << std::endl;
} else {
std::cout << "Parsing succeeded!" << std::endl;
}
for(int i=0; i<sdvec.size(); i++) {
std::cout << i << ": " << sdvec[i] << std::endl;
}
return 0;
}
Executing this I'm getting each found std::string
as an entry of the std::vector
:
Parsing 2 succeeded!
0: A
1: B C
2: F
3: D E
4: C
5: E
6: B
7: Z
But I've no idea how to parse these values into a std::map<std::string, std::string>
using boost::spirit::qi::phrase_parse
as simply replacing throws some compiling errors.
EDIT:
Actually I found something that's quite what I need: http://boost-spirit.com/home/articles/qi-example/parsing-a-list-of-key-value-pairs-using-spirit-qi/
I adopted the code of this article according to my problem:
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <iostream>
#include <string>
#include <vector>
#include <map>
namespace qi = boost::spirit::qi;
template <typename Iterator>
struct keys_and_values
: qi::grammar<Iterator, std::map<std::string, std::string>()>
{
keys_and_values()
: keys_and_values::base_type(query)
{
query = '{' >> *qi::lit(' ') >> pair >> *(qi::lit(',') >> *qi::lit(' ') >> pair) >> *qi::lit(' ') >> '}';
pair = key >> -(*qi::lit(' ') >> "->" >> *qi::lit(' ') >> value);
key = +qi::char_("a-zA-Z0-9") | qi::lexeme[('[' >> +qi::char_("a-zA-Z0-9 ") >> ']')];
value = +qi::char_("a-zA-Z0-9") | qi::lexeme[('[' >> +qi::char_("a-zA-Z0-9 ") >> ']')];
}
qi::rule<Iterator, std::map<std::string, std::string>()> query;
qi::rule<Iterator, std::pair<std::string, std::string>()> pair;
qi::rule<Iterator, std::string()> key, value;
};
int main()
{
std::string input = "{AB -> CD, E -> F, G -> HI, [J K L] -> [M N O] }";
std::string::iterator begin = input.begin();
std::string::iterator end = input.end();
keys_and_values<std::string::iterator> p; // create instance of parser
std::map<std::string, std::string> m; // map to receive results
bool result = qi::phrase_parse(begin, end, p, boost::spirit::ascii::space, m); // returns true if successful
if(begin != end) {
std::cout << "Parsing failed!" << std::endl;
} else {
std::cout << "Parsing succeeded!" << std::endl;
}
std::cout << m["AB"] << "\t" << m["E"] << "\t" << m["G"] << "\t" << m["J K L"] << std::endl;
return 0;
}
The result of this is more or less what I need:
Parsing succeeded!
CD F HI M N O
My last problem to solve is a case like A [B C] -> F [D E]
.
Any way to get those as four separated key-value pairs ("A", "F"), ("A", "D E"), ("B C", "F"), ("B C", "D E")
into my std::map<std::string, std::string> m
?
Or maybe it's easier to parse it into a std::map<std::vector<std::string>, std::vector<std::string> >
where each std::vector<std::string>
holds all keys/values?
For example:
in: "{A [B C] -> F [D E], C ->E,B->Z}"
out: { ({"A", "B C"}, {"F", "D E"}), ({"C"}, {"E"}), ({"B"}, {"Z"}) }
Thanks for any help!
Upvotes: 2
Views: 1854
Reputation: 155
Edit:This is an alternative way to do it, but I think it is a lot less clear than G. Civardi's solution.
As you observed, parsing into a map<vector<string>,vector<string>>
would be the easiest way, and you can later manipulate it to get the map you really want. The solution below uses an intermediate struct (basically equivalent to map<vector,vector>
) and then uses the customization point transform_attribute
in order to fill the multimap (since there are keys that repeat).
PS: Please forgive the use of range-based for loops, change them if you can't use c++11.
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <iostream>
#include <string>
#include <vector>
#include <map>
namespace qi=boost::spirit::qi;
namespace ascii=boost::spirit::ascii;
struct key_value
{
std::vector<std::string> keys;
std::vector<std::string> values;
};
struct intermediate_struct
{
std::vector<key_value> data;
};
BOOST_FUSION_ADAPT_STRUCT(
key_value,
(std::vector<std::string>, keys)
(std::vector<std::string>, values)
)
BOOST_FUSION_ADAPT_STRUCT(
intermediate_struct,
(std::vector<key_value>, data)
)
namespace boost{ namespace spirit{ namespace traits
{
template <>
struct transform_attribute<std::multimap<std::string,std::string>,intermediate_struct,qi::domain>
{
typedef intermediate_struct type;
static type pre(std::multimap<std::string,std::string>& )
{
return intermediate_struct();
}
static void post(std::multimap<std::string,std::string>& map, intermediate_struct const& intermediate)
{
for(const auto& key_val : intermediate.data)
{
for(const auto& key : key_val.keys)
{
for(const auto& val : key_val.values)
{
map.insert(typename std::multimap<std::string,std::string>::value_type(key,val));
}
}
}
}
static void fail(std::multimap<std::string,std::string>&){}
};
}}}
int main()
{
std::string input = "{A [B C] -> F [D E], C ->E,B->Z}";
std::string::const_iterator iter = input.begin(), end = input.end();
std::multimap<std::string,std::string> sdmap;
qi::rule<std::string::const_iterator,std::string(),ascii::space_type> text_rule =
+qi::char_("a-zA-Z0-9") | qi::lexeme[('[' >> +qi::char_("a-zA-Z0-9 ") >> ']')];
qi::rule<std::string::const_iterator,std::vector<std::string>(),ascii::space_type> keys_rule =
+text_rule;
qi::rule<std::string::const_iterator,std::vector<std::string>(),ascii::space_type> values_rule =
+text_rule;
qi::rule<std::string::const_iterator,intermediate_struct(),ascii::space_type> map_rule =
qi::eps >> ('{' >> (keys_rule >> "->" >> values_rule)%',' >> '}');
BOOST_SPIRIT_DEBUG_NODES( (map_rule)(keys_rule)(values_rule) );
bool r = qi::phrase_parse( iter,
end,
map_rule,
ascii::space,
sdmap
);
if(r && (iter == end)) {
std::cout << "Parsing succeeded!" << std::endl;
for(const auto& pair : sdmap) {
std::cout << pair.first << ": " << pair.second << std::endl;
}
} else {
std::cout << "Parsing Failed!" << std::endl;
std::cout << "Unparsed: " << std::string(iter,end) << std::endl;
}
return 0;
}
Upvotes: 3
Reputation: 667
I think you are quite close to your goal so I will skip the combinatorial part :-)
The parser will do the things it is supposed to do ... to check the syntax and to tokenize data then it passes keys, values and output map ( multimap ) arguments into phoenix function inserter
where you can insert whatever
you need in your map ( multimap )
#if __cplusplus >= 201103L
#define BOOST_RESULT_OF_USE_DECLTYPE
#endif
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <algorithm>
#include <iterator>
#include <iomanip>
#include <vector>
#include <map>
namespace qi = boost::spirit::qi;
namespace ascii=boost::spirit::ascii;
typedef std::map< std::string,std::string > TMap;
//typedef std::multimap< std::string,std::string > TMap;
struct SMapInsert
{
template <typename Arg1,typename Arg2,typename Arg3>
struct result
{
typedef void type;
};
template <typename Arg1,typename Arg2,typename Arg3>
void operator()( Arg1&out, Arg2&keys, Arg3&vals ) const
{
std::cout << "Keys:" << std::endl;
for( const auto &key : keys )
std::cout << std::left << "`" << key << "`" << std::endl;
std::cout << "Vals:" << std::endl;
for( const auto &val : vals )
std::cout << std::left << "`" << val << "`" << std::endl;
// your map here...
// out.insert
}
};
boost::phoenix::function< SMapInsert > inserter;
int main()
{
std::string input = "{A [B C] -> F [D E], C ->E,B->Z}";
TMap data;
std::string::const_iterator iter = input.begin();
std::string::const_iterator last = input.end();
qi::rule< std::string::const_iterator,std::string() > token=+qi::alnum;
qi::rule< std::string::const_iterator,ascii::space_type,std::vector< std::string >() >
keyOrvalue = +( token | ( '[' >> qi::lexeme[ +qi::char_("a-zA-Z0-9 ") ] >> ']' ) );
qi::rule< std::string::const_iterator,ascii::space_type, TMap() >
root = '{' >> ( ( keyOrvalue >> "->" >> keyOrvalue )[ inserter( qi::_val, qi::_1, qi::_2 ) ] ) % ',' >> '}';
std::cout << "input: `" << input << "`" << std::endl;
if( qi::phrase_parse( iter, last, root, ascii::space, data ) && iter==last )
{
for( const auto &keyValue : data )
std::cout << std::left << std::setw(10) << keyValue.first << std::setw(10) << keyValue.second << std::endl;
}
else
std::cout << "parsing failed:" << std::string( iter,last ) << std::endl;
return 0;
}
Upvotes: 3