Reputation: 1435
I want to replace with a regular expression all the words in the text that are not in the dictionary on the unique identifier. How I can do it? Maybe using callback function?
std::string getToken(const std::smatch &m) {
static int x = 0;
std::string keyword = m[0].str();
std::set<std::string> keywords = {"foo", "bar"};
if (keywords.find(keyword) != keywords.end()) {
return keyword;
} else {
return "i" + x++;
}
}
std::string replacer(std::string text) {
std::string ret = text;
ret = std::regex_replace(ret , std::regex("\\b.*\\b"), getToken); // It's don't works
return ret;
}
Upvotes: 2
Views: 1446
Reputation: 1435
#include <regex>
#include <string>
#include <sstream>
#include <set>
#include <map>
std::string replacer(std::string text) {
std::string output_text;
std::set<std::string> keywords = { "foo", "bar" };
std::map<std::string, int> ids = {};
int counter = 0;
auto callback = [&](std::string const& m){
std::istringstream iss(m);
std::string n;
if (iss >> n)
{
if (keywords.find(m) != keywords.end()) {
output_text += m + " ";
}
else {
if (ids.find(m) != ids.end()) {
output_text += "ID" + std::to_string(ids[m]) + " ";
}
else {
// not found
ids[m] = counter;
output_text += "ID" + std::to_string(counter++) + " ";
}
}
}
else
{
output_text += m;
}
};
std::regex re("\\b\\w*\\b");
std::sregex_token_iterator
begin(text.begin(), text.end(), re, { -1, 0 }),
end;
std::for_each(begin, end, callback);
return output_text;
}
Upvotes: 3