Blacksheep
Blacksheep

Reputation: 119

Splitting a string with different start delimeter (not a duplicate)

I need to split a big string which contains substrings encapsulated by either !="" or !"" but I am stuck now.

The code I have

#include <iostream>
#include <string>
#include <boost/regex.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/regex.hpp>
#include <fstream>
#include <cstring>
#include <vector>
#include <algorithm>

    using namespace std;
    using namespace boost;

    std::string line;

    // Create vector to store matrix
    std::vector< std::vector<string> > vec_line;
    // Create temp vector to create "rows"
    vector<string>vec_string_temp;


string add2vec_ele(string firste, string line)
{

    // Add row
    vec_string_temp.push_back(firste);
    boost::algorithm::split_regex( vec_string_temp, line, regex( "(!=\"|!\")" ) ) ;
    // store row in vec_line
    vec_line.push_back(vec_string_temp);
    vec_string_temp.clear();
return string();
}


int main()
{
    string firste = "KeyWord";
    string line = "!=\"abcd!#efg\" !\"ABCDEFGHAG!/8765438\" !\"This !/[isanotherstring]?but nobody cares78\" !=\"again a string with equal sign and exclamation mark\"";
    add2vec_ele(firste,line);

    // print all elements
    for (unsigned int i = 0; i < vec_line.size(); i++)
    {
        std::cout << "Vector line: " << i << " ";
        for (unsigned int j = 0; j < vec_line[i].size(); j++)
        {
            std::cout << " Col: " << j << " " << vec_line[i][j];
        }
        std::cout << endl;
    }
}

basically does what I want, except that the -> != <- or -> !" <- are lost.

Input is stored in the string 'line'

string line = "!=\"abcd!#efg\" !\"ABCDEFGHAG!/8765438\" !\"This !/[isanotherstring]?but nobody cares78\" !=\"again a string with equal sign and exclamation mark\"";

The output from above code is

Vector line: 0  Col: 0  Col: 1 abcd!#efg"  Col: 2 ABCDEFGHAG!/8765438"  Col: 3 This !/[isanotherstring]?but nobody cares78"  Col: 4 again a string with equal sign and exclamation mark"

The expected output would be

Vector line: 0  Col: 0  Col: 1 !="abcd!#efg"  Col: 2 !"ABCDEFGHAG!/8765438"  Col: 3 !"This !/[isanotherstring]?but nobody cares78"  Col: 4 !="again a string with equal sign and exclamation mark"

How can I achieve this?

Upvotes: 2

Views: 76

Answers (1)

Blacksheep
Blacksheep

Reputation: 119

Using Perl Regex (look ahead) solved my issue:

string add2vec_ele(string firste, string line)
{
    // Add row
    vec_string_temp.push_back(firste);
    boost::regex ex( "(?<!^)(?:(?=!=\")|(?=!\"))", boost::regex::perl );
    boost::algorithm::split_regex( vec_string_temp, line, ex ) ;
    // store row in vec_line
    vec_line.push_back(vec_string_temp);
    vec_string_temp.clear();
return string();
}

Upvotes: 2

Related Questions