user3674296
user3674296

Reputation: 69

C++: read dataset and check if vector<Class> is subset of vector<Class>

I have the following piece of code. The code creates a vector Dataset, each element of which is a vector. It also creates a vector S.

I want to check which vector of Dataset contain vector of S. Apparently I am doing something wrong, because for the following example, Dataset is:
a b c
a d
a b d

and S:
a b

it should print: 0 2

and for me it prints: 0 1 2

#include <iostream>
#include <fstream>
#include <sstream>
#include <string.h>
#include <string>
#include <time.h>
#include <vector>
#include <algorithm>

using namespace std;


class StringRef
{
private:
    char const*     begin_;
    int             size_;

public:
    int size() const { return size_; }
    char const* begin() const { return begin_; }
    char const* end() const { return begin_ + size_; }

    StringRef( char const* const begin, int const size )
        : begin_( begin )
        , size_( size )
    {}

    bool operator<(const StringRef& obj) const
    {
        return (strcmp(begin(),obj.begin()) > 0 );
    }

};


/************************************************
 * Checks if vector B is subset of vector A     *
 ************************************************/

bool isSubset(std::vector<StringRef> A, std::vector<StringRef> B)
{
    std::sort(A.begin(), A.end());
    std::sort(B.begin(), B.end());
    return std::includes(A.begin(), A.end(), B.begin(), B.end());
}


vector<StringRef> split3( string const& str, char delimiter = ' ' )
{
    vector<StringRef>   result;

    enum State { inSpace, inToken };

    State state = inSpace;
    char const*     pTokenBegin = 0;    // Init to satisfy compiler.
    for(auto it = str.begin(); it != str.end(); ++it )
    {
        State const newState = (*it == delimiter? inSpace : inToken);
        if( newState != state )
        {
            switch( newState )
            {
            case inSpace:
                result.push_back( StringRef( pTokenBegin, &*it - pTokenBegin ) );
                break;
            case inToken:
                pTokenBegin = &*it;
            }
        }
        state = newState;
    }
    if( state == inToken )
    {
        result.push_back( StringRef( pTokenBegin, &str.back() - pTokenBegin ) );
    }
    return result;
}

int main() {

    vector<vector<StringRef> > Dataset;
    vector<vector<StringRef> > S;

    ifstream input("test.dat");
    long count = 0;
    int sec, lps;
    time_t start = time(NULL);

    cin.sync_with_stdio(false); //disable synchronous IO

    for( string line; getline( input, line ); )
    {
        Dataset.push_back(split3( line ));
        count++;
    };
    input.close();
    input.clear();

    input.open("subs.dat");
    for( string line; getline( input, line ); )
    {
        S.push_back(split3( line ));
    };



    for ( std::vector<std::vector<StringRef> >::size_type i = 0; i < S.size(); i++ )
    {
        for(std::vector<std::vector<StringRef> >::size_type j=0; j<Dataset.size();j++)
        {

            if (isSubset(Dataset[j], S[i]))
            {
                cout << j << " ";
            }

        }
    }

    sec = (int) time(NULL) - start;
    cerr << "C++   : Saw " << count << " lines in " << sec << " seconds." ;
    if (sec > 0) {
        lps = count / sec;
        cerr << "  Crunch speed: " << lps << endl;
    } else
        cerr << endl;

    return 0;
}

Upvotes: 0

Views: 81

Answers (1)

interjay
interjay

Reputation: 110203

Your StringRef type is dangerous because it contains a const char * pointer, but no concept of ownership. So the pointer could be invalidated at some point after the object is constructed.

And indeed this is what happens here: You have a single string (line) and create StringRefs with pointers to its internal data. When the string is later modified, these pointers are invalidated.

You should create a vector<std::string> instead to prevent this problem.

Upvotes: 2

Related Questions