wjs
wjs

Reputation: 41

removing or ignoreing duplicate lines from txt file C++

currently i am trying to read a text file and ignore the duplicate lines inside it. Here's the code sample of what i have done so far

string filename;
            cout<<"Please enter filename: "<<endl;
            cin>>filename;
            ifstream inFile(filename.data());

            typedef std::map<std::string, int> line_record;
            line_record lines;
            int line_number = 1;

            if(inFile.is_open())
            {
                while(std::getline(inFile, line))
                {   
                    line_record::iterator existing = lines.find(line);
                    if(existing != lines.end())
                    {
                        existing->second = (-1);
                    }
                    else
                    {
                        lines.insert(std::make_pair(line,line_number));
                        ++line_number;
                        getline(inFile,line);
                        cout<<line<<endl;
                        noOfLine++;    
                    }

                }    
            }else
            {
                cout<<"Error opening file! Please try again!"<<endl;
            }
            cout<<"\n"<<noOfLine<<" record(s) read in successfully!\n"<<endl;

The text file(below):

Point2D, [3, 2]
Line3D, [7, 12, 3], [-9, 13, 68]
Point3D, [1, 3, 8]
Line2D, [5, 7], [3, 8]
Point2D, [3, 2]
Line3D, [7, -12, 3], [9, 13, 68]
Point3D, [6, 9, 5]
Point2D, [3, 2]
Line3D, [70, -120, -3], [-29, 1, 268]
Line3D, [25, -69, -33], [-2, -41, 58]
Point3D, [6, 9, -50]

But the results i am getting is:

Line3D, [7, 12, 3], [-9, 13, 68]
Line2D, [5, 7], [3, 8]
Point3D, [6, 9, 5]
Line3D, [25, -69, -33], [-2, -41, 58]
Point3D, [6, 9, -50]

Any help?? Thanks!!

Upvotes: 2

Views: 5460

Answers (3)

at-hex
at-hex

Reputation: 3130

Here is the complete solution for this problem:

#include <string>
#include <iostream>
#include <fstream>
#include <map>
#include <vector>
#include <algorithm>
#include <iterator>
using namespace std;

// we gonna read each line to map<string, int>
// which will be ordered by its strings by default nature, not line numbers
// then read this map to the vector and sort it by line numbers as it should be for output.txt

typedef pair<string, int> vecpair;

int main(int argc, char* argv[])
{
    if(argc < 2)
    {
        cout << "Missing input filename" << endl;
        exit(1);
    }
    else
    {
        ifstream infile(argv[1], ios::in);
        ofstream outfile("output.txt", ios::out);

        if(!infile)
        {
            cout << "File " << argv[1] << "could not be opened" << endl;
            exit(1);
        }

        // map of input file string lines
        map<string, int> records;
        int line_number{0};

        for(string line{}; getline(infile, line); line_number++)
        {
            auto entry = make_pair(line, line_number);
            auto pair = records.insert(entry);
        }

        infile.close();

        /*
        // map to the outfile sample as part of debugging
        if(records.empty())
        {
            outfile << "Empty records" << endl;
        }
        else
        {
            for(auto iter = records.begin(); iter != records.end(); iter++)
            {
                outfile << iter->first << "\t" << iter->second << endl;
            }
        }
        */

        // create an empty vector of pairs
        vector<vecpair> vec;

        // copy key-value pairs from the map to the vector
        copy(records.begin(), records.end(), back_inserter<vector<vecpair>>(vec));

        // sort the vector by increasing order of its pair's second value
        // if second value are equal, order by the pair's first value
        sort(vec.begin(), vec.end(), [](const vecpair& l, const vecpair& r)
        {
            if(l.second != r.second)
                return l.second < r.second;
            return l.first < r.first;
        });


        // output vector to outfile.txt
        if(vec.empty())
        {
            outfile << "Empty records" << endl;
        }
        else
        {
            for(auto const &vecpair : vec)
            {
                //outfile << vecpair.first << "\t" << vecpair.second << endl;
                outfile << vecpair.first << endl;
            }
        }

        outfile.close();
        return 0;
    }
}

Upvotes: 0

Thoritie Gretels
Thoritie Gretels

Reputation: 1

i just edit few things and it's work `if (myfile.is_open())

{

            while(getline(myfile, line)){


                line_record::iterator existing = lines.find(line);

                if(existing != lines.end())
                {
                    existing->second = (-1);




                }
                else
                {

                    lines.insert(make_pair(line,line_number));
                    ++line_number;
                    cout<<line<<endl;

                    count++;
                }



            }

            cout << "its -- > "<<count <<" record"<< endl;




    myfile.close();

}

`

Upvotes: 0

Sergey Kalinichenko
Sergey Kalinichenko

Reputation: 726849

Your code reads and discards the next line inside the loop:

lines.insert(std::make_pair(line,line_number));
++line_number;
// HERE
getline(inFile,line);
cout<<line<<endl;
noOfLine++;

essentially, the output of your program consists of lines that your program discards.

You should go through the "reading" loop without producing any output, and then go through the map, and print out the content of lines with the line numbers where they were found.

Upvotes: 2

Related Questions