Reputation: 23
Before I detected std::upper_bound I implemented my own version of binarySearch to determine the index of a wanted element. The implementation works but in comparision with a linear search my binarySearch is only a little bit faster. The factor between my implementation and the std lib increases as the search area grows.
For a quick selftest I inserted the complete code at the end of this post. For a quick glance here my searchBinary implementation:
template<typename T> T searchBinary(const std::vector<std::vector<T> > vectorList, const std::vector<T> compareVector) {
long iteration = 0;
size_t leftIndex = 0;
size_t rightIndex = vectorList.size()-1;
size_t pos;
while (leftIndex <= rightIndex) {
iteration++;
pos = (leftIndex + rightIndex) / 2;
if (compareVector < vectorList[pos]) {
rightIndex = pos - 1;
} else if (compareVector > vectorList[pos]) {
leftIndex = pos + 1;
} else {
cout << "Match at binary search after " << iteration << " iterations.\n";
return pos;
}
}
cout << "No match at binary search after " << iteration << " iterations.\n";
return -1;
}
And this is how I messure the runtime:
void searchBinaryOwn_messure(std::vector<std::vector<u_char> > vectorList, std::vector<u_char> compareVector) {
struct timeval begin, end;
long seconds, useconds;
if (gettimeofday(&begin,(struct timezone *)0)) {
fprintf(stderr, "can not get time\n");
exit(1);
}
searchBinary(vectorList, compareVector);
if (gettimeofday(&end,(struct timezone *)0)) {
fprintf(stderr, "can not get time\n");
exit(1);
}
seconds = end.tv_sec - begin.tv_sec;
useconds = end.tv_usec - begin.tv_usec;
if(useconds < 0) {
useconds += 1000000;
seconds--;
}
printf("searchBinaryOwn(): %ld sec %ld usec\n\n", seconds, useconds);
return;
}
Don't see any problems here. If I run ths program with 8 000 000 elements:
So why is there such a huge difference between the both binary searches? (compiled with gcc 4.8.2) Note: because of "cout..." takes about 30usec, std::binarySearch is in fact way faster than displayed
Here the full code:
#include <iostream>
#include <vector>
#include <sys/time.h>
#include <algorithm>
#include <string>
#include <stdio.h>
using namespace std;
template<typename T> T searchBinary(const std::vector<std::vector<T> > vectorList, const std::vector<T> compareVector) {
long iteration = 0;
size_t leftIndex = 0;
size_t rightIndex = vectorList.size()-1;
size_t pos;
while (leftIndex <= rightIndex) {
iteration++;
pos = (leftIndex + rightIndex) / 2;
if (compareVector < vectorList[pos]) {
rightIndex = pos - 1;
} else if (compareVector > vectorList[pos]) {
leftIndex = pos + 1;
} else {
cout << "Match at binary search after " << iteration << " iterations.\n";
return pos;
}
}
cout << "No match at binary search after " << iteration << " iterations.\n";
return -1;
}
size_t searchLinear(std::vector<std::vector<u_char> > vectorList, std::vector<u_char> compareVector) {
size_t vectorListSize = vectorList.size();
for (size_t i = 0; i < vectorListSize; i++) {
if (vectorList[i] == compareVector) {
return i;
}
}
return (size_t)-1;
}
void searchLinear_messure(std::vector<std::vector<u_char> > vectorList, std::vector<u_char> compareVector) {
struct timeval begin, end;
long seconds, useconds;
if (gettimeofday(&begin,(struct timezone *)0)) {
fprintf(stderr, "can not get time\n");
exit(1);
}
//search
cout << "\nPos: " << searchLinear(vectorList, compareVector) << endl;
if (gettimeofday(&end,(struct timezone *)0)) {
fprintf(stderr, "can not get time\n");
exit(1);
}
seconds = end.tv_sec - begin.tv_sec;
useconds = end.tv_usec - begin.tv_usec;
if(useconds < 0) {
useconds += 1000000;
seconds--;
}
printf("searchLinear(): %ld sec %ld usec\n\n", seconds, useconds);
return;
}
void searchBinaryStd_messure(std::vector<std::vector<u_char> > vectorList, std::vector<u_char> compareVector) {
struct timeval begin, end;
long seconds, useconds;
if (gettimeofday(&begin,(struct timezone *)0)) {
fprintf(stderr, "can not get time\n");
exit(1);
}
//search
cout << "found: " << std::binary_search(vectorList.begin(), vectorList.end(), compareVector) << endl;
if (gettimeofday(&end,(struct timezone *)0)) {
fprintf(stderr, "can not get time\n");
exit(1);
}
seconds = end.tv_sec - begin.tv_sec;
useconds = end.tv_usec - begin.tv_usec;
if(useconds < 0) {
useconds += 1000000;
seconds--;
}
printf("searchBinaryStd(): %ld sec %ld usec\n\n", seconds, useconds);
return;
}
void searchBinaryOwn_messure(std::vector<std::vector<u_char> > vectorList, std::vector<u_char> compareVector) {
struct timeval begin, end;
long seconds, useconds;
if (gettimeofday(&begin,(struct timezone *)0)) {
fprintf(stderr, "can not get time\n");
exit(1);
}
searchBinary(vectorList, compareVector);
if (gettimeofday(&end,(struct timezone *)0)) {
fprintf(stderr, "can not get time\n");
exit(1);
}
seconds = end.tv_sec - begin.tv_sec;
useconds = end.tv_usec - begin.tv_usec;
if(useconds < 0) {
useconds += 1000000;
seconds--;
}
printf("searchBinaryOwn(): %ld sec %ld usec\n\n", seconds, useconds);
return;
}
int main() {
std::vector<u_char> compareVector;
compareVector.clear();
compareVector.push_back(0xF8);
compareVector.push_back(0xD1);
compareVector.push_back(0x11);
compareVector.push_back(0xFF);
std::vector<std::vector<u_char> > vectorList;
vectorList.clear();
std::vector<u_char> temp;
for (unsigned int i = 0; i < ((unsigned int)-1); i++) {
if (i == 8000000) {
// if (i == 15000000) {
break;
}
temp.clear();
temp.push_back(0x11);
temp.push_back(0x22);
temp.push_back(0x33);
temp.push_back(0x44);
vectorList.push_back(temp);
}
vectorList[7999999] = compareVector;
cout << "Elements in vectorList: " << vectorList.size() << endl;
searchLinear_messure(vectorList, compareVector);
searchBinaryStd_messure(vectorList, compareVector);
searchBinaryOwn_messure(vectorList, compareVector);
return 0;
}
Upvotes: 2
Views: 199
Reputation: 234635
template<typename T> T searchBinary(const std::vector<std::vector<T> >& vectorList, const std::vector<T>& compareVector) {
i.e. pass by constant reference not by value. This will avoid two vector copies.
You can refactor using a single conditional test <
per iteration. (You'll also need to change the while
conditional).
Does iteration
need to be a long
? Can it not be shorter? What's the worst case for convergence?
Point 1 is the important one. 2 is quite important, 3 is a micro-optimisation that might not make a difference at all on some systems.
Upvotes: 1
Reputation: 16324
The vectors are passed to searchBinary
by value, hence copies will be created which takes time.
If you change the signature to
template<typename T> T searchBinary(const std::vector<std::vector<T> >& vectorList, const std::vector<T>& compareVector)
it is as fast as the std implementation: http://melpon.org/wandbox/permlink/qozapTfn3MrGv5JA
Upvotes: 1
Reputation: 16070
Well this template<typename T> T searchBinary(const std::vector<std::vector<T> > vectorList, const std::vector<T> compareVector)
makes a copy of input vector (as you pass it by value), which is linear in time. So the result you get is actually expected.
Btw. a tounge-in-cheek answer could be that standard library is written by quite good developers, it is expected to be hardly outperformed.
Upvotes: 0