Reputation: 58
I am trying to time the std::find()
function for std::unordered_set
, std::set
, and std::vector
. But the results are strange to me.
It takes more time to find an element in an unordered_set
than a vector
.
In theory, the time complexity for searching should be O(1)
for unordered_set
and O(n)
for vector
.
So, should it be faster to find an element in an unordered_set
than vector
? Did I do something wrong?
Here is my code:
#include <iostream>
#include <vector>
#include <set>
#include <unordered_set>
#include <algorithm>
#include <random>
#include <numeric>
#include <chrono>
using namespace std;
int main(int argc, char** argv)
{
int size = 10000;
vector<int> items(size);
iota(items.begin(), items.end(), 0);
auto rng = default_random_engine {};
shuffle(items.begin(), items.end(), rng);
chrono::steady_clock::time_point begin, end;
unordered_set<int> unsetInt;
set<int> setInt;
vector<int> vecInt;
// add element
for(auto it = items.begin(); it != items.end(); it++)
{
unsetInt.insert(*it);
setInt.insert(*it);
vecInt.push_back(*it);
}
// lookup time for unordered_set<int>
begin = chrono::steady_clock::now();
for(int i = 0; i < size; i++)
{
find(unsetInt.begin(), unsetInt.end(), i);
}
end = chrono::steady_clock::now();
cout << "lookup time for unordered_set<int>: " <<
chrono::duration_cast<chrono::milliseconds>(end-begin).count() << "ms" << endl;
// lookup time for set<int>
begin = chrono::steady_clock::now();
for(int i = 0; i < size; i++)
{
find(setInt.begin(), setInt.end(), i);
}
end = chrono::steady_clock::now();
cout << "lookup time for set<int>: " <<
chrono::duration_cast<chrono::milliseconds>(end-begin).count() << "ms" << endl;
// lookup time for vector<int>
begin = chrono::steady_clock::now();
for(int i = 0; i < size; i++)
{
find(vecInt.begin(), vecInt.end(), i);
}
end = chrono::steady_clock::now();
cout << "lookup time for vector<int>: " <<
chrono::duration_cast<chrono::milliseconds>(end-begin).count() << "ms" << endl;
return 0;
}
Here is my result:
lookup time for unordered_set<int>: 2293ms
lookup time for set<int>: 3080ms
lookup time for vector<int>: 1311ms
Upvotes: 1
Views: 91
Reputation: 490148
As a follow-up to HolyBlackCat's (correct) answer, here's some corrected code to show the differences:
#include <iostream>
#include <vector>
#include <set>
#include <unordered_set>
#include <algorithm>
#include <random>
#include <numeric>
#include <chrono>
using namespace std;
int main(int argc, char** argv)
{
int size = 10000;
vector<int> items(size);
iota(items.begin(), items.end(), 0);
auto rng = default_random_engine {};
shuffle(items.begin(), items.end(), rng);
using clock = std::chrono::high_resolution_clock;
clock::time_point begin, end;
unordered_set<int> unsetInt;
set<int> setInt;
vector<int> vecInt;
// add element
for(auto it = items.begin(); it != items.end(); it++)
{
unsetInt.insert(*it);
setInt.insert(*it);
vecInt.push_back(*it);
}
int suma = 0, sumb = 0, sumc = 0;
// lookup time for unordered_set<int>
begin = clock::now();
for(int i = 0; i < size; i++)
{
suma += *unsetInt.find(i);
// find(unsetInt.begin(), unsetInt.end(), i);
}
end = clock::now();
cout << "lookup time for unordered_set<int>: " <<
chrono::duration_cast<chrono::microseconds>(end-begin).count() << "us" << endl;
// lookup time for set<int>
begin = clock::now();
for(int i = 0; i < size; i++)
{
// find(setInt.begin(), setInt.end(), i);
sumb += *setInt.find(i);
}
end = clock::now();
cout << "lookup time for set<int>: " <<
chrono::duration_cast<chrono::microseconds>(end-begin).count() << "us" << endl;
// lookup time for vector<int>
begin = clock::now();
for(int i = 0; i < size; i++)
{
sumc += *find(vecInt.begin(), vecInt.end(), i);
}
end = clock::now();
cout << "lookup time for vector<int>: " <<
chrono::duration_cast<chrono::milliseconds>(end-begin).count() << "ms" << endl;
cout << "\nIgnore:" << suma << "," << sumb << "," << sumc << '\n';
return 0;
}
I changed it to show the time in us instead of ms, because the results came out like this:
lookup time for unordered_set<int>: 216us
lookup time for set<int>: 889us
lookup time for vector<int>: 21916us
Ignore:49995000,49995000,49995000
I added code to sum the results of the searches because without that, the compiler "noticed" that the loops had no observable behavior, and simply optimized them away completely (they all ran in 0ms). I'd guess you were testing with optimization turned off, but measuring code speed with optimization turned off isn't usually a useful thing to do.
Upvotes: 0
Reputation: 96286
Did I do something wrong?
You did.
std::find
performs the search by iterating over all elements. It doesn't have any special knowledge of std::[unordered_]set
and other containers.
To properly search in a set, you need to use its own .find()
member function.
Upvotes: 8