Ielixmar
Ielixmar

Reputation: 58

Timing of std::find() in standard containers

I am trying to time the std::find() function for std::unordered_set, std::set, and std::vector. But the results are strange to me.

It takes more time to find an element in an unordered_set than a vector.

In theory, the time complexity for searching should be O(1) for unordered_set and O(n) for vector.

So, should it be faster to find an element in an unordered_set than vector? Did I do something wrong?

Here is my code:

#include <iostream>
#include <vector>
#include <set>
#include <unordered_set>
#include <algorithm>
#include <random>
#include <numeric>
#include <chrono>

using namespace std;

int main(int argc, char** argv)
{
  int size = 10000;
  vector<int> items(size);
  iota(items.begin(), items.end(), 0);
  auto rng = default_random_engine {};
  shuffle(items.begin(), items.end(), rng);
  
  chrono::steady_clock::time_point begin, end;  
  unordered_set<int> unsetInt;
  set<int> setInt;
  vector<int> vecInt;

  // add element
  for(auto it = items.begin(); it != items.end(); it++)
  {
    unsetInt.insert(*it);
    setInt.insert(*it);
    vecInt.push_back(*it);
  }
  
  // lookup time for unordered_set<int>
  begin = chrono::steady_clock::now();
  for(int i = 0; i < size; i++)
  {
    find(unsetInt.begin(), unsetInt.end(), i);
  }
  end = chrono::steady_clock::now();
  cout << "lookup time for unordered_set<int>: " << 
  chrono::duration_cast<chrono::milliseconds>(end-begin).count() << "ms" << endl;
  
  // lookup time for set<int>
  begin = chrono::steady_clock::now();
  for(int i = 0; i < size; i++)
  {
    find(setInt.begin(), setInt.end(), i);
  }
  end = chrono::steady_clock::now();
  cout << "lookup time for set<int>: " << 
  chrono::duration_cast<chrono::milliseconds>(end-begin).count() << "ms" << endl;
  
  // lookup time for vector<int>
  begin = chrono::steady_clock::now();
  for(int i = 0; i < size; i++)
  {
    find(vecInt.begin(), vecInt.end(), i);
  }
  end = chrono::steady_clock::now();
  cout << "lookup time for vector<int>: " << 
  chrono::duration_cast<chrono::milliseconds>(end-begin).count() << "ms" << endl;

  return 0;
}

Here is my result:

lookup time for unordered_set<int>: 2293ms
lookup time for set<int>: 3080ms
lookup time for vector<int>: 1311ms

Upvotes: 1

Views: 91

Answers (2)

Jerry Coffin
Jerry Coffin

Reputation: 490148

As a follow-up to HolyBlackCat's (correct) answer, here's some corrected code to show the differences:

#include <iostream>
#include <vector>
#include <set>
#include <unordered_set>
#include <algorithm>
#include <random>
#include <numeric>
#include <chrono>

using namespace std;

int main(int argc, char** argv)
{
  int size = 10000;
  vector<int> items(size);
  iota(items.begin(), items.end(), 0);
  auto rng = default_random_engine {};
  shuffle(items.begin(), items.end(), rng);

  using clock = std::chrono::high_resolution_clock;

  clock::time_point begin, end;
  unordered_set<int> unsetInt;
  set<int> setInt;
  vector<int> vecInt;

  // add element
  for(auto it = items.begin(); it != items.end(); it++)
  {
    unsetInt.insert(*it);
    setInt.insert(*it);
    vecInt.push_back(*it);
  }

  int suma = 0, sumb = 0, sumc = 0;
  // lookup time for unordered_set<int>
  begin = clock::now();
  for(int i = 0; i < size; i++)
  {
        suma += *unsetInt.find(i);
    // find(unsetInt.begin(), unsetInt.end(), i);
  }
  end = clock::now();
  cout << "lookup time for unordered_set<int>: " <<
  chrono::duration_cast<chrono::microseconds>(end-begin).count() << "us" << endl;

  // lookup time for set<int>
  begin = clock::now();
  for(int i = 0; i < size; i++)
  {
    // find(setInt.begin(), setInt.end(), i);
    sumb += *setInt.find(i);
  }
  end = clock::now();
  cout << "lookup time for set<int>: " <<
  chrono::duration_cast<chrono::microseconds>(end-begin).count() << "us" << endl;

  // lookup time for vector<int>
  begin = clock::now();
  for(int i = 0; i < size; i++)
  {
    sumc += *find(vecInt.begin(), vecInt.end(), i);
  }
  end = clock::now();
  cout << "lookup time for vector<int>: " <<
      chrono::duration_cast<chrono::milliseconds>(end-begin).count() << "ms" << endl;

  cout << "\nIgnore:" << suma << "," << sumb << "," << sumc << '\n';

  return 0;
}

I changed it to show the time in us instead of ms, because the results came out like this:

lookup time for unordered_set<int>: 216us
lookup time for set<int>: 889us
lookup time for vector<int>: 21916us

Ignore:49995000,49995000,49995000

I added code to sum the results of the searches because without that, the compiler "noticed" that the loops had no observable behavior, and simply optimized them away completely (they all ran in 0ms). I'd guess you were testing with optimization turned off, but measuring code speed with optimization turned off isn't usually a useful thing to do.

Upvotes: 0

HolyBlackCat
HolyBlackCat

Reputation: 96286

Did I do something wrong?

You did.

std::find performs the search by iterating over all elements. It doesn't have any special knowledge of std::[unordered_]set and other containers.

To properly search in a set, you need to use its own .find() member function.

Upvotes: 8

Related Questions