Reputation: 377
Is there a faster way to the following in c++, so that i can outperform the python's implementation?
For example:
map <int,set<int>> hash_DICT1;
hash_DICT1[1] = {1,2,3};
hash_DICT1[2] = {4,5,6};
map <int,set<int>> hash_DICT2;
hash_DICT2[1] = {11,12,13};
hash_DICT2[3] = {4,5,6};
vector<int> output_vector
= GetPairDiff(hash_DICT1, hash_DICT2)
= [11-1,12-1,13-1,
11-2,12-2,13-2,
11-3,12-3,13-3] // only hashkey=1 is intersect, so only compute pairwise difference of the respective set elements.
= [10, 11, 12,
9, 10, 11,
8, 9, 10] // Note that i do want to keep duplicates, if any. Order does not matter.
GetPairDiff
function.
vector<int> GetPairDiff(
unordered_map <int, set<int>> &hash_DICT1,
unordered_map <int, set<int>> &hash_DICT2) {
// Init
vector<int> output_vector;
int curr_key;
set<int> curr_set1, curr_set2;
// Get intersection
for (const auto &KEY_SET:hash_DICT2) {
curr_key = KEY_SET.first;
// Find pairwise difference
if (hash_DICT1.count(curr_key) > 0){
curr_set1 = hash_DICT1[curr_key];
curr_set2 = hash_DICT2[curr_key];
for (auto it1=curr_set1.begin(); it1 != curr_set1.end(); ++it1) {
for (auto it2=curr_set2.begin(); it2 != curr_set2.end(); ++it2) {
output_vector.push_back(*it2 - *it1);
}
}
}
}
}
main run
int main (int argc, char ** argv) {
// Using unordered_map
unordered_map <int,set<int>> hash_DICT_1;
hash_DICT_1[1] = {1,2,3};
hash_DICT_1[2] = {4,5,6};
unordered <int,set<int>> hash_DICT_2;
hash_DICT_2[1] = {11,12,13};
hash_DICT_2[3] = {4,5,6};
GetPairDiff(hash_DICT_1, hash_DICT_1);
}
Compiled like this
g++ -o ./CompareRunTime.out -Ofast -Wall -Wextra -std=c++11
Other data structures are welcomed, such as map
or unordered_set
.
However i did try all 4 permutations, and found the one given by GetPairDiff
runs the fastest, but nowhere near as fast as the python's implementation:
hash_DICT1 = { 1 : {1,2,3}, 2 : {4,5,6} }
hash_DICT2 = { 1 : {11,12,13}, 3 : {4,5,6} }
def GetPairDiff(hash_DICT1, hash_DICT2):
vector = []
for element in hash_DICT1.keys() & hash_DICT2.keys():
vector.extend(
[db_t-qry_t
for qry_t in hash_DICT2[element]
for db_t in hash_DICT1[element] ])
return vector
output_vector = GetPairDiff(hash_DICT1, hash_DICT2)
Performance comparison:
python : 0.00824 s
c++ : 0.04286 s
The implementation by c++ takes about 5 times the time taken !!!
Upvotes: 0
Views: 310
Reputation: 117298
const&
.find
instead of count
and then use the result.push_back
to a vector
may be made faster by reserve()
ing the number of elements you need to store if you know the number in advance.Fixing these issues could result in something like this (requires C++17):
#include <iostream>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
using container = std::unordered_map<int, std::unordered_set<int>>;
std::vector<int> GetPairDiff(const container& hash_DICT1,
const container& hash_DICT2) {
// Init
std::vector<int> output_vector;
// Get intersection
for(auto& [curr_key2, curr_set2] : hash_DICT2) {
// use find() instead of count()
if(auto it1 = hash_DICT1.find(curr_key2); it1 != hash_DICT1.end()) {
auto& curr_set1 = it1->second;
// Reserve the space you know you'll need for this iteration. Note:
// This might be a pessimizing optimization so try with and without it.
output_vector.reserve(curr_set1.size() * curr_set2.size() +
output_vector.size());
// Calculate pairwise difference
for(auto& s1v : curr_set1) {
for(auto& s2v : curr_set2) {
output_vector.emplace_back(s2v - s1v);
}
}
}
}
return output_vector;
}
int main() {
container hash_DICT1{{1, {1, 2, 3}},
{2, {4, 5, 6}}};
container hash_DICT2{{1, {11, 12, 13}},
{3, {4, 5, 6}}};
auto result = GetPairDiff(hash_DICT1, hash_DICT2);
for(int v : result) {
std::cout << v << '\n';
}
}
This is more than 8 times as fast as the python version for these containers on my computer compiled with g++ -std=c++17 -O3
.
Here's a C++11 version of the same program:
#include <iostream>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
using container = std::unordered_map<int, std::unordered_set<int>>;
std::vector<int> GetPairDiff(const container& hash_DICT1,
const container& hash_DICT2) {
// Init
std::vector<int> output_vector;
// Get intersection
for(auto& curr_pair2 : hash_DICT2) {
auto& curr_key2 = curr_pair2.first;
auto& curr_set2 = curr_pair2.second;
// use find() instead of count()
auto it1 = hash_DICT1.find(curr_key2);
if(it1 != hash_DICT1.end()) {
auto& curr_set1 = it1->second;
// Reserve the space you know you'll need for this iteration. Note:
// This might be a pessimizing optimization so try with and without it.
output_vector.reserve(curr_set1.size() * curr_set2.size() +
output_vector.size());
// Calculate pairwise difference
for(auto& s1v : curr_set1) {
for(auto& s2v : curr_set2) {
output_vector.emplace_back(s2v - s1v);
}
}
}
}
return output_vector;
}
int main() {
container hash_DICT1{{1, {1, 2, 3}},
{2, {4, 5, 6}}};
container hash_DICT2{{1, {11, 12, 13}},
{3, {4, 5, 6}}};
auto result = GetPairDiff(hash_DICT1, hash_DICT2);
for(int v : result) {
std::cout << v << '\n';
}
}
Upvotes: 2