Reputation: 1673
How to measure computation time correctly?
Variant 1:
std::chrono::time_point<std::chrono::system_clock> start, end;
float elapsed = 0;
int N = 100;
for(int i=0; i<N; ++i)
{
start = std::chrono::system_clock::now();
func();//target function
end = std::chrono::system_clock::now();
elapsed += std::chrono::duration_cast<std::chrono::microseconds>(end-start).count();
}
Variant 2:
start = std::chrono::system_clock::now();
for(int i=0; i<N; ++i)
func();
end = std::chrono::system_clock::now();
elapsed = std::chrono::duration_cast<std::chrono::microseconds>(end-start).count();
These variants show very different results: i tried to replace virtual functions with std::function :
struct Base
{
virtual void f() = 0;
};
struct Der1 : public Base
{
virtual void f() override
{
int i=0;
for(int j=0; j<100; ++j)
i += 2*j;
}
};
struct Der2 : public Base
{
virtual void f() override
{
int i=0;
for(int j=0; j<100; ++j)
i += 3*j;
}
};
struct Base_
{
Base_(std::function<void()> f_) : f(f_) {}
std::function<void()> f;
};
struct Der1_ : public Base_
{
Der1_() : Base_([]{
int i=0;
for(int j=0; j<100; ++j)
i += 2*j;
}){}
};
struct Der2_ : public Base_
{
Der2_() : Base_([]{
int i=0;
for(int j=0; j<100; ++j)
i += 3*j;
}){}
};
void process1(std::vector<Base_*>& v)
{
for(auto &elem : v)
elem->f();
}
void process2(std::vector<Base*>& v)
{
for(auto &elem : v)
elem->f();
}
int main()
{
std::vector<Base_*> vec1;
vec1.push_back(new Der1_);
vec1.push_back(new Der2_);
vec1.push_back(new Der1_);
vec1.push_back(new Der2_);
std::vector<Base*> vec2;
vec2.push_back(new Der1);
vec2.push_back(new Der2);
vec2.push_back(new Der1);
vec2.push_back(new Der2);
std::chrono::time_point<std::chrono::system_clock> start1, end1, start2, end2;
float elapsed1 = 0;
float elapsed2 = 0;
int N = 6000;
//Variant 2
start1 = std::chrono::system_clock::now();
for(int i=0; i<N; ++i)
process1(vec1);
end1 = std::chrono::system_clock::now();
elapsed1 = std::chrono::duration_cast<std::chrono::microseconds>(end1-start1).count();
start2 = std::chrono::system_clock::now();
for(int i=0; i<N; ++i)
process2(vec2);
end2 = std::chrono::system_clock::now();
elapsed2 = std::chrono::duration_cast<std::chrono::microseconds>(end2-start2).count();
std::cout<<"virtual: "<<elapsed2<<"\npointer: "<<elapsed1;
for(int i=0; i<vec1.size(); ++i)
delete vec1[i];
for(int i=0; i<vec2.size(); ++i)
delete vec2[i];
return 0;
}
and I want to understand whether the performance gains from replacement virtual functions on std::function. Second variant indicates 2.5-3 gain, while first method shows drop of perfomance.
Upvotes: 2
Views: 345
Reputation: 14715
Code I recently used for timing std::sort
vs qsort
(here is one for std::sort
)
#include <algorithm>
#include <array>
#include <chrono>
#include <climits>
#include <iostream>
#include <random>
using std::chrono::duration_cast;
using std::chrono::milliseconds;
using std::chrono::high_resolution_clock;
std::default_random_engine generator;
std::uniform_int_distribution<int> distribution{INT_MIN, INT_MAX};
constexpr auto size = 100000000;
std::array<int, size> data;
int main() {
auto start = high_resolution_clock::now();
std::generate(std::begin(data), std::end(data), std::bind(distribution, generator));
auto gen = high_resolution_clock::now();
std::sort(std::begin(data), std::end(data));
auto finish = high_resolution_clock::now();
std::cout <<
static_cast<double>(duration_cast<milliseconds>(finish - gen).count())/1000 <<
"s for std::sort" << std::endl;
}
By the way std:sort
is almost 2 times faster on my computer.
Upvotes: 0
Reputation: 1808
in the first one you measure:
N*(t_func+t_now)
in the second you measure:
N*t_func+t_now+t_loop_overhead
if t_func is small and t_now is comparable to that..
read about micro benchmarking
Upvotes: 1
Reputation: 1862
It really depends why you are measuring. First variant is a bit better, just 100 iterations isn't that much and of course it very depends on your "func". But don't think that each call will take same amount of time, today processors, pipes and other components are very difficult (and smart) so if you need really accurate value it probably better to find some existing measurement testing framework or you will need to deal with caching, predicting etc by yourself.
Upvotes: 0
Reputation: 36882
The most likely reason for your difference in times is the time spent doing the assignment to end
, which will add extra time to your counters. The second way avoids this at the cost of counting the time it takes to increment i
in the loop, which is likely significantly less.
Upvotes: 1