gorill
gorill

Reputation: 1673

Measure computation time

How to measure computation time correctly?

Variant 1:

std::chrono::time_point<std::chrono::system_clock> start, end;  
    float elapsed = 0; 
    int N = 100;

    for(int i=0; i<N; ++i)
    {
        start = std::chrono::system_clock::now();
        func();//target function
        end = std::chrono::system_clock::now();
        elapsed += std::chrono::duration_cast<std::chrono::microseconds>(end-start).count();
    }

Variant 2:

start = std::chrono::system_clock::now();
for(int i=0; i<N; ++i)
    func();
end = std::chrono::system_clock::now();
elapsed = std::chrono::duration_cast<std::chrono::microseconds>(end-start).count();

These variants show very different results: i tried to replace virtual functions with std::function :

struct Base
{
    virtual void f() = 0;
};

struct Der1 : public Base
{
    virtual void f() override 
    {
        int i=0; 
        for(int j=0; j<100; ++j)
            i += 2*j; 
    }
};

struct Der2 : public Base
{
    virtual void f() override 
    {
        int i=0; 
        for(int j=0; j<100; ++j)
            i += 3*j; 
    }
};

struct Base_
{
    Base_(std::function<void()> f_) : f(f_) {}
    std::function<void()> f;
};

struct Der1_ : public Base_
{
    Der1_() : Base_([]{
                       int i=0; 
                       for(int j=0; j<100; ++j)
                           i += 2*j;
                      }){}
};

struct Der2_ : public Base_
{
    Der2_() : Base_([]{
                       int i=0; 
                       for(int j=0; j<100; ++j)
                           i += 3*j;
                      }){}
};


void process1(std::vector<Base_*>& v)
{
    for(auto &elem : v)
        elem->f();
}

void process2(std::vector<Base*>& v)
{
    for(auto &elem : v)
        elem->f();
}

int main()
{

    std::vector<Base_*> vec1;
    vec1.push_back(new Der1_);
    vec1.push_back(new Der2_);
    vec1.push_back(new Der1_);
    vec1.push_back(new Der2_);

    std::vector<Base*> vec2;
    vec2.push_back(new Der1);
    vec2.push_back(new Der2);
    vec2.push_back(new Der1);
    vec2.push_back(new Der2);
    std::chrono::time_point<std::chrono::system_clock> start1, end1, start2, end2;   
    float elapsed1 = 0; 
    float elapsed2 = 0;

    int N = 6000;
    //Variant 2
    start1 = std::chrono::system_clock::now();
    for(int i=0; i<N; ++i)
        process1(vec1);
    end1 = std::chrono::system_clock::now();
    elapsed1 = std::chrono::duration_cast<std::chrono::microseconds>(end1-start1).count();

    start2 = std::chrono::system_clock::now();
    for(int i=0; i<N; ++i)
        process2(vec2);
    end2 = std::chrono::system_clock::now();
    elapsed2 = std::chrono::duration_cast<std::chrono::microseconds>(end2-start2).count();

    std::cout<<"virtual: "<<elapsed2<<"\npointer: "<<elapsed1;

    for(int i=0; i<vec1.size(); ++i)
        delete vec1[i];

    for(int i=0; i<vec2.size(); ++i)
        delete vec2[i];

    return 0;
}

and I want to understand whether the performance gains from replacement virtual functions on std::function. Second variant indicates 2.5-3 gain, while first method shows drop of perfomance.

Upvotes: 2

Views: 345

Answers (4)

sasha.sochka
sasha.sochka

Reputation: 14715

Code I recently used for timing std::sort vs qsort (here is one for std::sort)

#include <algorithm>
#include <array>
#include <chrono>
#include <climits>
#include <iostream>
#include <random>

using std::chrono::duration_cast; 
using std::chrono::milliseconds; 
using std::chrono::high_resolution_clock;

std::default_random_engine generator; 
std::uniform_int_distribution<int> distribution{INT_MIN, INT_MAX};

constexpr auto size = 100000000; 
std::array<int, size> data;

int main() {
    auto start = high_resolution_clock::now();

    std::generate(std::begin(data), std::end(data), std::bind(distribution, generator));
    auto gen = high_resolution_clock::now();

    std::sort(std::begin(data), std::end(data));
    auto finish = high_resolution_clock::now();
    std::cout << 
        static_cast<double>(duration_cast<milliseconds>(finish - gen).count())/1000 <<
        "s for std::sort" << std::endl;
}

By the way std:sort is almost 2 times faster on my computer.

Upvotes: 0

Zolt&#225;n Haindrich
Zolt&#225;n Haindrich

Reputation: 1808

in the first one you measure:

N*(t_func+t_now)

in the second you measure:

N*t_func+t_now+t_loop_overhead

if t_func is small and t_now is comparable to that..

read about micro benchmarking

Upvotes: 1

Dainius
Dainius

Reputation: 1862

It really depends why you are measuring. First variant is a bit better, just 100 iterations isn't that much and of course it very depends on your "func". But don't think that each call will take same amount of time, today processors, pipes and other components are very difficult (and smart) so if you need really accurate value it probably better to find some existing measurement testing framework or you will need to deal with caching, predicting etc by yourself.

Upvotes: 0

Ryan Haining
Ryan Haining

Reputation: 36882

The most likely reason for your difference in times is the time spent doing the assignment to end, which will add extra time to your counters. The second way avoids this at the cost of counting the time it takes to increment i in the loop, which is likely significantly less.

Upvotes: 1

Related Questions