Reputation: 4062
I'm trying to compare GPU to CPU performance. For the NVIDIA GPU I've been using the cudaEvent_t
types to get a very precise timing.
For the CPU I've been using the following code:
// Timers
clock_t start, stop;
float elapsedTime = 0;
// Capture the start time
start = clock();
// Do something here
.......
// Capture the stop time
stop = clock();
// Retrieve time elapsed in milliseconds
elapsedTime = (float)(stop - start) / (float)CLOCKS_PER_SEC * 1000.0f;
Apparently, that piece of code is only good if you're counting in seconds. Also, the results sometime come out quite strange.
Does anyone know of some way to create a high resolution timer in Linux?
Upvotes: 50
Views: 110867
Reputation: 930
After reading this thread I started testing the code for clock_gettime against c++11's chrono and they don't seem to match.
There is a huge gap between them!
The std::chrono::seconds(1) seems to be equivalent to ~70,000 of the clock_gettime
#include <ctime>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <thread>
#include <chrono>
#include <iomanip>
#include <vector>
#include <mutex>
timespec diff(timespec start, timespec end);
timespec get_cpu_now_time();
std::vector<timespec> get_start_end_pairs();
std::vector<timespec> get_start_end_pairs2();
void output_deltas(const std::vector<timespec> &start_end_pairs);
//=============================================================
int main()
{
std::cout << "Hello waiter" << std::endl; // flush is intentional
std::vector<timespec> start_end_pairs = get_start_end_pairs2();
output_deltas(start_end_pairs);
return EXIT_SUCCESS;
}
//=============================================================
std::vector<timespec> get_start_end_pairs()
{
std::vector<timespec> start_end_pairs;
for (int i = 0; i < 20; ++i)
{
start_end_pairs.push_back(get_cpu_now_time());
std::this_thread::sleep_for(std::chrono::seconds(1));
start_end_pairs.push_back(get_cpu_now_time());
}
return start_end_pairs;
}
//=============================================================
std::vector<timespec> get_start_end_pairs2()
{
std::mutex mu;
std::vector<std::thread> workers;
std::vector<timespec> start_end_pairs;
for (int i = 0; i < 20; ++i) {
workers.emplace_back([&]()->void {
auto start_time = get_cpu_now_time();
std::this_thread::sleep_for(std::chrono::seconds(1));
auto end_time = get_cpu_now_time();
std::lock_guard<std::mutex> locker(mu);
start_end_pairs.emplace_back(start_time);
start_end_pairs.emplace_back(end_time);
});
}
for (auto &worker: workers) {
if (worker.joinable()) {
worker.join();
}
}
return start_end_pairs;
}
//=============================================================
void output_deltas(const std::vector<timespec> &start_end_pairs)
{
std::cout << "size: " << start_end_pairs.size() << std::endl;
for (auto it_start = start_end_pairs.begin(); it_start < start_end_pairs.end(); it_start += 2)
{
auto it_end = it_start + 1;
auto delta = diff(*it_start, *it_end);
std::cout
<< std::setw(2)
<< std::setfill(' ')
<< std::distance(start_end_pairs.begin(), it_start) / 2
<< " Waited ("
<< delta.tv_sec
<< "\ts\t"
<< std::setw(9)
<< std::setfill('0')
<< delta.tv_nsec
<< "\tns)"
<< std::endl;
}
}
//=============================================================
timespec diff(timespec start, timespec end)
{
timespec temp;
temp.tv_sec = end.tv_sec-start.tv_sec;
temp.tv_nsec = end.tv_nsec-start.tv_nsec;
if (temp.tv_nsec < 0) {
--temp.tv_sec;
temp.tv_nsec += 1000000000;
}
return temp;
}
//=============================================================
timespec get_cpu_now_time()
{
timespec now_time;
memset(&now_time, 0, sizeof(timespec));
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &now_time);
return now_time;
}
output:
Hello waiter
0 Waited (0 s 000843254 ns)
1 Waited (0 s 000681141 ns)
2 Waited (0 s 000685119 ns)
3 Waited (0 s 000674252 ns)
4 Waited (0 s 000714877 ns)
5 Waited (0 s 000624202 ns)
6 Waited (0 s 000746091 ns)
7 Waited (0 s 000575267 ns)
8 Waited (0 s 000860157 ns)
9 Waited (0 s 000827479 ns)
10 Waited (0 s 000612959 ns)
11 Waited (0 s 000534818 ns)
12 Waited (0 s 000553728 ns)
13 Waited (0 s 000586501 ns)
14 Waited (0 s 000627116 ns)
15 Waited (0 s 000616725 ns)
16 Waited (0 s 000616507 ns)
17 Waited (0 s 000641251 ns)
18 Waited (0 s 000683380 ns)
19 Waited (0 s 000850205 ns)
Upvotes: 1
Reputation: 500167
Check out clock_gettime
, which is a POSIX interface to high-resolution timers.
If, having read the manpage, you're left wondering about the difference between CLOCK_REALTIME
and CLOCK_MONOTONIC
, see Difference between CLOCK_REALTIME and CLOCK_MONOTONIC?
See the following page for a complete example: http://www.guyrutenberg.com/2007/09/22/profiling-code-using-clock_gettime/
#include <iostream>
#include <time.h>
using namespace std;
timespec diff(timespec start, timespec end);
int main()
{
timespec time1, time2;
int temp;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time1);
for (int i = 0; i< 242000000; i++)
temp+=temp;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time2);
cout<<diff(time1,time2).tv_sec<<":"<<diff(time1,time2).tv_nsec<<endl;
return 0;
}
timespec diff(timespec start, timespec end)
{
timespec temp;
if ((end.tv_nsec-start.tv_nsec)<0) {
temp.tv_sec = end.tv_sec-start.tv_sec-1;
temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
} else {
temp.tv_sec = end.tv_sec-start.tv_sec;
temp.tv_nsec = end.tv_nsec-start.tv_nsec;
}
return temp;
}
Upvotes: 70
Reputation: 41
epoll implemention: https://github.com/ielife/simple-timer-for-c-language
use like this:
timer_server_handle_t *timer_handle = timer_server_init(1024);
if (NULL == timer_handle) {
fprintf(stderr, "timer_server_init failed\n");
return -1;
}
ctimer timer1;
timer1.count_ = 3;
timer1.timer_internal_ = 0.5;
timer1.timer_cb_ = timer_cb1;
int *user_data1 = (int *)malloc(sizeof(int));
*user_data1 = 100;
timer1.user_data_ = user_data1;
timer_server_addtimer(timer_handle, &timer1);
ctimer timer2;
timer2.count_ = -1;
timer2.timer_internal_ = 0.5;
timer2.timer_cb_ = timer_cb2;
int *user_data2 = (int *)malloc(sizeof(int));
*user_data2 = 10;
timer2.user_data_ = user_data2;
timer_server_addtimer(timer_handle, &timer2);
sleep(10);
timer_server_deltimer(timer_handle, timer1.fd);
timer_server_deltimer(timer_handle, timer2.fd);
timer_server_uninit(timer_handle);
Upvotes: 0
Reputation: 18522
To summarise information presented so far, these are the two functions required for typical applications.
#include <time.h>
// call this function to start a nanosecond-resolution timer
struct timespec timer_start(){
struct timespec start_time;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start_time);
return start_time;
}
// call this function to end a timer, returning nanoseconds elapsed as a long
long timer_end(struct timespec start_time){
struct timespec end_time;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end_time);
long diffInNanos = (end_time.tv_sec - start_time.tv_sec) * (long)1e9 + (end_time.tv_nsec - start_time.tv_nsec);
return diffInNanos;
}
Here is an example of how to use them in timing how long it takes to calculate the variance of a list of input.
struct timespec vartime = timer_start(); // begin a timer called 'vartime'
double variance = var(input, MAXLEN); // perform the task we want to time
long time_elapsed_nanos = timer_end(vartime);
printf("Variance = %f, Time taken (nanoseconds): %ld\n", variance, time_elapsed_nanos);
Upvotes: 22
Reputation: 26251
Are you interested in wall time (how much time actually elapses) or cycle count (how many cycles)? In the first case, you should use something like gettimeofday
.
The highest resolution timer uses the RDTSC
x86 assembly instruction. However, this measures clock ticks, so you should be sure that power saving mode is disabled.
The wiki page for TSC gives a few examples: http://en.wikipedia.org/wiki/Time_Stamp_Counter
Upvotes: 1
Reputation: 96258
struct timespec t;
clock_gettime(CLOCK_REALTIME, &t);
there is also CLOCK_REALTIME_HR, but I'm not sure whether it makes any difference..
Upvotes: 1