Reputation: 11
I'm trying to make a device running Linux. My system was built with Buildroot 2022.02;
Kernel is 4.19.252 (+PREEMPT_RT patch) assembled with flags
CONFIG_PREEMPT=y
CONFIG_PREEMPT_RT_BASE=y
CONFIG_PREEMPT_RT=y
CONFIG_HAVE_PREEMPT_LAZY=y
CONFIG_PREEMPT_LAZY=y
CONFIG_PREEMPT_RT_FULL=y
CONFIG_PREEMPT_COUNT=y
CONFIG_PREEMPT_RCU=y
CONFIG_DEBUG_PREEMPT=y
But I've faced a problem I can't resolve on my own...
I run my app binding it to the core (it is a very complex one, let's assume it can't be done other way)
taskset -c 0 ./myapp
the app has many threads but I have a problem with specific pair of them.
The first one is running as 55:SCHED_FIFO, it is subscribed to timerfd triggered every 5 ms (it raises the timer by itself)
ssize_t s;
uint64_t value = 0;
while (true) {
s = read(m_fd, &value, sizeof(value));
if (s != sizeof(value)) {
//wtf ?
continue;
}
timer.rearm();
measureTime()
}
The second one is a cycling task thread, it runs every N ms (configurable) and takes about 150 ms to complete. It is running as 54:SCHED_FIFO
The problem is, when the second task is running (calculation state), it preempts the first thread's ability to receive timer irq... When I measure the time between two timer events I get +-150 ms difference. When my app is bound to the core, the situation appears every N ms (period of the second task). When the app has no binding the situation is random (but still present).
I've ran cyclictest with --prio=55 -a 0 (the same core) along with my program and it detects no latency gaps...
I've created a test app that behaves exactly the same when bound to a single core
/*
* scheduler test
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdint.h>
#include <errno.h>
#include <pthread.h>
#include <signal.h>
#include <sys/syscall.h>
#include <sys/timerfd.h>
volatile bool g_quit = false;
int g_timer = -1;
//----------------------------------------------------------------------------------------
void signalHandler(int t_param)
{
if (!g_quit) {
printf("signaled %d\n", t_param);
}
g_quit = true;
}
//----------------------------------------------------------------------------------------
struct timespec diff_timespec(
const struct timespec *time1,
const struct timespec *time0)
{
struct timespec diff;
diff.tv_sec = time1->tv_sec - time0->tv_sec;
diff.tv_nsec = time1->tv_nsec - time0->tv_nsec;
if (diff.tv_nsec < 0) {
diff.tv_nsec += 1000000000;
diff.tv_sec--;
}
return diff;
}
//----------------------------------------------------------------------------------------
void rearmTimer(int t_ms)
{
struct timespec ts;
ts.tv_sec = t_ms / 1000ULL;
ts.tv_nsec = (t_ms % 1000ULL) * 1000000ULL;
struct itimerspec itime;
itime.it_value.tv_sec = ts.tv_sec;
itime.it_value.tv_nsec = ts.tv_nsec;
itime.it_interval.tv_sec = 0;
itime.it_interval.tv_nsec = 0;
if (timerfd_settime(g_timer, 0, &itime, NULL) == -1) {
fprintf(stderr, "timerfd_settime: %s\n", strerror(errno));
}
}
//----------------------------------------------------------------------------------------
void* timerEventHandler(void *t_arg)
{
printf("timer thread entered\n");
pthread_setname_np(pthread_self(), "Timer");
struct timespec sp_prev;
struct timespec sp_curr;
struct timespec diff;
ssize_t s;
uint64_t value = 0;
while (g_timer == -1) {
usleep(100);
}
clock_gettime(CLOCK_REALTIME, &sp_prev);
while (!g_quit) {
s = read(g_timer, &value, sizeof(value));
if (s != sizeof(value)) {
// wtf?
continue;
}
rearmTimer(1000);
clock_gettime(CLOCK_REALTIME, &sp_curr);
diff = diff_timespec(&sp_curr, &sp_prev);
fprintf(stdout, "%ld.%09ld\n", diff.tv_sec, diff.tv_nsec);
sp_prev = sp_curr;
}
printf("timer thread exited\n");
return NULL;
}
//----------------------------------------------------------------------------------------
void* mainTaskHandler(void *t_arg)
{
printf("main task thread entered\n");
pthread_setname_np(pthread_self(), "MainTask");
struct timespec sp_prev;
struct timespec sp_curr;
struct timespec diff;
int a = 0;
while (!g_quit) {
usleep(5000 * 1000);
int i = 0;
clock_gettime(CLOCK_REALTIME, &sp_prev);
while(i++ < 0x7FFFFFE) {
a += i * i / (i + 1);
if (g_quit) {
break;
}
}
clock_gettime(CLOCK_REALTIME, &sp_curr);
diff = diff_timespec(&sp_curr, &sp_prev);
fprintf(stdout, "calculation %d took %ld.%09ld\n", a, diff.tv_sec, diff.tv_nsec);
}
printf("main task thread exited\n");
return NULL;
}
//----------------------------------------------------------------------------------------
int main(int argc, char *argv[])
{
signal(SIGINT, signalHandler);
signal(SIGTERM, signalHandler);
pthread_setname_np(pthread_self(), "main");
pthread_t eventThread;
pthread_t mainTaskThread;
pthread_attr_t attr;
struct sched_param schedparam;
schedparam.sched_priority = 55;
pthread_attr_init(&attr);
pthread_attr_setschedpolicy(&attr, SCHED_FIFO);
pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
pthread_attr_setschedparam(&attr, &schedparam);
printf("evt create res : %d\n", pthread_create(&eventThread, &attr, timerEventHandler, NULL));
schedparam.sched_priority = 54;
pthread_attr_setschedparam(&attr, &schedparam);
printf("rdd create res : %d\n", pthread_create(&mainTaskThread, &attr, mainTaskHandler, NULL));
pthread_attr_destroy(&attr);
struct sigevent event;
g_timer = timerfd_create(CLOCK_REALTIME, 0);
rearmTimer(1000);
while (!g_quit) {
sleep(1);
}
pthread_join(mainTaskThread, NULL);
pthread_detach(mainTaskThread);
pthread_join(eventThread, NULL);
pthread_detach(eventThread);
return EXIT_SUCCESS;
}
The test app prints time difference between current timer event and a previous one. It also prints (for debug purposes) the time MainTask thread was busy for. The time diff must be around 1 sec (without any spikes) But with my kernel I have timer spikes up to 6 seconds (for my hardware)
Is it normal behavior of RT kernel? What am I doing wrong or what am I understanding wrong about how PREEMPT_RT patch should work?
I've dug into the kernel a bit, and it seems that ktimersoftd (thread added by PREEMPT_RT patch running 1:SCHED_FIFO) is delayed by higher priority task and it does not send timer irqs to subscribers until the kernel gives him enough time. I'm not a kernel developer so I can't say is it correct behavior or not... Also, I've build kernel 5.10 + PREEMPT_RT and the kernel activates throttle mode (O_o) when running my test app and all timers go crazy
Upvotes: 1
Views: 714