Reputation: 41
I am trying to make a standalone (without libc) mutlithreading executable in linux, which can be handled with signals and unix-domain socket, but I have troubles setting up the signal handler.
This is the part of the code that I have troubles with.
#include <linux/futex.h>
#include <linux/signal.h>
#include <linux/time.h>
#include <kernel.h>
#include <mm.h>
#include <fs.h>
#include <bit.h>
#include <text-format.h>
#define STACKSIZE 16384
#define CORES_COUNT 1
#define STARTING_CORE 1
struct thread_props {
pid_t tid;
pid_t tret;
int32_t coreID;
int32_t stopped;
struct timespec start;
struct timespec end;
struct clone_args clargs;
};
typedef uint32_t cpuset_part_t;
#define MAX_CPUSET_BITS 32
#define CPUSET_PART_BITS (sizeof(cpuset_part_t) * 8)
#define CPUSET_PARTS ((MAX_CPUSET_BITS/8)/sizeof(cpuset_part_t))
#define CPUSET_ZERO(cpuset) memset((uint8_t *) cpuset, 0, CPUSET_PARTS * sizeof(cpuset_part_t))
#define CPUSET_ADD(cpuset, cpu) (cpuset[cpu/CPUSET_PART_BITS] = BIT_SET(cpuset[cpu/CPUSET_PART_BITS], (cpu % CPUSET_PART_BITS) + 1))
#define CPUSET_RM(cpuset, cpu) (cpuset[cpu/CPUSET_PART_BITS] = BIT_CLR(cpuset[cpu/CPUSET_PART_BITS], (cpu % CPUSET_PART_BITS) + 1))
#define CPUSET_TOGGLE(cpuset, cpu) (cpuset[cpu/CPUSET_PART_BITS] = BIT_TOGGLE(cpuset[cpu/CPUSET_PART_BITS], (cpu % CPUSET_PART_BITS) + 1))
#define CPUSET_ALLON(cpuset) memset((uint8_t *) cpuset, (uint64_t) -1, CPUSET_PARTS * sizeof(cpuset_part_t))
static int32_t thread_function(struct thread_props *tp)
{
cpuset_part_t cpuset[CPUSET_PARTS];
CPUSET_ZERO(cpuset);
CPUSET_ADD(cpuset, tp->coreID);
sched_setaffinity(0, sizeof(cpuset), (uint64_t) cpuset);
printf("before stop: tid %sl:d\n", tp->tid);
futex((uint64_t) &tp->stopped, FUTEX_WAIT, 1, (uint64_t) NULL, (uint64_t) NULL, 0);
clock_gettime(CLOCK_THREAD_CPUTIME_ID, (uint64_t) &tp->start);
printf("doing some works here\n")
clock_gettime(CLOCK_THREAD_CPUTIME_ID, (uint64_t) &tp->end);
tp->tid = 0;
futex((uint64_t) &tp->tid, FUTEX_WAKE, 1, (uint64_t) NULL, (uint64_t) NULL, 0);
return 0;
}
void create_thread(struct thread_props *tp) {
printf("Started\n");
tp->stopped = 1;
tp->clargs.flags = (CLONE_SYSVSEM | CLONE_IO | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD);
tp->clargs.exit_signal = 0;
tp->clargs.parent_tid = 0;
tp->clargs.pidfd = 0;
tp->clargs.cgroup = 0;
tp->clargs.set_tid = 0;
tp->clargs.set_tid_size = 0;
tp->clargs.tls = 0;
tp->clargs.child_tid = 0;
tp->clargs.stack = (uint64_t) mmap((uint64_t) NULL, STACKSIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
if(tp->clargs.stack == (uint64_t) -1) {
printf("mmap failed");
return;
}
printf("Initialized: coreID = %sl:d\n", tp->coreID);
tp->clargs.stack_size = STACKSIZE;
int64_t clret = 0;
clret = clone3((uint64_t) &tp->clargs, (uint64_t) sizeof(struct clone_args));
if(clret == 0) {
tp->tid = gettid();
tp->tret = tp->tid;
printf("entered tid: %sl:d\n", tp->tret);
thread_function(tp);
printf("exiting tid: %sl:d\n", gettid());
exit(0);
}
}
void wait_thread(struct thread_props *tp) {
futex((uint64_t) &tp->tid, FUTEX_WAIT, tp->tret, (uint64_t) NULL, (uint64_t) NULL, 0);
}
static struct thread_props tprops[CORES_COUNT];
void wake_threads(int32_t signo) {
printf("signaled\n");
for(int32_t core = 0; core < CORES_COUNT; ++core)
futex((uint64_t) &tprops[core].stopped, FUTEX_WAKE, CORES_COUNT, (uint64_t) NULL, (uint64_t) NULL, 0);
}
int32_t threads_test() {
cpuset_part_t cpuset[CPUSET_PARTS];
CPUSET_ZERO(cpuset);
CPUSET_ADD(cpuset, 0); // core 0 here
sched_setaffinity(0, sizeof(cpuset), (uint64_t) cpuset);
printf("entered tgid: %sl:d\n", gettid());
struct sigaction siganew = {};
struct sigaction sigaold = {};
siganew.sa_handler = wake_threads;
siganew.sa_restorer = NULL;
siganew.sa_mask = 0;
siganew.sa_flags = SA_RESTART; // for the futexes to restart after signal
rt_sigaction(SIGUSR1, (uint64_t) &siganew, (uint64_t) &sigaold, sizeof(sigset_t));
for(int32_t core = 0; core < CORES_COUNT; ++core) {
tprops[core].coreID = STARTING_CORE + core;
create_thread(&tprops[core]);
}
struct timespec waittime;
waittime.tv_sec = 1;
waittime.tv_nsec = 0;
// this will effectively give enough time for the threads to hook up on their stop futex
clock_nanosleep(CLOCK_MONOTONIC, 0, (uint64_t) &waittime, (uint64_t) NULL);
for(int32_t core = 0; core < CORES_COUNT; ++core)
wait_thread(&tprops[core]);
printf("exiting tgid: %sl:d\n", gettid());
return 0;
}
int32_t main() {
printf("entering threads test\n");
threads_test();
printf("finished threads test\n");
return 0;
}
I used gdb
to find out what is happening but the SIGSEGV is sent from the KERNEL after a close look on the output of the strace
.
This is the output of the strace
before I send a signal. It just correctly waits for a signal
$ strace -f output64/test.threads64
execve("output64/test.threads64", ["output64/test.threads64"], 0x7ffd61325aa8 /* 44 vars */) = 0
write(1, "entering threads test\n", 22entering threads test
) = 22
sched_setaffinity(0, 4, [0]) = 0
gettid() = 23568
write(1, "entered tgid: 23568\n", 20entered tgid: 23568
) = 20
rt_sigaction(SIGUSR1, {sa_handler=0x7fb47a12f720, sa_mask=[], sa_flags=SA_RESTART}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
write(1, "Started\n", 8Started
) = 8
mmap(NULL, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7fb47a121000
write(1, "Initialized: coreID = 1\n", 24Initialized: coreID = 1
) = 24
clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_IO, exit_signal=0, stack=0x7fb47a121000, stack_size=0x4000}, 88strace: Process 23569 attached
) = 23569
[pid 23569] gettid( <unfinished ...>
[pid 23568] clock_nanosleep(CLOCK_MONOTONIC, 0, {tv_sec=1, tv_nsec=0}, <unfinished ...>
[pid 23569] <... gettid resumed>) = 23569
[pid 23569] write(1, "entered tid: 23569\n", 19entered tid: 23569
) = 19
[pid 23569] sched_setaffinity(0, 4, [1]) = 0
[pid 23569] write(1, "before stop: tid 23569\n", 23before stop: tid 23569
) = 23
[pid 23569] futex(0x7fb47a132014, FUTEX_WAIT, 1, NULL <unfinished ...>
[pid 23568] <... clock_nanosleep resumed>NULL) = 0
[pid 23568] futex(0x7fb47a132000, FUTEX_WAIT, 23569, NULL
Then from another console, sending a signal with kill
:
$ kill -SIGUSR1 23568
And the strace
exits with:
[pid 23568] --- SIGUSR1 {si_signo=SIGUSR1, si_code=SI_USER, si_pid=23649, si_uid=1000} ---
[pid 23568] --- SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=NULL} ---
[pid 23569] <... futex resumed>) = ?
[pid 23569] +++ killed by SIGSEGV +++
+++ killed by SIGSEGV +++
I am compiling the code with
gcc -c -nostdinc -m64 -march=raptorlake -mtune=raptorlake -fno-stack-protector -ffreestanding -fno-builtin -pipe -O0 -g3 -ggdb -I /usr/lib/modules/6.13.1/build/headers/include -I include -Wall -Wextra -o output64/threads.c.o test/threads.c
And finally link with
ld output64/libc.c.o output64/arch/x86/start64.S.o output64/threads.c.o -x -pie -nostdlib --no-dynamic-linker -m elf_x86_64 -o output64/test.threads64
Exits normally after I took out the stop futex
of the child thread. The SIGSEGV happens only when it receives the signal.
I searched on the internet for such problems and I found nothing relevant. I have also tried with (SIG_BLOCK and SIG_SETMASK):
sigset_t sigset = (sigset_t)(1 << (((SIGUSR1) - 1) % (sizeof(sigset_t) * 8)));
rt_sigprocmask(SIG_BLOCK, (uint64_t) &sigset, (uint64_t) NULL, sizeof(sigset_t));
exactly after clone3
when clret == 0
(in the child thread) and I had the same results
update: @CraigEstey mentioned the use of the
sigreturn()
function, which indeed might be the problem, but I have no idea how thesignal traboline
concept works (as it is mentioned in the manual), so I am going to look for it
update: After some reasearch in the
musl
libc I found out that the defaultrestorer
just only calls__restore_rt
or__restore
which is an assembly function that only calls thert_sigreturn
orsigreturn
syscall respectively. So I just placed arestorer
function on thesa_restorer
member ofstruct sigaction
, which only callsrt_sigreturn
syscall, addedSA_RESTORER
flag on thesa_flags
andBOOM!!!
it worked as expected!
So why is this needed is not clear to me. With few words the manual says, that it is a way for the kernel to restore the process context state (sigmask, stack frame etc), during transition from kernel-space to user-space after a signal has been received from the process.
Thank you @CraigEstey!
Upvotes: 4
Views: 87