Reputation: 48662
Consider this simple program that just infinitely loops:
int main(void) {
for(;;);
}
It's easy enough to use ptrace
to inject a system call into it, like this:
#include <stdio.h>
#include <stdlib.h>
#include <sys/ptrace.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/user.h>
#include <sys/wait.h>
int main(int argc, char *argv[]) {
struct user_regs_struct regs;
pid_t pid = strtol(argv[1], NULL, 10);
ptrace(PTRACE_ATTACH, pid, 0, 0);
waitid(P_PID, pid, NULL, WSTOPPED);
ptrace(PTRACE_GETREGS, pid, 0, ®s);
if(ptrace(PTRACE_POKETEXT, pid, (void*)regs.rip, (void*)0x050f /* the "syscall" instruction, in little-endian */)) {
perror("PTRACE_POKETEXT");
return 1;
}
regs.rax = SYS_exit;
regs.rdi = 42;
ptrace(PTRACE_SETREGS, pid, 0, ®s);
ptrace(PTRACE_DETACH, pid, 0, 0);
return 0;
}
That will inject the syscall _exit(42);
over the infinite loop. It's also possible to do this by looking for an existing syscall
instruction instead of just overwriting wherever the instruction pointer happens to be.
Now consider this program, that also (after some setup) just infinitely loops:
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/syscall.h>
struct mapping_list {
void *start;
size_t len;
struct mapping_list *next;
};
typedef void unmap_all_t(struct mapping_list *list, void *start, size_t len);
extern unmap_all_t unmap_all;
extern const char unmap_all_end[];
__asm__("\n"
"unmap_all:\n"
" movq %rsi, %r8 # save start\n"
" movq %rdi, %r9 # save list\n"
".unmap_list_element:\n"
" movq (%r9), %rdi # pass list->start as addr\n"
" movq 8(%r9), %rsi # pass list->len as length\n"
" movl $11, %eax # SYS_munmap\n"
" syscall\n"
" movq 16(%r9), %r9 # advance to the next list element\n"
" testq %r9, %r9\n"
" jne .unmap_list_element\n"
" movl $11, %eax # SYS_munmap\n"
" movq %r8, %rdi # pass start as addr\n"
" movq %rdx, %rsi # pass len as length\n"
" jmp .final_syscall\n"
" .org unmap_all+4094 # make sure the upcoming syscall instruction is at the very end of the page,\n"
".final_syscall: # given that unmap_all started at the very beginning of it\n"
" syscall\n"
".loop_forever:\n"
" jmp .loop_forever\n"
"unmap_all_end:\n"
);
int main(void) {
FILE *maps = fopen("/proc/self/maps", "r");
if(!maps) {
perror("fopen");
return 1;
}
struct mapping_list *list = NULL;
unsigned long start, end;
char r, w, x;
while(fscanf(maps, "%lx-%lx %c%c%c", &start, &end, &r, &w, &x) == 5) {
while(fgetc(maps) != '\n');
if(x != 'x') continue;
struct mapping_list *new_list = malloc(sizeof(struct mapping_list));
new_list->start = (void*)start;
new_list->len = end - start;
new_list->next = list;
list = new_list;
}
if(fclose(maps)) {
perror("fclose");
return 1;
}
int memfd = syscall(SYS_memfd_create, "unmap_all", 2 /* MFD_ALLOW_SEALING */);
if(memfd == -1) {
perror("memfd_create");
return 1;
}
if(ftruncate(memfd, 8192)) {
perror("ftruncate");
return 1;
}
char *pages = mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_SHARED, memfd, 0);
if(pages == MAP_FAILED) {
perror("mmap");
return 1;
}
memcpy(pages, unmap_all, unmap_all_end - (const char*)unmap_all);
if(munmap(pages, 8192)) {
perror("munmap");
return 1;
}
char *path;
if(asprintf(&path, "/proc/self/fd/%d", memfd) == -1) {
perror("asprintf");
return 1;
}
int memfd_ro = open(path, O_RDONLY);
if(memfd_ro == -1) {
perror("open");
return 1;
}
free(path);
if(fcntl(memfd, 1033 /* F_ADD_SEALS */, 15 /* F_SEAL_SEAL|F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE */)) {
perror("fcntl");
return 1;
}
if(close(memfd)) {
perror("close");
return 1;
}
pages = mmap(NULL, 8192, PROT_READ|PROT_EXEC, MAP_SHARED, memfd_ro, 0);
if(pages == MAP_FAILED) {
perror("mmap");
return 1;
}
if(close(memfd_ro)) {
perror("close");
return 1;
}
((unmap_all_t*)pages)(list, pages, 4096);
__builtin_unreachable();
}
When I try to use my ptrace program on it, the PTRACE_POKETEXT
step to write the syscall instruction fails with error EIO
, since the containing page is a shared mapping of a read-only file. I also don't have the option to find an existing syscall
instruction, as all executable pages except one have been unmapped, and the sole remaining one doesn't contain that instruction anywhere.
Is there any other way to use ptrace
to cause that program to execute a syscall, or have I made that completely impossible? (If it matters, assume Linux 4.19 on x86_64.)
Upvotes: 3
Views: 1398
Reputation: 39426
Can ptrace cause the traced process to perform a syscall without access to an executable syscall instruction?
Only if the tracer can generate one using POKETEXT
, using current mainline kernels and kernel modules.
Perhaps it is time to reread the first paragraph in man 2 ptrace
:
The ptrace() system call provides a means by which one process (the "tracer") may observe and control the execution of another process (the "tracee"), and examine and change the tracee's memory and registers. It is primarily used to implement breakpoint debugging and system call tracing.
It is a tool for observing an controlling a tracee, not some kind of jail, nor an attacker that a process should protect themselves from.
There are probably other ways a tracee can set up a non-writable executable mapping, unmap all other pages, and ensure nothing in the executable pages left contain a sequence that can be used to construct a system call.
So what? Such cases have not yet popped up in practice, or we'd have modified ptrace facilities to cover that case as well.
If this is a real problem, it seems to me that the best approach would be to add an explicit syscall facility to ptrace. There are a number of options how it could be implemented.
So, any "No" answer to the stated question must be amended with "we can add that feature if needed, though". We don't even need to modify any existing kernels, I think; just write a helper kernel module providing the needed facilities.
Upvotes: 0
Reputation: 33631
The point of the seal is to "prove" that ptrace shouldn't automatically allow writing read-only pages
Seals are related to normal shared memory access between processes.
As I mentioned on your other question, regarding kernel source:
ptrace
access via PTRACE_POKETEXT
is different. It completely bypasses the protections on a given page. (i.e.) It does not reference anything related to seals.
The poketext operation is handled by completely different code within the kernel and [sort of] just does it via access calls to the VM.
I wouldn't worry about it too much.
You might have a look at CONFIG_HAVE_IOREMAP_PROT
Upvotes: 1