Syahmi Azhar
Syahmi Azhar

Reputation: 49

Is it possible to transfer thread execution to another thread?

I'm currently experimenting for possibilities transferring a thread execution to another newly created thread from current thread (I hope its a correct word); Here's the illustration:

EDIT: Updated the example.

#include "stdafx.h"
#include <memory>
#include <windows.h>
#include <cassert>

int _eax, _ebx, _ecx, _edx;
int _ebp, _esp, _esi, _edi;
int _eip;
int _flags;
int _jmp_addr;
bool thread_setup = false;
CONTEXT PrevThreadCtx;
HANDLE thread_handle;

int _newt_esp;
int _newt_ret;

DWORD WINAPI RunTheThread(LPVOID lpParam)
{
    // 1000 is more than enough, call to CreateThread() should already return by now.
    Sleep(1000);

    ResumeThread(thread_handle);
    return 0;
}

DWORD WINAPI DummyPrologueEpilogue(LPVOID lpParam)
{
    return 123;
}

__declspec(naked) void TransferThread(LPVOID lpParam)
{
    //longjmp(jmpbuf, 0);=
    __asm
    {
        call get_eip;
        cmp[_newt_esp], 0;
        mov[_newt_ret], eax;
        jz setup_new_thread;
        jmp DummyPrologueEpilogue;

get_eip:
        mov eax, [esp];
        ret;

setup_new_thread:
        pushad;
        mov[_newt_esp], esp;

        mov eax, [_flags];
        push eax;
        popfd;

        mov eax, [_eax];
        mov ebx, [_ebx];
        mov ecx, [_ecx];
        mov edx, [_edx];

        mov ebp, [_ebp];
        mov esp, [_esp];
        mov esi, [_esi];
        mov edi, [_edi];

        jmp [_eip];
    }
}

int _tmain(int argc, _TCHAR* argv[])
{
    int x = 100;
    char szTest[256];

    sprintf_s(szTest, "x = %d", x);

    //HideThread();

    //setjmp(jmpbuf);

    __asm
    {
        // Save all the register
        mov[_eax], eax;
        mov[_ebx], ebx;
        mov[_ecx], ecx;
        mov[_edx], edx;

        mov[_ebp], ebp;
        mov[_esp], esp;
        mov[_esi], esi;
        mov[_edi], edi;

        push eax;

        // Save the flags
        pushfd;
        pop eax;
        mov[_flags], eax;

        // If we on *new thread* jmp to end_asm, otherwise continue...
        call get_eip;
        mov[_eip], eax;
        mov al, byte ptr[thread_setup];
        test al, al;
        jnz end_asm;

        mov eax, [jmp_self];
        mov[_jmp_addr], eax;

        pop eax;

        mov[_newt_esp], 0;
        mov byte ptr[thread_setup], 1;
        push 0;
        push CREATE_SUSPENDED;
        push 0;
        push TransferThread;
        push 0;
        push 0;
        call CreateThread;
        mov [thread_handle], eax;

        // Create another thread just to resume 'TransferThread()'/*new thread* to give time to
        // __stdcall below to return properly, thus restoring the stack.
        // So the *new thread* does not accidentally pop the value from stacks or the __stdcall cleanup
        // code doesn't accidentally overwrites new pushed value from *new thread*.
        push 0;
        push 0;
        push 0;
        push RunTheThread;
        push 0;
        push 0;
        call CreateThread;

        // Jump to self, consumes CPU
jmp_self:
        jmp jmp_self;
        nop;
        nop;
        jmp end_asm;

get_eip:
        mov eax, [esp];
        ret;
end_asm:
    }

    // Test stack-based variable
    MessageBoxA(0, szTest, "Hello World!", MB_OK);
    assert(x = 100);

    x += GetCurrentThreadId();
    sprintf_s(szTest, "x = %d", x);

    HMODULE hMod = LoadLibrary(TEXT("comctl32"));
    FreeLibrary(hMod);

    try
    {
        std::unique_ptr<char[]> pTest(new char[256]);

        sprintf_s(pTest.get(), 256, "WinApi call test. Previous loadLibrary() call return %X", hMod);
        MessageBoxA(0, pTest.get(), "Hello World!", MB_OK);
    } catch (...) {}

    char *pszTest = (char*) malloc(256);
    if (pszTest)
    {
        float f = 1.0;
        f *= (float) GetCurrentThreadId();

        sprintf_s(pszTest, 256, "Current Thread ID = %X, Thread handle = %X, FP Test = %f", GetCurrentThreadId(), GetCurrentThread(), f);
        MessageBoxA(0, pszTest, "Hello World!", MB_OK);

        free( pszTest );
    }

    // printf() from *new thread* will fail on stkchk()
    //printf("Simple test\n");

    // Let's terminate this *new* thread and continue the old thread
    if (thread_setup)
    {
        DWORD OldProtect;
        thread_setup = false;

        VirtualProtect((PVOID)_jmp_addr, 2, PAGE_EXECUTE_READWRITE, &OldProtect);
        *(int*)(_jmp_addr) = 0x90909090; // Prev thread not suspended. Just hope this op is atomic.

        // Operation below will change the stack pointer
        //VirtualProtect((PVOID)_jmp_addr, 2, OldProtect, &OldProtect);
        //FlushInstructionCache(GetCurrentProcess(), (PVOID)_jmp_addr, 2);

        __asm {
            push eax;
            mov eax, jmp_self2;
            mov[_jmp_addr], eax;
            pop eax;
jmp_self2:
            jmp jmp_self2;
            nop;
            nop;
            mov esp, [_newt_esp];
            popad;
            jmp _newt_ret;
        }
    }
    else
    {
        DWORD OldProtect;
        VirtualProtect((PVOID)_jmp_addr, 2, PAGE_EXECUTE_READWRITE, &OldProtect);
        *(int*)(_jmp_addr) = 0x90909090; // Prev thread not suspended. Just hope this op is atomic.
    }

    // Show both thread can be exited cleanly... with some hacks.
    DWORD dwStatus;
    while (GetExitCodeThread(thread_handle, &dwStatus) && dwStatus == STILL_ACTIVE) Sleep(10);
    printf("*New Thread* exited with status %d (Expected 123), Error=%X\n", dwStatus, GetLastError());
    assert(dwStatus == 123);

    printf("Test printf from original thread!\n");
    printf("printf again!\n");
    printf("and again!\n");
    Sleep( 1000 );

    return 0;
}

The code might be pain to read since it consists mostly asm. So I added a little comment to help. Now that I test, it is quite possible but with some problems. Calling few win api seems fine, but calling printf will certainly crash on stkchk() function (access denied). I will try alternative if there is any suggestion.

Upvotes: 1

Views: 593

Answers (3)

Joshua
Joshua

Reputation: 43317

It's almost like you want the Fiber APIs. ConvertThreadToFiber converts the current thread to a fiber. After that you can call CreateFiber to make more of them, and SwitchToFiber to change which fiber is the active one. Fibers are cleaned up with DeleteFiber, and the last one calls ConvertFiberToThread. You can pass fibers from thread to thread too by calling ConvertThreadToFiber from multiple threads first.

But whyyy? The only real user of this thing is (or most likely was) SQL Server, and that because it has its own scheduler. This API set is pretty much obsolete by multi-core CPUs and Thread Pools. The Fiber API provides Green Threads, which is the old way of doing things. These days, we use async libraries.

Upvotes: 3

ElderBug
ElderBug

Reputation: 6145

EDIT: It might be possible to switch successfully with OS APIs like GetThreadContext as JS1 mentioned, and/or some OS-specific fiddling, but other limitations still apply.

The thing is, the new thread needs the previous thread stack to run. You can do that by either using the old stack directly, or copying the old stack to the new stack. Neither of these are easy: you can't copy the stack because of stack-dependent pointers (frame pointers, for example), and you can't use the old stack, because the OS will detect that the thread went out of its stack, and throw a stack overflow or underflow.

Copying and using a new stack is not realistic, because you cannot reliably identify pointers to the stack. You could try a reasonable heuristic but you will never be 100% sure you got it right.

To reuse the old stack you need to update the stack detection mechanism for your OS, so that OS doesn't throw a stack error. On Windows you need to update the stack pointers in the Thread Information Block. There might or might not be some other information that may need updating, I didn't do an exhaustive research (maybe exception/stack unwind data?).

You have some problem with your current code, because you push some registers AFTER you saved the stack pointer (ESP). When you reload ESP, it's like you never pushed anything. The ESP pointer really is a special case that need to be handled carefully. Note that you don't even need to care about the new stack in this case, it will just be ignored. That means you don't need any special naked declaration.

Another note, if you are able to do this, there might be additional issues when terminating. What happens on clean-up and when the stack is freed? If the old thread is terminated, won't its stack get freed? And if the stack was changed I'm not sure the new stack will be properly freed either.

Upvotes: 4

Jun
Jun

Reputation: 804

As an FYI, I have not tried the following, but it's possible that you might be able to get something to work like this with a naked function (AFAIK only Microsoft compilers): https://msdn.microsoft.com/en-us/library/5ekezyy2.aspx

There are a significant number of limitations: https://msdn.microsoft.com/en-us/library/4d12973a.aspx but starting a thread with a naked function isn't listed as a limitation. A naked function would remove the prolog/epilog and allow you to try and transfer the context from the previous thread.

You can potentially also do this through an interpreter: basically save the interpreted state of the program and start on a separate thread.

As I can think of no actual use case, I'm not sure why you would ever want to do this.

Upvotes: 0

Related Questions