saltq
saltq

Reputation: 19

QEMU call goes to wrong address

I've been working on a small osdev project. So far i've gotten to running C code with A20, GDT, protected mode (32-bit) and disk loading, but function calls are not working. I've confirmed the actual binary has no problems (ndisasm -b 32 lizard.bin):

... irrelevant bootloader code ...
00000200  8D4C2404          lea ecx,[esp+0x4]
00000204  83E4F0            and esp,byte -0x10
00000207  FF71FC            push dword [ecx-0x4]
0000020A  55                push ebp
0000020B  89E5              mov ebp,esp
0000020D  51                push ecx
0000020E  83EC14            sub esp,byte +0x14
00000211  C745F400000000    mov dword [ebp-0xc],0x0
00000218  83EC0C            sub esp,byte +0xc
0000021B  8D45F4            lea eax,[ebp-0xc]
0000021E  50                push eax
0000021F  E82F000000        call 0x253
00000224  83C410            add esp,byte +0x10
00000227  8945F4            mov [ebp-0xc],eax
0000022A  FA                cli
0000022B  F4                hlt
0000022C  83EC0C            sub esp,byte +0xc
0000022F  8D45F4            lea eax,[ebp-0xc]
00000232  50                push eax
00000233  E81B000000        call 0x253
00000238  83C410            add esp,byte +0x10
0000023B  8945F4            mov [ebp-0xc],eax
0000023E  83EC0C            sub esp,byte +0xc
00000241  8D45F4            lea eax,[ebp-0xc]
00000244  50                push eax
00000245  E809000000        call 0x253
0000024A  83C410            add esp,byte +0x10
0000024D  8945F4            mov [ebp-0xc],eax
00000250  90                nop
00000251  EBFD              jmp short 0x250
00000253  55                push ebp
00000254  89E5              mov ebp,esp
00000256  83EC10            sub esp,byte +0x10
00000259  FA                cli
0000025A  F4                hlt
0000025B  C745FC01000000    mov dword [ebp-0x4],0x1
00000262  8B55FC            mov edx,[ebp-0x4]
00000265  89D0              mov eax,edx
00000267  C1E002            shl eax,byte 0x2
0000026A  01D0              add eax,edx
0000026C  8945FC            mov [ebp-0x4],eax
0000026F  8B55FC            mov edx,[ebp-0x4]
00000272  89D0              mov eax,edx
00000274  C1E003            shl eax,byte 0x3
00000277  29D0              sub eax,edx
00000279  8945FC            mov [ebp-0x4],eax
0000027C  836DFC06          sub dword [ebp-0x4],byte +0x6
00000280  8B55FC            mov edx,[ebp-0x4]
00000283  89D0              mov eax,edx
00000285  C1E003            shl eax,byte 0x3
00000288  01D0              add eax,edx
0000028A  8945FC            mov [ebp-0x4],eax
0000028D  8B4508            mov eax,[ebp+0x8]
00000290  8B55FC            mov edx,[ebp-0x4]
00000293  8910              mov [eax],edx
00000295  8B45FC            mov eax,[ebp-0x4]
00000298  C9                leave
00000299  C3                ret

The cli & hlt pairs are for debugging with qemu, qemu has not halted on them. As you can see the 3 call instructions are perfectly normal. However running qemu and running info registers produces:

QEMU 6.2.0 monitor - type 'help' for more information
(qemu) info registers
... irrelevant ...
EIP=00007e50 ... irrelevant ...
... irrelevant ...

As you can see, eip is 7e50, the infinite loop! This should not have happened, because there are cli and hlt instructions after the function call (not triggered) and the function (not triggered). If I use gdb, putting a breakpoint on 7e00, the memory address of the kernel, after that continuing and using si sees gdb go into a call to the function, only to have the next instruction be in the infinite loop!

Finally ill provide the files.

Makefile:

PRINTDIRECTORY        = --no-print-directory
BOOTLOADER-PARTFILE   = int/parts/boot.prt
BOOTLOADER-OBJECTFILE = int/boot.o
BOOTLOADER-SOURCEFILE = src/boot.s
KERNEL-PARTFILE       = int/parts/detailed-boot.prt
KERNEL-OBJECTFILE     = int/detailed-boot.o
KERNEL-SOURCEFILE     = src/detailed-boot.c
GCC                   = ~/opt/cross/bin/i686-elf-gcc
LD                    = ~/opt/cross/bin/i686-elf-ld
VM                    = qemu-system-i386
SYSFILE               = lizard.bin

full:
    make bootloader $(PRINTDIRECTORY)
    make kernel $(PRINTDIRECTORY)
    truncate -s 32768 ./int/parts/detailed-boot.prt
    make join $(PRINTDIRECTORY)
bootloader:
    as -o $(BOOTLOADER-OBJECTFILE) $(BOOTLOADER-SOURCEFILE)
    ld -o $(BOOTLOADER-PARTFILE) --oformat binary -e init $(BOOTLOADER-OBJECTFILE) -Ttext 0x7c00
kernel:
    $(GCC) -ffunction-sections -ffreestanding $(KERNEL-SOURCEFILE) -o $(KERNEL-OBJECTFILE) -nostdlib -Wall -Wextra -O0
    $(LD) -o $(KERNEL-PARTFILE) -Ttext 0x7e00 --oformat binary $(KERNEL-OBJECTFILE) -e main --script=LDfile -O 0 -Ttext-segment 0x7e00
join:
    cat $(BOOTLOADER-PARTFILE) $(KERNEL-PARTFILE) > $(SYSFILE)
run:
    $(VM) $(SYSFILE)
debug:
    $(VM) $(SYSFILE) -gdb tcp:localhost:6000 -S

LDfile:

ENTRY(main)
SECTIONS {
    . = 0x7e00;
    .text . : { *(.text) }
    .data . : { *(.data) }
    .bss  . : { *(.bss ) }
}

src/detailed-boot.c:

//#include "stdc/stdbool.h"
//#include "stdc/stdio.h"
asm(".code32");
int a(int *d);
int main() {
    int c = 0;
    c = a(&c);
    asm("cli");
    asm("hlt");
    c = a(&c);
    c = a(&c);
    while(1);
}
int a(int *d) {
    asm("cli");
    asm("hlt");
    int b = 1;
    b *= 5;
    b *= 7;
    b -= 6;
    b *= 9;
    *d = b;
    return b;
}
//#include "stdc/stdio.c"

src/boot.s:

.code16         # 16 bit mode
.global init    # make label init global

init:
    call enableA20
reset:

    mov $0x00, %ah # 0 = reset drive
    mov $0x80, %dl # boot disk
    int $0x13
    jc reset
load:
    mov $0x42, %ah                        # 42 = extended read

    mov $0x8000,             %si
    xor %bx,                 %bx

    movl $0x00007e00,         %ds:4 (%si,1)
    movl $0x00400010,         %ds:0 (%si,1)
    mov  %cs,                 %ds:6 (%si,1)
    movl $0x00000001,         %ds:8 (%si,1) # start sector in lba
    movl $0x00000000,         %ds:12(%si,1) # start sector in lba
    int  $0x13

    # 1. Disable interrupts
    cli
    # 2. Load GDT
    lgdt (gdt_descriptor)
    # set 32 bit mode
    mov %cr0, %eax
    or  $1,   %eax
    mov %eax, %cr0
    # Far jmp
    jmp %cs:(code32)

checkA20:
    push %ds

    xor %ax, %ax
    mov %ax, %ds

    movw $0xAA55, %ax
    movw $0x7DFE, %bx
    movw (%bx), %bx
    cmpw %ax, %bx
    jnz checkA20_enabled
checkA20_disabled:
    xor %ax, %ax
    jmp checkA20_done
checkA20_enabled:
    xor %ax, %ax
    inc %ax
checkA20_done:
    pop %ds
    ret


enableA20:
    call checkA20
    jnz enableA20_enabled

enableA20_int15:
    mov $0x2403, %ax                 # A20 gate support
    int $0x15
    jb enableA20_keyboardController  # INT 15 aint supported
    cmp $0, %ah
    jnz enableA20_keyboardController # INT 15 aint supported

    mov $0x2402, %ax                 # A20 status
    int $0x15
    jb enableA20_keyboardController  # couldnt get status
    cmp $0, %ah
    jnz enableA20_keyboardController # couldnt get status

    cmp $1, %al
    jz enableA20_enabled             # A20 is activated

    mov $0x2401, %ax                 # A20 activation
    int $0x15
    jb enableA20_keyboardController  # couldnt activate
    cmp $0, %ah
    jnz enableA20_keyboardController # couldnt activate

enableA20_keyboardController:
    call checkA20
    jnz enableA20_enabled

    cli

    call enableA20_wait
    mov $0xAD, %al
    out %al,   $0x64

    call enableA20_wait
    mov $0xD0, %al
    out %al,   $64

    call enableA20_wait2
    in  $0x60, %al
    push %eax

    call enableA20_wait
    mov $0xD1, %al
    out %al,   $0x64

    call enableA20_wait
    pop %eax
    or  $2, %al
    out %al, $0x60

    call enableA20_wait
    mov $0xAE, %al
    out %al,   $0x64

    call enableA20_wait
    sti

enableA20_fastA20:
    call checkA20
    jnz enableA20_enabled

    in $0x92, %al
    test $2,  %al
    jnz enableA20_postFastA20
    or  $2,    %al
    and $0xFE, %al
    out %al,   $92

enableA20_postFastA20:
    call checkA20
    jnz enableA20_enabled
    cli
    hlt
enableA20_enabled:
    ret
enableA20_wait:
    in   $0x64, %al
    test $2,    %al
    jnz enableA20_wait
    ret
enableA20_wait2:
    in   $0x64, %al
    test $1,    %al
    jnz enableA20_wait2
    ret
setGDT: ret
# NOTE limit is the length
# NOTE base is the start
# NOTE base + limit = last address
gdt_start:
gdt_null:
# null descriptor
    .quad 0
gdt_data:
    .word 0x01c8 # limit: bits 0-15
    .word 0x0000 # base:  bits 0-15
    .byte 0x00   # base:  bits 16-23
# segment presence: yes (+0x80)
# descriptor priviledge level: ring 0 (+0x00)
# descriptor type: code/data (+0x10)
# executable: no (+0x00)
# direction bit: grows up (+0x00)
# writable bit: writable (+0x02)
# accesed bit [best left 0, cpu will deal with it]: no (+0x00)
    .byte 0x80 + 0x10 + 0x02
# granularity flag: limit scaled by 4kib (+0x80)
# size flag: 32 bit pm (+0x40)
# long mode flag: 32pm/16pm/data (+0x00)
# reserved: reserved (+0x00)
    .byte 0x80 + 0x40 # flags: granularity @ 4-7 limit: bits 16-19 @ 0-3
    .byte 0x00 # base:  bits 24-31
gdt_code:
    .word 0x0100 # limit: bits 0-15
    .word 0x8000 # base:  bits 0-15
    .byte 0x1c   # base:  bits 16-23   
# segment presence: yes (+0x80)
# descriptor priviledge level: ring 0 (+0x00)
# descriptor type: code/data (+0x10)
# executable: yes (+0x08)
# conforming bit [0: only ring 0 can execute this]: no (+0x00)
# readable bit: yes (0x02)
# accessed bit [best left 0, cpu will deal with it]: no (0x00)
    .byte 0x80 + 0x10 + 0x08 + 0x02
# granularity flag: limit scaled by 4kib (+0x80)
# size flag: 32 bit pm (+0x40)
# long mode flag: 32pm/16pm/data (+0x00)
# reserved: reserved (+0x00)
    .byte 0x80 + 0x40 + 0x00   # flags: granularity @ 4-7 limit: bits 16-19 @ 0-3
    .byte 0x00                 # base:  bits 24-31
gdt_end:
gdt_descriptor:
    .word gdt_end - gdt_start - 1
    .long gdt_start

.code32
code32:
    mov %ds, %ax
    mov %ax, %ds
#   mov %ax, %ss
    mov %ax, %es
    mov %ax, %fs
    mov %ax, %gs

    movl $0x4000,  %ebp
    mov  %esp,     %ebp

    push $0x7e00
    ret
.fill 500-(.-init)
.quad 1
.word 1

.word 0xaa55
kernel:

I know that this is not a minimum scenario, I apologize. I'll end this off by giving a link to the github repo: https://github.com/saltq144/lizard and the cross compiler tutorial i followed: https://wiki.osdev.org/GCC_Cross-Compiler

To address some comments: I have not configured the IDT, an NMI would cause a triple fault or jump to garbage, not the loop. Trying to modify SS caused a triple fault from my limited testing. And I do agree that .code32 in the .c file is pointless, but the cross compiler is i686 so 64-bit code shouldn't be an issue, however i'll look into it.

Note:

Using inline assembly, I am able to insert two nop instructions to allow function calls to work. This is not an ideal solution, but it will have to work until this issue is fully resolved. Compiler optimizations may break this, but they haven't yet.

Upvotes: 0

Views: 250

Answers (1)

Brendan
Brendan

Reputation: 37232

When you switch to protected mode (at the jmp %cs:(code32)) CS is loaded with "protected mode compatible" information from the GDT.

At the start of code32: all other segment registers contain real mode values. You copy the real mode value that is not compatible with protected mode from DS (at mov %ds, %ax) into all data segment registers. This real mode value from DS is probably 0x0000. In protected mode that refers to the "null descriptor".

This is why you can't do mov %ax, %ss - the CPU will not allow you to use "null descriptor" for the stack segment (it will give you a general protection fault instead). Because you don't load SS with protected mode compatible values, it's left using old values from real mode - an unknown base address, a 64 KiB segment limit, and a 16-bit default stack pointer size.

The consequence of all this is... as soon as you do any normal memory access (e.g. the push dword [ecx-0x4] which uses DS as an implied segment register like [ds: ecx-0x4]) you will get a general protection fault because DS is set to the null descriptor. Because you haven't set up a protected mode IDT the CPU will just use the values real mode was using for its IVT, causing CPU to think unknown trash (that would've been for "interrupt 0x1A" in real mode and not "interupt 0x0D" due to IDT entries being twice as big) is the IDT entry for the general protection fault handler. There's no easy way to predict what happens after that (maybe the unknown trash isn't a valid IDT entry for protected mode and it causes a double fault, maybe it is a "valid enough" IDT entry and you start executing garbage at an unknown address).

Upvotes: 2

Related Questions