Reputation: 19
I've been working on a small osdev project. So far i've gotten to running C code with A20, GDT, protected mode (32-bit) and disk loading, but function calls are not working. I've confirmed the actual binary has no problems (ndisasm -b 32 lizard.bin
):
... irrelevant bootloader code ...
00000200 8D4C2404 lea ecx,[esp+0x4]
00000204 83E4F0 and esp,byte -0x10
00000207 FF71FC push dword [ecx-0x4]
0000020A 55 push ebp
0000020B 89E5 mov ebp,esp
0000020D 51 push ecx
0000020E 83EC14 sub esp,byte +0x14
00000211 C745F400000000 mov dword [ebp-0xc],0x0
00000218 83EC0C sub esp,byte +0xc
0000021B 8D45F4 lea eax,[ebp-0xc]
0000021E 50 push eax
0000021F E82F000000 call 0x253
00000224 83C410 add esp,byte +0x10
00000227 8945F4 mov [ebp-0xc],eax
0000022A FA cli
0000022B F4 hlt
0000022C 83EC0C sub esp,byte +0xc
0000022F 8D45F4 lea eax,[ebp-0xc]
00000232 50 push eax
00000233 E81B000000 call 0x253
00000238 83C410 add esp,byte +0x10
0000023B 8945F4 mov [ebp-0xc],eax
0000023E 83EC0C sub esp,byte +0xc
00000241 8D45F4 lea eax,[ebp-0xc]
00000244 50 push eax
00000245 E809000000 call 0x253
0000024A 83C410 add esp,byte +0x10
0000024D 8945F4 mov [ebp-0xc],eax
00000250 90 nop
00000251 EBFD jmp short 0x250
00000253 55 push ebp
00000254 89E5 mov ebp,esp
00000256 83EC10 sub esp,byte +0x10
00000259 FA cli
0000025A F4 hlt
0000025B C745FC01000000 mov dword [ebp-0x4],0x1
00000262 8B55FC mov edx,[ebp-0x4]
00000265 89D0 mov eax,edx
00000267 C1E002 shl eax,byte 0x2
0000026A 01D0 add eax,edx
0000026C 8945FC mov [ebp-0x4],eax
0000026F 8B55FC mov edx,[ebp-0x4]
00000272 89D0 mov eax,edx
00000274 C1E003 shl eax,byte 0x3
00000277 29D0 sub eax,edx
00000279 8945FC mov [ebp-0x4],eax
0000027C 836DFC06 sub dword [ebp-0x4],byte +0x6
00000280 8B55FC mov edx,[ebp-0x4]
00000283 89D0 mov eax,edx
00000285 C1E003 shl eax,byte 0x3
00000288 01D0 add eax,edx
0000028A 8945FC mov [ebp-0x4],eax
0000028D 8B4508 mov eax,[ebp+0x8]
00000290 8B55FC mov edx,[ebp-0x4]
00000293 8910 mov [eax],edx
00000295 8B45FC mov eax,[ebp-0x4]
00000298 C9 leave
00000299 C3 ret
The cli & hlt pairs are for debugging with qemu, qemu has not halted on them. As you can see the 3 call instructions are perfectly normal. However running qemu and running info registers
produces:
QEMU 6.2.0 monitor - type 'help' for more information
(qemu) info registers
... irrelevant ...
EIP=00007e50 ... irrelevant ...
... irrelevant ...
As you can see, eip
is 7e50
, the infinite loop! This should not have happened, because there are cli
and hlt
instructions after the function call (not triggered) and the function (not triggered). If I use gdb
, putting a breakpoint on 7e00
, the memory address of the kernel, after that continuing and using si
sees gdb
go into a call to the function, only to have the next instruction be in the infinite loop!
Finally ill provide the files.
Makefile
:
PRINTDIRECTORY = --no-print-directory
BOOTLOADER-PARTFILE = int/parts/boot.prt
BOOTLOADER-OBJECTFILE = int/boot.o
BOOTLOADER-SOURCEFILE = src/boot.s
KERNEL-PARTFILE = int/parts/detailed-boot.prt
KERNEL-OBJECTFILE = int/detailed-boot.o
KERNEL-SOURCEFILE = src/detailed-boot.c
GCC = ~/opt/cross/bin/i686-elf-gcc
LD = ~/opt/cross/bin/i686-elf-ld
VM = qemu-system-i386
SYSFILE = lizard.bin
full:
make bootloader $(PRINTDIRECTORY)
make kernel $(PRINTDIRECTORY)
truncate -s 32768 ./int/parts/detailed-boot.prt
make join $(PRINTDIRECTORY)
bootloader:
as -o $(BOOTLOADER-OBJECTFILE) $(BOOTLOADER-SOURCEFILE)
ld -o $(BOOTLOADER-PARTFILE) --oformat binary -e init $(BOOTLOADER-OBJECTFILE) -Ttext 0x7c00
kernel:
$(GCC) -ffunction-sections -ffreestanding $(KERNEL-SOURCEFILE) -o $(KERNEL-OBJECTFILE) -nostdlib -Wall -Wextra -O0
$(LD) -o $(KERNEL-PARTFILE) -Ttext 0x7e00 --oformat binary $(KERNEL-OBJECTFILE) -e main --script=LDfile -O 0 -Ttext-segment 0x7e00
join:
cat $(BOOTLOADER-PARTFILE) $(KERNEL-PARTFILE) > $(SYSFILE)
run:
$(VM) $(SYSFILE)
debug:
$(VM) $(SYSFILE) -gdb tcp:localhost:6000 -S
LDfile
:
ENTRY(main)
SECTIONS {
. = 0x7e00;
.text . : { *(.text) }
.data . : { *(.data) }
.bss . : { *(.bss ) }
}
src/detailed-boot.c
:
//#include "stdc/stdbool.h"
//#include "stdc/stdio.h"
asm(".code32");
int a(int *d);
int main() {
int c = 0;
c = a(&c);
asm("cli");
asm("hlt");
c = a(&c);
c = a(&c);
while(1);
}
int a(int *d) {
asm("cli");
asm("hlt");
int b = 1;
b *= 5;
b *= 7;
b -= 6;
b *= 9;
*d = b;
return b;
}
//#include "stdc/stdio.c"
src/boot.s
:
.code16 # 16 bit mode
.global init # make label init global
init:
call enableA20
reset:
mov $0x00, %ah # 0 = reset drive
mov $0x80, %dl # boot disk
int $0x13
jc reset
load:
mov $0x42, %ah # 42 = extended read
mov $0x8000, %si
xor %bx, %bx
movl $0x00007e00, %ds:4 (%si,1)
movl $0x00400010, %ds:0 (%si,1)
mov %cs, %ds:6 (%si,1)
movl $0x00000001, %ds:8 (%si,1) # start sector in lba
movl $0x00000000, %ds:12(%si,1) # start sector in lba
int $0x13
# 1. Disable interrupts
cli
# 2. Load GDT
lgdt (gdt_descriptor)
# set 32 bit mode
mov %cr0, %eax
or $1, %eax
mov %eax, %cr0
# Far jmp
jmp %cs:(code32)
checkA20:
push %ds
xor %ax, %ax
mov %ax, %ds
movw $0xAA55, %ax
movw $0x7DFE, %bx
movw (%bx), %bx
cmpw %ax, %bx
jnz checkA20_enabled
checkA20_disabled:
xor %ax, %ax
jmp checkA20_done
checkA20_enabled:
xor %ax, %ax
inc %ax
checkA20_done:
pop %ds
ret
enableA20:
call checkA20
jnz enableA20_enabled
enableA20_int15:
mov $0x2403, %ax # A20 gate support
int $0x15
jb enableA20_keyboardController # INT 15 aint supported
cmp $0, %ah
jnz enableA20_keyboardController # INT 15 aint supported
mov $0x2402, %ax # A20 status
int $0x15
jb enableA20_keyboardController # couldnt get status
cmp $0, %ah
jnz enableA20_keyboardController # couldnt get status
cmp $1, %al
jz enableA20_enabled # A20 is activated
mov $0x2401, %ax # A20 activation
int $0x15
jb enableA20_keyboardController # couldnt activate
cmp $0, %ah
jnz enableA20_keyboardController # couldnt activate
enableA20_keyboardController:
call checkA20
jnz enableA20_enabled
cli
call enableA20_wait
mov $0xAD, %al
out %al, $0x64
call enableA20_wait
mov $0xD0, %al
out %al, $64
call enableA20_wait2
in $0x60, %al
push %eax
call enableA20_wait
mov $0xD1, %al
out %al, $0x64
call enableA20_wait
pop %eax
or $2, %al
out %al, $0x60
call enableA20_wait
mov $0xAE, %al
out %al, $0x64
call enableA20_wait
sti
enableA20_fastA20:
call checkA20
jnz enableA20_enabled
in $0x92, %al
test $2, %al
jnz enableA20_postFastA20
or $2, %al
and $0xFE, %al
out %al, $92
enableA20_postFastA20:
call checkA20
jnz enableA20_enabled
cli
hlt
enableA20_enabled:
ret
enableA20_wait:
in $0x64, %al
test $2, %al
jnz enableA20_wait
ret
enableA20_wait2:
in $0x64, %al
test $1, %al
jnz enableA20_wait2
ret
setGDT: ret
# NOTE limit is the length
# NOTE base is the start
# NOTE base + limit = last address
gdt_start:
gdt_null:
# null descriptor
.quad 0
gdt_data:
.word 0x01c8 # limit: bits 0-15
.word 0x0000 # base: bits 0-15
.byte 0x00 # base: bits 16-23
# segment presence: yes (+0x80)
# descriptor priviledge level: ring 0 (+0x00)
# descriptor type: code/data (+0x10)
# executable: no (+0x00)
# direction bit: grows up (+0x00)
# writable bit: writable (+0x02)
# accesed bit [best left 0, cpu will deal with it]: no (+0x00)
.byte 0x80 + 0x10 + 0x02
# granularity flag: limit scaled by 4kib (+0x80)
# size flag: 32 bit pm (+0x40)
# long mode flag: 32pm/16pm/data (+0x00)
# reserved: reserved (+0x00)
.byte 0x80 + 0x40 # flags: granularity @ 4-7 limit: bits 16-19 @ 0-3
.byte 0x00 # base: bits 24-31
gdt_code:
.word 0x0100 # limit: bits 0-15
.word 0x8000 # base: bits 0-15
.byte 0x1c # base: bits 16-23
# segment presence: yes (+0x80)
# descriptor priviledge level: ring 0 (+0x00)
# descriptor type: code/data (+0x10)
# executable: yes (+0x08)
# conforming bit [0: only ring 0 can execute this]: no (+0x00)
# readable bit: yes (0x02)
# accessed bit [best left 0, cpu will deal with it]: no (0x00)
.byte 0x80 + 0x10 + 0x08 + 0x02
# granularity flag: limit scaled by 4kib (+0x80)
# size flag: 32 bit pm (+0x40)
# long mode flag: 32pm/16pm/data (+0x00)
# reserved: reserved (+0x00)
.byte 0x80 + 0x40 + 0x00 # flags: granularity @ 4-7 limit: bits 16-19 @ 0-3
.byte 0x00 # base: bits 24-31
gdt_end:
gdt_descriptor:
.word gdt_end - gdt_start - 1
.long gdt_start
.code32
code32:
mov %ds, %ax
mov %ax, %ds
# mov %ax, %ss
mov %ax, %es
mov %ax, %fs
mov %ax, %gs
movl $0x4000, %ebp
mov %esp, %ebp
push $0x7e00
ret
.fill 500-(.-init)
.quad 1
.word 1
.word 0xaa55
kernel:
I know that this is not a minimum scenario, I apologize. I'll end this off by giving a link to the github repo: https://github.com/saltq144/lizard and the cross compiler tutorial i followed: https://wiki.osdev.org/GCC_Cross-Compiler
To address some comments: I have not configured the IDT, an NMI would cause a triple fault or jump to garbage, not the loop. Trying to modify SS caused a triple fault from my limited testing. And I do agree that .code32
in the .c file is pointless, but the cross compiler is i686 so 64-bit code shouldn't be an issue, however i'll look into it.
Note:
Using inline assembly, I am able to insert two nop
instructions to allow function calls to work. This is not an ideal solution, but it will have to work until this issue is fully resolved. Compiler optimizations may break this, but they haven't yet.
Upvotes: 0
Views: 250
Reputation: 37232
When you switch to protected mode (at the jmp %cs:(code32)
) CS is loaded with "protected mode compatible" information from the GDT.
At the start of code32:
all other segment registers contain real mode values. You copy the real mode value that is not compatible with protected mode from DS (at mov %ds, %ax
) into all data segment registers. This real mode value from DS is probably 0x0000. In protected mode that refers to the "null descriptor".
This is why you can't do mov %ax, %ss
- the CPU will not allow you to use "null descriptor" for the stack segment (it will give you a general protection fault instead). Because you don't load SS with protected mode compatible values, it's left using old values from real mode - an unknown base address, a 64 KiB segment limit, and a 16-bit default stack pointer size.
The consequence of all this is... as soon as you do any normal memory access (e.g. the push dword [ecx-0x4]
which uses DS as an implied segment register like [ds: ecx-0x4]
) you will get a general protection fault because DS is set to the null descriptor. Because you haven't set up a protected mode IDT the CPU will just use the values real mode was using for its IVT, causing CPU to think unknown trash (that would've been for "interrupt 0x1A" in real mode and not "interupt 0x0D" due to IDT entries being twice as big) is the IDT entry for the general protection fault handler. There's no easy way to predict what happens after that (maybe the unknown trash isn't a valid IDT entry for protected mode and it causes a double fault, maybe it is a "valid enough" IDT entry and you start executing garbage at an unknown address).
Upvotes: 2