Trouble simplifying GDT on x86

I'm trying to simplify a GDT table with contains 6 segments but in which 2 are really necessary (from what I gather). I cannot make the changes work.

The code is from Cromwell, a Xbox (Original) bootloader. The CPU is a Pentium III . There is no concept of userspace so everything should run on segments with privilege level 0. I want to begin with a flat model with a single code32 and a single data32 segment.

Here's the relevant original working code:

    .code32

.section .text, "ax"
     .org 0x00
     jmp    start_linux

.global Cromwellconfig
Cromwellconfig:
    .org 0x0c
    // Space for the SHA1 checksum
    .org 0x20   

    // The Value positions are fixed, do not change them, used everywhere
    .long 0x0   // 0x20 if XBE, then this bit is 0, if Cromwell mode, the bit is set to 1 by the Startuploader
    .long 0x0   // 0x24 ImageRetryLoads
    .long 0x0   // 0x28 Bank, from where Loaded
    .long 0x0   // 0x2C 0 .. Bios = 256 k, 1 .. Bios = 1MB
    .long 0x0   // 0x30 free
    .long _end_complete_rom       // 0x34 free
    .long 0x0       // 0x38 free
    .long 0x0   // free

.align 16
tableGdt:
    .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // 0x00 dummy
    .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x9a, 0xcf, 0x00 // 0x08 code32
    .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x9a, 0xcf, 0x00 // 0x10 code32
    .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0xcf, 0x00 // 0x18 data32
    .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x9a, 0x8f, 0x00 // 0x20 code16 (8f indicates 4K granularity, ie, huge limit)
    .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0x8f, 0x00 // 0x28 data16

tableGdtDescriptor:
    // This is the GDT header having 8 bytes
    .word tableGdtDescriptor-tableGdt  // 0x30 byte GDT
    .long GDT_LOC                      // GDT located at 0xA0000
    .word 0                            // Padding
tableGdtEnd:

.align 16
tableIdtDescriptor:

    .word 2048
    .long IDT_LOC                      // IDT located at 0xB0000
    .word 0     // fill Word, so we get aligned again

        // We are dword aligned now

.align 16        
    .globl start_linux
start_linux:

    // Make SURE the IRQs are turned off
    cli

    // kill the cache  = Disable bit 30 + 29 = CD + NW
    // CD = Cache Disable (disable = 1)
    // NW Not write through (disable = 1)
    // Protected mode enabled
    mov     $0x60010033, %eax
    mov %eax, %cr0
    wbinvd

    // Flush the TLB
    xor %eax, %eax
    mov %eax, %cr3

    // We kill the Local Descriptor Table
    xor %eax, %eax
    lldt    %ax

    // DR6/DR7: Clear the debug registers
    xor %eax, %eax
    mov %eax, %dr6
    mov %eax, %dr7
    mov %eax, %dr0
    mov %eax, %dr1
    mov %eax, %dr2
    mov %eax, %dr3


    // IMPORTANT!  Linux expects the GDT located at a specific position,
    // 0xA0000, so we have to move it there.

    // Copy the GDT to its final location
    movl $GDT_LOC, %edi
    movl $tableGdt, %esi
    movl $(tableGdtEnd-tableGdt)/4, %ecx
    // Moving (tableGdtEnd-tableGdt)/4 DWORDS from &tableGdt to &GDT_LOC
    rep movsl

    // Load the new GDT (bits0-15: Table limit, bits16-47: Base address)
    lgdt GDT_LOC+(tableGdtDescriptor-tableGdt)

    // Kill the LDT, if any
    xor %eax, %eax
    lldt %ax

    // Reload CS as 0010 from the new GDT using a far jump
    jmp $0x010, $reload_cs

reload_cs:

    // CS is now a valid entry in the GDT.  Set SS, DS, and ES to valid
    // descriptors, but clear FS and GS as they are not necessary.

    // Set SS, DS, and ES to a data32 segment with maximum limit.
    movw $0x0018, %ax
    mov %eax, %ss
    mov %eax, %ds
    mov %eax, %es

    // Clear FS and GS
    xor %eax, %eax
    mov %eax, %fs
    mov %eax, %gs

Changing the far jump in the code above to

jmp $0x008, $reload_cs

also works fine by the way.

As you can see, protected mode is enabled at the start.

I want to trim the GDT to have a code32 segment at 0x08 and a data32 segment at 0x10. Here's my take on this; which isn't working:

    .code32

.section .text, "ax"
     .org 0x00
     jmp    start_linux

.global Cromwellconfig
Cromwellconfig:
    .org 0x0c
    // Space for the SHA1 checksum
    .org 0x20   

    // The Value positions are fixed, do not change them, used everywhere
    .long 0x0   // 0x20 if XBE, then this bit is 0, if Cromwell mode, the bit is set to 1 by the Startuploader
    .long 0x0   // 0x24 ImageRetryLoads
    .long 0x0   // 0x28 Bank, from where Loaded
    .long 0x0   // 0x2C 0 .. Bios = 256 k, 1 .. Bios = 1MB
    .long 0x0   // 0x30 free
    .long _end_complete_rom       // 0x34 free
    .long 0x0       // 0x38 free
    .long 0x0   // free

.align 16
tableGdt:
    .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // 0x00 dummy
    .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x9a, 0xcf, 0x00 // 0x08 code32
    .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0xcf, 0x00 // 0x10 data32

tableGdtDescriptor:
    // This is the GDT header having 8 bytes
    .word tableGdtDescriptor-tableGdt  // 0x18 byte GDT
    .long GDT_LOC                      // GDT located at 0xA0000
    .word 0                            // Padding
tableGdtEnd:

.align 16
tableIdtDescriptor:

    .word 2048
    .long IDT_LOC                      // IDT located at 0xB0000
    .word 0     // fill Word, so we get aligned again

        // We are dword aligned now

.align 16        
    .globl start_linux
start_linux:

    // Make SURE the IRQs are turned off
    cli

    // kill the cache  = Disable bit 30 + 29 = CD + NW
    // CD = Cache Disable (disable = 1)
    // NW Not write through (disable = 1)
    // Protected mode enabled
    mov     $0x60010033, %eax
    mov %eax, %cr0
    wbinvd

    // Flush the TLB
    xor %eax, %eax
    mov %eax, %cr3

    // We kill the Local Descriptor Table
    xor %eax, %eax
    lldt    %ax

    // DR6/DR7: Clear the debug registers
    xor %eax, %eax
    mov %eax, %dr6
    mov %eax, %dr7
    mov %eax, %dr0
    mov %eax, %dr1
    mov %eax, %dr2
    mov %eax, %dr3


    // IMPORTANT!  Linux expects the GDT located at a specific position,
    // 0xA0000, so we have to move it there.

    // Copy the GDT to its final location
    movl $GDT_LOC, %edi
    movl $tableGdt, %esi
    movl $(tableGdtEnd-tableGdt)/4, %ecx
    // Moving (tableGdtEnd-tableGdt)/4 DWORDS from &tableGdt to &GDT_LOC
    rep movsl

    // Load the new GDT (bits0-15: Table limit, bits16-47: Base address)
    lgdt GDT_LOC+(tableGdtDescriptor-tableGdt)

    // Kill the LDT, if any
    xor %eax, %eax
    lldt %ax

    // Reload CS as 0008 from the new GDT using a far jump
    jmp $0x008, $reload_cs

reload_cs:

    // CS is now a valid entry in the GDT.  Set SS, DS, and ES to valid
    // descriptors, but clear FS and GS as they are not necessary.

    // Set SS, DS, and ES to a data32 segment with maximum limit.
    movw $0x0010, %ax
    mov %eax, %ss
    mov %eax, %ds
    mov %eax, %es

    // Clear FS and GS
    xor %eax, %eax
    mov %eax, %fs
    mov %eax, %gs

Can anybody spot why it wouldn't work?

Bonus questions I cannot find answers on my own:

  1. First of all, at "tableGdtDescriptor:", shouldn't the limit value (first word) be the size of the table - 1? So should the value here be "tableGdtDescriptor-tableGdt - 1"? If so, why is it working in the original code? (My assumption is that this value is that any value above 47 bytes (6 segments - 1 byte) just get brought back to 47 bytes.
  2. Why does the "tableGdtDescriptor" field have padding at the end if there's a forced 16 bit align right after? It doesn't seem necessary. Purely for good practice?
  3. Why are FS and GS cleared and not set to the same values as for SS, DS and ES? All examples online sets these registers to the same segment offset. Why was it done differently here?

Upvotes: 1

Views: 555

Answers (1)

Well turns out the issue was in populating the IDT. I was pointing every IDT entries to the code segment at offset 0x10 in the GDT, hence why I needed the coded segment at offset 0x10.

Here's the fixed code which I simplified a bit:

    .code32

.section .text, "ax"
         .org 0x00
         jmp    start_linux

.global Cromwellconfig
Cromwellconfig:
    .org 0x0c
        // Space for the SHA1 checksum
        .org 0x20   

        // The Value positions are fixed, do not change them, used everywhere
        .long 0x0   // 0x20 if XBE, then this bit is 0, if Cromwell mode, the bit is set to 1 by the Startuploader
        .long 0x0   // 0x24 ImageRetryLoads
        .long 0x0   // 0x28 Bank, from where Loaded
        .long 0x0   // 0x2C 0 .. Bios = 256 k, 1 .. Bios = 1MB
        .long 0x0   // 0x30 free
        .long _end_complete_rom       // 0x34 free
        .long 0x0       // 0x38 free
        .long 0x0   // free

.align 16
tableGdt:
    .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // 0x00 dummy
    .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x9b, 0xcf, 0x00 // 0x08 code32
    .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00 // 0x10 data32

tableGdtDescriptor:
    // This is the GDT header having 8 bytes
    .word tableGdtDescriptor-tableGdt - 1  // Size - 1byte
    .long tableGdt                      // GDT location
    .word 0                            // Padding
tableGdtEnd:

.align 16
tableIdtDescriptor:

    .word 2048
    .long IDT_LOC                      // IDT located at 0xB0000
    .word 0     // fill Word, so we get aligned again

        // We are dword aligned now

.align 16        
    .globl start_linux
start_linux:

        //Make SURE the IRQs are turned off
    cli

    // kill the cache  = Disable bit 30 + 29 = CD + NW
    // CD = Cache Disable (disable = 1)
    // NW Not write through (disable = 1)
       //   mov     %cr0, %eax
    //orl   $0x60000000, %eax
    mov     $0x60010033, %eax
    mov %eax, %cr0
    wbinvd

    // Flush the TLB
    xor %eax, %eax
    mov %eax, %cr3

    // We kill the Local Descriptor Table
    xor %eax, %eax
    lldt    %ax

    // DR6/DR7: Clear the debug registers
    xor %eax, %eax
    mov %eax, %dr6
    mov %eax, %dr7
    mov %eax, %dr0
    mov %eax, %dr1
    mov %eax, %dr2
    mov %eax, %dr3

    // Load the new GDT
    lgdt tableGdtDescriptor

    // Kill the LDT, if any
    xor %eax, %eax
    lldt %ax

    // Reload CS as 0008 from the new GDT using a far jump
    jmp $0x0008, $reload_cs

reload_cs:

    // CS is now a valid entry in the GDT.  Set SS, DS, and ES to valid
    // descriptors, but clear FS and GS as they are not necessary.

    // Set SS, DS, and ES to a data32 segment with maximum limit.
    movw $0x0010, %ax
    mov %eax, %ss
    mov %eax, %ds
    mov %eax, %es

    // Clear FS and GS
    xor %eax, %eax
    mov %eax, %fs
    mov %eax, %gs

Code modified above now sets the correct GDT size in descriptor (total size minus 1 byte). Also, the GDT is no longer copied at offset 0xA0000 in memory. The GDT register is now pointing to the original location of the GDT.

Every IDT entry now sets its selector to 0x08 to match the only code32 segment location.

Upvotes: 2

Related Questions