nhanphamdev
nhanphamdev

Reputation: 23

Weird behavior of x86 assembly LEA instruction - Impact on Stack

I wrote a small program in x86 assembly to add 2 fractions together. Here is the struct:

struct FRACTION 
    numer   dd  ? 
    denom   dd  ? 
ends 

And here is the function that add two fractions:

    
;===================================
; add_fractions(frac_addr_1, frac_addr_2)
;
add_fractions:
    .frac_addr_1 = 8h
    .frac_addr_2 = 0ch 
    push    ebp 
    mov     ebp, esp
    sub     esp, sizeof.FRACTION
    
    .sum_numer_offset = -8h
    .sum_denom_offset = -4h
    
    push    esi
    push    edi
    push    edx 
    push    ecx 
    push    ebx 
    
    mov esi, dword [ebp + .frac_addr_1]
    mov edi, dword [ebp + .frac_addr_2]
    
    ;=================================
    ; Get product of both denominator
    ;================================= 
    mov eax, dword [esi + FRACTION.denom]
    mov ebx, dword [edi + FRACTION.denom]
    mul ebx 
    mov ecx, eax            ; ecx = product 
    
    ;===============================
    ; Get GCD of both denominator
    ;===============================
    mov eax, dword [esi + FRACTION.denom]
    mov ebx, dword [edi + FRACTION.denom]
    push    ebx 
    push    eax
    call    stein 
    add esp, 4*2            ; eax has GCD 
    
    ;===============================
    ; Get the LCM = product / GCD 
    ;===============================
    mov ebx, eax            ; swap eax and ecx
    mov eax, ecx            ; eax is not product        
    mov ecx, ebx            ; ecx is GCD to divide 
    div ecx                 ; eax is now LCM after division
    
    ;=========================================
    ; store LCM as the denominator of the sum
    ;=========================================
    mov dword [ebp + .sum_denom_offset], eax 
    
    ;=========================================
    ; compute for the numerator of the sum 
    ;=========================================
    mov eax, dword [ebp + .sum_denom_offset]
    mov ebx, dword [esi + FRACTION.denom]
    div ebx 
    mov ebx, dword [esi + FRACTION.numer]
    mul ebx
    mov dword [ebp + .sum_numer_offset], eax 
    
    mov eax, dword [ebp + .sum_denom_offset]
    mov ebx, dword [edi + FRACTION.denom]
    div ebx 
    mov ebx, dword [edi + FRACTION.numer]
    mul ebx 
    
    add eax, dword [ebp + .sum_numer_offset]
    mov dword [ebp + .sum_numer_offset], eax 
    
    lea eax, dword [ebp - sizeof.FRACTION]
    
.end_func:
    pop ebx 
    pop ecx 
    pop edx 
    pop edi 
    pop esi 
    
    add esp, sizeof.FRACTION

    pop ebp
    ret

The LEA instruction in the add_fractions subroutine works fine, it returns the correct address to the local vars that has sum_numer and sum_denom.

This is the snippet in the calling program where the add_fractions is called:

mov esi, frac_1 
mov edi, frac_2
push    edi 
push    esi
call    add_fractions
add esp, 4*2    

lea ebx, dword [eax]
lea ecx, dword [eax + 4]
    
mov eax, [ebx] 
call    print_eax 
    
mov eax, [ecx] 
call    print_eax 

In the above snippet, frac_1 is 1/2, frac_2 is 1/3. After the calling, I store the address of the numerator in ebx, of the denominator in ecx, then print the values in those address. It should print out (5/6) But the actual result is weird:

5
dff5c

Not 6, but dff5c, which is the address in stack I used to store denominator. But when I change lea to mov directly. It works fine.

mov esi, frac_1 
mov edi, frac_2
push    edi 
push    esi
call    add_fractions
add esp, 4*2    

mov ebx, dword [eax]
mov ecx, dword [eax + 4]
    
mov eax, ebx
call    print_eax 
        
mov eax, ecx
call    print_eax 

This time I store the values of enumerator and denominator into ebx, ecx, not addresses any more. And it prints out properly

5
6

Question is does the "lea ebx, dword [eax]" affects the stack and changes the value in [eax + 4], so it prints out dff5c instead of 6?

#PS: I have searched online about best practice of Assembly calling convention, and indeed they say eax should return the value, or the address of the data structure passed in the subroutine by the calling program. Eax (or any register) should not return the local variable address as after the subroutine is done, the address is no longer valid. However, I wrote a simple C program to testify this.

#include <stdio.h>

typedef struct {
    int numerator;
    int denominator;
} Fraction;
    
Fraction add_fractions(Fraction *frac1, Fraction *frac2) 
{
    // Not using malloc here, so Fraction fraction data should be all inside the local variables. 
    Fraction fraction = { frac1->numerator + frac2->numerator, frac1->denominator + frac2->denominator }; // of course not the right calculation of sum of fraction, but the main point is to look at how the fraction variable here is returned to the main.

    return fraction;
}

int main()
{
    Fraction frac1 = { 1, 2 };
    Fraction frac2 = { 1, 3 };
    
    Fraction result = add_fractions(&frac1, &frac2);    
    printf("%d, %d\n", result.numerator, result.denominator);
}

And the compiled ASM version of the C code having the "main" and "add_fraction" subroutines:

add_fractions:
    pushq   %rbp
    .seh_pushreg    %rbp
    movq    %rsp, %rbp
    .seh_setframe   %rbp, 0
    subq    $16, %rsp
    .seh_stackalloc 16
    .seh_endprologue
    movq    %rcx, 16(%rbp)
    movq    %rdx, 24(%rbp)
    movq    16(%rbp), %rax
    movl    (%rax), %edx
    movq    24(%rbp), %rax
    movl    (%rax), %eax
    addl    %edx, %eax
    movl    %eax, -8(%rbp)
    movq    16(%rbp), %rax
    movl    4(%rax), %edx
    movq    24(%rbp), %rax
    movl    4(%rax), %eax
    addl    %edx, %eax
    movl    %eax, -4(%rbp)
    movq    -8(%rbp), %rax
    addq    $16, %rsp
    popq    %rbp
    ret
main:
    pushq   %rbp
    .seh_pushreg    %rbp
    movq    %rsp, %rbp
    .seh_setframe   %rbp, 0
    subq    $64, %rsp
    .seh_stackalloc 64
    .seh_endprologue
    call    __main
    movl    $1, -8(%rbp)
    movl    $2, -4(%rbp)
    movl    $1, -16(%rbp)
    movl    $3, -12(%rbp)
    leaq    -16(%rbp), %rdx
    leaq    -8(%rbp), %rax
    movq    %rax, %rcx
    call    add_fractions
    movq    %rax, -24(%rbp)
    movl    -20(%rbp), %edx
    movl    -24(%rbp), %eax
    movl    %edx, %r8d
    movl    %eax, %edx
    leaq    .LC0(%rip), %rax
    movq    %rax, %rcx
    call    printf
    movl    $0, %eax
    addq    $64, %rsp
    popq    %rbp
    ret

I'm not familiar with the AT&T syntax, but as far as I can understand, the main func passed into the add_fractions addresses of 2 fractions through 2 registers: rdx and rcx.

I think the returned rax should have the address of the local vars so that the main can access both calculated numerator and denominator. Otherwise, main only sees one value from rax.

I'm getting it wrong somewhere? Thank you for helping explain this.

Upvotes: 1

Views: 113

Answers (1)

Sep Roland
Sep Roland

Reputation: 39166

I see (at least) three problems in this code:

  • Before div r32 you need to clear the EDX register! You have div ecx and two times div ebx. See When and why do we sign extend and use cdq with mul/div?.

  • The address that you return in EAX points at a local area that is no longer valid after add_fractions has returned to its caller.

lea eax, dword [ebp - sizeof.FRACTION]
...
add esp, sizeof.FRACTION

The add esp, sizeof.FRACTION invalidates the info.

One simple solution could be to overwrite the stacked arguments. So instead of storing at [ebp - 8] and [ebp - 4], you store at [ebp + 8] and [ebp + 12]. By then the original args would have been transferred to ESI and EDI already.

  • You dereference where there is no address!
push    edi 
push    esi
call    add_fractions
add esp, 4*2    

lea ebx, dword [eax]
lea ecx, dword [eax + 4]
mov eax, [ebx] 
call print_eax
mov eax, [ecx] 
call print_eax

At [eax] and [eax + 4] you normally would have had .sum_numer and .sum_denom. Those are the actual numbers, not pointers to these values. You don't need to dereference.
Applying the 'simple solution' from my second bullet point, this could become:

push    edi 
push    esi
call    add_fractions
pop     ebx              ; -> EBX is sum_numer
pop     ecx              ; -> ECX is sum_denom
; You could store EBX and ECX to a struct here
mov     eax, ebx 
call    print_eax
mov     eax, ecx 
call    print_eax

[edit]

Based on the added #PS.

I think the returned rax should have the address of the local vars so that the main can access both calculated numerator and denominator. Otherwise, main only sees one value from rax.

That would bring us back to square one of the original question, which was returning the address of a variable that was local to the callee.
What happens here is that you have switched from 32-bit assembly to 64-bit assembly. You already noticed that the args are not passed on the stack but through registers RCX and RDX (and R8). And what 64-bit programming has on offer are those much wider registers at 64 bits. The compiler therefore was able to return the contents of the resulting struct in the single register RAX. The low dword of RAX contains the new numerator and the high dword of RAX contains the new denominator.

If you really want to find out, then make it such that the number of bits in the resulting struct is more than a single 64-bit register can hold.
Either add one or more extra 32-bit elements:

typedef struct {
    int numerator;
    int denominator;
    int myDummy;
} Fraction;

Or make the two elements 64-bit:

typedef struct {
    int64_t numerator;
    int64_t denominator;
} Fraction;

Upvotes: 1

Related Questions