abi difference on linux/amd64 C vs C++

Question

I have C library with such API:

#ifdef __cplusplus
extern "C" {
#endif
struct Foo {
    void *p;
    int len;
};

struct Foo f(void *opaque, int param);
void foo_free(struct Foo *);
#ifdef __cplusplus
}
#endif

to simplify my C++ life I decide do simple thing:

 struct Foo {
    void *p;
    int len;
#ifdef __cplusplus
    ~Foo() { foo_free(this); }
#endif
};

And after that things become crazy: for example if I call f(0xfffeeea0, 40) in C++, then on C side I got 0x7fff905d2050 -69984:

assember without destructor:

   0x000055555555467a <+0>: push   %rbp
   0x000055555555467b <+1>: mov    %rsp,%rbp
   0x000055555555467e <+4>: sub    $0x10,%rsp
   0x0000555555554682 <+8>: mov    $0x28,%esi
   0x0000555555554687 <+13>:    mov    $0xfffeeea0,%edi
   0x000055555555468c <+18>:    callq  0x5555555546a0 
   0x0000555555554691 <+23>:    mov    %rax,-0x10(%rbp)
   0x0000555555554695 <+27>:    mov    %rdx,-0x8(%rbp)
   0x0000555555554699 <+31>:    mov    $0x0,%eax
   0x000055555555469e <+36>:    leaveq 
   0x000055555555469f <+37>:    retq

assember with destructor:

   0x00000000000006da <+0>: push   %rbp
   0x00000000000006db <+1>: mov    %rsp,%rbp
   0x00000000000006de <+4>: sub    $0x20,%rsp
   0x00000000000006e2 <+8>: mov    %fs:0x28,%rax
   0x00000000000006eb <+17>:    mov    %rax,-0x8(%rbp)
   0x00000000000006ef <+21>:    xor    %eax,%eax
   0x00000000000006f1 <+23>:    lea    -0x20(%rbp),%rax
   0x00000000000006f5 <+27>:    mov    $0x28,%edx
   0x00000000000006fa <+32>:    mov    $0xfffeeea0,%esi
   0x00000000000006ff <+37>:    mov    %rax,%rdi
   0x0000000000000702 <+40>:    callq  0x739 
   0x0000000000000707 <+45>:    lea    -0x20(%rbp),%rax
   0x000000000000070b <+49>:    mov    %rax,%rdi
   0x000000000000070e <+52>:    callq  0x72e 
   0x0000000000000713 <+57>:    mov    $0x0,%eax
   0x0000000000000718 <+62>:    mov    -0x8(%rbp),%rcx
   0x000000000000071c <+66>:    xor    %fs:0x28,%rcx
   0x0000000000000725 <+75>:    je     0x72c 
   0x0000000000000727 <+77>:    callq  0x5c0 <__stack_chk_fail@plt>
   0x000000000000072c <+82>:    leaveq 
   0x000000000000072d <+83>:    retq

I wonder what is going on? I can understand why compiler should handle return in different way, but why it moves arguments in different registers %esi vs %edi.

For clearness I understand that I do wrong thing, and I rewrite code with some kind of smart pointers instead without touching real Foo. But I wonder how ABI of c++ and c works in this particular case.

full example:

//test.cpp
extern "C" {
    struct Foo {
        void *p;
        int len;
        ~Foo() {/*call free*/}
    };

    struct Foo f(void *opaque, int param);
}

int main()
{
    auto foo = f(reinterpret_cast(0xfffeeea0), 40);
}

//test.c
#include 

struct Foo {
    void *p;
    int len;
};

struct Foo f(void *opaque, int param)
{
    printf("!!! %p %d
", opaque, param);
    struct Foo ret = {0, 0};    
    return ret;
}
#makefile:
prog: test.cpp test.c
    gcc -Wall -ggdb -std=c11 -c -o test.c.o test.c
    g++ -Wall -ggdb -std=c++11 -o $@ test.cpp test.c.o
    ./prog

melpomene · Accepted Answer

In the first version of your code (no destructor), we have:

// allocate 16 bytes on the stack (for a Foo instance)
sub    $0x10,%rsp

// load two (constant) arguments into %edi and %esi
mov    $0x28,%esi
mov    $0xfffeeea0,%edi

// call f
callq  0x5555555546a0 

// a 2-word struct was returned by value (in %rax/%rdx).
// move the values to the corresponding slots on the stack
mov    %rax,-0x10(%rbp)
mov    %rdx,-0x8(%rbp)

In the second version (with a destructor):

// load address of Foo instance into %rax
lea    -0x20(%rbp),%rax

// load three arguments:
//  - 40 in %edx
//  - 0xfffeeea0 in %esi
//  - &foo in %rdi
mov    $0x28,%edx
mov    $0xfffeeea0,%esi
mov    %rax,%rdi

// ... and call f
callq  0x739 

// ignore f's return value; load &foo into %rax again
lea    -0x20(%rbp),%rax

// call ~Foo on &foo
mov    %rax,%rdi
callq  0x72e

My guess is that without a destructor the struct is treated like a plain 2-word tuple and returned by value.

But with a destructor the compiler assumes it can't just copy the member values around, so it transforms the struct return value into a hidden pointer argument:

struct Foo f(void *opaque, int param);

// actually implemented as:
void f(struct Foo *_hidden, void *opaque, int param);

Normally f would then take care of writing the return value into *_hidden.

Because the caller and the implementer of the function see a different return type, they disagree about the number of parameters the function actually has. The C++ code passes 3 arguments, but the C code only looks at two of them. It misinterprets the address of the Foo instance as the opaque pointer, and what was supposed to be the opaque pointer ends up in param.

In other words, the presence of a destructor means Foo is no longer a POD type, which inhibits simple return-by-value through registers.

abi difference on linux/amd64 C vs C++

Answers (1)

Related Questions