Reputation: 7286
I encounter often a function calling itself (in the pseudo code, produced by IDA) such as:
result = (**(__int64 (__fastcall ***)(volatile signed __int32 *))lambda)(lambda);
call qword ptr [rax]
, can't this be simplified into result = lambda();
if I translate the pseudo code in c ?lambda + 8i64
(ie., call qword ptr [rax+8]
) ?Here a more complete context:
__int64 __fastcall CR_maybeParseWithLambda(_QWORD *a1, __int64 newPtr, __int64 positionOrCounter)
{
volatile signed __int32 *lambda; // rdi
__int64 result; // rax
lambda = (volatile signed __int32 *)a1[1];
if ( lambda )
{
result = (unsigned int)_InterlockedExchangeAdd(lambda + 2, 0xFFFFFFFF);
if ( (_DWORD)result == 1 )
{
result = (**(__int64 (__fastcall ***)(volatile signed __int32 *))lambda)(lambda);
if ( _InterlockedExchangeAdd(lambda + 3, 0xFFFFFFFF) == 1 )
result = (*(__int64 (__fastcall **)(volatile signed __int32 *))(*(_QWORD *)lambda + 8i64))(lambda);
a1[1] = positionOrCounter;
*a1 = newPtr;
}
else
{
a1[1] = positionOrCounter;
*a1 = newPtr;
}
}
else
{
a1[1] = positionOrCounter;
*a1 = newPtr;
}
return result;
}
The disassembly, also from IDA:
.text:0000000180005F70 ; __int64 __fastcall CR_maybeParseWithLambda(_QWORD *a1, __int64 newPtr, __int64 positionOrCounter)
.text:0000000180005F70 CR_maybeParseWithLambda proc near ; CODE XREF: sub_180005B10+10F↑p
.text:0000000180005F70 ; sub_180005B10+14A↑p ...
.text:0000000180005F70
.text:0000000180005F70 arg_0 = qword ptr 8
.text:0000000180005F70 arg_8 = qword ptr 10h
.text:0000000180005F70 arg_10 = qword ptr 18h
.text:0000000180005F70 arg_18 = qword ptr 20h
.text:0000000180005F70
.text:0000000180005F70 mov [rsp+arg_8], rbx
.text:0000000180005F75 mov [rsp+arg_10], rbp
.text:0000000180005F7A mov [rsp+arg_18], rsi
.text:0000000180005F7F push rdi
.text:0000000180005F80 sub rsp, 20h
.text:0000000180005F84 mov rdi, [rcx+8]
.text:0000000180005F88 mov rsi, r8
.text:0000000180005F8B mov rbp, rdx
.text:0000000180005F8E mov rbx, rcx
.text:0000000180005F91 test rdi, rdi
.text:0000000180005F94 jz short loc_180005FF3
.text:0000000180005F96
.text:0000000180005F96 loc_180005F96: ; DATA XREF: .rdata:0000000180401E74↓o
.text:0000000180005F96 ; .rdata:0000000180401E84↓o ...
.text:0000000180005F96 mov [rsp+28h+arg_0], r14
.text:0000000180005F9B or r14d, 0FFFFFFFFh
.text:0000000180005F9F mov eax, r14d
.text:0000000180005FA2 lock xadd [rdi+8], eax
.text:0000000180005FA7 cmp eax, 1
.text:0000000180005FAA jnz short loc_180005FEA
.text:0000000180005FAC mov rax, [rdi]
.text:0000000180005FAF mov rcx, rdi
.text:0000000180005FB2 call qword ptr [rax]
.text:0000000180005FB4 lock xadd [rdi+0Ch], r14d
.text:0000000180005FBA cmp r14d, 1
.text:0000000180005FBE jnz short loc_180005FC9
.text:0000000180005FC0 mov rax, [rdi]
.text:0000000180005FC3 mov rcx, rdi
.text:0000000180005FC6 call qword ptr [rax+8]
.text:0000000180005FC9
.text:0000000180005FC9 loc_180005FC9: ; CODE XREF: CR_maybeParseWithLambda+4E↑j
.text:0000000180005FC9 mov [rbx+8], rsi
.text:0000000180005FCD mov [rbx], rbp
.text:0000000180005FD0
.text:0000000180005FD0 loc_180005FD0: ; CODE XREF: CR_maybeParseWithLambda+81↓j
.text:0000000180005FD0 mov r14, [rsp+28h+arg_0]
.text:0000000180005FD5
.text:0000000180005FD5 loc_180005FD5: ; CODE XREF: CR_maybeParseWithLambda+8A↓j
.text:0000000180005FD5 ; DATA XREF: .pdata:0000000180483888↓o ...
.text:0000000180005FD5 mov rbx, [rsp+28h+arg_8]
.text:0000000180005FDA mov rbp, [rsp+28h+arg_10]
.text:0000000180005FDF mov rsi, [rsp+28h+arg_18]
.text:0000000180005FE4 add rsp, 20h
.text:0000000180005FE8 pop rdi
.text:0000000180005FE9 retn
Upvotes: 3
Views: 857
Reputation: 3955
- Since the disassembly is
call qword ptr [rax]
, can't this be simplified intoresult = lambda();
if I translate the pseudo code in c ?
No. The decompiler detect that the variable passed in is likely an argument of the called function.
For example, void f()
and void f(int)
functions are both called with a single call
assembly command, except that in the latter case, the caller moves an int
value to an appropriate register before calling the function.
You can change the type of lambda
to avoid that.
- Why in the pseudo code the function is having itself as argument ?
Read both the assembly code and the decompiled code very carefully. lambda
is not a function pointer, to get a function pointer from it it's necessary to dereference twice. So it could be something like this (pseudo-C++ code)
using FunctionType=int(int);
struct B{
FunctionType* functionPointer;
};
struct A{
B* b;
};
A* lambda; // the variable name is a little misleading, given this interpretation.
auto functionPointer=(*(*lambda).b);
functionPointer(lambda);
Given the double-dereference, it's very likely that B
is actually a vftable (although in these cases, the function are usually called with __thiscall
convention) -- so the code could be written like this:
struct Base{
virtual void someFunction(){}
virtual void otherFunction(){}
};
struct Base_vftableType{ // compiler-generated
void (*someFunction)(Base*); // explicit (this) argument shown
void (*otherFunction)(Base*); // explicit (this) argument shown
};
struct Derived: Base{
Base_vftableType *vftable; // compiler-generated
void someFunction(){ /* ... */ }
};
Base_vftableType derived_vftable{ /* ... */ }; // compiler-generated vftable
Derived *a;
// the function call is something like this in pseudo-C
// (and probably how it will be displayed in IDA):
a->vftable->someFunction(a);
- What is happening when there is a call such as
lambda + 8i64
(ie.,call qword ptr [rax+8]
) ?
Similarly, there could be more than one functions in the vftable, and the +
simply takes the address of other functions.
Assuming 64-bit function pointers, +8
would be the second function in the table.
See also: c++ - How to organize vtables in IDA Pro? - Reverse Engineering Stack Exchange
Upvotes: 4