Advice for interpreting an assembly function's behavior

Question

I am relatively new to assembly and came across a function that has me stumped. In the course of debugging I found the $rdi register was initially set to 0x55555575a1b8. Essentially, I am looking to return a value of anything, but a negative value or zero. Originally, it seemed like setting the $esi to 0x5575a1b8 would return 0x1. In reality it returned -1. Further inspection shows that there are several two recursive function calls, which each save the return value into a register for later comparison:

r12 = Find(*(rdi + 0x8), rsi);
rax = Find(*(rdi + 0x10), rsi);

After this, I get lost in the code...

0000000000001b62 :
1b62:   48 85 ff                test   %rdi,%rdi
1b65:   74 3c                   je     1ba3 
1b67:   39 37                   cmp    %esi,(%rdi)
1b69:   74 3e                   je     1ba9 
1b6b:   41 54                   push   %r12
1b6d:   55                      push   %rbp
1b6e:   53                      push   %rbx
1b6f:   89 f5                   mov    %esi,%ebp
1b71:   48 89 fb                mov    %rdi,%rbx
1b74:   48 8b 7f 08             mov    0x8(%rdi),%rdi
1b78:   e8 e5 ff ff ff          callq  1b62 
1b7d:   41 89 c4                mov    %eax,%r12d
1b80:   48 8b 7b 10             mov    0x10(%rbx),%rdi
1b84:   89 ee                   mov    %ebp,%esi
1b86:   e8 d7 ff ff ff          callq  1b62 
1b8b:   45 85 e4                test   %r12d,%r12d
1b8e:   7e 09                   jle    1b99 
1b90:   43 8d 04 24             lea    (%r12,%r12,1),%eax
1b94:   5b                      pop    %rbx
1b95:   5d                      pop    %rbp
1b96:   41 5c                   pop    %r12
1b98:   c3                      retq   
1b99:   85 c0                   test   %eax,%eax
1b9b:   7e 12                   jle    1baf 
1b9d:   8d 44 00 01             lea    0x1(%rax,%rax,1),%eax
1ba1:   eb f1                   jmp    1b94 
1ba3:   b8 ff ff ff ff          mov    $0xffffffff,%eax
1ba8:   c3                      retq   
1ba9:   b8 01 00 00 00          mov    $0x1,%eax
1bae:   c3                      retq   
1baf:   b8 ff ff ff ff          mov    $0xffffffff,%eax
1bb4:   eb de                   jmp    1b94

Any insight into the behavior of the method is greatly appreciated.

Ped7g · Accepted Answer

0000000000001b62 :
; if (null == rdi) return -1;
1b62:   48 85 ff                test   %rdi,%rdi
1b65:   74 3c                   je     1ba3 
; if (esi == *rdi) return 1;
1b67:   39 37                   cmp    %esi,(%rdi)
1b69:   74 3e                   je     1ba9 
; preserving r12, rbp, rbx
1b6b:   41 54                   push   %r12
1b6d:   55                      push   %rbp
1b6e:   53                      push   %rbx
; r12d = Find( *(rdi+8), esi );
1b6f:   89 f5                   mov    %esi,%ebp
1b71:   48 89 fb                mov    %rdi,%rbx
1b74:   48 8b 7f 08             mov    0x8(%rdi),%rdi
1b78:   e8 e5 ff ff ff          callq  1b62 
1b7d:   41 89 c4                mov    %eax,%r12d
; eax = Find( *(rdi_orig+16), esi_orig)
1b80:   48 8b 7b 10             mov    0x10(%rbx),%rdi
1b84:   89 ee                   mov    %ebp,%esi
1b86:   e8 d7 ff ff ff          callq  1b62 
; if (r12d <= 0 && eax <= 0) return -1; (with restoring regs)
; if (r12d <= 0 && 0 < eax) return 2*eax + 1; (with restoring regs)
1b8b:   45 85 e4                test   %r12d,%r12d
1b8e:   7e 09                   jle    1b99 
; return 2*r12d; with restoring regs
1b90:   43 8d 04 24             lea    (%r12,%r12,1),%eax
1b94:   5b                      pop    %rbx
1b95:   5d                      pop    %rbp
1b96:   41 5c                   pop    %r12
1b98:   c3                      retq   
1b99:   85 c0                   test   %eax,%eax
1b9b:   7e 12                   jle    1baf 
1b9d:   8d 44 00 01             lea    0x1(%rax,%rax,1),%eax
1ba1:   eb f1                   jmp    1b94 
1ba3:   b8 ff ff ff ff          mov    $0xffffffff,%eax
1ba8:   c3                      retq   
1ba9:   b8 01 00 00 00          mov    $0x1,%eax
1bae:   c3                      retq   
1baf:   b8 ff ff ff ff          mov    $0xffffffff,%eax
1bb4:   eb de                   jmp    1b94

So in C-like code I think (didn't verify or debug, or even just compile):

struct node_s {
    int32_t value;
    int32_t _padding; // left starts at offset 8
    node_s* left;
    node_s* right;
};

int32_t Find(node_s* node, int32_t i) {
    if (nullptr == node) return -1;
    if (i == node->value) return 1;
    int32_t f1 = Find( node->left, i);
    int32_t f2 = Find( node->right, i);
    if (0 < f1) return 2 * f1;
    if (0 < f2) return 2 * f2 + 1;
    return -1;
}

It looks to me as unoptimized way to return kind-of-position of particular value in binary tree.

Let's say you have tree like this:

n0: {10, n1, n2}
n1: {11, n3, n4}
n2: {12, n5, n6}
n3: {13, nullptr, n7}
n4: {14, nullptr, nullptr}
n5: {15, nullptr, n8}
n6: {16, nullptr, nullptr}
n7: {17, nullptr, nullptr}
n8: {18, nullptr, nullptr}

i.e.

           10
        /      \
     11            12
  /    \        /    \
13      14    15      16
  \             \
   17            18

Then I guess:

Find(n0, 99) = -1
Find(n0, 10) = 1
Find(n0, 11) = 2
Find(n0, 12) = 3
Find(n1, 11) = 1
Find(n0, 17) = 12
Find(n1, 17) = 6
Find(n3, 17) = 3
Find(n7, 17) = 1
...

Or when searching for particular value from n0 node, the return values will be (I hope I didn't do mistake):

           1
        /      \
     2             3
  /    \        /    \
 4       6     5       7
  \             \
   12             13

So it's returning whole "floor" (depth of linking) as some range i .. i+f_n-1 (f_n = amount of max nodes on floor possible in BT), where first half of range are left nodes and second half of range are right nodes.

Now if you have that code in machine, you can try to recreate such tree struct for input and let me know, if I did guess it correctly.. :)

EDIT: nope, didn't. That bottom would be then 12, 14, not 12, 13, so the order is somewhat more tricky than nice left/right split... too tired to think how to name this. Looks like some kind of lexicographic ordering when you have path from root to value (but the sort is backwards like reversed string, or path from value to top):

1 : ""
2 : L
3 : R
4 : LL
5 : RL
6 : LR
7 : RR
8 : LLL
9 : RLL
10: LRL
11: RRL
12: LLR
13: RLR
14: LRR
15: RRR
16: LLLL
17: RLLL
18: LRLL
...

Advice for interpreting an assembly function's behavior

Answers (1)

Related Questions

Advice for interpreting an assembly function&#39;s behavior

Answers (1)

Related Questions

Advice for interpreting an assembly function's behavior