user3334663
user3334663

Reputation: 5

Assembly Code - Where is % 4 implemented?

I am trying to figure where in the disas of a C function it is implementing % 4. This is a program compiled without optimization. Here is the function in C:

uint function (uint len, int*   intArray) {   
   uint i;   uint sum = 0;

  for  (i = 0;  i < len-2;  i++)
    switch  (i % 4)
    {
    case 0 :
      sum += countPairsUpTo(len-2,intArray,8,6);
      break;

    case 1 :
      sum += countPairsUpTo(len-2,intArray,0,0);
      break;

    case 2 :
      sum += countPairsUpTo(len-2,intArray,15,0);
      break;

    case 3 :
      sum += countPairsUpTo(len-2,intArray,7,9);
      break;
    }

  return(sum); }

Here is the assembly:

0x08048607 <funkyFunction+0>:   push   %ebp
0x08048608 <funkyFunction+1>:   mov    %esp,%ebp
0x0804860a <funkyFunction+3>:   sub    $0x24,%esp
0x0804860d <funkyFunction+6>:   call   0x8048418 <mcount@plt>
0x08048612 <funkyFunction+11>:  movl   $0x0,-0x4(%ebp)
0x08048619 <funkyFunction+18>:  movl   $0x0,-0x8(%ebp)
0x08048620 <funkyFunction+25>:  jmp    0x8048704 <funkyFunction+253>
0x08048625 <funkyFunction+30>:  mov    -0x8(%ebp),%eax
0x08048628 <funkyFunction+33>:  and    $0x3,%eax
0x0804862b <funkyFunction+36>:  mov    %eax,-0x14(%ebp)
0x0804862e <funkyFunction+39>:  cmpl   $0x1,-0x14(%ebp)
0x08048632 <funkyFunction+43>:  je     0x804867e <funkyFunction+119>
0x08048634 <funkyFunction+45>:  cmpl   $0x1,-0x14(%ebp)
0x08048638 <funkyFunction+49>:  jb     0x804864f <funkyFunction+72>
0x0804863a <funkyFunction+51>:  cmpl   $0x2,-0x14(%ebp)
0x0804863e <funkyFunction+55>:  je     0x80486aa <funkyFunction+163>
0x08048640 <funkyFunction+57>:  cmpl   $0x3,-0x14(%ebp)
0x08048644 <funkyFunction+61>:  je     0x80486d6 <funkyFunction+207>
0x0804864a <funkyFunction+67>:  jmp    0x8048700 <funkyFunction+249>
0x0804864f <funkyFunction+72>:  mov    0x8(%ebp),%eax
0x08048652 <funkyFunction+75>:  sub    $0x2,%eax
0x08048655 <funkyFunction+78>:  mov    %eax,%edx
0x08048657 <funkyFunction+80>:  movl   $0x6,0xc(%esp)
0x0804865f <funkyFunction+88>:  movl   $0x8,0x8(%esp)
0x08048667 <funkyFunction+96>:  mov    0xc(%ebp),%eax
0x0804866a <funkyFunction+99>:  mov    %eax,0x4(%esp)
0x0804866e <funkyFunction+103>: mov    %edx,(%esp)
0x08048671 <funkyFunction+106>: call   0x80485b1 <countPairsUpTo>
0x08048676 <funkyFunction+111>: add    %eax,-0x4(%ebp)
0x08048679 <funkyFunction+114>: jmp    0x8048700 <funkyFunction+249>
0x0804867e <funkyFunction+119>: mov    0x8(%ebp),%eax
0x08048681 <funkyFunction+122>: sub    $0x2,%eax
0x08048684 <funkyFunction+125>: mov    %eax,%edx
0x08048686 <funkyFunction+127>: movl   $0x0,0xc(%esp)
0x0804868e <funkyFunction+135>: movl   $0x0,0x8(%esp)
0x08048696 <funkyFunction+143>: mov    0xc(%ebp),%eax
0x08048699 <funkyFunction+146>: mov    %eax,0x4(%esp)
0x0804869d <funkyFunction+150>: mov    %edx,(%esp)
---Type <return> to continue, or q <return> to quit---
0x080486a0 <funkyFunction+153>: call   0x80485b1 <countPairsUpTo>
0x080486a5 <funkyFunction+158>: add    %eax,-0x4(%ebp)
0x080486a8 <funkyFunction+161>: jmp    0x8048700 <funkyFunction+249>
0x080486aa <funkyFunction+163>: mov    0x8(%ebp),%eax
0x080486ad <funkyFunction+166>: sub    $0x2,%eax
0x080486b0 <funkyFunction+169>: mov    %eax,%edx
0x080486b2 <funkyFunction+171>: movl   $0x0,0xc(%esp)
0x080486ba <funkyFunction+179>: movl   $0xf,0x8(%esp)
0x080486c2 <funkyFunction+187>: mov    0xc(%ebp),%eax
0x080486c5 <funkyFunction+190>: mov    %eax,0x4(%esp)
0x080486c9 <funkyFunction+194>: mov    %edx,(%esp)
0x080486cc <funkyFunction+197>: call   0x80485b1 <countPairsUpTo>
0x080486d1 <funkyFunction+202>: add    %eax,-0x4(%ebp)
0x080486d4 <funkyFunction+205>: jmp    0x8048700 <funkyFunction+249>
0x080486d6 <funkyFunction+207>: mov    0x8(%ebp),%eax
0x080486d9 <funkyFunction+210>: sub    $0x2,%eax
0x080486dc <funkyFunction+213>: mov    %eax,%edx
0x080486de <funkyFunction+215>: movl   $0x9,0xc(%esp)
0x080486e6 <funkyFunction+223>: movl   $0x7,0x8(%esp)
0x080486ee <funkyFunction+231>: mov    0xc(%ebp),%eax
0x080486f1 <funkyFunction+234>: mov    %eax,0x4(%esp)
0x080486f5 <funkyFunction+238>: mov    %edx,(%esp)
0x080486f8 <funkyFunction+241>: call   0x80485b1 <countPairsUpTo>
0x080486fd <funkyFunction+246>: add    %eax,-0x4(%ebp)
0x08048700 <funkyFunction+249>: addl   $0x1,-0x8(%ebp)
0x08048704 <funkyFunction+253>: mov    0x8(%ebp),%eax
0x08048707 <funkyFunction+256>: sub    $0x2,%eax
0x0804870a <funkyFunction+259>: cmp    -0x8(%ebp),%eax
0x0804870d <funkyFunction+262>: ja     0x8048625 <funkyFunction+30>
0x08048713 <funkyFunction+268>: mov    -0x4(%ebp),%eax
0x08048716 <funkyFunction+271>: leave  
0x08048717 <funkyFunction+272>: ret    

I see on line 30 is where it is getting i. On line 253 it is getting len. On line 256 it computes len-2. On line 259 it compares len-2 with i. Then, if i < len-2, it goes back to line 30. How is it performing the % 4 part?

Upvotes: 0

Views: 104

Answers (1)

lurker
lurker

Reputation: 58244

The compiler is smart enough to realize that if you take a number mod 4 (% 4) it's logically the same as, and faster to, just logically AND the number with 3. That would be the 9th line in your assembly language listing. It can know this with certainty because the 4 is a constant, so it's an arithmetic "rule" it can follow.

You'll see this kind of translation in other areas, too. For example, if you multiply (or divide) by 4, the compiler may choose to generate an arithmetic shift left (or right) by 2 bits, which is the equivalent of multiplying (or dividing) by 4, but may be faster than a multiply (or divide) instruction, depending upon CPU and scenario.

There are several kinds of other common optimizations a compiler may do, inside and outside the arithmetic realm, that can break the visually direct correspondence between C instruction flow and assembly language instruction flow, outside the scope of this particular question.

Upvotes: 2

Related Questions