Reputation: 5
I am trying to figure where in the disas of a C function it is implementing % 4. This is a program compiled without optimization. Here is the function in C:
uint function (uint len, int* intArray) {
uint i; uint sum = 0;
for (i = 0; i < len-2; i++)
switch (i % 4)
{
case 0 :
sum += countPairsUpTo(len-2,intArray,8,6);
break;
case 1 :
sum += countPairsUpTo(len-2,intArray,0,0);
break;
case 2 :
sum += countPairsUpTo(len-2,intArray,15,0);
break;
case 3 :
sum += countPairsUpTo(len-2,intArray,7,9);
break;
}
return(sum); }
Here is the assembly:
0x08048607 <funkyFunction+0>: push %ebp
0x08048608 <funkyFunction+1>: mov %esp,%ebp
0x0804860a <funkyFunction+3>: sub $0x24,%esp
0x0804860d <funkyFunction+6>: call 0x8048418 <mcount@plt>
0x08048612 <funkyFunction+11>: movl $0x0,-0x4(%ebp)
0x08048619 <funkyFunction+18>: movl $0x0,-0x8(%ebp)
0x08048620 <funkyFunction+25>: jmp 0x8048704 <funkyFunction+253>
0x08048625 <funkyFunction+30>: mov -0x8(%ebp),%eax
0x08048628 <funkyFunction+33>: and $0x3,%eax
0x0804862b <funkyFunction+36>: mov %eax,-0x14(%ebp)
0x0804862e <funkyFunction+39>: cmpl $0x1,-0x14(%ebp)
0x08048632 <funkyFunction+43>: je 0x804867e <funkyFunction+119>
0x08048634 <funkyFunction+45>: cmpl $0x1,-0x14(%ebp)
0x08048638 <funkyFunction+49>: jb 0x804864f <funkyFunction+72>
0x0804863a <funkyFunction+51>: cmpl $0x2,-0x14(%ebp)
0x0804863e <funkyFunction+55>: je 0x80486aa <funkyFunction+163>
0x08048640 <funkyFunction+57>: cmpl $0x3,-0x14(%ebp)
0x08048644 <funkyFunction+61>: je 0x80486d6 <funkyFunction+207>
0x0804864a <funkyFunction+67>: jmp 0x8048700 <funkyFunction+249>
0x0804864f <funkyFunction+72>: mov 0x8(%ebp),%eax
0x08048652 <funkyFunction+75>: sub $0x2,%eax
0x08048655 <funkyFunction+78>: mov %eax,%edx
0x08048657 <funkyFunction+80>: movl $0x6,0xc(%esp)
0x0804865f <funkyFunction+88>: movl $0x8,0x8(%esp)
0x08048667 <funkyFunction+96>: mov 0xc(%ebp),%eax
0x0804866a <funkyFunction+99>: mov %eax,0x4(%esp)
0x0804866e <funkyFunction+103>: mov %edx,(%esp)
0x08048671 <funkyFunction+106>: call 0x80485b1 <countPairsUpTo>
0x08048676 <funkyFunction+111>: add %eax,-0x4(%ebp)
0x08048679 <funkyFunction+114>: jmp 0x8048700 <funkyFunction+249>
0x0804867e <funkyFunction+119>: mov 0x8(%ebp),%eax
0x08048681 <funkyFunction+122>: sub $0x2,%eax
0x08048684 <funkyFunction+125>: mov %eax,%edx
0x08048686 <funkyFunction+127>: movl $0x0,0xc(%esp)
0x0804868e <funkyFunction+135>: movl $0x0,0x8(%esp)
0x08048696 <funkyFunction+143>: mov 0xc(%ebp),%eax
0x08048699 <funkyFunction+146>: mov %eax,0x4(%esp)
0x0804869d <funkyFunction+150>: mov %edx,(%esp)
---Type <return> to continue, or q <return> to quit---
0x080486a0 <funkyFunction+153>: call 0x80485b1 <countPairsUpTo>
0x080486a5 <funkyFunction+158>: add %eax,-0x4(%ebp)
0x080486a8 <funkyFunction+161>: jmp 0x8048700 <funkyFunction+249>
0x080486aa <funkyFunction+163>: mov 0x8(%ebp),%eax
0x080486ad <funkyFunction+166>: sub $0x2,%eax
0x080486b0 <funkyFunction+169>: mov %eax,%edx
0x080486b2 <funkyFunction+171>: movl $0x0,0xc(%esp)
0x080486ba <funkyFunction+179>: movl $0xf,0x8(%esp)
0x080486c2 <funkyFunction+187>: mov 0xc(%ebp),%eax
0x080486c5 <funkyFunction+190>: mov %eax,0x4(%esp)
0x080486c9 <funkyFunction+194>: mov %edx,(%esp)
0x080486cc <funkyFunction+197>: call 0x80485b1 <countPairsUpTo>
0x080486d1 <funkyFunction+202>: add %eax,-0x4(%ebp)
0x080486d4 <funkyFunction+205>: jmp 0x8048700 <funkyFunction+249>
0x080486d6 <funkyFunction+207>: mov 0x8(%ebp),%eax
0x080486d9 <funkyFunction+210>: sub $0x2,%eax
0x080486dc <funkyFunction+213>: mov %eax,%edx
0x080486de <funkyFunction+215>: movl $0x9,0xc(%esp)
0x080486e6 <funkyFunction+223>: movl $0x7,0x8(%esp)
0x080486ee <funkyFunction+231>: mov 0xc(%ebp),%eax
0x080486f1 <funkyFunction+234>: mov %eax,0x4(%esp)
0x080486f5 <funkyFunction+238>: mov %edx,(%esp)
0x080486f8 <funkyFunction+241>: call 0x80485b1 <countPairsUpTo>
0x080486fd <funkyFunction+246>: add %eax,-0x4(%ebp)
0x08048700 <funkyFunction+249>: addl $0x1,-0x8(%ebp)
0x08048704 <funkyFunction+253>: mov 0x8(%ebp),%eax
0x08048707 <funkyFunction+256>: sub $0x2,%eax
0x0804870a <funkyFunction+259>: cmp -0x8(%ebp),%eax
0x0804870d <funkyFunction+262>: ja 0x8048625 <funkyFunction+30>
0x08048713 <funkyFunction+268>: mov -0x4(%ebp),%eax
0x08048716 <funkyFunction+271>: leave
0x08048717 <funkyFunction+272>: ret
I see on line 30 is where it is getting i. On line 253 it is getting len. On line 256 it computes len-2. On line 259 it compares len-2 with i. Then, if i < len-2, it goes back to line 30. How is it performing the % 4 part?
Upvotes: 0
Views: 104
Reputation: 58244
The compiler is smart enough to realize that if you take a number mod 4 (% 4
) it's logically the same as, and faster to, just logically AND the number with 3
. That would be the 9th line in your assembly language listing. It can know this with certainty because the 4
is a constant, so it's an arithmetic "rule" it can follow.
You'll see this kind of translation in other areas, too. For example, if you multiply (or divide) by 4
, the compiler may choose to generate an arithmetic shift left (or right) by 2 bits, which is the equivalent of multiplying (or dividing) by 4
, but may be faster than a multiply (or divide) instruction, depending upon CPU and scenario.
There are several kinds of other common optimizations a compiler may do, inside and outside the arithmetic realm, that can break the visually direct correspondence between C instruction flow and assembly language instruction flow, outside the scope of this particular question.
Upvotes: 2