Reputation: 1842
I recently started using the gdb disassembler, and wanted to see how it really displays the assembly code, is it logical or not (Tried debugging a C program, a function that calculates the length of a linked-list).
This is the C code (Not mine, have to credit this site):
int length() {
int length = 0;
struct node *current;
for(current = head; current != NULL; current = current->next) {
length++;
}
return length;
}
Compiled:
gcc linkedlist.c -o linkedlist
This is the resulting disassemble (intel-flavored):
0x00000000000012a8 <+0>: endbr64
0x00000000000012ac <+4>: push rbp
0x00000000000012ad <+5>: mov rbp,rsp
0x00000000000012b0 <+8>: mov DWORD PTR [rbp-0xc],0x0
0x00000000000012b7 <+15>: mov rax,QWORD PTR [rip+0x2d5a] # 0x4018 <head>
0x00000000000012be <+22>: mov QWORD PTR [rbp-0x8],rax
0x00000000000012c2 <+26>: jmp 0x12d4 <length+44>
0x00000000000012c4 <+28>: add DWORD PTR [rbp-0xc],0x1
0x00000000000012c8 <+32>: mov rax,QWORD PTR [rbp-0x8]
0x00000000000012cc <+36>: mov rax,QWORD PTR [rax+0x8]
0x00000000000012d0 <+40>: mov QWORD PTR [rbp-0x8],rax
0x00000000000012d4 <+44>: cmp QWORD PTR [rbp-0x8],0x0
0x00000000000012d9 <+49>: jne 0x12c4 <length+28>
0x00000000000012db <+51>: mov eax,DWORD PTR [rbp-0xc]
0x00000000000012de <+54>: pop rbp
0x00000000000012df <+55>: ret
What really bothers me, which is a little thing that I notice, maybe you notice more, is that it is not the type of assembly code I was being taught. I remember teachers/professors saying over and over again: "Don't use mov ,0x0, just xor , "
But here, inside it does:
DWORD PTR [rbp-0xc],0x0
Which I assume is the initialization of the variable int length = 0;
My questions are, why it does not show the most effective code? and if it cannot do that (Probably everything is not perfect) - then why it does not know to detect initialization of the number 0
and do the xor operation instead of the mov (automatically), does that really matter performance (If it does, by what factor?)
Maybe there are more lines that could've been replaces/disregarded at all, but I as a beginner do not notice them, but this specific one I did.. any explanation?
Upvotes: 0
Views: 151
Reputation: 67476
Usually, gcc is compiling with the -O0
option enabled as default. It generates the code exactly as it is written in the source file, without any optimizations. The compiler can optimize the code having many possible options as in the example below:
struct node
{
struct node *next;
};
int length(struct node *head) {
int length = 0;
struct node *current;
for(current = head; current != NULL; current = current->next) {
length++;
}
return length;
}
int __attribute__((optimize("-O3"))) length1(struct node *head) {
int length = 0;
struct node *current;
for(current = head; current != NULL; current = current->next) {
length++;
}
return length;
}
int __attribute__((optimize("-Os"))) length2(struct node *head) {
int length = 0;
struct node *current;
for(current = head; current != NULL; current = current->next) {
length++;
}
return length;
}
int __attribute__((optimize("-Og"))) length3(struct node *head) {
int length = 0;
struct node *current;
for(current = head; current != NULL; current = current->next) {
length++;
}
return length;
}
and the code
length:
push rbp
mov rbp, rsp
mov QWORD PTR [rbp-24], rdi
mov DWORD PTR [rbp-4], 0
mov rax, QWORD PTR [rbp-24]
mov QWORD PTR [rbp-16], rax
jmp .L2
.L3:
add DWORD PTR [rbp-4], 1
mov rax, QWORD PTR [rbp-16]
mov rax, QWORD PTR [rax]
mov QWORD PTR [rbp-16], rax
.L2:
cmp QWORD PTR [rbp-16], 0
jne .L3
mov eax, DWORD PTR [rbp-4]
pop rbp
ret
length1:
xor eax, eax
test rdi, rdi
je .L8
.L7:
mov rdi, QWORD PTR [rdi]
add eax, 1
test rdi, rdi
jne .L7
ret
.L8:
ret
length2:
xor eax, eax
.L12:
test rdi, rdi
je .L14
mov rdi, QWORD PTR [rdi]
inc eax
jmp .L12
.L14:
ret
length3:
mov eax, 0
jmp .L16
.L17:
add eax, 1
mov rdi, QWORD PTR [rdi]
.L16:
test rdi, rdi
jne .L17
ret
Upvotes: 2