Reputation: 19
Please take a look at following code snippet
#define HF_ND_SZ sizeof(struct huffman_node)
#define TSIZE_MAX 256
struct huffman_node * build_decomp_huffman_tree(uint64_t *table, int size) {
static struct huffman_node huffman_node_list2[TSIZE_MAX * 3];
int i = 0, j = 0;
int k = TSIZE_MAX * 2; // this is the case point 1
//...//
for (i = 0; i < size - 1; i++) {
huffman_node_list2[k + i] = huffman_node_list2[i + 1]; // point 2
huffman_node_list2[TSIZE_MAX + i].right = &huffman_node_list2[k+ i];
// ... //
}
return &huffman_node_list2[size - 1];
}
For simplicity I reduced the code and point out the locations where I want to highlight,also do not think algorithm and structure too deeply.
What I want to is that if we define point 1 as const int k = TSIZE_MAX * 2;
,then is there any optimization happens at point 2 or 3 where assignment happens to contiguous data(array) huffman_node_list2[k + i] = huffman_node_list2[i + 1];
?
(Please bear with and correct my assumption if it is wrong,I thought when we declare const
in local or global scope it's being created as an immutable memory allocation, if we use that immutable memory and carried out math operation as in point 2 or 3([k + i]
) in a loop structure ,during runtime program has to load immutable memory every iteration of the loop and store the result in temporary memory location,what if happend if that immutable memory has large chunk,hope you can grab my idea,Am I correct?)
Upvotes: 1
Views: 315
Reputation: 1842
Using Visual C, I compiled both versions of your code : with const int k
and without const
. The flag /FA
produces code machine in a .asm
file readable by (some) human. No optimization flags were used.
The result is : there's no optimization, no difference. The machine code produced is strictly the same :
; Listing generated by Microsoft (R) Optimizing Compiler Version 19.00.24231.0
TITLE opt_const.c
.686P
.XMM
include listing.inc
.model flat
INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES
PUBLIC _main
_BSS SEGMENT
?huffman_node_list2@?1??main@@9@9 DB 01fd4H DUP (?) ; `main'::`2'::huffman_node_list2
_BSS ENDS
; Function compile flags: /Odtp
; File c:\joël\tests\opt_const.c
_TEXT SEGMENT
_j$ = -16 ; size = 4
_size$ = -12 ; size = 4
_k$ = -8 ; size = 4
_i$ = -4 ; size = 4
_argc$ = 8 ; size = 4
_argv$ = 12 ; size = 4
_main PROC
; 10 : {
push ebp
mov ebp, esp
sub esp, 16 ; 00000010H
push esi
push edi
; 11 : static struct huffman_node huffman_node_list2[TSIZE_MAX * 3];
; 12 : int i = 0, j = 0, size = 17;
mov DWORD PTR _i$[ebp], 0
mov DWORD PTR _j$[ebp], 0
mov DWORD PTR _size$[ebp], 17 ; 00000011H
; 13 : int k = TSIZE_MAX * 2; // this is the case point 1
mov DWORD PTR _k$[ebp], 194 ; 000000c2H
; 14 : //...//
; 15 : for (i = 0; i < size - 1; i++) {
mov DWORD PTR _i$[ebp], 0
jmp SHORT $LN4@main
$LN2@main:
mov eax, DWORD PTR _i$[ebp]
add eax, 1
mov DWORD PTR _i$[ebp], eax
$LN4@main:
mov ecx, DWORD PTR _size$[ebp]
sub ecx, 1
cmp DWORD PTR _i$[ebp], ecx
jge SHORT $LN3@main
; 16 : huffman_node_list2[k + i] = huffman_node_list2[i + 1]; // point 2
mov edx, DWORD PTR _i$[ebp]
add edx, 1
imul esi, edx, 28
add esi, OFFSET ?huffman_node_list2@?1??main@@9@9
mov eax, DWORD PTR _k$[ebp]
add eax, DWORD PTR _i$[ebp]
imul edi, eax, 28
add edi, OFFSET ?huffman_node_list2@?1??main@@9@9
mov ecx, 7
rep movsd
; 17 : huffman_node_list2[TSIZE_MAX + i].right = &huffman_node_list2[k+ i];
mov ecx, DWORD PTR _k$[ebp]
add ecx, DWORD PTR _i$[ebp]
imul edx, ecx, 28
add edx, OFFSET ?huffman_node_list2@?1??main@@9@9
mov eax, DWORD PTR _i$[ebp]
add eax, 97 ; 00000061H
imul ecx, eax, 28
mov DWORD PTR ?huffman_node_list2@?1??main@@9@9[ecx], edx
; 18 : // ... //
; 19 : }
jmp SHORT $LN2@main
$LN3@main:
; 20 : return 0;
xor eax, eax
; 21 : }
pop edi
pop esi
mov esp, ebp
pop ebp
ret 0
_main ENDP
_TEXT ENDS
END
EDIT : I did the same test with gcc, -O3 optimization flags.
And... same result : the generated assembler code is again stricly the same with and without the const
keyword.
.file "opt_const.c"
.section .text.unlikely,"ax",@progbits
.LCOLDB0:
.section .text.startup,"ax",@progbits
.LHOTB0:
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB23:
.cfi_startproc
movl $huffman_node_list2.2488+16384, %eax
.p2align 4,,10
.p2align 3
.L2:
movq -16352(%rax), %rdx
movq %rax, -8192(%rax)
addq $32, %rax
movq %rdx, -32(%rax)
movq -16376(%rax), %rdx
movq %rdx, -24(%rax)
movq -16368(%rax), %rdx
movq %rdx, -16(%rax)
movq -16360(%rax), %rdx
movq %rdx, -8(%rax)
cmpq $huffman_node_list2.2488+17088, %rax
jne .L2
xorl %eax, %eax
ret
.cfi_endproc
.LFE23:
.size main, .-main
.section .text.unlikely
.LCOLDE0:
.section .text.startup
.LHOTE0:
.local huffman_node_list2.2488
.comm huffman_node_list2.2488,24576,32
.ident "GCC: (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609"
.section .note.GNU-stack,"",@progbits
Upvotes: 1
Reputation: 310884
const
doesn't necessarily create a memory location at all, unless you take its address. They can just disappear into the instruction stream as immediate-mode constants, or be added into addresses at compile or link time.
For example, huffman_node_list2[k + i] = huffman_node_list2[i + 1]
is almost certainly compiled as huffman_node_list2[TSIZE_MAX * 2 + i] = huffman_node_list2[i + 1]
, where not only is TSIZE_MAX * 2
evaluated at compile time but huffman_node_list2+TSIZE_MAX*2
is evaluated at link time.
Upvotes: 0
Reputation: 7802
const
can be slower if the compiler puts it in the read only .text section far enough away that causes a cache miss.
This can happen with global const
s or when the compiler hoists it out of a function rather than having to build it with instructions (a fairly common optimization for structs or arrays) This can reduce code size if multiple functions use the same constant, but also increases the distance from the code and thus the likeliness to cause a miss.
Since you aren't using any aggregate types, there should be no difference with a decent optimizing compiler.
There is a good article on how different data gets laid out here
Upvotes: 2