macxfadz
macxfadz

Reputation: 19

Where the const reduce performance rather than optimizing it?

Please take a look at following code snippet

#define HF_ND_SZ sizeof(struct huffman_node)
#define TSIZE_MAX 256

struct huffman_node * build_decomp_huffman_tree(uint64_t *table, int size) {
    static struct huffman_node huffman_node_list2[TSIZE_MAX * 3];
    int i = 0, j = 0;
    int k = TSIZE_MAX * 2; // this is the case point 1
    //...//
    for (i = 0; i < size - 1; i++) {
        huffman_node_list2[k + i] = huffman_node_list2[i + 1]; // point 2
        huffman_node_list2[TSIZE_MAX + i].right = &huffman_node_list2[k+ i];
    // ... //
    }
    return &huffman_node_list2[size - 1];
}

For simplicity I reduced the code and point out the locations where I want to highlight,also do not think algorithm and structure too deeply.

What I want to is that if we define point 1 as const int k = TSIZE_MAX * 2;,then is there any optimization happens at point 2 or 3 where assignment happens to contiguous data(array) huffman_node_list2[k + i] = huffman_node_list2[i + 1]; ?

(Please bear with and correct my assumption if it is wrong,I thought when we declare const in local or global scope it's being created as an immutable memory allocation, if we use that immutable memory and carried out math operation as in point 2 or 3([k + i]) in a loop structure ,during runtime program has to load immutable memory every iteration of the loop and store the result in temporary memory location,what if happend if that immutable memory has large chunk,hope you can grab my idea,Am I correct?)

Upvotes: 1

Views: 315

Answers (3)

Jo&#235;l Hecht
Jo&#235;l Hecht

Reputation: 1842

Using Visual C, I compiled both versions of your code : with const int k and without const. The flag /FA produces code machine in a .asm file readable by (some) human. No optimization flags were used.

The result is : there's no optimization, no difference. The machine code produced is strictly the same :

; Listing generated by Microsoft (R) Optimizing Compiler Version 19.00.24231.0 

    TITLE   opt_const.c
    .686P
    .XMM
    include listing.inc
    .model  flat

INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES

PUBLIC  _main
_BSS    SEGMENT
?huffman_node_list2@?1??main@@9@9 DB 01fd4H DUP (?) ; `main'::`2'::huffman_node_list2
_BSS    ENDS
; Function compile flags: /Odtp
; File c:\joël\tests\opt_const.c
_TEXT   SEGMENT
_j$ = -16                       ; size = 4
_size$ = -12                        ; size = 4
_k$ = -8                        ; size = 4
_i$ = -4                        ; size = 4
_argc$ = 8                      ; size = 4
_argv$ = 12                     ; size = 4
_main   PROC

; 10   : {

    push    ebp
    mov ebp, esp
    sub esp, 16                 ; 00000010H
    push    esi
    push    edi

; 11   :     static struct huffman_node huffman_node_list2[TSIZE_MAX * 3];
; 12   :     int i = 0, j = 0, size = 17;

    mov DWORD PTR _i$[ebp], 0
    mov DWORD PTR _j$[ebp], 0
    mov DWORD PTR _size$[ebp], 17       ; 00000011H

; 13   :     int k = TSIZE_MAX * 2; // this is the case point 1

    mov DWORD PTR _k$[ebp], 194         ; 000000c2H

; 14   :     //...//
; 15   :     for (i = 0; i < size - 1; i++) {

    mov DWORD PTR _i$[ebp], 0
    jmp SHORT $LN4@main
$LN2@main:
    mov eax, DWORD PTR _i$[ebp]
    add eax, 1
    mov DWORD PTR _i$[ebp], eax
$LN4@main:
    mov ecx, DWORD PTR _size$[ebp]
    sub ecx, 1
    cmp DWORD PTR _i$[ebp], ecx
    jge SHORT $LN3@main

; 16   :         huffman_node_list2[k + i] = huffman_node_list2[i + 1]; // point 2

    mov edx, DWORD PTR _i$[ebp]
    add edx, 1
    imul    esi, edx, 28
    add esi, OFFSET ?huffman_node_list2@?1??main@@9@9
    mov eax, DWORD PTR _k$[ebp]
    add eax, DWORD PTR _i$[ebp]
    imul    edi, eax, 28
    add edi, OFFSET ?huffman_node_list2@?1??main@@9@9
    mov ecx, 7
    rep movsd

; 17   :         huffman_node_list2[TSIZE_MAX + i].right = &huffman_node_list2[k+ i];

    mov ecx, DWORD PTR _k$[ebp]
    add ecx, DWORD PTR _i$[ebp]
    imul    edx, ecx, 28
    add edx, OFFSET ?huffman_node_list2@?1??main@@9@9
    mov eax, DWORD PTR _i$[ebp]
    add eax, 97                 ; 00000061H
    imul    ecx, eax, 28
    mov DWORD PTR ?huffman_node_list2@?1??main@@9@9[ecx], edx

; 18   :     // ... //
; 19   :     }

    jmp SHORT $LN2@main
$LN3@main:

; 20   :     return 0;

    xor eax, eax

; 21   : }

    pop edi
    pop esi
    mov esp, ebp
    pop ebp
    ret 0
_main   ENDP
_TEXT   ENDS
END

EDIT : I did the same test with gcc, -O3 optimization flags. And... same result : the generated assembler code is again stricly the same with and without the const keyword.

        .file       "opt_const.c"
        .section    .text.unlikely,"ax",@progbits
.LCOLDB0:
        .section    .text.startup,"ax",@progbits
.LHOTB0:
        .p2align 4,,15
        .globl      main
        .type       main, @function
main:
.LFB23:
        .cfi_startproc
        movl        $huffman_node_list2.2488+16384, %eax
        .p2align 4,,10
        .p2align 3
.L2:
        movq        -16352(%rax), %rdx
        movq        %rax, -8192(%rax)
        addq        $32, %rax
        movq        %rdx, -32(%rax)
        movq        -16376(%rax), %rdx
        movq        %rdx, -24(%rax)
        movq        -16368(%rax), %rdx
        movq        %rdx, -16(%rax)
        movq        -16360(%rax), %rdx
        movq        %rdx, -8(%rax)
        cmpq        $huffman_node_list2.2488+17088, %rax
        jne .L2
        xorl        %eax, %eax
        ret
        .cfi_endproc
.LFE23:
        .size       main, .-main
        .section    .text.unlikely
.LCOLDE0:
        .section    .text.startup
.LHOTE0:
        .local      huffman_node_list2.2488
        .comm       huffman_node_list2.2488,24576,32
        .ident      "GCC: (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609"
        .section    .note.GNU-stack,"",@progbits

Upvotes: 1

user207421
user207421

Reputation: 310884

const doesn't necessarily create a memory location at all, unless you take its address. They can just disappear into the instruction stream as immediate-mode constants, or be added into addresses at compile or link time.

For example, huffman_node_list2[k + i] = huffman_node_list2[i + 1] is almost certainly compiled as huffman_node_list2[TSIZE_MAX * 2 + i] = huffman_node_list2[i + 1], where not only is TSIZE_MAX * 2 evaluated at compile time but huffman_node_list2+TSIZE_MAX*2 is evaluated at link time.

Upvotes: 0

technosaurus
technosaurus

Reputation: 7802

const can be slower if the compiler puts it in the read only .text section far enough away that causes a cache miss.

This can happen with global consts or when the compiler hoists it out of a function rather than having to build it with instructions (a fairly common optimization for structs or arrays) This can reduce code size if multiple functions use the same constant, but also increases the distance from the code and thus the likeliness to cause a miss.

Since you aren't using any aggregate types, there should be no difference with a decent optimizing compiler.

There is a good article on how different data gets laid out here

Upvotes: 2

Related Questions