motoku
motoku

Reputation: 1581

Unexpected file sizes while variating gcc optimization switch

I have a small Brainfuck interpreter I wrote a while back; and while compiling it, I noticed the output sizes of variating the optimization switch for gcc are not quite what I expected. The following is the program I compiled:

struct node {
    struct node *prev;
    int val;
    struct node *jump;
    struct node *next;
};
typedef struct node node;
node *newnode();
node *append(node *n);
node *prepend(node *n);
void erase(node *n);
int pop(node *n);
void doop(node *n);
node *link(node *n);

#include <stdlib.h>

// allocates a new node and sets all the things to zero
node *newnode() {
    node *n = malloc(sizeof(node));
    n->prev = n->next = n->jump = NULL;
    n->val = 0;
    return n;
}

// appends a node to a given node. assumes it is an end node
node *append(node *n) {
    n->next = newnode();
    n->next->prev = n;
    return n->next;
}

// prepend node to list. assumes it is the first node
node *prepend(node *n) {
    n->prev = newnode();
    n->prev->next = n;
    return n->prev;
}

// navigates to first node, then frees all the nodes, iterating to the end
void erase(node *n) {
    node *m;
    while (n->prev)
        n = n->prev;
    while (n) {
        m = n->next;
        free(n);
        n = m;
    }
}

// pops any node and links any connected nodes to each other
// returns value of erased node
int pop(node *n) {
    int ret;
    if (n->prev)
        n->prev->next = n->next;
    if (n->next)
        n->next->prev = n->prev;
    ret = n->val;
    free(n);
    return ret;
}

#include <stdio.h>

// bf tokens. all other are ignored
#define LSEEK       '<'
#define RSEEK       '>'
#define INCREMENT   '+'
#define DECREMENT   '-'
#define STDOUT      '.'
#define STDIN       ','
#define LBRACKET    '['
#define RBRACKET    ']'

// memory used by bf program. is this really turing-compliant?
char mem[30000] = { 0 };
// pointer used by bf program
char *ptr = mem;

// do operation beginning with given node
void doop(node *n) {
    // copy node pointer in case we need the head of the list later
    node *m = n;
    // loop while node pointer is a valid one; e.g. stop at EOF
    while (m) {
        switch (m->val) {
            // most of these are pretty self-explanatory
            case LSEEK:
                ptr--;
                break;
            case RSEEK:
                ptr++;
                break;
            case INCREMENT:
                (*ptr)++;
                break;
            case DECREMENT:
                (*ptr)--;
                break;
            case STDOUT:
                printf("%c", *ptr);
                fflush(stdout);
                break;
            case STDIN:
                *ptr = getchar();
                break;
            case LBRACKET:
                // jump to closing bracket if value at pointer is false
                if (!*ptr)
                    m = m->jump;
                break;
            case RBRACKET:
                // jump back to opening bracket if value at pointer is true
                if (*ptr)
                    m = m->jump;
                break;
        }
        // proceed to next instruction
        m = m->next;
    }
}

// finds and references each bracket instruction to its corresponding bracket
node *link(node *n) {
    // make a copy of the list head
    node *m = n;
    // make a temporary list to contain bracket links
    node *links = newnode();
    // while a valid node
    while (m) {
        // switch to bracket type
        switch (m->val) {
            case LBRACKET:
                // this is an opening bracket, so we temporarily store it's
                // location, and append the list as it grows
                if (links->jump)
                    links = append(links);
                links->jump = m;
                break;
            case RBRACKET:
                // this is the closing bracket, so we save the temporarily
                // stored link address to the closing bracket node, and
                // connect the opening bracket node to the closing also;
                // popping the list as we no longer need the data
                m->jump = links->jump;
                links->jump->jump = m;
                if (links->prev) {
                    links = links->prev;
                    pop(links->next);
                }
                break;
        }
        // increment to next character
        m = m->next;
    }
    // erase all the nodes in the temporary linked list
    erase(links);
    // return the head of the list
    return n;
}

#include <signal.h>

// press ctrl-c then enter to quit if not running from a file
int run = 1;
void quit(int val) {
    run = 0;
}

int main(int argc, char** argv) {
    // catch crtl-c
    signal(SIGINT, quit);
    int c;
    // our text structure is a linked list
    node *text, *text_start;
    if (argc > 1) {
        // print the file name
        printf("%s\n", argv[1]);
        // open the file and read it to the linked list
        FILE *f = fopen(argv[1], "r");
        if (f == NULL) return 1;
        text = text_start = newnode();
        while ((c = fgetc(f)) != EOF) {
            if (text->val)
                text = append(text);
            text->val = c;
        }
        fclose(f);
        // link all the loops/ gotos, then process all instructions
        doop(link(text_start));
        // free all linked list nodes
        erase(text_start);
        // we just ran a file, so no interpreter
        run = 0;
    }
    // repeatedly read and execute only one line until interrupted
    while (run) {
        // linked list generated for each line of input
        text = text_start = newnode();
        // read stdin buffer to list
        while ((c = getchar()) != '\n') {
            if (text->val)
                text = append(text);
            text->val = c;
        }
        // link all the loops/ gotos, then process the
        // instructions for the line
        doop(link(text_start));
        // free all linked list nodes
        erase(text_start);
    }
    return 0;
}

Lastly, the following is the compilation sequence the unexpected file sizes derive from:

$ gcc brainfuck.c -o brainfuck -Os && ls brainfuck -l && for i in `seq 0 3`; do gcc brainfuck.c -o brainfuck -O$i && ls brainfuck -l; done && gcc --version
-rwxrwxr-x 1 m m 13552 Oct  4 18:31 brainfuck
-rwxrwxr-x 1 m m 13544 Oct  4 18:31 brainfuck
-rwxrwxr-x 1 m m 13600 Oct  4 18:31 brainfuck
-rwxrwxr-x 1 m m 13600 Oct  4 18:31 brainfuck
-rwxrwxr-x 1 m m 13600 Oct  4 18:31 brainfuck
gcc (Ubuntu 5.4.0-6ubuntu1~16.04.2) 5.4.0 20160609

Upvotes: 1

Views: 56

Answers (1)

BaseZen
BaseZen

Reputation: 8718

A great deal of a small binary's size is going to be boilerplate startup, plus the debugging symbol table, plus lots of zero-padding in the global data region and other sections. Do a binary inspection for null padding. To get a somewhat more realistic proportion, strip the symbols.

You should really just be comparing the size of the TEXT section, that is the instruction stream not the whole Unix executable format binary.

Also optimizing code has a very unpredictable effect on size. Unrolling loops lengthens code, as well as inlining, but removing redundant memory load/stores, common subexpression elimination, dead code elimination and constant folding reduces the size. So you have an extremely opaque view when summing these opposing forces. If you really want to learn something, study the assembly side-by-side, line by line. See gcc -S and report back.

Plus the comments are correct that a lot of code is not going to be very optimizable if you are spending most of your energy transporting data to and from I/O streams. Optimization works on CPU bound and memory bound material.

% gcc -OS -o bfos brainfuck.c # -OS is optimize but keep code small
% objdump -h bfos | grep text
 12 .text         00000452  0000000000400730  0000000000400730  00000730  2**4

% gcc -O0 -o bfo0 brainfuck.c # -O0 is default: no optimizations
% objdump -h bfo0 | grep text
 12 .text         00000652  0000000000400730  0000000000400730  00000730  2**4

0x452 / 0x652 = huge difference.

And yet the binary sizes are many times larger, have padding, and have nothing to do with the compiled code size:

% ls -l bfo0 bfos
-rwxr-xr-x 1 root root 13461 Oct  4 22:42 bfo0
-rwxr-xr-x 1 root root 13469 Oct  4 22:41 bfos

% gcc --version
gcc (Ubuntu 4.8.4-2ubuntu1~14.04.3) 4.8.4

Finally, long stretches of zero padding (the '*' means all repetition, so from 0x000760 through 0x0006700, it's all zero bytes)

% od -x bfo0 | grep -1 '\*'
0000720 0000 0000 0000 0000 0000 0000 0000 0000
*
0000760 0000 0000 0000 0000 0010 0000 0000 0000
--
0006700 0a8c 0040 0000 0000 0a8c 0040 0000 0000
*
0007040 09c9 0040 0000 0000 0a8c 0040 0000 0000
--
0007100 0a8c 0040 0000 0000 0a8c 0040 0000 0000
*
0007420 0a8c 0040 0000 0000 0a51 0040 0000 0000
--
0010640 0000 0000 0000 0000 0000 0000 0000 0000
*
0017020 07f0 0040 0000 0000 07d0 0040 0000 0000
--
0017640 0000 0000 0000 0000 0000 0000 0000 0000
*
0020000 1e28 0060 0000 0000 0000 0000 0000 0000
--
0020720 0000 0000 0000 0000 0000 0000 0000 0000
*
0021000 0000 0000 0000 0000 001b 0000 0001 0000
--
0024500 0000 0000 0000 0000 0000 0000 0000 0000
*
0024540 0000 0000 0003 0001 0238 0040 0000 0000

Upvotes: 4

Related Questions