Reputation: 1581
I have a small Brainfuck interpreter I wrote a while back; and while compiling it, I noticed the output sizes of variating the optimization switch for gcc are not quite what I expected. The following is the program I compiled:
struct node {
struct node *prev;
int val;
struct node *jump;
struct node *next;
};
typedef struct node node;
node *newnode();
node *append(node *n);
node *prepend(node *n);
void erase(node *n);
int pop(node *n);
void doop(node *n);
node *link(node *n);
#include <stdlib.h>
// allocates a new node and sets all the things to zero
node *newnode() {
node *n = malloc(sizeof(node));
n->prev = n->next = n->jump = NULL;
n->val = 0;
return n;
}
// appends a node to a given node. assumes it is an end node
node *append(node *n) {
n->next = newnode();
n->next->prev = n;
return n->next;
}
// prepend node to list. assumes it is the first node
node *prepend(node *n) {
n->prev = newnode();
n->prev->next = n;
return n->prev;
}
// navigates to first node, then frees all the nodes, iterating to the end
void erase(node *n) {
node *m;
while (n->prev)
n = n->prev;
while (n) {
m = n->next;
free(n);
n = m;
}
}
// pops any node and links any connected nodes to each other
// returns value of erased node
int pop(node *n) {
int ret;
if (n->prev)
n->prev->next = n->next;
if (n->next)
n->next->prev = n->prev;
ret = n->val;
free(n);
return ret;
}
#include <stdio.h>
// bf tokens. all other are ignored
#define LSEEK '<'
#define RSEEK '>'
#define INCREMENT '+'
#define DECREMENT '-'
#define STDOUT '.'
#define STDIN ','
#define LBRACKET '['
#define RBRACKET ']'
// memory used by bf program. is this really turing-compliant?
char mem[30000] = { 0 };
// pointer used by bf program
char *ptr = mem;
// do operation beginning with given node
void doop(node *n) {
// copy node pointer in case we need the head of the list later
node *m = n;
// loop while node pointer is a valid one; e.g. stop at EOF
while (m) {
switch (m->val) {
// most of these are pretty self-explanatory
case LSEEK:
ptr--;
break;
case RSEEK:
ptr++;
break;
case INCREMENT:
(*ptr)++;
break;
case DECREMENT:
(*ptr)--;
break;
case STDOUT:
printf("%c", *ptr);
fflush(stdout);
break;
case STDIN:
*ptr = getchar();
break;
case LBRACKET:
// jump to closing bracket if value at pointer is false
if (!*ptr)
m = m->jump;
break;
case RBRACKET:
// jump back to opening bracket if value at pointer is true
if (*ptr)
m = m->jump;
break;
}
// proceed to next instruction
m = m->next;
}
}
// finds and references each bracket instruction to its corresponding bracket
node *link(node *n) {
// make a copy of the list head
node *m = n;
// make a temporary list to contain bracket links
node *links = newnode();
// while a valid node
while (m) {
// switch to bracket type
switch (m->val) {
case LBRACKET:
// this is an opening bracket, so we temporarily store it's
// location, and append the list as it grows
if (links->jump)
links = append(links);
links->jump = m;
break;
case RBRACKET:
// this is the closing bracket, so we save the temporarily
// stored link address to the closing bracket node, and
// connect the opening bracket node to the closing also;
// popping the list as we no longer need the data
m->jump = links->jump;
links->jump->jump = m;
if (links->prev) {
links = links->prev;
pop(links->next);
}
break;
}
// increment to next character
m = m->next;
}
// erase all the nodes in the temporary linked list
erase(links);
// return the head of the list
return n;
}
#include <signal.h>
// press ctrl-c then enter to quit if not running from a file
int run = 1;
void quit(int val) {
run = 0;
}
int main(int argc, char** argv) {
// catch crtl-c
signal(SIGINT, quit);
int c;
// our text structure is a linked list
node *text, *text_start;
if (argc > 1) {
// print the file name
printf("%s\n", argv[1]);
// open the file and read it to the linked list
FILE *f = fopen(argv[1], "r");
if (f == NULL) return 1;
text = text_start = newnode();
while ((c = fgetc(f)) != EOF) {
if (text->val)
text = append(text);
text->val = c;
}
fclose(f);
// link all the loops/ gotos, then process all instructions
doop(link(text_start));
// free all linked list nodes
erase(text_start);
// we just ran a file, so no interpreter
run = 0;
}
// repeatedly read and execute only one line until interrupted
while (run) {
// linked list generated for each line of input
text = text_start = newnode();
// read stdin buffer to list
while ((c = getchar()) != '\n') {
if (text->val)
text = append(text);
text->val = c;
}
// link all the loops/ gotos, then process the
// instructions for the line
doop(link(text_start));
// free all linked list nodes
erase(text_start);
}
return 0;
}
Lastly, the following is the compilation sequence the unexpected file sizes derive from:
$ gcc brainfuck.c -o brainfuck -Os && ls brainfuck -l && for i in `seq 0 3`; do gcc brainfuck.c -o brainfuck -O$i && ls brainfuck -l; done && gcc --version
-rwxrwxr-x 1 m m 13552 Oct 4 18:31 brainfuck
-rwxrwxr-x 1 m m 13544 Oct 4 18:31 brainfuck
-rwxrwxr-x 1 m m 13600 Oct 4 18:31 brainfuck
-rwxrwxr-x 1 m m 13600 Oct 4 18:31 brainfuck
-rwxrwxr-x 1 m m 13600 Oct 4 18:31 brainfuck
gcc (Ubuntu 5.4.0-6ubuntu1~16.04.2) 5.4.0 20160609
Upvotes: 1
Views: 56
Reputation: 8718
A great deal of a small binary's size is going to be boilerplate startup, plus the debugging symbol table, plus lots of zero-padding in the global data region and other sections. Do a binary inspection for null padding. To get a somewhat more realistic proportion, strip the symbols.
You should really just be comparing the size of the TEXT section, that is the instruction stream not the whole Unix executable format binary.
Also optimizing code has a very unpredictable effect on size. Unrolling loops lengthens code, as well as inlining, but removing redundant memory load/stores, common subexpression elimination, dead code elimination and constant folding reduces the size. So you have an extremely opaque view when summing these opposing forces. If you really want to learn something, study the assembly side-by-side, line by line. See gcc -S
and report back.
Plus the comments are correct that a lot of code is not going to be very optimizable if you are spending most of your energy transporting data to and from I/O streams. Optimization works on CPU bound and memory bound material.
% gcc -OS -o bfos brainfuck.c # -OS is optimize but keep code small
% objdump -h bfos | grep text
12 .text 00000452 0000000000400730 0000000000400730 00000730 2**4
% gcc -O0 -o bfo0 brainfuck.c # -O0 is default: no optimizations
% objdump -h bfo0 | grep text
12 .text 00000652 0000000000400730 0000000000400730 00000730 2**4
0x452 / 0x652 = huge difference.
And yet the binary sizes are many times larger, have padding, and have nothing to do with the compiled code size:
% ls -l bfo0 bfos
-rwxr-xr-x 1 root root 13461 Oct 4 22:42 bfo0
-rwxr-xr-x 1 root root 13469 Oct 4 22:41 bfos
% gcc --version
gcc (Ubuntu 4.8.4-2ubuntu1~14.04.3) 4.8.4
Finally, long stretches of zero padding (the '*' means all repetition, so from 0x000760 through 0x0006700, it's all zero bytes)
% od -x bfo0 | grep -1 '\*'
0000720 0000 0000 0000 0000 0000 0000 0000 0000
*
0000760 0000 0000 0000 0000 0010 0000 0000 0000
--
0006700 0a8c 0040 0000 0000 0a8c 0040 0000 0000
*
0007040 09c9 0040 0000 0000 0a8c 0040 0000 0000
--
0007100 0a8c 0040 0000 0000 0a8c 0040 0000 0000
*
0007420 0a8c 0040 0000 0000 0a51 0040 0000 0000
--
0010640 0000 0000 0000 0000 0000 0000 0000 0000
*
0017020 07f0 0040 0000 0000 07d0 0040 0000 0000
--
0017640 0000 0000 0000 0000 0000 0000 0000 0000
*
0020000 1e28 0060 0000 0000 0000 0000 0000 0000
--
0020720 0000 0000 0000 0000 0000 0000 0000 0000
*
0021000 0000 0000 0000 0000 001b 0000 0001 0000
--
0024500 0000 0000 0000 0000 0000 0000 0000 0000
*
0024540 0000 0000 0003 0001 0238 0040 0000 0000
Upvotes: 4