Reputation: 26647
A C loop is
while( *from)
{
*to++ = *from++;
}
I think I basically want to know which MIPS instructions the *to++ = *from++;
translates to. My result is that the instructions are 14:
$L2:
lw $2,12($fp)
lb $3,0($2)
bne $3,$0,$L4
j $L3
$L4:
lw $2,8($fp)
addu $3,$fp,12
lw $4,0($3)
lbu $5,0($4)
sb $5,0($2)
addu $4,$4,1
sw $4,0($3)
addu $2,$2,1
sw $2,8($fp)
j $L2
I came to this conclusion from looking at the complete C program:
/* strcpy.c */
#include <stdio.h>
#include <idt_entrypt.h>
/* C stringcopy */
static void str_cpy( char *to, const char *from)
{
while( *from)
{
*to++ = *from++;
}
*to = '\0';
}
int main()
{
static char* hello = "Hello World!";
static char to[4711] = "blaha blaj blurk bletch";
int Time;
printf("Strangen hello ser ut sa har: %s\n", hello);
flush_cache(); /* toem cache-minnet */
timer_start(); /* nollstall tidmatning */
str_cpy( to, hello);
Time = timer_stop(); /* las av tiden */
printf("Time to copy: %d\n",Time);
printf("Och kopian sa har: %s\n", to);
}
Compiling it to MIPS assembly turns out this:
.file 1 "strcpy.c"
# -G value = 8, Cpu = 3000, ISA = 1
# GNU C version cygnus-2.7.2-970404 (mips-mips-ecoff) compiled by GNU C version cygnus-2.7.2-970404.
# options passed: -msoft-float
# options enabled: -fpeephole -ffunction-cse -fkeep-static-consts
# -fpcc-struct-return -fcommon -fverbose-asm -fgnu-linker -msoft-float
# -meb -mcpu=3000
gcc2_compiled.:
__gnu_compiled_c:
.text
.align 2
.ent str_cpy
str_cpy:
.frame $fp,8,$31 # vars= 0, regs= 1/0, args= 0, extra= 0
.mask 0x40000000,-8
.fmask 0x00000000,0
subu $sp,$sp,8
sw $fp,0($sp)
move $fp,$sp
sw $4,8($fp)
sw $5,12($fp)
$L2:
lw $2,12($fp)
lb $3,0($2)
bne $3,$0,$L4
j $L3
$L4:
lw $2,8($fp)
addu $3,$fp,12
lw $4,0($3)
lbu $5,0($4)
sb $5,0($2)
addu $4,$4,1
sw $4,0($3)
addu $2,$2,1
sw $2,8($fp)
j $L2
$L3:
lw $2,8($fp)
sb $0,0($2)
$L1:
move $sp,$fp # sp not trusted here
lw $fp,0($sp)
addu $sp,$sp,8
j $31
.end str_cpy
.rdata
.align 2
$LC0:
.ascii "Hello World!\000"
.sdata
.align 2
hello.4:
.word $LC0
.data
.align 2
to.5:
.ascii "blaha blaj blurk bletch\000"
.space 4687
.rdata
.align 2
$LC1:
.ascii "Strangen hello ser ut sa har: %s\n\000"
.align 2
$LC2:
.ascii "Time to copy: %d\n\000"
.align 2
$LC3:
.ascii "Och kopian sa har: %s\n\000"
.text
.align 2
.globl main
.ent main
main:
.frame $fp,32,$31 # vars= 8, regs= 2/0, args= 16, extra= 0
.mask 0xc0000000,-4
.fmask 0x00000000,0
subu $sp,$sp,32
sw $31,28($sp)
sw $fp,24($sp)
move $fp,$sp
jal __main
la $4,$LC1
lw $5,hello.4
jal printf
jal flush_cache
jal timer_start
la $4,to.5
lw $5,hello.4
jal str_cpy
jal timer_stop
sw $2,16($fp)
la $4,$LC2
lw $5,16($fp)
jal printf
la $4,$LC3
la $5,to.5
jal printf
$L5:
move $sp,$fp # sp not trusted here
lw $31,28($sp)
lw $fp,24($sp)
addu $sp,$sp,32
j $31
.end main
So I analyzed the above and found that the number of instructions performed in one cycle of the while loop is 14. Is my reasoning correct?
Upvotes: 1
Views: 2603
Reputation: 16582
$L2:
lw $2,12($fp) ; 12($fp) is 'from' - load it in to $2
lb $3,0($2) ; read a byte
bne $3,$0,$L4 ; if it's non-zero, jump into the main loop
j $L3 ; otherwise exit (this is the while clause)
$L4:
lw $2,8($fp) ; 8($fp) is 'to' - load it into $2
addu $3,$fp,12 ; Load the address of 'from' into $3
lw $4,0($3) ; Load 'from' into $4
lbu $5,0($4) ; Read the byte again (this is the = *from)
sb $5,0($2) ; Store the byte (*to = )
addu $4,$4,1 ; increment from (from++)
sw $4,0($3) ; store it back
addu $2,$2,1 ; increment to (to++)
sw $2,8($fp) ; store it back
j $L2 ; do it all again
So 13 ops in the loop, as the j $L3 is skipped.
However, as markgz points out, MIPS has branch delay slots, which may require the compiler or assembler to add nops or switch instructions. You should look at the disassembly of the final code, as well as the intermediate assembler output.
It's probable in this case that there will at least be an extra nop after the initial bne instruction, but the assembler may re-order the final jump rather than pad with a nop. So 14 instructions may well be the total if you look at the final output.
There's a lot of redundancy in there - half the instructions are just loading/storing back to local variables, rather than just keeping stuff in registers. This is typical of a debug/unoptimised build.
Upvotes: 2
Reputation: 78903
You are including the test and conditional jump instructions in your counting, which doesn't seem appropriate to me.
Already, you have one branching that is too much in your code. Try
while ((*to++ = *from++));
My compiler (gcc for x86) produces better code with that only has one conditional jump. On that architecture (which seems to have nicer addressing modes) that one compiles to
xorl %eax, %eax
.L8:
movzbl (%rsi,%rax), %edx
movb %dl, (%rdi,%rax)
addq $1, %rax
testb %dl, %dl
jne .L8
ret
So here the inner part is only three instructions plus one initialization, since the increment is only done once inside the loop and not twice. Generally you have to be careful when asking questions like this, there is not really a cost to (*to++ = *from++)
by itself, but only as embedded into the surrounding code.
Upvotes: 1
Reputation: 756
Actually there are only 13 operation in the execution of that while loop (the operation j $L3
is only executed when the while ends).
Upvotes: 1
Reputation: 67733
Looks correct.
There seem to be lots of redundant loads & stores - is optimisation turned off completely?
Upvotes: 1