Reputation: 89
My program seems to get stuck on box_1, after the jal instruction for Ch. I figure it's got to do with $ra getting overwritten. How do I fix this?
The program is supposed to be a simple bitcoin miner. Here is the diagram:
I'm using this article.
I figure that the way of solving this might have to do with saving $ra on the stack but that seems a bit unpractical and inefficient? Likely I am doing something wrong.
.data
A: .word 0x87564C0C
B: .word 0xF1369725
C: .word 0x82E6D493
D: .word 0x63A6B509
E: .word 0xDD9EFF54
F: .word 0xE07C2655
G: .word 0xA41F32E7
H: .word 0xC7D25631
W: .word 0x6534EA14
K: .word 0xC67178F2
.text
.globl main
main:
li $s0,0 #loop counter
li $s1,64 #loop limit
main_loop:
bge $s0,$s1,end_main_loop
jal box_0
move $a0,$v0 #save return value in $a0 to be used as argument by box_1
jal box_1
move $a0,$v0 #
jal box_2
move $a0,$v0 #
move $s2,$a0 #will be necessary for the input of box_4 later
jal box_3
move $s3,$v0 #Will be assigned to E later
jal box_4
move $a0,$v0 #
jal box_5
###Assignments
lw $a0,G
la $a1,H
sw $a0,($a1) #Old G goes into new H
lw $a0,F
la $a1,G
sw $a0,($a1) #Old F goes into new G
lw $a0,E
la $a1,F
sw $a0,($a1) #Old E goes into new F
#
la $a1,E
sw $s3,($a1) #Output of box_3 goes into new E
#
lw $a0,C
la $a1,D
sw $a0,($a1) #Old C goes into new D
lw $a0,B
la $a1,C
sw $a0,($a1) #Old B goes into new C
lw $a0,A
la $a1,B
sw $a0,($a1) #Old A goes into new B
#
la $a0,A
sw $v0,($a0) #Output of box_5 goes into new A
addi $s0,$s0,1 #increment loop counter
end_main_loop:
li $v0, 10 # terminate program
syscall
.text
.globl red_boxes
red_boxes:
box_0:
lw $t0,W
lw $t1,K
addu $t0,$t0,$t1 #Wt + Kt
move $v0,$t1
jr $ra
box_1:
move $t0,$a0 #output of box_0
jal Ch
move $t1,$v0
lw $t3,H
addu $t0,$t0,$t1
addu $t3,$t0,$t3
move $v0,$t3
jr $ra
box_2:
move $t0,$a0 #output of box_1
#move $t1,$a1 #output of Sigma1
jal Sigma1
move $t1,$v0
addu $t0,$t0,$t1
move $v0,$t0
jr $ra
box_3:
move $t0,$a0 #output of box_2
lw $t1,D
addu $t0,$t0,$t1
move $v0,$t0
jr $ra
box_4:
move $t0,$a0 #output of box_2 <----!!
#move $t1,$a1 #output of Ma
jal Ma
move $t1,$v0
addu $t0,$t0,$t1
move $v0,$t0
jr $ra
box_5:
move $t0,$a0 #output of box_4
#move $t1,$a1 #output of Sigma0
jal Sigma0
move $t1,$v0
addu $t0,$t0,$t1
move $v0,$t0
jr $ra
.text
.globl op_boxes
op_boxes:
Ch:
# (G&!E) || (F&E)
lw $t0,E
lw $t1,F
lw $t2,G
and $t1,$t1,$t0 #(F&E)
not $t0,$t0 #!E
and $t2,$t2,$t0 #(G&!E)
or $t0,$t1,$t2 #(G&!E) || (F&E)
move $v0,$t0
jr $ra
Sigma1:
lw $t0,E
ror $t1,$t0,6 #rotates E to the right by 6 bits
ror $t2,$t0,11 # ''' by 11 bits
ror $t3,$t0,25 # ''' by 25 bits
addu $t2,$t2,$t1 # A->6 + A->11
addu $t3,$t3,$t2 # (A->6 + A->11) + A->25
li $t1,1
and $t1,$t3,$t1
move $v0,$t1
jr $ra
Ma:
# majority = (A&B) | (B&C)
lw $t0,A
lw $t1,B
lw $t2,C
or $t3, $t0, $t2
and $t1, $t1, $t3
and $v0, $t0, $t2
or $v0, $t1, $v0
jr $ra
Sigma0:
#Same as Sigma0 but shifted by different values
lw $t0,A
ror $t1,$t0,2
ror $t2,$t0,13
ror $t3,$t0,22
add $t2,$t2,$t1
add $t3,$t3,$t2
li $t1,1
and $t1,$t3,$t1
move $v0,$t1
jr $ra
Also, I have used addu instead of add because sometimes I get an overflow. Is this correct? The article doesn't say anything about overflows.
Upvotes: 0
Views: 313
Reputation: 26766
saving $ra on the stack but that seems a bit unpractical and inefficient?
If you know exactly what registers Ch (and all its callee's, here none), then you can save the $ra
away in one of those registers instead of on the stack to save a cycle here and there.
If you were using RISC V, you could use an alternate register for for the return address when calling Ch
, and then by avoiding $ra
here, wouldn't need to preserve $ra
at all.
A couple of approaches if you really want to optimize the code:
One is to develop program-wide register assignments, so that you don't have to keep loading and storing those global variables, instead just manipulating registers.
(This can be hard to do if either the program gets really large, or, you also want to mix calls having compiler generated C code — as the compiler won't necessarily know about your program-wide register assignments. However, if you write it as a subroutine called by C but doesn't call C back — only returns to C — you can still to the whole subroutine register assignments.)
Another approach would be to inline all the methods — taking a quick glance it doesn't look like there's much reuse by invocation, so such inlining seems indicated.
Both approaches can be combined.
Yes, addu
is appropriate when we don't care about overflow. It produces the same bit patterns as add
but simply forgoes the overflow check.
Upvotes: 2