Reputation: 21
Basically, all I have to do is multiply a matrix by a vector, using floating point operations in assembly.
My first draft looks like this:
.data
mat: .float 1.0, 2.0, 3.0, 4.0, 5.0, 6.0
vek: .float 1.0, 2.0, 3.0
res: .float 0.0, 0.0
.text
.globl main
main:
; here goes the magic
call exit
Unfortunately, all I've tried doesn't work. I know how to do the basic arithmetic operations (like faddp, fmulp and so on), but I'm still not able to store it efficiently. fstpl
doesn't seem to work at all.
Can anybody give me a rough sketch (not on how to make this matrix - vector multiplication) how to use the FPU commands and how to store a computed result in a register?
Thanks in advance,
Upvotes: 2
Views: 2640
Reputation: 212979
Implement it in C first, test it, then use gcc -S
to generate assembler source, then either use that as is or use it as a template for your own code.
E.g. here is a C code implementation, mat_vec.c
:
#include <stdio.h>
int main(void)
{
// note: use "volatile" qualifier for input data otherwise gcc will
// just optimise all the arithmetic away...
volatile float mat[2][3] = { { 1.0f, 2.0f, 3.0f }, { 4.0f, 5.0f, 6.0f } };
volatile float vek[3] = { 1.0f, 2.0f, 3.0f };
float res[2] = { 0.0f, 0.0f };
res[0] = mat[0][0] * vek[0] + mat[0][1] * vek[1] + mat[0][2] * vek[2];
res[1] = mat[1][0] * vek[0] + mat[1][1] * vek[1] + mat[1][2] * vek[2];
printf("res = { %g, %g }\n", res[0], res[1]);
return 0;
}
Let's make sure it works:
$ gcc -Wall -Os -m32 -march=i686 mat_vec.c -o mat_vec
$ ./mat_vec
res = { 14, 32 }
$
Looks good, so let's generate assembler source:
$ gcc -Wall -Os -m32 -march=i686 -S mat_vec.c -o mat_vec.S
$ cat mat_vec.S
.cstring
LC6:
.ascii "res = { %g, %g }\12\0"
.text
.globl _main
_main:
pushl %ebp
movl $0x40000000, %ecx
movl %esp, %ebp
movl $0x40400000, %edx
pushl %esi
movl $0x40800000, %eax
pushl %ebx
movl $0x3f800000, %esi
subl $96, %esp
movl %esi, -44(%ebp)
movl %ecx, -40(%ebp)
movl %edx, -36(%ebp)
movl %eax, -32(%ebp)
movl $0x40a00000, %eax
movl %eax, -28(%ebp)
movl $0x40c00000, %eax
movl %eax, -24(%ebp)
movl %esi, -20(%ebp)
movl %ecx, -16(%ebp)
movl %edx, -12(%ebp)
flds -44(%ebp)
flds -20(%ebp)
fstps -72(%ebp)
flds -40(%ebp)
flds -16(%ebp)
fstps -68(%ebp)
flds -36(%ebp)
fstps -64(%ebp)
flds -12(%ebp)
fstps -60(%ebp)
flds -32(%ebp)
flds -20(%ebp)
flds -28(%ebp)
flds -16(%ebp)
fxch %st(3)
fmulp %st, %st(2)
flds -24(%ebp)
flds -12(%ebp)
fxch %st(2)
fmulp %st, %st(4)
call L3
"L00000000001$pb":
L3:
popl %ebx
fmulp %st, %st(1)
fxch %st(3)
fmuls -68(%ebp)
fxch %st(1)
faddp %st, %st(2)
fxch %st(3)
fmuls -72(%ebp)
fxch %st(1)
faddp %st, %st(2)
fxch %st(1)
leal LC6-"L00000000001$pb"(%ebx), %eax
fstpl 12(%esp)
flds -64(%ebp)
fmuls -60(%ebp)
fxch %st(1)
faddp %st, %st(2)
faddp %st, %st(1)
fstpl 4(%esp)
movl %eax, (%esp)
call L_printf$stub
addl $96, %esp
xorl %eax, %eax
popl %ebx
popl %esi
leave
ret
.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
L_printf$stub:
.indirect_symbol _printf
hlt ; hlt ; hlt ; hlt ; hlt
.subsections_via_symbols
The part you're interested in starts just before label L3
.
Upvotes: 2