Reputation: 49
I have a problem with sum of elements of two vectors type double which are the same size. Code always returns 0.
#include <iostream>
using namespace std;
int main()
{
int n = 5;
double* tab = new double[n];
double* tab3 = new double[n];
for (size_t i = 0; i < n; i++)
{
tab[i] = 1;
tab3[i] = 1;
}
double sum;
__asm {
mov eax, n; //vector size
mov edi, tab; //first vector
mov esi, tab3; //second vector
fldz;
l:
fadd[edi + 8 * eax - 8];
fadd[esi + 8 * eax - 8];
dec eax;
jnz l;
fstp sum;
}
cout << sum;
}
Upvotes: 1
Views: 153
Reputation: 12342
So you are looking for asm for this bit of C code, right? https://godbolt.org/z/vbdfEb94s
#include <cstddef>
double add(double *a, double *b, std::size_t len) {
double sum = 0;
while (len-- > 0) {
sum += *a++;
sum += *b++;
}
return sum;
}
I (meaning gcc) come up with this code for 64bit:
add(double*, double*, unsigned long):
xor eax, eax
xorps xmm0, xmm0
.L3:
cmp rdx, rax
je .L1
addsd xmm0, QWORD PTR [rdi+rax*8]
addsd xmm0, QWORD PTR [rsi+rax*8]
inc rax
jmp .L3
.L1:
ret
and this for 32bit i386:
add(double*, double*, unsigned int):
push ebp
xor eax, eax
fldz
mov ebp, esp
mov ecx, DWORD PTR [ebp+8]
mov edx, DWORD PTR [ebp+12]
.L3:
cmp DWORD PTR [ebp+16], eax
je .L1
fadd QWORD PTR [ecx+eax*8]
fadd QWORD PTR [edx+eax*8]
inc eax
jmp .L3
.L1:
pop ebp
ret
Upvotes: 1
Reputation: 1569
Sadly i am not on windows, so i had to modify the code to use g++ instead of msvc, but i used intel syntax assembly too. During debugging it turned out that fadd
instructions had no effect. I fixed it by adding qword ptr
before the [edi + 8 * eax - 8]
and [esi + 8 * eax - 8]
to tell assembler to use pointers to an 8 byte value (since you are using double
instead of float
):
fadd qword ptr [edi + 8 * eax - 8];
fadd qword ptr [esi + 8 * eax - 8];
Upvotes: 4