Two uint64_t numbers multiplication and division on coefficient in the C program are wrong compiled with GCC

Question

It is well known that fixed point arithmetics is equavalent to integer arithmetics with some constraints. In this way I caught the issue. Let's see:

#include 
#include 

#define USE_C_FUNC 1

#if USE_C_FUNC != 0
uint64_t fixmd(uint64_t a, uint64_t b)
{
    uint64_t c = a * b / 10000000000LL;
    return c;
}
#else
uint64_t fixmd(uint64_t a, uint64_t b)
{
    uint64_t c;

    asm (
    "mov %1, %%rax
"
    "mul %2
"
    "mov $10000000000, %%r8
"
    "div %%r8
"
    "mov %%rax, %0
"
    : "=r" (c) 
    : "r" (a), "r" (b)
    : "rax", "rdx", "r8"
    );
    return c;
}
#endif

void main(void)
{
    uint64_t x = 12589254118LL;
    uint64_t y = fixmd(x, x);
    printf("x=0x%llX=%llu, y=0x%llX=%llu
", x, x, y, y);
}

In case #define USE_C_FUNC 1 program prints WRONG result:

x=0x2EE60C5E6=12589254118, y=0x410F8719=1091536665

In case #define USE_C_FUNC 0 program (with inline assembly) prints RIGHT result:

x=0x2EE60C5E6=12589254118, y=0x3B0AB8254=15848931924

I had compiling this example without any optimization (with gcc option -O0).

$ gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/10/lto-wrapper
OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa:hsa
OFFLOAD_TARGET_DEFAULT=1
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Debian 10.2.1-6' --with-bugurl=file:///usr/share/doc/gcc-10/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-10 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-10-Km9U7s/gcc-10-10.2.1/debian/tmp-nvptx/usr,amdgcn-amdhsa=/build/gcc-10-Km9U7s/gcc-10-10.2.1/debian/tmp-gcn/usr,hsa --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-mutex
Thread model: posix
Supported LTO compression algorithms: zlib zstd
gcc version 10.2.1 20210110 (Debian 10.2.1-6)

objdump -D uint64 for WRONG result for function fixmd() is:

0000000000001135 :
    1135:   55                      push   %rbp
    1136:   48 89 e5                mov    %rsp,%rbp
    1139:   48 89 7d e8             mov    %rdi,-0x18(%rbp)
    113d:   48 89 75 e0             mov    %rsi,-0x20(%rbp)
    1141:   48 8b 45 e8             mov    -0x18(%rbp),%rax
    1145:   48 0f af 45 e0          imul   -0x20(%rbp),%rax
    114a:   48 ba bf d5 ed bd ce    movabs $0xdbe6fecebdedd5bf,%rdx
    1151:   fe e6 db 
    1154:   48 f7 e2                mul    %rdx
    1157:   48 89 d0                mov    %rdx,%rax
    115a:   48 c1 e8 21             shr    $0x21,%rax
    115e:   48 89 45 f8             mov    %rax,-0x8(%rbp)
    1162:   48 8b 45 f8             mov    -0x8(%rbp),%rax
    1166:   5d                      pop    %rbp
    1167:   c3                      retq

objdump -D uint64 for GOOD result for function fixmd() is:

0000000000001135 :
    1135:   55                      push   %rbp
    1136:   48 89 e5                mov    %rsp,%rbp
    1139:   48 89 7d e8             mov    %rdi,-0x18(%rbp)
    113d:   48 89 75 e0             mov    %rsi,-0x20(%rbp)
    1141:   48 8b 4d e8             mov    -0x18(%rbp),%rcx
    1145:   48 8b 75 e0             mov    -0x20(%rbp),%rsi
    1149:   48 89 c8                mov    %rcx,%rax
    114c:   48 f7 e6                mul    %rsi
    114f:   49 b8 00 e4 0b 54 02    movabs $0x2540be400,%r8
    1156:   00 00 00 
    1159:   49 f7 f0                div    %r8
    115c:   48 89 c1                mov    %rax,%rcx
    115f:   48 89 4d f8             mov    %rcx,-0x8(%rbp)
    1163:   48 8b 45 f8             mov    -0x8(%rbp),%rax
    1167:   5d                      pop    %rbp
    1168:   c3                      retq

In the code for fixmd() in C the problem is quite clean: gcc uses signed multiplication (imul) for unsigned values and substitute division with multiplication on the coefficient (with overflow) and binary shifting. Seems, it is the bug, is not it?

Two uint64_t numbers multiplication and division on coefficient in the C program are wrong compiled with GCC

Answers (1)

Related Questions