Reputation: 19
I am facing an issue with the FCMLE(Floating-point Compare Less than or Equal to zero) instruction in ARM assembly. I am using the aarch64-none-elf toolchain for my embedded software development, and the problem is that my compiler says that the fcmle instruction is not a SIMD vector register, but this is not true.
Here's a summary of my test code for fcmle:
#include <arm_neon.h>
bool test_fcmle(void)
{
float32x4_t Vn = { 1.5f, 1.1f, -3.5f, 0.0f};
float32x4_t Vm1 = { 1.1f, 1.5f, -3.5f, 4.4f};
asm volatile(
"FMOV %s0, %s1\n" // Load Vn into the SIMD register
"FCMLE %s0, %s0, %s2" // Compare Vn and Vm1
: "=w" (Vn)
: "w" (Vn), "w" (Vm1)
);
//...
//Some comparison code to check the result
//...
return true;
}
I used the fadd instruction instead of fcmle to make sure the SIMD registers were working and the error in the compilation process went away and the program ran successfully.
NOTE-1 : My toolchain is up to date (Version: aarch64-none-elf-gcc version 12.3.1 20230626 (Arm GNU Toolchain 12.3.Rel1 (Build arm-12.35))).
NOTE-2 : In the "instruction-sets" document on the ARM Developer website, in the supported architectures section for the "fcmle" instruction it says "Architectures A64" but for "fadd" it says "Architectures v7, A32, A64".
NOTE-3 : After a successful compilation, I test the executable file generated on a QEMU(v7.1.0) machine. The QEMU machine emulates the ARM Cortex-A72.
NOTE-4 : The command I used to compile :
aarch64-none-elf-gcc -Wall -Wextra -Wpedantic -nostdlib -ffreestanding -march=armv8-a -O0 -g -c -o main.o main.c
Does anyone have any insights into why I might be encountering this error? Am I missing any configuration or have I misunderstood the usage of the FCMLE instruction with this toolchain?
EDIT-1: The error I get as a result of compilation when I run the above code snippet:
/tmp/cc4yEhOw.s: Assembler messages:
/tmp/cc4yEhOw.s:1839: Error: operand 1 must be a SIMD vector register -- `fcmle s0,s0,s1'
make: *** [Makefile:23: main.o] Error 1
EDIT-2: After the changes @Jester mentioned, the following code compiles successfully, but I didn't get the result I expected, all values in the vresult vector are 0.
#include <arm_neon.h>
#include <stdbool.h>
#include "uart.h"
bool test_fcmle(void)
{
printUart0("Testing fcmle\n");
float32x4_t Vn = { 1.5f, 1.1f, -3.5f, 0.0f};
uint32x4_t Vexpected = {0, 0, 0xFFFFFFFF, 0xFFFFFFFF};
float32x4_t Vresult;
asm volatile(
"FMOV %s0, %s1\n" // Load Vn into the SIMD register
"FCMLE %s0, %s0, #0.0" // Compare Vn and Vm1
: "=w" (Vresult)
: "w" (Vn)
);
for(int i = 0; i<4; ++i){
printNumber("Actual = ", Vresult[i]);
printNumber("Expected = ", Vexpected[i]);
if(Vresult[i] != Vexpected[i])
return false;
}
return true;
}
int main()
{
if(!test_fcmle())
printUart0("Test failed\n");
}
Output of the code:
---------------------------------------------
Testing fcmle
Actual = 0
Expected = 0
Actual = 0
Expected = 0
Actual = 0
Expected = 4294967295
Test failed
EDIT-3 : I added "#include <arm_neon.h>" directive
Upvotes: 0
Views: 148
Reputation: 222264
Get rid of the FMOV
line. There is no need for it. You can freely specify input and output registers in FCMLE
.
Change %s0
, %s1
, and %s2
to %0
, %1
, and %2
, respectively. %s0
is not useful to you in GCC inline assembly (per comments, s
is an undocumented modifier for the operand that gives a scalar part of the register, but you want a vector). The names of the operands to the inline assembly construct are, if not named explicitly, %0
, %1
, and %2
. Simply using FCMLE %0, %1, %2
may work. If it does not because your assembler does not support this form of the FCMLE
pseudo-instruction, use FCMGE %0, %2, %1
. Here is a demonstration:
#include <arm_neon.h>
static uint32x4_t foo(float32x4_t Vn, float32x4_t Vm1)
{
uint32x4_t Vresult;
__asm__ volatile(
"FCMLE %0, %1, %2"
: "=w" (Vresult)
: "w" (Vn), "w" (Vm1)
);
return Vresult;
}
#include <stdbool.h>
#include <stdio.h>
static bool test_fcmle(void)
{
printf("Testing fcmle:\n");
float32x4_t Vn = { 35, 45, 55, 65 };
float32x4_t Vm1 = { 36, 45, 54, 63 };
uint32x4_t Vexpected = { -1, -1, 0, 0 };
uint32x4_t Vresult;
printf("\tVn = ");
for (int i = 0; i < 4; ++i) printf(" %10g", Vn[i]);
printf(".\n");
printf("\tVm1 = ");
for (int i = 0; i < 4; ++i) printf(" %10g", Vm1[i]);
printf(".\n");
Vresult = foo(Vn, Vm1);
printf("\tVexpected = ");
for (int i = 0; i < 4; ++i) printf(" 0x%08x", Vexpected[i]);
printf(".\n");
printf("\tVresult = ");
for (int i = 0; i < 4; ++i) printf(" 0x%08x", Vresult[i]);
printf(".\n");
for (int i = 0; i < 4; ++i)
if (Vresult[i] != Vexpected[i])
return false;
return true;
}
int main(void)
{
if (!test_fcmle())
printf("Test failed.\n");
}
The output is:
Testing fcmle: Vn = 35 45 55 65. Vm1 = 36 45 54 63. Vexpected = 0xffffffff 0xffffffff 0x00000000 0x00000000. Vresult = 0xffffffff 0xffffffff 0x00000000 0x00000000.
Note: With Apple tools, I had to use "FCMLE.4s %0, %1, %2"
rather than "FCMLE %0, %1, %2"
.
I recommend naming the operands explicitly. This code uses the same names in assembly as the names in C:
static uint32x4_t foo(float32x4_t Vn, float32x4_t Vm1)
{
uint32x4_t Vresult;
__asm__ volatile(
"FCMLE %[Vresult], %[Vn], %[Vm1]"
: [Vresult] "=w" (Vresult)
: [Vn] "w" (Vn), [Vm1] "w" (Vm1)
);
return Vresult;
}
Upvotes: 1
Reputation: 58762
fcmle
is an alias for fcmge
with reversed operands. Apparently gnu binutils currently does not support it (see aarch64-tbl.h:3429) As a workaround you can use the fcmge
directly. Try FCMGE %s0, %s2, %s0
(notice last two arguments swapped).
Upvotes: 1