Understanding C x86 optimization of a small function

Question

I have a relatively small C function.

#define ROTL16(x, r) (((x) << (r)) | (x >> (16 - (r))))

void a_inverse(uint16_t *left, uint16_t *right) {
  *right ^= *left;
  *right = ROTL16(*right, 14);
  *left -= *right;
  *left = ROTL16(*left, 7);
}

The function was called by submitting uint16_t pointers that point to low and hight 16-bit of a single uint32_t integer.

uint32_t whole;
uint16_t * left = (uint16_t *)&whole;
uint16_t * right = left + 1;

a_inverse(left, right);

But the function does not perform as expected. So I blindly add the volatile keyword.

#define ROTL16(x, r) (((x) << (r)) | (x >> (16 - (r))))

void a_inverse(volatile uint16_t *left, volatile uint16_t *right) {
  *right ^= *left;
  *right = ROTL16(*right, 14);
  *left -= *right;
  *left = ROTL16(*left, 7);
}

This time it works correctly.

Comparing the output assembly with -O3 I see nothing wrong

This is a non-volatile version

a_inverse:
  .cfi_startproc
  endbr64
  movzwl    (%rsi), %ea
  xorw  (%rdi), %ax
  rorw  $2, %ax
  movw  %ax, (%rsi)
  movzwl    (%rdi), %edx
  subl  %eax, %edx
  movl  %edx, %eax
  rolw  $7, %ax
  movw  %ax, (%rdi)
  ret
  .cfi_endproc

And this is the one with volatile keyword

a_inverse:
  .cfi_startproc
  endbr64
  movzwl    (%rdi), %edx
  movzwl    (%rsi), %eax
  xorl  %edx, %eax
  movw  %ax, (%rsi)
  movzwl    (%rsi), %eax
  movzwl    (%rsi), %edx
  sall  $14, %eax
  shrw  $2, %dx
  orl   %edx, %eax
  movw  %ax, (%rsi)
  movzwl    (%rsi), %edx
  movzwl    (%rdi), %eax
  subl  %edx, %eax
  movw  %ax, (%rdi)
  movzwl    (%rdi), %eax
  movzwl    (%rdi), %edx
  sall  $7, %eax
  shrw  $9, %dx
  orl   %edx, %eax
  movw  %ax, (%rdi)
  ret
  .cfi_endproc

And I don't understand why do the functions behave differently.

Could you help me understand what the reason might be?
The only difference in expected/unexpected results is those volatile usages. Removing either one of the volatile would result in an unexpected result.

Understanding C x86 optimization of a small function

Answers (1)

Related Questions