ashleysmithgpu
ashleysmithgpu

Reputation: 1935

How do I really disable all rustc optimizations?

I'm trying to learn assembly through compiling Rust. I have found a way to compile Rust code to binary machine code and be able to objdump it to view the assembly. However if I write the following:

#![no_main]

#[link_section = ".text.entry"]
#[no_mangle]
pub extern "C" fn _start() -> ! {
    let a: u64 = 4;
    let b: u64 = 7;
    let c: u64 = a * b;
    
    loop {}
}

The assembly I get is:

0000000000000000 <.data>:
   0:   1101                    addi    sp,sp,-32
   2:   4511                    li      a0,4
   4:   e42a                    sd      a0,8(sp)
   6:   451d                    li      a0,7
   8:   e82a                    sd      a0,16(sp)
   a:   4571                    li      a0,28
   c:   ec2a                    sd      a0,24(sp)
   e:   a009                    j       0x10
  10:   a001                    j       0x10

So it looks like rust is collapsing the mul to a constant. I'm using the following compile options:

Cargo.toml:

[profile.dev]
opt-level = 0
mir-opt-level = 0

Is there a way to stop Rust from optimizing this?

The LLVM emitted looks like this:

; Function Attrs: noreturn nounwind
define dso_local void @_start() unnamed_addr #0 section ".text.entry" !dbg !22 {
start:
  %c.dbg.spill = alloca i64, align 8
  %b.dbg.spill = alloca i64, align 8
  %a.dbg.spill = alloca i64, align 8
  store i64 4, i64* %a.dbg.spill, align 8, !dbg !36
  call void @llvm.dbg.declare(metadata i64* %a.dbg.spill, metadata !28, metadata !DIExpression()), !dbg !37
  store i64 7, i64* %b.dbg.spill, align 8, !dbg !38
  call void @llvm.dbg.declare(metadata i64* %b.dbg.spill, metadata !31, metadata !DIExpression()), !dbg !39
  store i64 28, i64* %c.dbg.spill, align 8, !dbg !40
  call void @llvm.dbg.declare(metadata i64* %c.dbg.spill, metadata !33, metadata !DIExpression()), !dbg !41

So it looks like the optimization is before the LLVM pass.

$ rustc --version                                                                                                                                           
rustc 1.60.0-nightly (c5c610aad 2022-02-14)

Command to build:

RUSTFLAGS="--emit=llvm-bc" cargo build --target riscv64imac-unknown-none-elf --no-default-features

build.rs

fn main() {
    println!("cargo:rerun-if-changed=build.rs");
    println!("cargo:rustc-link-arg=-Tlink.ld");
}

link.ld

ENTRY(_start)
SECTIONS {
  .text : { *(.text); *(.text.*) }
}

Upvotes: 4

Views: 3369

Answers (1)

E_net4
E_net4

Reputation: 30042

There is one compiler pass before the generation of LLVM-IR, which is the generation of MIR, the Rust intermediate representation. If you emit this for the given code with a command such as this one:

cargo rustc -- --emit mir

You will see in the .mir file generated that the optimization already took place there.

fn _start() -> ! {
    let mut _0: !;                       // return place in scope 0 at src\main.rs:5:31: 5:32
    let _1: u64;                         // in scope 0 at src\main.rs:6:9: 6:10
    scope 1 {
        debug a => _1;                   // in scope 1 at src\main.rs:6:9: 6:10
        let _2: u64;                     // in scope 1 at src\main.rs:7:9: 7:10
        scope 2 {
            debug b => _2;               // in scope 2 at src\main.rs:7:9: 7:10
            let _3: u64;                 // in scope 2 at src\main.rs:8:9: 8:10
            scope 3 {
                debug c => _3;           // in scope 3 at src\main.rs:8:9: 8:10
            }
        }
    }

    bb0: {
        _1 = const 4_u64;                // scope 0 at src\main.rs:6:18: 6:19
        _2 = const 7_u64;                // scope 1 at src\main.rs:7:18: 7:19
        _3 = const 28_u64;               // scope 2 at src\main.rs:8:18: 8:23
        goto -> bb1;                     // scope 3 at src\main.rs:10:5: 10:12
    }

    bb1: {
        goto -> bb1;                     // scope 3 at src\main.rs:10:5: 10:12
    }
}

This is happening because the mir-opt-level option currently only exists as an unstable compiler option. It is not available as a profile property in Cargo. Set it manually on a direct call to the compiler:

cargo rustc -- -Z mir-opt-level=0 --emir mir

And this optimization will disappear:

fn _start() -> ! {
    let mut _0: !;                       // return place in scope 0 at src\main.rs:5:31: 5:32
    let mut _1: !;                       // in scope 0 at src\main.rs:5:33: 11:2
    let _2: u64;                         // in scope 0 at src\main.rs:6:9: 6:10
    let mut _5: u64;                     // in scope 0 at src\main.rs:8:18: 8:19
    let mut _6: u64;                     // in scope 0 at src\main.rs:8:22: 8:23
    let mut _7: (u64, bool);             // in scope 0 at src\main.rs:8:18: 8:23
    let mut _8: !;                       // in scope 0 at src\main.rs:10:5: 10:12
    let mut _9: ();                      // in scope 0 at src\main.rs:5:1: 11:2
    scope 1 {
        debug a => _2;                   // in scope 1 at src\main.rs:6:9: 6:10
        let _3: u64;                     // in scope 1 at src\main.rs:7:9: 7:10
        scope 2 {
            debug b => _3;               // in scope 2 at src\main.rs:7:9: 7:10
            let _4: u64;                 // in scope 2 at src\main.rs:8:9: 8:10
            scope 3 {
                debug c => _4;           // in scope 3 at src\main.rs:8:9: 8:10
            }
        }
    }

    bb0: {
        StorageLive(_1);                 // scope 0 at src\main.rs:5:33: 11:2
        StorageLive(_2);                 // scope 0 at src\main.rs:6:9: 6:10
        _2 = const 4_u64;                // scope 0 at src\main.rs:6:18: 6:19
        StorageLive(_3);                 // scope 1 at src\main.rs:7:9: 7:10
        _3 = const 7_u64;                // scope 1 at src\main.rs:7:18: 7:19
        StorageLive(_4);                 // scope 2 at src\main.rs:8:9: 8:10
        StorageLive(_5);                 // scope 2 at src\main.rs:8:18: 8:19
        _5 = _2;                         // scope 2 at src\main.rs:8:18: 8:19
        StorageLive(_6);                 // scope 2 at src\main.rs:8:22: 8:23
        _6 = _3;                         // scope 2 at src\main.rs:8:22: 8:23
        _7 = CheckedMul(_5, _6);         // scope 2 at src\main.rs:8:18: 8:23
        assert(!move (_7.1: bool), "attempt to compute `{} * {}`, which would overflow", move _5, move _6) -> bb1; // scope 2 at src\main.rs:8:18: 8:23
    }

    bb1: {
        _4 = move (_7.0: u64);           // scope 2 at src\main.rs:8:18: 8:23
        StorageDead(_6);                 // scope 2 at src\main.rs:8:22: 8:23
        StorageDead(_5);                 // scope 2 at src\main.rs:8:22: 8:23
        StorageLive(_8);                 // scope 3 at src\main.rs:10:5: 10:12
        goto -> bb2;                     // scope 3 at src\main.rs:10:5: 10:12
    }

    bb2: {
        _9 = const ();                   // scope 3 at src\main.rs:10:10: 10:12
        goto -> bb2;                     // scope 3 at src\main.rs:10:5: 10:12
    }
}

And this is probably as far as you can go without touching LLVM directly. Some optimisations in specific parts of the code can also be prevented through constructs such as black_box.

See also:

Upvotes: 11

Related Questions