From 63c2d1e0c34ff7b4452e50b8fa74218bf9ceb00e Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Tue, 9 Aug 2022 16:53:51 -0700 Subject: [PATCH] x64: Remove unnecessary register use when comparing against constants (#4645) https://github.com/bytecodealliance/wasmtime/pull/4645 --- cranelift/codegen/src/isa/x64/inst.isle | 10 +++ cranelift/codegen/src/isa/x64/lower/isle.rs | 5 ++ cranelift/filetests/filetests/isa/x64/b1.clif | 64 +++++++++++++++++++ 3 files changed, 79 insertions(+) diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index aff7c7c8e3d1..69ed608d130e 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1080,6 +1080,9 @@ (decl cc_invert (CC) CC) (extern constructor cc_invert cc_invert) +(decl intcc_reverse (IntCC) IntCC) +(extern constructor intcc_reverse intcc_reverse) + (decl floatcc_inverse (FloatCC) FloatCC) (extern constructor floatcc_inverse floatcc_inverse) @@ -3178,6 +3181,13 @@ (let ((size OperandSize (raw_operand_size_of_type ty))) (icmp_cond_result (x64_cmp size b a) cc))) +;; As a special case, reverse the arguments to the comparison when the LHS is a +;; constant. This ensures that we avoid moving the constant into a register when +;; performing the comparison. +(rule (emit_cmp cc (and (simm32_from_value a) (value_type ty)) b) + (let ((size OperandSize (raw_operand_size_of_type ty))) + (icmp_cond_result (x64_cmp size a b) (intcc_reverse cc)))) + ;; For I128 values (held in two GPRs), the instruction sequences depend on what ;; kind of condition is tested. (rule (emit_cmp (IntCC.Equal) a @ (value_type $I128) b) diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index be1b51e42597..fb9abb6319b6 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -610,6 +610,11 @@ where } } + #[inline] + fn intcc_reverse(&mut self, cc: &IntCC) -> IntCC { + cc.reverse() + } + #[inline] fn floatcc_inverse(&mut self, cc: &FloatCC) -> FloatCC { cc.inverse() diff --git a/cranelift/filetests/filetests/isa/x64/b1.clif b/cranelift/filetests/filetests/isa/x64/b1.clif index a67242437054..d790d72d9436 100644 --- a/cranelift/filetests/filetests/isa/x64/b1.clif +++ b/cranelift/filetests/filetests/isa/x64/b1.clif @@ -73,6 +73,70 @@ block2: ; popq %rbp ; ret +function %f3(i64) -> i32 { +block0(v0: i64): + v1 = iconst.i32 1 + v2 = load.i32 v0 + v3 = icmp eq v1, v2 + brnz v3, block1 + jump block2 +block1: + v4 = iconst.i32 1 + return v4 +block2: + v5 = iconst.i32 1 + return v5 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl 0(%rdi), %r8d +; cmpl $1, %r8d +; jz label1; j label2 +; block1: +; movl $1, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; movl $1, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f4(i64) -> i32 { +block0(v0: i64): + v1 = iconst.i32 1 + v2 = load.i32 v0 + v3 = icmp eq v2, v1 + brnz v3, block1 + jump block2 +block1: + v4 = iconst.i32 1 + return v4 +block2: + v5 = iconst.i32 1 + return v5 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl 0(%rdi), %r8d +; cmpl $1, %r8d +; jz label1; j label2 +; block1: +; movl $1, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; movl $1, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + function %test_x_slt_0_i64(i64) -> b1 { block0(v0: i64): v1 = iconst.i64 0