Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

x64 bugfix: prevent load-op fusion of cmp because it could be emitted multiple times. #2576

Merged
merged 1 commit into from
Jan 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -380,11 +380,18 @@ fn emit_cmp<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {

// TODO Try to commute the operands (and invert the condition) if one is an immediate.
let lhs = put_input_in_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
// We force the RHS into a register, and disallow load-op fusion, because we
// do not have a transitive guarantee that this cmp-site will be the sole
// user of the value. Consider: the icmp might be the only user of a load,
// but there may be multiple users of the icmp (e.g. select or bint
// instructions) that each invoke `emit_cmp()`. If we were to allow a load
// to sink to the *latest* one, but other sites did not permit sinking, then
// we would be missing the load for other cmp-sites.
let rhs = put_input_in_reg(ctx, inputs[1]);

// Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives
// us dst - src at the machine instruction level, so invert operands.
ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs));
ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, RegMemImm::reg(rhs), lhs));
}

/// A specification for a fcmp emission.
Expand Down Expand Up @@ -465,8 +472,10 @@ fn emit_fcmp<C: LowerCtx<I = Inst>>(
(inputs[0], inputs[1])
};
let lhs = put_input_in_reg(ctx, lhs_input);
let rhs = input_to_reg_mem(ctx, rhs_input);
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
// See above in `emit_cmp()`. We must only use the reg/reg form of the
// comparison in order to avoid issues with merged loads.
let rhs = put_input_in_reg(ctx, rhs_input);
cfallin marked this conversation as resolved.
Show resolved Hide resolved
ctx.emit(Inst::xmm_cmp_rm_r(op, RegMem::reg(rhs), lhs));

let cond_result = match cond_code {
FloatCC::Equal => FcmpCondResult::AndConditions(CC::NP, CC::Z),
Expand Down
49 changes: 49 additions & 0 deletions cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
test compile
target x86_64
feature "experimental_x64"

function %f0(i64, i64) -> i64, i64 {
block0(v0: i64, v1: i64):
v2 = load.i64 v1
; check: movq 0(%rsi), %rax

v3 = icmp eq v0, v2

v4 = bint.i64 v3
; nextln: cmpq %rax, %rdi
; nextln: setz %cl
; nextln: movzbq %cl, %rcx

v5 = select.i64 v3, v0, v1
; nextln: cmpq %rax, %rdi
; nextln: cmovzq %rdi, %rsi

return v4, v5
; nextln: movq %rcx, %rax
; nextln: movq %rsi, %rdx
}

function %f1(f64, i64) -> i64, f64 {
block0(v0: f64, v1: i64):
v2 = load.f64 v1
; check: movsd 0(%rdi), %xmm1

v3 = fcmp eq v0, v2

v4 = bint.i64 v3
; nextln: ucomisd %xmm1, %xmm0
; nextln: setnp %dil
; nextln: setz %sil
; nextln: andl %edi, %esi
; nextln: movzbq %sil, %rsi

v5 = select.f64 v3, v0, v0
; nextln: ucomisd %xmm1, %xmm0
; nextln: movaps %xmm0, %xmm1
; nextln: jnp $$next; movsd %xmm0, %xmm1; $$next:
; nextln: jz $$next; movsd %xmm0, %xmm1; $$next:

return v4, v5
; nextln: movq %rsi, %rax
; nextln: movaps %xmm1, %xmm0
}