Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

x64: Fix false dependencies in int-to-float conversions #7098

Merged
merged 5 commits into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 35 additions & 10 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,19 @@
(dst WritableXmm)
(src_size OperandSize))

;; Conversion from signed integers to floats, the `{v,}`cvtsi2s{s,d}`
;; instructions.
(CvtIntToFloat (op SseOpcode)
alexcrichton marked this conversation as resolved.
Show resolved Hide resolved
(src1 Xmm)
(src2 GprMem)
(dst WritableXmm)
(src2_size OperandSize))
(CvtIntToFloatVex (op AvxOpcode)
(src1 Xmm)
(src2 GprMem)
(dst WritableXmm)
(src2_size OperandSize))

;; Converts an unsigned int64 to a float32/float64.
(CvtUint64ToFloatSeq (dst_size OperandSize) ;; 4 or 8
(src Gpr)
Expand Down Expand Up @@ -2095,6 +2108,18 @@
(_ Unit (emit (MInst.UnaryRmRImmVex size op src dst imm))))
dst))

(decl cvt_int_to_float (SseOpcode Xmm GprMem OperandSize) Xmm)
(rule (cvt_int_to_float op src1 src2 size)
(let ((dst WritableXmm (temp_writable_xmm))
(_ Unit (emit (MInst.CvtIntToFloat op src1 src2 dst size))))
dst))

(decl cvt_int_to_float_vex (AvxOpcode Xmm GprMem OperandSize) Xmm)
(rule (cvt_int_to_float_vex op src1 src2 size)
(let ((dst WritableXmm (temp_writable_xmm))
(_ Unit (emit (MInst.CvtIntToFloatVex op src1 src2 dst size))))
dst))

(decl cvt_u64_to_float_seq (Type Gpr) Xmm)
(rule (cvt_u64_to_float_seq ty src)
(let ((size OperandSize (raw_operand_size_of_type ty))
Expand Down Expand Up @@ -4351,20 +4376,20 @@
(xmm_unary_rm_r_vex (AvxOpcode.Vcvtdq2pd) x))

;; Helper for creating `cvtsi2ss` instructions.
(decl x64_cvtsi2ss (Type GprMem) Xmm)
(rule (x64_cvtsi2ss ty x)
(gpr_to_xmm (SseOpcode.Cvtsi2ss) x (raw_operand_size_of_type ty)))
(rule 1 (x64_cvtsi2ss ty x)
(decl x64_cvtsi2ss (Type Xmm GprMem) Xmm)
(rule (x64_cvtsi2ss ty x y)
(cvt_int_to_float (SseOpcode.Cvtsi2ss) x y (raw_operand_size_of_type ty)))
(rule 1 (x64_cvtsi2ss ty x y)
(if-let $true (use_avx))
(gpr_to_xmm_vex (AvxOpcode.Vcvtsi2ss) x (raw_operand_size_of_type ty)))
(cvt_int_to_float_vex (AvxOpcode.Vcvtsi2ss) x y (raw_operand_size_of_type ty)))

;; Helper for creating `cvtsi2sd` instructions.
(decl x64_cvtsi2sd (Type GprMem) Xmm)
(rule (x64_cvtsi2sd ty x)
(gpr_to_xmm (SseOpcode.Cvtsi2sd) x (raw_operand_size_of_type ty)))
(rule 1 (x64_cvtsi2sd ty x)
(decl x64_cvtsi2sd (Type Xmm GprMem) Xmm)
(rule (x64_cvtsi2sd ty x y)
(cvt_int_to_float (SseOpcode.Cvtsi2sd) x y (raw_operand_size_of_type ty)))
(rule 1 (x64_cvtsi2sd ty x y)
(if-let $true (use_avx))
(gpr_to_xmm_vex (AvxOpcode.Vcvtsi2sd) x (raw_operand_size_of_type ty)))
(cvt_int_to_float_vex (AvxOpcode.Vcvtsi2sd) x y (raw_operand_size_of_type ty)))

;; Helper for creating `cvttps2dq` instructions.
(decl x64_cvttps2dq (XmmMem) Xmm)
Expand Down
83 changes: 69 additions & 14 deletions cranelift/codegen/src/isa/x64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2872,30 +2872,21 @@ pub(crate) fn emit(
let (prefix, map, opcode) = match op {
// vmovd/vmovq are differentiated by `w`
AvxOpcode::Vmovd | AvxOpcode::Vmovq => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x6E),
AvxOpcode::Vcvtsi2ss => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0x2A),
AvxOpcode::Vcvtsi2sd => (LegacyPrefixes::_F2, OpcodeMap::_0F, 0x2A),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
let w = match src_size {
OperandSize::Size64 => true,
_ => false,
};
let mut insn = VexInstruction::new()
VexInstruction::new()
.length(VexVectorLength::V128)
.w(w)
.prefix(prefix)
.map(map)
.opcode(opcode)
.rm(src)
.reg(dst.to_real_reg().unwrap().hw_enc());
// These opcodes technically take a second operand which is the
// upper bits to preserve during the float conversion. We don't
// actually use this in this backend right now so reuse the
// destination register. This at least matches what LLVM does.
if let AvxOpcode::Vcvtsi2ss | AvxOpcode::Vcvtsi2sd = op {
insn = insn.vvvv(dst.to_real_reg().unwrap().hw_enc());
}
insn.encode(sink);
.reg(dst.to_real_reg().unwrap().hw_enc())
.encode(sink);
}

Inst::XmmRmREvex {
Expand Down Expand Up @@ -3200,8 +3191,6 @@ pub(crate) fn emit(
// Movd and movq use the same opcode; the presence of the REX prefix (set below)
// actually determines which is used.
SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefixes::_66, 0x0F6E),
SseOpcode::Cvtsi2ss => (LegacyPrefixes::_F3, 0x0F2A),
SseOpcode::Cvtsi2sd => (LegacyPrefixes::_F2, 0x0F2A),
_ => panic!("unexpected opcode {:?}", op),
};
let rex = RexFlags::from(*src_size);
Expand Down Expand Up @@ -3239,6 +3228,72 @@ pub(crate) fn emit(
}
}

Inst::CvtIntToFloat {
op,
src1,
src2,
dst,
src2_size,
} => {
let dst = allocs.next(dst.to_reg().to_reg());
let src1 = allocs.next(src1.to_reg());
alexcrichton marked this conversation as resolved.
Show resolved Hide resolved
assert_eq!(src1, dst);
let src2 = src2.clone().to_reg_mem().with_allocs(allocs);

let (prefix, opcode) = match op {
SseOpcode::Cvtsi2ss => (LegacyPrefixes::_F3, 0x0F2A),
SseOpcode::Cvtsi2sd => (LegacyPrefixes::_F2, 0x0F2A),
_ => panic!("unexpected opcode {:?}", op),
};
let rex = RexFlags::from(*src2_size);
match src2 {
RegMem::Reg { reg: src2 } => {
emit_std_reg_reg(sink, prefix, opcode, 2, dst, src2, rex);
}
RegMem::Mem { addr } => {
let addr = &addr.finalize(state, sink);
emit_std_reg_mem(sink, prefix, opcode, 2, dst, addr, rex, 0);
}
}
}

Inst::CvtIntToFloatVex {
op,
src1,
src2,
dst,
src2_size,
} => {
let dst = allocs.next(dst.to_reg().to_reg());
let src1 = allocs.next(src1.to_reg());
let src2 = match src2.clone().to_reg_mem().with_allocs(allocs) {
RegMem::Reg { reg } => {
RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into())
}
RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)),
};

let (prefix, map, opcode) = match op {
AvxOpcode::Vcvtsi2ss => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0x2A),
AvxOpcode::Vcvtsi2sd => (LegacyPrefixes::_F2, OpcodeMap::_0F, 0x2A),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
let w = match src2_size {
OperandSize::Size64 => true,
_ => false,
};
VexInstruction::new()
.length(VexVectorLength::V128)
.w(w)
.prefix(prefix)
.map(map)
.opcode(opcode)
.rm(src2)
.reg(dst.to_real_reg().unwrap().hw_enc())
.vvvv(src1.to_real_reg().unwrap().hw_enc())
.encode(sink);
}

Inst::CvtUint64ToFloatSeq {
dst_size,
src,
Expand Down
20 changes: 0 additions & 20 deletions cranelift/codegen/src/isa/x64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5051,26 +5051,6 @@ fn test_x64_emit() {
"664C0F6EFF",
"movq %rdi, %xmm15",
));
insns.push((
Inst::gpr_to_xmm(
SseOpcode::Cvtsi2ss,
RegMem::reg(rdi),
OperandSize::Size32,
w_xmm15,
),
"F3440F2AFF",
"cvtsi2ss %edi, %xmm15",
));
insns.push((
Inst::gpr_to_xmm(
SseOpcode::Cvtsi2sd,
RegMem::reg(rsi),
OperandSize::Size64,
w_xmm1,
),
"F2480F2ACE",
"cvtsi2sd %rsi, %xmm1",
));

// ========================================================
// XmmRmi
Expand Down
48 changes: 46 additions & 2 deletions cranelift/codegen/src/isa/x64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,8 @@ impl Inst {
| Inst::XmmToGprImm { op, .. }
| Inst::XmmUnaryRmRImm { op, .. }
| Inst::XmmUnaryRmRUnaligned { op, .. }
| Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()],
| Inst::XmmUnaryRmR { op, .. }
| Inst::CvtIntToFloat { op, .. } => smallvec![op.available_from()],

Inst::XmmUnaryRmREvex { op, .. }
| Inst::XmmRmREvex { op, .. }
Expand All @@ -196,7 +197,8 @@ impl Inst {
| Inst::XmmMovRMImmVex { op, .. }
| Inst::XmmToGprImmVex { op, .. }
| Inst::XmmToGprVex { op, .. }
| Inst::GprToXmmVex { op, .. } => op.available_from(),
| Inst::GprToXmmVex { op, .. }
| Inst::CvtIntToFloatVex { op, .. } => op.available_from(),
}
}
}
Expand Down Expand Up @@ -1296,6 +1298,34 @@ impl PrettyPrint for Inst {
format!("{op} {src}, {dst}")
}

Inst::CvtIntToFloat {
op,
src1,
src2,
dst,
src2_size,
} => {
let dst = pretty_print_reg(*dst.to_reg(), 8, allocs);
let src1 = pretty_print_reg(src1.to_reg(), 8, allocs);
alexcrichton marked this conversation as resolved.
Show resolved Hide resolved
let src2 = src2.pretty_print(src2_size.to_bytes(), allocs);
let op = ljustify(op.to_string());
format!("{op} {src1}, {src2}, {dst}")
}

Inst::CvtIntToFloatVex {
op,
src1,
src2,
dst,
src2_size,
} => {
let dst = pretty_print_reg(*dst.to_reg(), 8, allocs);
let src1 = pretty_print_reg(src1.to_reg(), 8, allocs);
let src2 = src2.pretty_print(src2_size.to_bytes(), allocs);
let op = ljustify(op.to_string());
format!("{op} {src1}, {src2}, {dst}")
}

Inst::CvtUint64ToFloatSeq {
src,
dst,
Expand Down Expand Up @@ -2164,6 +2194,20 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
collector.reg_def(dst.to_writable_reg());
src.get_operands(collector);
}
Inst::CvtIntToFloat {
src1, src2, dst, ..
} => {
collector.reg_use(src1.to_reg());
collector.reg_reuse_def(dst.to_writable_reg(), 0);
src2.get_operands(collector);
}
Inst::CvtIntToFloatVex {
src1, src2, dst, ..
} => {
collector.reg_def(dst.to_writable_reg());
collector.reg_use(src1.to_reg());
src2.get_operands(collector);
}
Inst::CvtUint64ToFloatSeq {
src,
dst,
Expand Down
29 changes: 21 additions & 8 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3336,23 +3336,36 @@

;; Rules for `fcvt_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Note that the `cvtsi2s{s,d}` instruction is not just an int-to-float
;; conversion instruction in isolation, it also takes the upper 64-bits of an
;; xmm register and places it into the destination. We don't actually want that
;; to happen as it could accidentally create a false dependency with a
;; previous instruction defining the register's upper 64-bits. See #7085 for
;; an instance of this.
;;
;; This means that the first operand to all of the int-to-float conversions here
;; are `(xmm_zero)` operands which is a guaranteed zero register that has no
;; dependencies on other instructions. Ideally this would be lifted out to a
;; higher level to get deduplicated between consecutive int-to-float operations
;; for example but that's not easy to do at this time.
alexcrichton marked this conversation as resolved.
Show resolved Hide resolved

(rule 2 (lower (has_type $F32 (fcvt_from_sint a @ (value_type $I8))))
(x64_cvtsi2ss $I32 (extend_to_gpr a $I32 (ExtendKind.Sign))))
(x64_cvtsi2ss $I32 (xmm_zero $F32X4) (extend_to_gpr a $I32 (ExtendKind.Sign))))

(rule 2 (lower (has_type $F32 (fcvt_from_sint a @ (value_type $I16))))
(x64_cvtsi2ss $I32 (extend_to_gpr a $I32 (ExtendKind.Sign))))
(x64_cvtsi2ss $I32 (xmm_zero $F32X4) (extend_to_gpr a $I32 (ExtendKind.Sign))))

(rule 1 (lower (has_type $F32 (fcvt_from_sint a @ (value_type (ty_int (fits_in_64 ty))))))
(x64_cvtsi2ss ty a))
(x64_cvtsi2ss ty (xmm_zero $F32X4) a))

(rule 2 (lower (has_type $F64 (fcvt_from_sint a @ (value_type $I8))))
(x64_cvtsi2sd $I32 (extend_to_gpr a $I32 (ExtendKind.Sign))))
(x64_cvtsi2sd $I32 (xmm_zero $F64X2) (extend_to_gpr a $I32 (ExtendKind.Sign))))

(rule 2 (lower (has_type $F64 (fcvt_from_sint a @ (value_type $I16))))
(x64_cvtsi2sd $I32 (extend_to_gpr a $I32 (ExtendKind.Sign))))
(x64_cvtsi2sd $I32 (xmm_zero $F64X2) (extend_to_gpr a $I32 (ExtendKind.Sign))))

(rule 1 (lower (has_type $F64 (fcvt_from_sint a @ (value_type (ty_int (fits_in_64 ty))))))
(x64_cvtsi2sd ty a))
(x64_cvtsi2sd ty (xmm_zero $F64X2) a))

(rule 0 (lower (fcvt_from_sint a @ (value_type $I32X4)))
(x64_cvtdq2ps a))
Expand All @@ -3363,10 +3376,10 @@
;; Rules for `fcvt_from_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 1 (lower (has_type $F32 (fcvt_from_uint val @ (value_type (fits_in_32 (ty_int ty))))))
(x64_cvtsi2ss $I64 (extend_to_gpr val $I64 (ExtendKind.Zero))))
(x64_cvtsi2ss $I64 (xmm_zero $F32X4) (extend_to_gpr val $I64 (ExtendKind.Zero))))

(rule 1 (lower (has_type $F64 (fcvt_from_uint val @ (value_type (fits_in_32 (ty_int ty))))))
(x64_cvtsi2sd $I64 (extend_to_gpr val $I64 (ExtendKind.Zero))))
(x64_cvtsi2sd $I64 (xmm_zero $F64X2) (extend_to_gpr val $I64 (ExtendKind.Zero))))

(rule (lower (has_type ty (fcvt_from_uint val @ (value_type $I64))))
(cvt_u64_to_float_seq ty val))
Expand Down
Loading