diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index 7ea81927dad5..4c99f7c8f2aa 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -274,15 +274,6 @@ (f_tmp WritableReg) (rs Reg) (ty Type)) - ;;;; FMax - (FloatSelect - (op FloatSelectOP) - (rd WritableReg) - ;; a integer register - (tmp WritableReg) - (rs1 Reg) - (rs2 Reg) - (ty Type)) ;; popcnt if target doesn't support extension B ;; use iteration to implement. @@ -391,11 +382,6 @@ )) -(type FloatSelectOP (enum - (Max) - (Min) -)) - (type FloatRoundOP (enum (Nearest) (Ceil) @@ -1098,15 +1084,6 @@ (_ Unit (emit (MInst.FloatRound op rd tmp tmp2 rs ty)))) (writable_reg_to_reg rd))) -(decl gen_float_select (FloatSelectOP Reg Reg Type) Reg) -(rule - (gen_float_select op x y ty) - (let - ((rd WritableReg (temp_writable_reg ty)) - (tmp WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.FloatSelect op rd tmp x y ty)))) - (writable_reg_to_reg rd))) - ;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1527,6 +1504,16 @@ (decl rv_fge (Type FReg FReg) XReg) (rule (rv_fge ty rs1 rs2) (rv_fle ty rs2 rs1)) +;; Helper for emitting the `fmin` instruction. +(decl rv_fmin (Type FReg FReg) FReg) +(rule (rv_fmin $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FminS) $F32 rs1 rs2)) +(rule (rv_fmin $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FminD) $F64 rs1 rs2)) + +;; Helper for emitting the `fmax` instruction. +(decl rv_fmax (Type FReg FReg) FReg) +(rule (rv_fmax $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FmaxS) $F32 rs1 rs2)) +(rule (rv_fmax $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FmaxD) $F64 rs1 rs2)) + ;; `Zba` Extension Instructions diff --git a/cranelift/codegen/src/isa/riscv64/inst/args.rs b/cranelift/codegen/src/isa/riscv64/inst/args.rs index 0ce56295f849..7539e3172833 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/args.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/args.rs @@ -1695,53 +1695,6 @@ impl FloatRoundOP { } } -impl FloatSelectOP { - pub(crate) fn op_name(self) -> &'static str { - match self { - FloatSelectOP::Max => "max", - FloatSelectOP::Min => "min", - } - } - - pub(crate) fn to_fpuoprrr(self, ty: Type) -> FpuOPRRR { - match self { - FloatSelectOP::Max => { - if ty == F32 { - FpuOPRRR::FmaxS - } else { - FpuOPRRR::FmaxD - } - } - FloatSelectOP::Min => { - if ty == F32 { - FpuOPRRR::FminS - } else { - FpuOPRRR::FminD - } - } - } - } - // move qnan bits into int register. - pub(crate) fn snan_bits(self, rd: Writable, ty: Type) -> SmallInstVec { - let mut insts = SmallInstVec::new(); - insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1))); - let x = if ty == F32 { 22 } else { 51 }; - insts.push(Inst::AluRRImm12 { - alu_op: AluOPRRI::Srli, - rd: rd, - rs: rd.to_reg(), - imm12: Imm12::from_i16(x), - }); - insts.push(Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd: rd, - rs: rd.to_reg(), - imm12: Imm12::from_i16(x), - }); - insts - } -} - pub(crate) fn f32_bits(f: f32) -> u32 { u32::from_le_bytes(f.to_le_bytes()) } diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 130eeaa12142..2fc5749deea4 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -368,7 +368,6 @@ impl Inst { | Inst::Unwind { .. } | Inst::DummyUse { .. } | Inst::FloatRound { .. } - | Inst::FloatSelect { .. } | Inst::Popcnt { .. } | Inst::Rev8 { .. } | Inst::Cltz { .. } @@ -2587,126 +2586,6 @@ impl Inst { sink.bind_label(label_jump_over, &mut state.ctrl_plane); } - &Inst::FloatSelect { - op, - rd, - tmp, - rs1, - rs2, - ty, - } => { - let label_nan = sink.get_label(); - let label_jump_over = sink.get_label(); - // check if rs1 is nan. - Inst::emit_not_nan(tmp, rs1, ty).emit(&[], sink, emit_info, state); - Inst::CondBr { - taken: CondBrTarget::Label(label_nan), - not_taken: CondBrTarget::Fallthrough, - kind: IntegerCompare { - kind: IntCC::Equal, - rs1: tmp.to_reg(), - rs2: zero_reg(), - }, - } - .emit(&[], sink, emit_info, state); - // check if rs2 is nan. - Inst::emit_not_nan(tmp, rs2, ty).emit(&[], sink, emit_info, state); - Inst::CondBr { - taken: CondBrTarget::Label(label_nan), - not_taken: CondBrTarget::Fallthrough, - kind: IntegerCompare { - kind: IntCC::Equal, - rs1: tmp.to_reg(), - rs2: zero_reg(), - }, - } - .emit(&[], sink, emit_info, state); - // here rs1 and rs2 is not nan. - Inst::FpuRRR { - alu_op: op.to_fpuoprrr(ty), - frm: None, - rd: rd, - rs1: rs1, - rs2: rs2, - } - .emit(&[], sink, emit_info, state); - // special handle for +0 or -0. - { - // check is rs1 and rs2 all equal to zero. - let label_done = sink.get_label(); - { - // if rs1 == 0 - let mut insts = Inst::emit_if_float_not_zero( - tmp, - rs1, - ty, - CondBrTarget::Label(label_done), - CondBrTarget::Fallthrough, - ); - insts.extend(Inst::emit_if_float_not_zero( - tmp, - rs2, - ty, - CondBrTarget::Label(label_done), - CondBrTarget::Fallthrough, - )); - insts - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - } - Inst::FpuRR { - alu_op: FpuOPRR::move_f_to_x_op(ty), - frm: None, - rd: tmp, - rs: rs1, - } - .emit(&[], sink, emit_info, state); - Inst::FpuRR { - alu_op: FpuOPRR::move_f_to_x_op(ty), - frm: None, - rd: writable_spilltmp_reg(), - rs: rs2, - } - .emit(&[], sink, emit_info, state); - Inst::AluRRR { - alu_op: if op == FloatSelectOP::Max { - AluOPRRR::And - } else { - AluOPRRR::Or - }, - rd: tmp, - rs1: tmp.to_reg(), - rs2: spilltmp_reg(), - } - .emit(&[], sink, emit_info, state); - // move back to rd. - Inst::FpuRR { - alu_op: FpuOPRR::move_x_to_f_op(ty), - frm: None, - rd, - rs: tmp.to_reg(), - } - .emit(&[], sink, emit_info, state); - // - sink.bind_label(label_done, &mut state.ctrl_plane); - } - // we have the reuslt,jump over. - Inst::gen_jump(label_jump_over).emit(&[], sink, emit_info, state); - // here is nan. - sink.bind_label(label_nan, &mut state.ctrl_plane); - op.snan_bits(tmp, ty) - .into_iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - // move to rd. - Inst::FpuRR { - alu_op: FpuOPRR::move_x_to_f_op(ty), - frm: None, - rd, - rs: tmp.to_reg(), - } - .emit(&[], sink, emit_info, state); - sink.bind_label(label_jump_over, &mut state.ctrl_plane); - } &Inst::Popcnt { sum, tmp, @@ -3708,22 +3587,6 @@ impl Inst { rd: allocs.next_writable(rd), }, - Inst::FloatSelect { - op, - rd, - tmp, - rs1, - rs2, - ty, - } => Inst::FloatSelect { - op, - ty, - rs1: allocs.next(rs1), - rs2: allocs.next(rs2), - tmp: allocs.next_writable(tmp), - rd: allocs.next_writable(rd), - }, - Inst::Popcnt { sum, tmp, diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs index 111259278469..d42312e60f4b 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs @@ -2287,15 +2287,6 @@ fn riscv64_worst_case_instruction_size() { ty: F64, }); - candidates.push(Inst::FloatSelect { - op: FloatSelectOP::Max, - rd: writable_fa0(), - tmp: writable_a0(), - rs1: fa0(), - rs2: fa0(), - ty: F64, - }); - let mut max: (u32, MInst) = (0, Inst::Nop0); for i in candidates { let mut buffer = MachBuffer::new(); diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index 4c10191eccfc..d677de0b8f68 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -55,7 +55,7 @@ pub(crate) type VecWritableReg = Vec>; pub use crate::isa::riscv64::lower::isle::generated_code::{ AluOPRRI, AluOPRRR, AtomicOP, CsrImmOP, CsrRegOP, FClassResult, FFlagsException, FloatRoundOP, - FloatSelectOP, FpuOPRR, FpuOPRRR, FpuOPRRRR, LoadOP, MInst as Inst, StoreOP, CSR, FRM, + FpuOPRR, FpuOPRRR, FpuOPRRRR, LoadOP, MInst as Inst, StoreOP, CSR, FRM, }; use crate::isa::riscv64::lower::isle::generated_code::{CjOp, MInst, VecAluOpRRImm5, VecAluOpRRR}; @@ -609,13 +609,6 @@ fn riscv64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_early_def(f_tmp); collector.reg_early_def(rd); } - &Inst::FloatSelect { - rd, tmp, rs1, rs2, .. - } => { - collector.reg_uses(&[rs1, rs2]); - collector.reg_early_def(tmp); - collector.reg_early_def(rd); - } &Inst::Popcnt { sum, step, rs, tmp, .. } => { @@ -1109,29 +1102,6 @@ impl Inst { ty ) } - &Inst::FloatSelect { - op, - rd, - tmp, - rs1, - rs2, - ty, - } => { - let rs1 = format_reg(rs1, allocs); - let rs2 = format_reg(rs2, allocs); - let tmp = format_reg(tmp.to_reg(), allocs); - let rd = format_reg(rd.to_reg(), allocs); - format!( - "f{}.{} {},{},{}##tmp={} ty={}", - op.op_name(), - if ty == F32 { "s" } else { "d" }, - rd, - rs1, - rs2, - tmp, - ty - ) - } &Inst::AtomicStore { src, ty, p } => { let src = format_reg(src, allocs); let p = format_reg(p, allocs); diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 53a2d65f6a05..5106ee4fa2d4 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -1478,8 +1478,16 @@ ;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; RISC-V's `fmin` instruction returns the number input if one of inputs is a +;; NaN. We handle this by manually checking if one of the inputs is a NaN +;; and selecting based on that result. (rule 0 (lower (has_type (ty_scalar_float ty) (fmin x y))) - (gen_float_select (FloatSelectOP.Min) x y ty)) + (let (;; Check if both inputs are not nan. + (is_ordered FCmp (emit_fcmp (FloatCC.Ordered) ty x y)) + ;; `fadd` returns a nan if any of the inputs is a NaN. + (nan FReg (rv_fadd ty x y)) + (min FReg (rv_fmin ty x y))) + (gen_select_freg is_ordered min nan))) ;; vfmin does almost the right thing, but it does not handle NaN's correctly. ;; We should return a NaN if any of the inputs is a NaN, but vfmin returns the @@ -1496,8 +1504,17 @@ ;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; RISC-V's `fmax` instruction returns the number input if one of inputs is a +;; NaN. We handle this by manually checking if one of the inputs is a NaN +;; and selecting based on that result. (rule 0 (lower (has_type (ty_scalar_float ty) (fmax x y))) - (gen_float_select (FloatSelectOP.Max) x y ty)) + (let (;; Check if both inputs are not nan. + (is_ordered FCmp (emit_fcmp (FloatCC.Ordered) ty x y)) + ;; `fadd` returns a NaN if any of the inputs is a NaN. + (nan FReg (rv_fadd ty x y)) + (max FReg (rv_fmax ty x y))) + (gen_select_freg is_ordered max nan))) + ;; vfmax does almost the right thing, but it does not handle NaN's correctly. ;; We should return a NaN if any of the inputs is a NaN, but vfmax returns the diff --git a/cranelift/filetests/filetests/isa/riscv64/float.clif b/cranelift/filetests/filetests/isa/riscv64/float.clif index 2b5cd9564023..fc14272e316e 100644 --- a/cranelift/filetests/filetests/isa/riscv64/float.clif +++ b/cranelift/filetests/filetests/isa/riscv64/float.clif @@ -130,153 +130,6 @@ block0(v0: f64, v1: f64): ; fdiv.d fa0, fa0, fa1 ; ret -function %f9(f32, f32) -> f32 { -block0(v0: f32, v1: f32): - v2 = fmin v0, v1 - return v2 -} - -; VCode: -; block0: -; fmv.d fa5,fa0 -; fmin.s fa0,fa5,fa1##tmp=a4 ty=f32 -; ret -; -; Disassembled: -; block0: ; offset 0x0 -; fmv.d fa5, fa0 -; feq.s a4, fa5, fa5 -; beqz a4, 0x3c -; feq.s a4, fa1, fa1 -; beqz a4, 0x34 -; fmin.s fa0, fa5, fa1 -; fclass.s a4, fa5 -; andi a4, a4, 0x18 -; beqz a4, 0x34 -; fclass.s a4, fa1 -; andi a4, a4, 0x18 -; beqz a4, 0x28 -; fmv.x.w a4, fa5 -; fmv.x.w t6, fa1 -; or a4, a4, t6 -; fmv.w.x fa0, a4 -; j 0x14 -; addi a4, zero, -1 -; srli a4, a4, 0x16 -; slli a4, a4, 0x16 -; fmv.w.x fa0, a4 -; ret - -function %f10(f64, f64) -> f64 { -block0(v0: f64, v1: f64): - v2 = fmin v0, v1 - return v2 -} - -; VCode: -; block0: -; fmv.d fa5,fa0 -; fmin.d fa0,fa5,fa1##tmp=a4 ty=f64 -; ret -; -; Disassembled: -; block0: ; offset 0x0 -; fmv.d fa5, fa0 -; feq.d a4, fa5, fa5 -; beqz a4, 0x3c -; feq.d a4, fa1, fa1 -; beqz a4, 0x34 -; fmin.d fa0, fa5, fa1 -; fclass.d a4, fa5 -; andi a4, a4, 0x18 -; beqz a4, 0x34 -; fclass.d a4, fa1 -; andi a4, a4, 0x18 -; beqz a4, 0x28 -; fmv.x.d a4, fa5 -; fmv.x.d t6, fa1 -; or a4, a4, t6 -; fmv.d.x fa0, a4 -; j 0x14 -; addi a4, zero, -1 -; srli a4, a4, 0x33 -; slli a4, a4, 0x33 -; fmv.d.x fa0, a4 -; ret - -function %f11(f32, f32) -> f32 { -block0(v0: f32, v1: f32): - v2 = fmax v0, v1 - return v2 -} - -; VCode: -; block0: -; fmv.d fa5,fa0 -; fmax.s fa0,fa5,fa1##tmp=a4 ty=f32 -; ret -; -; Disassembled: -; block0: ; offset 0x0 -; fmv.d fa5, fa0 -; feq.s a4, fa5, fa5 -; beqz a4, 0x3c -; feq.s a4, fa1, fa1 -; beqz a4, 0x34 -; fmax.s fa0, fa5, fa1 -; fclass.s a4, fa5 -; andi a4, a4, 0x18 -; beqz a4, 0x34 -; fclass.s a4, fa1 -; andi a4, a4, 0x18 -; beqz a4, 0x28 -; fmv.x.w a4, fa5 -; fmv.x.w t6, fa1 -; and a4, a4, t6 -; fmv.w.x fa0, a4 -; j 0x14 -; addi a4, zero, -1 -; srli a4, a4, 0x16 -; slli a4, a4, 0x16 -; fmv.w.x fa0, a4 -; ret - -function %f12(f64, f64) -> f64 { -block0(v0: f64, v1: f64): - v2 = fmax v0, v1 - return v2 -} - -; VCode: -; block0: -; fmv.d fa5,fa0 -; fmax.d fa0,fa5,fa1##tmp=a4 ty=f64 -; ret -; -; Disassembled: -; block0: ; offset 0x0 -; fmv.d fa5, fa0 -; feq.d a4, fa5, fa5 -; beqz a4, 0x3c -; feq.d a4, fa1, fa1 -; beqz a4, 0x34 -; fmax.d fa0, fa5, fa1 -; fclass.d a4, fa5 -; andi a4, a4, 0x18 -; beqz a4, 0x34 -; fclass.d a4, fa1 -; andi a4, a4, 0x18 -; beqz a4, 0x28 -; fmv.x.d a4, fa5 -; fmv.x.d t6, fa1 -; and a4, a4, t6 -; fmv.d.x fa0, a4 -; j 0x14 -; addi a4, zero, -1 -; srli a4, a4, 0x33 -; slli a4, a4, 0x33 -; fmv.d.x fa0, a4 -; ret function %f13(f32) -> f32 { block0(v0: f32): diff --git a/cranelift/filetests/filetests/isa/riscv64/fmax.clif b/cranelift/filetests/filetests/isa/riscv64/fmax.clif new file mode 100644 index 000000000000..04fcdc2622fb --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/fmax.clif @@ -0,0 +1,62 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %fmax_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmax v0, v1 + return v2 +} + +; VCode: +; block0: +; feq.s a3,fa0,fa0 +; feq.s a5,fa1,fa1 +; and a1,a3,a5 +; fadd.s fa3,fa0,fa1 +; fmax.s fa5,fa0,fa1 +; select fa0,fa5,fa3##condition=(a1 ne zero) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; feq.s a3, fa0, fa0 +; feq.s a5, fa1, fa1 +; and a1, a3, a5 +; fadd.s fa3, fa0, fa1 +; fmax.s fa5, fa0, fa1 +; beqz a1, 0xc +; fmv.d fa0, fa5 +; j 8 +; fmv.d fa0, fa3 +; ret + +function %fmax_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmax v0, v1 + return v2 +} + +; VCode: +; block0: +; feq.d a3,fa0,fa0 +; feq.d a5,fa1,fa1 +; and a1,a3,a5 +; fadd.d fa3,fa0,fa1 +; fmax.d fa5,fa0,fa1 +; select fa0,fa5,fa3##condition=(a1 ne zero) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; feq.d a3, fa0, fa0 +; feq.d a5, fa1, fa1 +; and a1, a3, a5 +; fadd.d fa3, fa0, fa1 +; fmax.d fa5, fa0, fa1 +; beqz a1, 0xc +; fmv.d fa0, fa5 +; j 8 +; fmv.d fa0, fa3 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/fmin.clif b/cranelift/filetests/filetests/isa/riscv64/fmin.clif new file mode 100644 index 000000000000..eb68c53f96d0 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/fmin.clif @@ -0,0 +1,62 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %fmin_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmin v0, v1 + return v2 +} + +; VCode: +; block0: +; feq.s a3,fa0,fa0 +; feq.s a5,fa1,fa1 +; and a1,a3,a5 +; fadd.s fa3,fa0,fa1 +; fmin.s fa5,fa0,fa1 +; select fa0,fa5,fa3##condition=(a1 ne zero) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; feq.s a3, fa0, fa0 +; feq.s a5, fa1, fa1 +; and a1, a3, a5 +; fadd.s fa3, fa0, fa1 +; fmin.s fa5, fa0, fa1 +; beqz a1, 0xc +; fmv.d fa0, fa5 +; j 8 +; fmv.d fa0, fa3 +; ret + +function %fmin_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmin v0, v1 + return v2 +} + +; VCode: +; block0: +; feq.d a3,fa0,fa0 +; feq.d a5,fa1,fa1 +; and a1,a3,a5 +; fadd.d fa3,fa0,fa1 +; fmin.d fa5,fa0,fa1 +; select fa0,fa5,fa3##condition=(a1 ne zero) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; feq.d a3, fa0, fa0 +; feq.d a5, fa1, fa1 +; and a1, a3, a5 +; fadd.d fa3, fa0, fa1 +; fmin.d fa5, fa0, fa1 +; beqz a1, 0xc +; fmv.d fa0, fa5 +; j 8 +; fmv.d fa0, fa3 +; ret +