From 72aaff50607293ecfd0258f740a5b675db5d1e33 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 23 Oct 2023 15:18:47 -0500 Subject: [PATCH] riscv64: Refactor FRM and fcvt-to-int management (#7327) * riscv64: Specify rounding modes in instructions This commit updates how floating-point instructions specify their float rounding mode (FRM). Previously instructions stored `Option` and this would mostly be `None`. All floating-point instructions in RISC-V have a 3-bit `rm` field, and most encode the FRM into this field but some have a require encoding of this field. For example `fsgnj.s` uses the `rm` field to differentiate between `fsgnj`, `fsgnjx`, and `fsgnjn`. Instructions like `fadd` however use this field for a rounding mode. All FPU instructions now store `FRM` directly. Instruction helpers like `fadd` require this to be specified explicitly. Instructions helpers like for `fsgnj` do not take this as an argument and hardcode the field as necessary. This means that all lowerings of floating point instructions, where relevant, now specify a rounding mode. Previously the default rounding mode was to use the `fcsr` register, meaning that the rounding mode would be determined dynamically at runtime depending on the status of this register. Cranelift semantics, however, are derivative of WebAssembly semantics which specify round-to-nearest ties-to-even. This PR additionally fixes this discrepancy by using `FRM::RNE` in all existing instructions instead of `FRM::Fcsr`. * riscv64: Refactor float-to-int conversions This commit removes the `FcvtToInt` macro-instruction in the riscv64 backend in favor of decomposing it into individual operation for `fcvt_to_{s,u}int*` instructions. This additionally provides a slightly different lowering for the `*_sat` operations which doesn't use branches. The non-saturating operations continue to have a number of branches and their code has changed slightly due to how immediates are loaded. Overall everything is in ISLE now instead of split a bit. * riscv64: Clean up some dead code in the backend Don't put `#![allow(dead_code)]` at the root, instead place it on some smaller items. * Fix emission tests * Add regression tests and bless output Closes #5992 Closes #5993 * Enable i8/i16 saturating float-to-int in fuzzgen * Better `fcvt_*_bound` implementations * Fix typo in match orderings * Fix tests on x64 Where float-to-int isn't implemented for i8/i16 --- .../codegen/meta/src/shared/immediates.rs | 1 + cranelift/codegen/src/isa/riscv64/inst.isle | 208 ++++--- .../codegen/src/isa/riscv64/inst/args.rs | 106 +--- .../codegen/src/isa/riscv64/inst/emit.rs | 251 +------- .../src/isa/riscv64/inst/emit_tests.rs | 340 +++-------- .../codegen/src/isa/riscv64/inst/imms.rs | 4 - cranelift/codegen/src/isa/riscv64/inst/mod.rs | 117 ++-- .../codegen/src/isa/riscv64/inst/regs.rs | 19 +- .../codegen/src/isa/riscv64/inst/vector.rs | 7 - cranelift/codegen/src/isa/riscv64/lower.isle | 133 ++++- .../codegen/src/isa/riscv64/lower/isle.rs | 61 +- .../filetests/isa/riscv64/fcvt-small.clif | 186 +++--- .../filetests/isa/riscv64/float.clif | 551 ++++++++++-------- .../filetests/filetests/isa/riscv64/fmax.clif | 8 +- .../filetests/filetests/isa/riscv64/fmin.clif | 8 +- .../filetests/isa/riscv64/prologue.clif | 248 ++++---- .../filetests/isa/riscv64/return-call.clif | 4 +- .../filetests/runtests/conversion-i8-i16.clif | 234 ++++++++ .../filetests/runtests/conversion.clif | 256 +++++++- .../filetests/runtests/issue-5992.clif | 36 ++ .../filetests/runtests/issue-5993.clif | 19 + cranelift/fuzzgen/src/function_generator.rs | 2 +- 22 files changed, 1504 insertions(+), 1295 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/conversion-i8-i16.clif create mode 100644 cranelift/filetests/filetests/runtests/issue-5992.clif create mode 100644 cranelift/filetests/filetests/runtests/issue-5993.clif diff --git a/cranelift/codegen/meta/src/shared/immediates.rs b/cranelift/codegen/meta/src/shared/immediates.rs index 9f908c93da48..5584b5564bb9 100644 --- a/cranelift/codegen/meta/src/shared/immediates.rs +++ b/cranelift/codegen/meta/src/shared/immediates.rs @@ -181,6 +181,7 @@ impl Immediates { trapcode_values.insert("heap_oob", "HeapOutOfBounds"); trapcode_values.insert("int_ovf", "IntegerOverflow"); trapcode_values.insert("int_divz", "IntegerDivisionByZero"); + trapcode_values.insert("bad_toint", "BadConversionToInteger"); new_enum( "code", "ir::TrapCode", diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index 20f501105ea1..8627f2a293fa 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -22,7 +22,7 @@ ;; An ALU operation with one register sources and a register destination. (FpuRR (alu_op FpuOPRR) - (frm OptionFloatRoundingMode) + (frm FRM) (rd WritableReg) (rs Reg)) @@ -37,7 +37,7 @@ ;; An ALU operation with two register sources and a register destination. (FpuRRR (alu_op FpuOPRRR) - (frm OptionFloatRoundingMode) + (frm FRM) (rd WritableReg) (rs1 Reg) (rs2 Reg)) @@ -45,7 +45,7 @@ ;; An ALU operation with three register sources and a register destination. (FpuRRRR (alu_op FpuOPRRRR) - (frm OptionFloatRoundingMode) + (frm FRM) (rd WritableReg) (rs1 Reg) (rs2 Reg) @@ -239,14 +239,6 @@ (addr Reg) (v Reg) (ty Type)) - (FcvtToInt - (is_sat bool) - (rd WritableReg) - (tmp WritableReg) ;; a float register to load bounds. - (rs Reg) - (is_signed bool) - (in_type Type) - (out_type Type)) (RawData (data VecU8)) @@ -831,7 +823,6 @@ (type Imm20 (primitive Imm20)) (type Imm3 (primitive Imm3)) (type CondBrTarget (primitive CondBrTarget)) -(type OptionFloatRoundingMode (primitive OptionFloatRoundingMode)) (type VecU8 (primitive VecU8)) (type AMO (primitive AMO)) (type VecMachLabel extern (enum)) @@ -1360,104 +1351,156 @@ ;; TODO: Enable these instructions only when we have the F or D extensions ;; Helper for emitting the `fadd` instruction. -(decl rv_fadd (Type FReg FReg) FReg) -(rule (rv_fadd $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FaddS) $F32 rs1 rs2)) -(rule (rv_fadd $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FaddD) $F64 rs1 rs2)) +(decl rv_fadd (Type FRM FReg FReg) FReg) +(rule (rv_fadd $F32 frm rs1 rs2) (fpu_rrr (FpuOPRRR.FaddS) $F32 frm rs1 rs2)) +(rule (rv_fadd $F64 frm rs1 rs2) (fpu_rrr (FpuOPRRR.FaddD) $F64 frm rs1 rs2)) ;; Helper for emitting the `fsub` instruction. -(decl rv_fsub (Type FReg FReg) FReg) -(rule (rv_fsub $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsubS) $F32 rs1 rs2)) -(rule (rv_fsub $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsubD) $F64 rs1 rs2)) +(decl rv_fsub (Type FRM FReg FReg) FReg) +(rule (rv_fsub $F32 frm rs1 rs2) (fpu_rrr (FpuOPRRR.FsubS) $F32 frm rs1 rs2)) +(rule (rv_fsub $F64 frm rs1 rs2) (fpu_rrr (FpuOPRRR.FsubD) $F64 frm rs1 rs2)) ;; Helper for emitting the `fmul` instruction. -(decl rv_fmul (Type FReg FReg) FReg) -(rule (rv_fmul $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FmulS) $F32 rs1 rs2)) -(rule (rv_fmul $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FmulD) $F64 rs1 rs2)) +(decl rv_fmul (Type FRM FReg FReg) FReg) +(rule (rv_fmul $F32 frm rs1 rs2) (fpu_rrr (FpuOPRRR.FmulS) $F32 frm rs1 rs2)) +(rule (rv_fmul $F64 frm rs1 rs2) (fpu_rrr (FpuOPRRR.FmulD) $F64 frm rs1 rs2)) ;; Helper for emitting the `fdiv` instruction. -(decl rv_fdiv (Type FReg FReg) FReg) -(rule (rv_fdiv $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FdivS) $F32 rs1 rs2)) -(rule (rv_fdiv $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FdivD) $F64 rs1 rs2)) +(decl rv_fdiv (Type FRM FReg FReg) FReg) +(rule (rv_fdiv $F32 frm rs1 rs2) (fpu_rrr (FpuOPRRR.FdivS) $F32 frm rs1 rs2)) +(rule (rv_fdiv $F64 frm rs1 rs2) (fpu_rrr (FpuOPRRR.FdivD) $F64 frm rs1 rs2)) ;; Helper for emitting the `fsqrt` instruction. -(decl rv_fsqrt (Type FReg) FReg) -(rule (rv_fsqrt $F32 rs1) (fpu_rr (FpuOPRR.FsqrtS) $F32 rs1)) -(rule (rv_fsqrt $F64 rs1) (fpu_rr (FpuOPRR.FsqrtD) $F64 rs1)) +(decl rv_fsqrt (Type FRM FReg) FReg) +(rule (rv_fsqrt $F32 frm rs1) (fpu_rr (FpuOPRR.FsqrtS) $F32 frm rs1)) +(rule (rv_fsqrt $F64 frm rs1) (fpu_rr (FpuOPRR.FsqrtD) $F64 frm rs1)) ;; Helper for emitting the `fmadd` instruction. -(decl rv_fmadd (Type FReg FReg FReg) FReg) -(rule (rv_fmadd $F32 rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddS) $F32 rs1 rs2 rs3)) -(rule (rv_fmadd $F64 rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddD) $F64 rs1 rs2 rs3)) +(decl rv_fmadd (Type FRM FReg FReg FReg) FReg) +(rule (rv_fmadd $F32 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddS) $F32 frm rs1 rs2 rs3)) +(rule (rv_fmadd $F64 frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddD) $F64 frm rs1 rs2 rs3)) ;; Helper for emitting the `fmv.x.w` instruction. (decl rv_fmvxw (FReg) XReg) -(rule (rv_fmvxw r) (fpu_rr (FpuOPRR.FmvXW) $I32 r)) +(rule (rv_fmvxw r) (fpu_rr (FpuOPRR.FmvXW) $I32 (FRM.RNE) r)) ;; Helper for emitting the `fmv.x.d` instruction. (decl rv_fmvxd (FReg) XReg) -(rule (rv_fmvxd r) (fpu_rr (FpuOPRR.FmvXD) $I64 r)) +(rule (rv_fmvxd r) (fpu_rr (FpuOPRR.FmvXD) $I64 (FRM.RNE) r)) ;; Helper for emitting the `fmv.w.x` instruction. (decl rv_fmvwx (XReg) FReg) -(rule (rv_fmvwx r) (fpu_rr (FpuOPRR.FmvWX) $F32 r)) +(rule (rv_fmvwx r) (fpu_rr (FpuOPRR.FmvWX) $F32 (FRM.RNE) r)) ;; Helper for emitting the `fmv.d.x` instruction. (decl rv_fmvdx (XReg) FReg) -(rule (rv_fmvdx r) (fpu_rr (FpuOPRR.FmvDX) $F64 r)) +(rule (rv_fmvdx r) (fpu_rr (FpuOPRR.FmvDX) $F64 (FRM.RNE) r)) ;; Helper for emitting the `fcvt.d.s` ("Float Convert Double to Single") instruction. (decl rv_fcvtds (FReg) FReg) -(rule (rv_fcvtds rs1) (fpu_rr (FpuOPRR.FcvtDS) $F32 rs1)) +(rule (rv_fcvtds rs1) (fpu_rr (FpuOPRR.FcvtDS) $F32 (FRM.RNE) rs1)) ;; Helper for emitting the `fcvt.s.d` ("Float Convert Single to Double") instruction. -(decl rv_fcvtsd (FReg) FReg) -(rule (rv_fcvtsd rs1) (fpu_rr (FpuOPRR.FcvtSD) $F64 rs1)) +(decl rv_fcvtsd (FRM FReg) FReg) +(rule (rv_fcvtsd frm rs1) (fpu_rr (FpuOPRR.FcvtSD) $F64 frm rs1)) ;; Helper for emitting the `fcvt.s.w` instruction. -(decl rv_fcvtsw (XReg) FReg) -(rule (rv_fcvtsw rs1) (fpu_rr (FpuOPRR.FcvtSw) $F32 rs1)) +(decl rv_fcvtsw (FRM XReg) FReg) +(rule (rv_fcvtsw frm rs1) (fpu_rr (FpuOPRR.FcvtSw) $F32 frm rs1)) ;; Helper for emitting the `fcvt.s.wu` instruction. -(decl rv_fcvtswu (XReg) FReg) -(rule (rv_fcvtswu rs1) (fpu_rr (FpuOPRR.FcvtSwU) $F32 rs1)) +(decl rv_fcvtswu (FRM XReg) FReg) +(rule (rv_fcvtswu frm rs1) (fpu_rr (FpuOPRR.FcvtSwU) $F32 frm rs1)) ;; Helper for emitting the `fcvt.d.w` instruction. (decl rv_fcvtdw (XReg) FReg) -(rule (rv_fcvtdw rs1) (fpu_rr (FpuOPRR.FcvtDW) $F32 rs1)) +(rule (rv_fcvtdw rs1) (fpu_rr (FpuOPRR.FcvtDW) $F32 (FRM.RNE) rs1)) ;; Helper for emitting the `fcvt.d.wu` instruction. (decl rv_fcvtdwu (XReg) FReg) -(rule (rv_fcvtdwu rs1) (fpu_rr (FpuOPRR.FcvtDWU) $F32 rs1)) +(rule (rv_fcvtdwu rs1) (fpu_rr (FpuOPRR.FcvtDWU) $F32 (FRM.RNE) rs1)) ;; Helper for emitting the `fcvt.s.l` instruction. -(decl rv_fcvtsl (XReg) FReg) -(rule (rv_fcvtsl rs1) (fpu_rr (FpuOPRR.FcvtSL) $F32 rs1)) +(decl rv_fcvtsl (FRM XReg) FReg) +(rule (rv_fcvtsl frm rs1) (fpu_rr (FpuOPRR.FcvtSL) $F32 frm rs1)) ;; Helper for emitting the `fcvt.s.lu` instruction. -(decl rv_fcvtslu (XReg) FReg) -(rule (rv_fcvtslu rs1) (fpu_rr (FpuOPRR.FcvtSLU) $F32 rs1)) +(decl rv_fcvtslu (FRM XReg) FReg) +(rule (rv_fcvtslu frm rs1) (fpu_rr (FpuOPRR.FcvtSLU) $F32 frm rs1)) ;; Helper for emitting the `fcvt.d.l` instruction. -(decl rv_fcvtdl (XReg) FReg) -(rule (rv_fcvtdl rs1) (fpu_rr (FpuOPRR.FcvtDL) $F32 rs1)) +(decl rv_fcvtdl (FRM XReg) FReg) +(rule (rv_fcvtdl frm rs1) (fpu_rr (FpuOPRR.FcvtDL) $F32 frm rs1)) ;; Helper for emitting the `fcvt.d.lu` instruction. -(decl rv_fcvtdlu (XReg) FReg) -(rule (rv_fcvtdlu rs1) (fpu_rr (FpuOPRR.FcvtDLu) $F32 rs1)) +(decl rv_fcvtdlu (FRM XReg) FReg) +(rule (rv_fcvtdlu frm rs1) (fpu_rr (FpuOPRR.FcvtDLu) $F32 frm rs1)) + +;; Helper for emitting the `fcvt.w.s` instruction. +(decl rv_fcvtws (FRM FReg) XReg) +(rule (rv_fcvtws frm rs1) (fpu_rr (FpuOPRR.FcvtWS) $I64 frm rs1)) + +;; Helper for emitting the `fcvt.l.s` instruction. +(decl rv_fcvtls (FRM FReg) XReg) +(rule (rv_fcvtls frm rs1) (fpu_rr (FpuOPRR.FcvtLS) $I64 frm rs1)) + +;; Helper for emitting the `fcvt.wu.s` instruction. +(decl rv_fcvtwus (FRM FReg) XReg) +(rule (rv_fcvtwus frm rs1) (fpu_rr (FpuOPRR.FcvtWuS) $I64 frm rs1)) + +;; Helper for emitting the `fcvt.lu.s` instruction. +(decl rv_fcvtlus (FRM FReg) XReg) +(rule (rv_fcvtlus frm rs1) (fpu_rr (FpuOPRR.FcvtLuS) $I64 frm rs1)) + +;; Helper for emitting the `fcvt.w.d` instruction. +(decl rv_fcvtwd (FRM FReg) XReg) +(rule (rv_fcvtwd frm rs1) (fpu_rr (FpuOPRR.FcvtWD) $I64 frm rs1)) + +;; Helper for emitting the `fcvt.l.d` instruction. +(decl rv_fcvtld (FRM FReg) XReg) +(rule (rv_fcvtld frm rs1) (fpu_rr (FpuOPRR.FcvtLD) $I64 frm rs1)) + +;; Helper for emitting the `fcvt.wu.d` instruction. +(decl rv_fcvtwud (FRM FReg) XReg) +(rule (rv_fcvtwud frm rs1) (fpu_rr (FpuOPRR.FcvtWuD) $I64 frm rs1)) + +;; Helper for emitting the `fcvt.lu.d` instruction. +(decl rv_fcvtlud (FRM FReg) XReg) +(rule (rv_fcvtlud frm rs1) (fpu_rr (FpuOPRR.FcvtLuD) $I64 frm rs1)) + +;; Helper for emitting the `fcvt.w.*` instructions. +(decl rv_fcvtw (Type FRM FReg) XReg) +(rule (rv_fcvtw $F32 frm rs1) (rv_fcvtws frm rs1)) +(rule (rv_fcvtw $F64 frm rs1) (rv_fcvtwd frm rs1)) + +;; Helper for emitting the `fcvt.l.*` instructions. +(decl rv_fcvtl (Type FRM FReg) XReg) +(rule (rv_fcvtl $F32 frm rs1) (rv_fcvtls frm rs1)) +(rule (rv_fcvtl $F64 frm rs1) (rv_fcvtld frm rs1)) + +;; Helper for emitting the `fcvt.wu.*` instructions. +(decl rv_fcvtwu (Type FRM FReg) XReg) +(rule (rv_fcvtwu $F32 frm rs1) (rv_fcvtwus frm rs1)) +(rule (rv_fcvtwu $F64 frm rs1) (rv_fcvtwud frm rs1)) + +;; Helper for emitting the `fcvt.lu.*` instructions. +(decl rv_fcvtlu (Type FRM FReg) XReg) +(rule (rv_fcvtlu $F32 frm rs1) (rv_fcvtlus frm rs1)) +(rule (rv_fcvtlu $F64 frm rs1) (rv_fcvtlud frm rs1)) ;; Helper for emitting the `fsgnj` ("Floating Point Sign Injection") instruction. ;; The output of this instruction is `rs1` with the sign bit from `rs2` ;; This implements the `copysign` operation (decl rv_fsgnj (Type FReg FReg) FReg) -(rule (rv_fsgnj $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjS) $F32 rs1 rs2)) -(rule (rv_fsgnj $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjD) $F64 rs1 rs2)) +(rule (rv_fsgnj $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjS) $F32 (FRM.RNE) rs1 rs2)) +(rule (rv_fsgnj $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjD) $F64 (FRM.RNE) rs1 rs2)) ;; Helper for emitting the `fsgnjn` ("Floating Point Sign Injection Negated") instruction. ;; The output of this instruction is `rs1` with the negated sign bit from `rs2` ;; When `rs1 == rs2` this implements the `neg` operation (decl rv_fsgnjn (Type FReg FReg) FReg) -(rule (rv_fsgnjn $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjnS) $F32 rs1 rs2)) -(rule (rv_fsgnjn $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjnD) $F64 rs1 rs2)) +(rule (rv_fsgnjn $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjnS) $F32 (FRM.RTZ) rs1 rs2)) +(rule (rv_fsgnjn $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjnD) $F64 (FRM.RTZ) rs1 rs2)) ;; Helper for emitting the `fneg` ("Floating Point Negate") instruction. ;; This instruction is a mnemonic for `fsgnjn rd, rs1, rs1` @@ -1468,8 +1511,8 @@ ;; The output of this instruction is `rs1` with the XOR of the sign bits from `rs1` and `rs2`. ;; When `rs1 == rs2` this implements `fabs` (decl rv_fsgnjx (Type FReg FReg) FReg) -(rule (rv_fsgnjx $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjxS) $F32 rs1 rs2)) -(rule (rv_fsgnjx $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjxD) $F64 rs1 rs2)) +(rule (rv_fsgnjx $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjxS) $F32 (FRM.RDN) rs1 rs2)) +(rule (rv_fsgnjx $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjxD) $F64 (FRM.RDN) rs1 rs2)) ;; Helper for emitting the `fabs` ("Floating Point Absolute") instruction. ;; This instruction is a mnemonic for `fsgnjx rd, rs1, rs1` @@ -1478,18 +1521,18 @@ ;; Helper for emitting the `feq` ("Float Equal") instruction. (decl rv_feq (Type FReg FReg) XReg) -(rule (rv_feq $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FeqS) $I64 rs1 rs2)) -(rule (rv_feq $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FeqD) $I64 rs1 rs2)) +(rule (rv_feq $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FeqS) $I64 (FRM.RDN) rs1 rs2)) +(rule (rv_feq $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FeqD) $I64 (FRM.RDN) rs1 rs2)) ;; Helper for emitting the `flt` ("Float Less Than") instruction. (decl rv_flt (Type FReg FReg) XReg) -(rule (rv_flt $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FltS) $I64 rs1 rs2)) -(rule (rv_flt $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FltD) $I64 rs1 rs2)) +(rule (rv_flt $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FltS) $I64 (FRM.RTZ) rs1 rs2)) +(rule (rv_flt $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FltD) $I64 (FRM.RTZ) rs1 rs2)) ;; Helper for emitting the `fle` ("Float Less Than or Equal") instruction. (decl rv_fle (Type FReg FReg) XReg) -(rule (rv_fle $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FleS) $I64 rs1 rs2)) -(rule (rv_fle $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FleD) $I64 rs1 rs2)) +(rule (rv_fle $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FleS) $I64 (FRM.RNE) rs1 rs2)) +(rule (rv_fle $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FleD) $I64 (FRM.RNE) rs1 rs2)) ;; Helper for emitting the `fgt` ("Float Greater Than") instruction. ;; Note: The arguments are reversed @@ -1503,13 +1546,13 @@ ;; Helper for emitting the `fmin` instruction. (decl rv_fmin (Type FReg FReg) FReg) -(rule (rv_fmin $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FminS) $F32 rs1 rs2)) -(rule (rv_fmin $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FminD) $F64 rs1 rs2)) +(rule (rv_fmin $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FminS) $F32 (FRM.RNE) rs1 rs2)) +(rule (rv_fmin $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FminD) $F64 (FRM.RNE) rs1 rs2)) ;; Helper for emitting the `fmax` instruction. (decl rv_fmax (Type FReg FReg) FReg) -(rule (rv_fmax $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FmaxS) $F32 rs1 rs2)) -(rule (rv_fmax $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FmaxD) $F64 rs1 rs2)) +(rule (rv_fmax $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FmaxS) $F32 (FRM.RTZ) rs1 rs2)) +(rule (rv_fmax $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FmaxD) $F64 (FRM.RTZ) rs1 rs2)) ;; `Zba` Extension Instructions @@ -1937,14 +1980,11 @@ (rule (canonical_nan_u64 $F32) 0x7fc00000) (rule (canonical_nan_u64 $F64) 0x7ff8000000000000) -(decl gen_default_frm () OptionFloatRoundingMode) -(extern constructor gen_default_frm gen_default_frm) - ;; Helper for emitting `MInst.FpuRR` instructions. -(decl fpu_rr (FpuOPRR Type Reg) Reg) -(rule (fpu_rr op ty src) +(decl fpu_rr (FpuOPRR Type FRM Reg) Reg) +(rule (fpu_rr op ty frm src) (let ((dst WritableReg (temp_writable_reg ty)) - (_ Unit (emit (MInst.FpuRR op (gen_default_frm) dst src)))) + (_ Unit (emit (MInst.FpuRR op frm dst src)))) dst)) ;; Helper for emitting `MInst.AluRRR` instructions. @@ -1955,17 +1995,17 @@ dst)) ;; Helper for emitting `MInst.AluRRR` instructions. -(decl fpu_rrr (FpuOPRRR Type Reg Reg) Reg) -(rule (fpu_rrr op ty src1 src2) +(decl fpu_rrr (FpuOPRRR Type FRM Reg Reg) Reg) +(rule (fpu_rrr op ty frm src1 src2) (let ((dst WritableReg (temp_writable_reg ty)) - (_ Unit (emit (MInst.FpuRRR op (gen_default_frm) dst src1 src2)))) + (_ Unit (emit (MInst.FpuRRR op frm dst src1 src2)))) dst)) ;; Helper for emitting `MInst.FpuRRRR` instructions. -(decl fpu_rrrr (FpuOPRRRR Type Reg Reg Reg) Reg) -(rule (fpu_rrrr op ty src1 src2 src3) +(decl fpu_rrrr (FpuOPRRRR Type FRM Reg Reg Reg) Reg) +(rule (fpu_rrrr op ty frm src1 src2 src3) (let ((dst WritableReg (temp_writable_reg ty)) - (_ Unit (emit (MInst.FpuRRRR op (gen_default_frm) dst src1 src2 src3)))) + (_ Unit (emit (MInst.FpuRRRR op frm dst src1 src2 src3)))) dst)) @@ -2479,16 +2519,6 @@ (_ Unit (emit (MInst.ElfTlsGetAddr dst name)))) dst)) -;;;; -(decl gen_fcvt_int (bool FReg bool Type Type) XReg) -(rule - (gen_fcvt_int is_sat rs is_signed in_type out_type) - (let - ((result WritableReg (temp_writable_reg out_type)) - (tmp WritableFReg (temp_writable_freg)) - (_ Unit (emit (MInst.FcvtToInt is_sat result tmp rs is_signed in_type out_type)))) - (writable_reg_to_reg result))) - ;;; some float binary operation ;;; 1. need move into x reister. ;;; 2. do the operation. diff --git a/cranelift/codegen/src/isa/riscv64/inst/args.rs b/cranelift/codegen/src/isa/riscv64/inst/args.rs index 7539e3172833..e791d31a7723 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/args.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/args.rs @@ -1,7 +1,5 @@ //! Riscv64 ISA definitions: instruction arguments. -// Some variants are never constructed, but we still want them as options in the future. -#![allow(dead_code)] use super::*; use crate::ir::condcodes::CondCode; @@ -251,17 +249,8 @@ impl BranchFunct3 { BranchFunct3::Geu => 0b111, } } - pub(crate) fn op_name(self) -> &'static str { - match self { - BranchFunct3::Eq => "eq", - BranchFunct3::Ne => "ne", - BranchFunct3::Lt => "lt", - BranchFunct3::Ge => "ge", - BranchFunct3::Ltu => "ltu", - BranchFunct3::Geu => "geu", - } - } } + impl IntegerCompare { pub(crate) fn op_code(self) -> u32 { 0b1100011 @@ -345,10 +334,6 @@ impl FpuOPRRRR { } } - pub(crate) fn funct3(self, rounding_mode: Option) -> u32 { - rounding_mode.unwrap_or_default().as_u32() - } - pub(crate) fn op_code(self) -> u32 { match self { FpuOPRRRR::FmaddS => 0b1000011, @@ -417,15 +402,6 @@ impl FpuOPRR { } } - // move from f register to x register. - pub(crate) fn move_f_to_x_op(ty: Type) -> Self { - match ty { - F32 => Self::FmvXW, - F64 => Self::FmvXD, - _ => unreachable!("ty:{:?}", ty), - } - } - pub(crate) fn float_convert_2_int_op(from: Type, is_type_signed: bool, to: Type) -> Self { let type_32 = to.bits() <= 32; match from { @@ -554,38 +530,6 @@ impl FpuOPRR { FpuOPRR::FsqrtD => 0b0101101, } } - - pub(crate) fn funct3(self, rounding_mode: Option) -> u32 { - let rounding_mode = rounding_mode.unwrap_or_default().as_u32(); - match self { - FpuOPRR::FsqrtS => rounding_mode, - FpuOPRR::FcvtWS => rounding_mode, - FpuOPRR::FcvtWuS => rounding_mode, - FpuOPRR::FmvXW => 0b000, - FpuOPRR::FclassS => 0b001, - FpuOPRR::FcvtSw => rounding_mode, - FpuOPRR::FcvtSwU => rounding_mode, - FpuOPRR::FmvWX => 0b000, - FpuOPRR::FcvtLS => rounding_mode, - FpuOPRR::FcvtLuS => rounding_mode, - FpuOPRR::FcvtSL => rounding_mode, - FpuOPRR::FcvtSLU => rounding_mode, - FpuOPRR::FcvtLD => rounding_mode, - FpuOPRR::FcvtLuD => rounding_mode, - FpuOPRR::FmvXD => 0b000, - FpuOPRR::FcvtDL => rounding_mode, - FpuOPRR::FcvtDLu => rounding_mode, - FpuOPRR::FmvDX => 0b000, - FpuOPRR::FcvtSD => rounding_mode, - FpuOPRR::FcvtDS => rounding_mode, - FpuOPRR::FclassD => 0b001, - FpuOPRR::FcvtWD => rounding_mode, - FpuOPRR::FcvtWuD => rounding_mode, - FpuOPRR::FcvtDW => rounding_mode, - FpuOPRR::FcvtDWU => 0b000, - FpuOPRR::FsqrtD => rounding_mode, - } - } } impl FpuOPRRR { @@ -618,41 +562,6 @@ impl FpuOPRRR { } } - pub fn funct3(self, rounding_mode: Option) -> u32 { - let rounding_mode = rounding_mode.unwrap_or_default(); - let rounding_mode = rounding_mode.as_u32(); - match self { - Self::FaddS => rounding_mode, - Self::FsubS => rounding_mode, - Self::FmulS => rounding_mode, - Self::FdivS => rounding_mode, - - Self::FsgnjS => 0b000, - Self::FsgnjnS => 0b001, - Self::FsgnjxS => 0b010, - Self::FminS => 0b000, - Self::FmaxS => 0b001, - - Self::FeqS => 0b010, - Self::FltS => 0b001, - Self::FleS => 0b000, - - Self::FaddD => rounding_mode, - Self::FsubD => rounding_mode, - Self::FmulD => rounding_mode, - Self::FdivD => rounding_mode, - - Self::FsgnjD => 0b000, - Self::FsgnjnD => 0b001, - Self::FsgnjxD => 0b010, - Self::FminD => 0b000, - Self::FmaxD => 0b001, - Self::FeqD => 0b010, - Self::FltD => 0b001, - Self::FleD => 0b000, - } - } - pub fn op_code(self) -> u32 { match self { Self::FaddS @@ -1239,6 +1148,7 @@ impl FRM { impl FFlagsException { #[inline] + #[allow(dead_code)] pub(crate) fn mask(self) -> u32 { match self { FFlagsException::NV => 1 << 4, @@ -1363,6 +1273,7 @@ impl StoreOP { } } +#[allow(dead_code)] impl FClassResult { pub(crate) const fn bit(self) -> u32 { match self { @@ -1695,15 +1606,8 @@ impl FloatRoundOP { } } -pub(crate) fn f32_bits(f: f32) -> u32 { - u32::from_le_bytes(f.to_le_bytes()) -} -pub(crate) fn f64_bits(f: f64) -> u64 { - u64::from_le_bytes(f.to_le_bytes()) -} - /// -pub(crate) fn f32_cvt_to_int_bounds(signed: bool, out_bits: u8) -> (f32, f32) { +pub(crate) fn f32_cvt_to_int_bounds(signed: bool, out_bits: u32) -> (f32, f32) { match (signed, out_bits) { (true, 8) => (i8::min_value() as f32 - 1., i8::max_value() as f32 + 1.), (true, 16) => (i16::min_value() as f32 - 1., i16::max_value() as f32 + 1.), @@ -1717,7 +1621,7 @@ pub(crate) fn f32_cvt_to_int_bounds(signed: bool, out_bits: u8) -> (f32, f32) { } } -pub(crate) fn f64_cvt_to_int_bounds(signed: bool, out_bits: u8) -> (f64, f64) { +pub(crate) fn f64_cvt_to_int_bounds(signed: bool, out_bits: u32) -> (f64, f64) { match (signed, out_bits) { (true, 8) => (i8::min_value() as f64 - 1., i8::max_value() as f64 + 1.), (true, 16) => (i16::min_value() as f64 - 1., i16::max_value() as f64 + 1.), diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index e85c466e4359..cdc7619f8ddb 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -68,10 +68,6 @@ impl EmitState { self.stack_map.take() } - fn clear_post_insn(&mut self) { - self.stack_map = None; - } - fn cur_srcloc(&self) -> RelSourceLoc { self.cur_srcloc } @@ -159,7 +155,7 @@ impl Inst { } else { FpuOPRRR::FeqD }, - frm: None, + frm: FRM::RDN, rd: rd, rs1: rs, rs2: rs, @@ -173,49 +169,12 @@ impl Inst { } else { FpuOPRRR::FsgnjxD }, - frm: None, + frm: FRM::RDN, rd: rd, rs1: rs, rs2: rs, } } - /// If a float is zero. - pub(crate) fn emit_if_float_not_zero( - tmp: Writable, - rs: Reg, - ty: Type, - taken: CondBrTarget, - not_taken: CondBrTarget, - ) -> SmallInstVec { - let mut insts = SmallInstVec::new(); - let class_op = if ty == F32 { - FpuOPRR::FclassS - } else { - FpuOPRR::FclassD - }; - insts.push(Inst::FpuRR { - alu_op: class_op, - frm: None, - rd: tmp, - rs: rs, - }); - insts.push(Inst::AluRRImm12 { - alu_op: AluOPRRI::Andi, - rd: tmp, - rs: tmp.to_reg(), - imm12: Imm12::from_i16(FClassResult::is_zero_bits() as i16), - }); - insts.push(Inst::CondBr { - taken, - not_taken, - kind: IntegerCompare { - kind: IntCC::Equal, - rs1: tmp.to_reg(), - rs2: zero_reg(), - }, - }); - insts - } /// Returns Some(VState) if this insturction is expecting a specific vector state /// before emission. @@ -259,7 +218,6 @@ impl Inst { | Inst::Atomic { .. } | Inst::Select { .. } | Inst::AtomicCas { .. } - | Inst::FcvtToInt { .. } | Inst::RawData { .. } | Inst::AtomicStore { .. } | Inst::AtomicLoad { .. } @@ -968,7 +926,7 @@ impl Inst { } => { let x = alu_op.op_code() | reg_to_gpr_num(rd.to_reg()) << 7 - | alu_op.funct3(frm) << 12 + | frm.as_u32() << 12 | reg_to_gpr_num(rs) << 15 | alu_op.rs2_funct5() << 20 | alu_op.funct7() << 25; @@ -988,7 +946,7 @@ impl Inst { } => { let x = alu_op.op_code() | reg_to_gpr_num(rd.to_reg()) << 7 - | alu_op.funct3(frm) << 12 + | frm.as_u32() << 12 | reg_to_gpr_num(rs1) << 15 | reg_to_gpr_num(rs2) << 20 | alu_op.funct2() << 25 @@ -1005,7 +963,7 @@ impl Inst { } => { let x: u32 = alu_op.op_code() | reg_to_gpr_num(rd.to_reg()) << 7 - | (alu_op.funct3(frm)) << 12 + | frm.as_u32() << 12 | reg_to_gpr_num(rs1) << 15 | reg_to_gpr_num(rs2) << 20 | alu_op.funct7() << 25; @@ -1337,7 +1295,7 @@ impl Inst { } else { FpuOPRRR::FsgnjD }, - frm: None, + frm: FRM::RNE, rd: rd, rs1: rm, rs2: rm, @@ -1951,175 +1909,6 @@ impl Inst { .emit(&[], sink, emit_info, state); } - &Inst::FcvtToInt { - is_sat, - rd, - rs, - is_signed, - in_type, - out_type, - tmp, - } => { - let label_nan = sink.get_label(); - let label_jump_over = sink.get_label(); - // get if nan. - Inst::emit_not_nan(rd, rs, in_type).emit(&[], sink, emit_info, state); - // jump to nan. - Inst::CondBr { - taken: CondBrTarget::Label(label_nan), - not_taken: CondBrTarget::Fallthrough, - kind: IntegerCompare { - kind: IntCC::Equal, - rs2: zero_reg(), - rs1: rd.to_reg(), - }, - } - .emit(&[], sink, emit_info, state); - - if !is_sat { - let f32_bounds = f32_cvt_to_int_bounds(is_signed, out_type.bits() as u8); - let f64_bounds = f64_cvt_to_int_bounds(is_signed, out_type.bits() as u8); - if in_type == F32 { - Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.0), |_| { - writable_spilltmp_reg() - }) - } else { - Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.0), |_| { - writable_spilltmp_reg() - }) - } - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - - let le_op = if in_type == F32 { - FpuOPRRR::FleS - } else { - FpuOPRRR::FleD - }; - - // rd := rs <= tmp - Inst::FpuRRR { - alu_op: le_op, - frm: None, - rd, - rs1: rs, - rs2: tmp.to_reg(), - } - .emit(&[], sink, emit_info, state); - Inst::TrapIf { - cc: IntCC::NotEqual, - rs1: rd.to_reg(), - rs2: zero_reg(), - trap_code: TrapCode::IntegerOverflow, - } - .emit(&[], sink, emit_info, state); - - if in_type == F32 { - Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.1), |_| { - writable_spilltmp_reg() - }) - } else { - Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.1), |_| { - writable_spilltmp_reg() - }) - } - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - - // rd := rs >= tmp - Inst::FpuRRR { - alu_op: le_op, - frm: None, - rd, - rs1: tmp.to_reg(), - rs2: rs, - } - .emit(&[], sink, emit_info, state); - - Inst::TrapIf { - cc: IntCC::NotEqual, - rs1: rd.to_reg(), - rs2: zero_reg(), - trap_code: TrapCode::IntegerOverflow, - } - .emit(&[], sink, emit_info, state); - } - // convert to int normally. - Inst::FpuRR { - frm: Some(FRM::RTZ), - alu_op: FpuOPRR::float_convert_2_int_op(in_type, is_signed, out_type), - rd, - rs, - } - .emit(&[], sink, emit_info, state); - if out_type.bits() < 32 && is_signed { - // load value part mask. - Inst::load_constant_u32( - writable_spilltmp_reg(), - if 16 == out_type.bits() { - (u16::MAX >> 1) as u64 - } else { - // I8 - (u8::MAX >> 1) as u64 - }, - ) - .into_iter() - .for_each(|x| x.emit(&[], sink, emit_info, state)); - // keep value part. - Inst::AluRRR { - alu_op: AluOPRRR::And, - rd: writable_spilltmp_reg(), - rs1: rd.to_reg(), - rs2: spilltmp_reg(), - } - .emit(&[], sink, emit_info, state); - // extact sign bit. - Inst::AluRRImm12 { - alu_op: AluOPRRI::Srli, - rd: rd, - rs: rd.to_reg(), - imm12: Imm12::from_i16(31), - } - .emit(&[], sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd: rd, - rs: rd.to_reg(), - imm12: Imm12::from_i16(if 16 == out_type.bits() { - 15 - } else { - // I8 - 7 - }), - } - .emit(&[], sink, emit_info, state); - // make result,sign bit and value part. - Inst::AluRRR { - alu_op: AluOPRRR::Or, - rd: rd, - rs1: rd.to_reg(), - rs2: spilltmp_reg(), - } - .emit(&[], sink, emit_info, state); - } - - // I already have the result,jump over. - Inst::gen_jump(label_jump_over).emit(&[], sink, emit_info, state); - // here is nan , move 0 into rd register - sink.bind_label(label_nan, &mut state.ctrl_plane); - if is_sat { - Inst::load_imm12(rd, Imm12::ZERO).emit(&[], sink, emit_info, state); - } else { - // here is ud2. - Inst::Udf { - trap_code: TrapCode::BadConversionToInteger, - } - .emit(&[], sink, emit_info, state); - } - // bind jump_over - sink.bind_label(label_jump_over, &mut state.ctrl_plane); - } - &Inst::LoadExtName { rd, ref name, @@ -2368,7 +2157,7 @@ impl Inst { // branch if f_tmp < rd Inst::FpuRRR { - frm: None, + frm: FRM::RTZ, alu_op: if ty == F32 { FpuOPRRR::FltS } else { @@ -2394,7 +2183,7 @@ impl Inst { //convert to int. Inst::FpuRR { alu_op: FpuOPRR::float_convert_2_int_op(ty, true, I64), - frm: Some(op.to_frm()), + frm: op.to_frm(), rd: int_tmp, rs: rs, } @@ -2406,7 +2195,7 @@ impl Inst { } else { FpuOPRR::FcvtDL }, - frm: Some(op.to_frm()), + frm: op.to_frm(), rd, rs: int_tmp.to_reg(), } @@ -2418,7 +2207,7 @@ impl Inst { } else { FpuOPRRR::FsgnjD }, - frm: None, + frm: FRM::RNE, rd, rs1: rd.to_reg(), rs2: rs, @@ -2434,7 +2223,7 @@ impl Inst { } else { FpuOPRRR::FaddD }, - frm: None, + frm: FRM::RNE, rd: rd, rs1: rs, rs2: rs, @@ -3296,24 +3085,6 @@ impl Inst { dst: allocs.next_writable(dst), }, - Inst::FcvtToInt { - is_sat, - rd, - rs, - is_signed, - in_type, - out_type, - tmp, - } => Inst::FcvtToInt { - is_sat, - is_signed, - in_type, - out_type, - rs: allocs.next(rs), - tmp: allocs.next_writable(tmp), - rd: allocs.next_writable(rd), - }, - Inst::LoadExtName { rd, name, offset } => Inst::LoadExtName { rd: allocs.next_writable(rd), name, diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs index d42312e60f4b..d0975502acf4 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs @@ -5,6 +5,10 @@ use crate::settings; use alloc::vec::Vec; use std::borrow::Cow; +fn fa7() -> Reg { + f_reg(17) +} + #[test] fn test_riscv64_binemit() { struct TestUnit { @@ -1156,7 +1160,7 @@ fn test_riscv64_binemit() { // insns.push(TestUnit::new( Inst::FpuRRR { - frm: Some(FRM::RNE), + frm: FRM::RNE, alu_op: FpuOPRRR::FaddS, rd: writable_fa0(), rs1: fa0(), @@ -1167,7 +1171,7 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: Some(FRM::RTZ), + frm: FRM::RTZ, alu_op: FpuOPRRR::FsubS, rd: writable_fa0(), rs1: fa0(), @@ -1178,7 +1182,7 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: Some(FRM::RUP), + frm: FRM::RUP, alu_op: FpuOPRRR::FmulS, rd: writable_fa0(), rs1: fa0(), @@ -1189,18 +1193,18 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRRR::FdivS, rd: writable_fa0(), rs1: fa0(), rs2: fa1(), }, - "fdiv.s fa0,fa0,fa1", + "fdiv.s fa0,fa0,fa1,fcsr", 0x18b57553, )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::RNE, alu_op: FpuOPRRR::FsgnjS, rd: writable_fa0(), rs1: fa0(), @@ -1211,7 +1215,7 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::RTZ, alu_op: FpuOPRRR::FsgnjnS, rd: writable_fa0(), rs1: fa0(), @@ -1223,7 +1227,7 @@ fn test_riscv64_binemit() { insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::RDN, alu_op: FpuOPRRR::FsgnjxS, rd: writable_fa0(), rs1: fa0(), @@ -1234,7 +1238,7 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::RNE, alu_op: FpuOPRRR::FminS, rd: writable_fa0(), rs1: fa0(), @@ -1246,7 +1250,7 @@ fn test_riscv64_binemit() { insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::RTZ, alu_op: FpuOPRRR::FmaxS, rd: writable_fa0(), rs1: fa0(), @@ -1257,7 +1261,7 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::RDN, alu_op: FpuOPRRR::FeqS, rd: writable_a0(), rs1: fa0(), @@ -1268,7 +1272,7 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::RTZ, alu_op: FpuOPRRR::FltS, rd: writable_a0(), rs1: fa0(), @@ -1279,7 +1283,7 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::RNE, alu_op: FpuOPRRR::FleS, rd: writable_a0(), rs1: fa0(), @@ -1292,51 +1296,51 @@ fn test_riscv64_binemit() { // insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRRR::FaddD, rd: writable_fa0(), rs1: fa0(), rs2: fa1(), }, - "fadd.d fa0,fa0,fa1", + "fadd.d fa0,fa0,fa1,fcsr", 0x2b57553, )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRRR::FsubD, rd: writable_fa0(), rs1: fa0(), rs2: fa1(), }, - "fsub.d fa0,fa0,fa1", + "fsub.d fa0,fa0,fa1,fcsr", 0xab57553, )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRRR::FmulD, rd: writable_fa0(), rs1: fa0(), rs2: fa1(), }, - "fmul.d fa0,fa0,fa1", + "fmul.d fa0,fa0,fa1,fcsr", 0x12b57553, )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRRR::FdivD, rd: writable_fa0(), rs1: fa0(), rs2: fa1(), }, - "fdiv.d fa0,fa0,fa1", + "fdiv.d fa0,fa0,fa1,fcsr", 0x1ab57553, )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::RNE, alu_op: FpuOPRRR::FsgnjD, rd: writable_fa0(), rs1: fa0(), @@ -1347,7 +1351,7 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::RTZ, alu_op: FpuOPRRR::FsgnjnD, rd: writable_fa0(), rs1: fa0(), @@ -1359,7 +1363,7 @@ fn test_riscv64_binemit() { insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::RDN, alu_op: FpuOPRRR::FsgnjxD, rd: writable_fa0(), rs1: fa0(), @@ -1370,7 +1374,7 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::RNE, alu_op: FpuOPRRR::FminD, rd: writable_fa0(), rs1: fa0(), @@ -1382,7 +1386,7 @@ fn test_riscv64_binemit() { insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::RTZ, alu_op: FpuOPRRR::FmaxD, rd: writable_fa0(), rs1: fa0(), @@ -1393,7 +1397,7 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::RDN, alu_op: FpuOPRRR::FeqD, rd: writable_a0(), rs1: fa0(), @@ -1404,7 +1408,7 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::RTZ, alu_op: FpuOPRRR::FltD, rd: writable_a0(), rs1: fa0(), @@ -1415,7 +1419,7 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRRR { - frm: None, + frm: FRM::RNE, alu_op: FpuOPRRR::FleD, rd: writable_a0(), rs1: fa0(), @@ -1428,7 +1432,7 @@ fn test_riscv64_binemit() { // insns.push(TestUnit::new( Inst::FpuRR { - frm: Some(FRM::RNE), + frm: FRM::RNE, alu_op: FpuOPRR::FsqrtS, rd: writable_fa0(), rs: fa1(), @@ -1438,28 +1442,28 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRR::FcvtWS, rd: writable_a0(), rs: fa1(), }, - "fcvt.w.s a0,fa1", + "fcvt.w.s a0,fa1,fcsr", 0xc005f553, )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRR::FcvtWuS, rd: writable_a0(), rs: fa1(), }, - "fcvt.wu.s a0,fa1", + "fcvt.wu.s a0,fa1,fcsr", 0xc015f553, )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::RNE, alu_op: FpuOPRR::FmvXW, rd: writable_a0(), rs: fa1(), @@ -1469,7 +1473,7 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::RTZ, alu_op: FpuOPRR::FclassS, rd: writable_a0(), rs: fa1(), @@ -1480,28 +1484,28 @@ fn test_riscv64_binemit() { insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRR::FcvtSw, rd: writable_fa0(), rs: a0(), }, - "fcvt.s.w fa0,a0", + "fcvt.s.w fa0,a0,fcsr", 0xd0057553, )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRR::FcvtSwU, rd: writable_fa0(), rs: a0(), }, - "fcvt.s.wu fa0,a0", + "fcvt.s.wu fa0,a0,fcsr", 0xd0157553, )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::RNE, alu_op: FpuOPRR::FmvWX, rd: writable_fa0(), rs: a0(), @@ -1511,81 +1515,81 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRR::FcvtLS, rd: writable_a0(), rs: fa0(), }, - "fcvt.l.s a0,fa0", + "fcvt.l.s a0,fa0,fcsr", 0xc0257553, )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRR::FcvtLuS, rd: writable_a0(), rs: fa0(), }, - "fcvt.lu.s a0,fa0", + "fcvt.lu.s a0,fa0,fcsr", 0xc0357553, )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRR::FcvtSL, rd: writable_fa0(), rs: a0(), }, - "fcvt.s.l fa0,a0", + "fcvt.s.l fa0,a0,fcsr", 0xd0257553, )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRR::FcvtSLU, rd: writable_fa0(), rs: a0(), }, - "fcvt.s.lu fa0,a0", + "fcvt.s.lu fa0,a0,fcsr", 0xd0357553, )); // insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRR::FsqrtD, rd: writable_fa0(), rs: fa1(), }, - "fsqrt.d fa0,fa1", + "fsqrt.d fa0,fa1,fcsr", 0x5a05f553, )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRR::FcvtWD, rd: writable_a0(), rs: fa1(), }, - "fcvt.w.d a0,fa1", + "fcvt.w.d a0,fa1,fcsr", 0xc205f553, )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRR::FcvtWuD, rd: writable_a0(), rs: fa1(), }, - "fcvt.wu.d a0,fa1", + "fcvt.wu.d a0,fa1,fcsr", 0xc215f553, )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::RNE, alu_op: FpuOPRR::FmvXD, rd: writable_a0(), rs: fa1(), @@ -1595,7 +1599,7 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::RTZ, alu_op: FpuOPRR::FclassD, rd: writable_a0(), rs: fa1(), @@ -1606,17 +1610,17 @@ fn test_riscv64_binemit() { insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRR::FcvtSD, rd: writable_fa0(), rs: fa0(), }, - "fcvt.s.d fa0,fa0", + "fcvt.s.d fa0,fa0,fcsr", 0x40157553, )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::RNE, alu_op: FpuOPRR::FcvtDWU, rd: writable_fa0(), rs: a0(), @@ -1627,7 +1631,7 @@ fn test_riscv64_binemit() { insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::RNE, alu_op: FpuOPRR::FmvDX, rd: writable_fa0(), rs: a0(), @@ -1637,49 +1641,49 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRR::FcvtLD, rd: writable_a0(), rs: fa0(), }, - "fcvt.l.d a0,fa0", + "fcvt.l.d a0,fa0,fcsr", 0xc2257553, )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRR::FcvtLuD, rd: writable_a0(), rs: fa0(), }, - "fcvt.lu.d a0,fa0", + "fcvt.lu.d a0,fa0,fcsr", 0xc2357553, )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRR::FcvtDL, rd: writable_fa0(), rs: a0(), }, - "fcvt.d.l fa0,a0", + "fcvt.d.l fa0,a0,fcsr", 0xd2257553, )); insns.push(TestUnit::new( Inst::FpuRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRR::FcvtDLu, rd: writable_fa0(), rs: a0(), }, - "fcvt.d.lu fa0,a0", + "fcvt.d.lu fa0,a0,fcsr", 0xd2357553, )); ////////////////////// insns.push(TestUnit::new( Inst::FpuRRRR { - frm: Some(FRM::RNE), + frm: FRM::RNE, alu_op: FpuOPRRRR::FmaddS, rd: writable_fa0(), rs1: fa0(), @@ -1691,56 +1695,56 @@ fn test_riscv64_binemit() { )); insns.push(TestUnit::new( Inst::FpuRRRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRRRR::FmsubS, rd: writable_fa0(), rs1: fa0(), rs2: fa1(), rs3: fa7(), }, - "fmsub.s fa0,fa0,fa1,fa7", + "fmsub.s fa0,fa0,fa1,fa7,fcsr", 0x88b57547, )); insns.push(TestUnit::new( Inst::FpuRRRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRRRR::FnmsubS, rd: writable_fa0(), rs1: fa0(), rs2: fa1(), rs3: fa7(), }, - "fnmsub.s fa0,fa0,fa1,fa7", + "fnmsub.s fa0,fa0,fa1,fa7,fcsr", 0x88b5754b, )); insns.push(TestUnit::new( Inst::FpuRRRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRRRR::FnmaddS, rd: writable_fa0(), rs1: fa0(), rs2: fa1(), rs3: fa7(), }, - "fnmadd.s fa0,fa0,fa1,fa7", + "fnmadd.s fa0,fa0,fa1,fa7,fcsr", 0x88b5754f, )); insns.push(TestUnit::new( Inst::FpuRRRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRRRR::FmaddD, rd: writable_fa0(), rs1: fa0(), rs2: fa1(), rs3: fa7(), }, - "fmadd.d fa0,fa0,fa1,fa7", + "fmadd.d fa0,fa0,fa1,fa7,fcsr", 0x8ab57543, )); insns.push(TestUnit::new( Inst::FpuRRRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRRRR::FmsubD, rd: writable_fa0(), @@ -1748,31 +1752,31 @@ fn test_riscv64_binemit() { rs2: fa1(), rs3: fa7(), }, - "fmsub.d fa0,fa0,fa1,fa7", + "fmsub.d fa0,fa0,fa1,fa7,fcsr", 0x8ab57547, )); insns.push(TestUnit::new( Inst::FpuRRRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRRRR::FnmsubD, rd: writable_fa0(), rs1: fa0(), rs2: fa1(), rs3: fa7(), }, - "fnmsub.d fa0,fa0,fa1,fa7", + "fnmsub.d fa0,fa0,fa1,fa7,fcsr", 0x8ab5754b, )); insns.push(TestUnit::new( Inst::FpuRRRR { - frm: None, + frm: FRM::Fcsr, alu_op: FpuOPRRRR::FnmaddD, rd: writable_fa0(), rs1: fa0(), rs2: fa1(), rs3: fa7(), }, - "fnmadd.d fa0,fa0,fa1,fa7", + "fnmadd.d fa0,fa0,fa1,fa7,fcsr", 0x8ab5754f, )); @@ -2039,7 +2043,7 @@ fn test_riscv64_binemit() { insns.push(TestUnit::new( Inst::FpuRRR { alu_op: FpuOPRRR::FsgnjS, - frm: None, + frm: FRM::RNE, rd: writable_fa0(), rs1: fa1(), rs2: fa1(), @@ -2050,7 +2054,7 @@ fn test_riscv64_binemit() { insns.push(TestUnit::new( Inst::FpuRRR { alu_op: FpuOPRRR::FsgnjD, - frm: None, + frm: FRM::RNE, rd: writable_fa0(), rs1: fa1(), rs2: fa1(), @@ -2062,7 +2066,7 @@ fn test_riscv64_binemit() { insns.push(TestUnit::new( Inst::FpuRRR { alu_op: FpuOPRRR::FsgnjnS, - frm: None, + frm: FRM::RTZ, rd: writable_fa0(), rs1: fa1(), rs2: fa1(), @@ -2073,7 +2077,7 @@ fn test_riscv64_binemit() { insns.push(TestUnit::new( Inst::FpuRRR { alu_op: FpuOPRRR::FsgnjnD, - frm: None, + frm: FRM::RTZ, rd: writable_fa0(), rs1: fa1(), rs2: fa1(), @@ -2110,111 +2114,6 @@ fn make_test_flags() -> (settings::Flags, super::super::riscv_settings::Flags) { (flags, isa_flags) } -#[derive(Debug)] -pub(crate) struct DebugRTypeInst { - op_code: u32, - rd: u32, - funct3: u32, - rs1: u32, - rs2: u32, - funct7: u32, -} - -impl DebugRTypeInst { - pub(crate) fn from_bs(x: &[u8]) -> Option { - if x.len() != 4 { - return None; - } - let a = [x[0], x[1], x[2], x[3]]; - Some(Self::from_u32(u32::from_le_bytes(a))) - } - - pub(crate) fn from_u32(x: u32) -> Self { - let op_code = x & 0b111_1111; - let x = x >> 7; - let rd = x & 0b1_1111; - let x = x >> 5; - let funct3 = x & 0b111; - let x = x >> 3; - let rs1 = x & 0b1_1111; - let x = x >> 5; - let rs2 = x & 0b1_1111; - let x = x >> 5; - let funct7 = x & 0b111_1111; - Self { - op_code, - rd, - funct3, - rs1, - rs2, - funct7, - } - } -} - -#[derive(Debug)] -pub(crate) struct DebugITypeInst { - op_code: u32, - rd: u32, - funct3: u32, - rs: u32, - imm12: u32, - shamt5: u32, - shamt6: u32, - funct7: u32, - funct6: u32, -} - -impl DebugITypeInst { - pub(crate) fn from_bs(x: &[u8]) -> Self { - let a = [x[0], x[1], x[2], x[3]]; - Self::from_u32(u32::from_le_bytes(a)) - } - pub(crate) fn from_u32(x: u32) -> Self { - let op_code = x & 0b111_1111; - let x = x >> 7; - let rd = x & 0b1_1111; - let x = x >> 5; - let funct3 = x & 0b111; - let x = x >> 3; - let rs = x & 0b1_1111; - let x = x >> 5; - let imm12 = x & 0b1111_1111_1111; - let shamt5 = imm12 & 0b1_1111; - let shamt6 = imm12 & 0b11_1111; - let funct7 = imm12 >> 5; - let funct6 = funct7 >> 1; - Self { - op_code, - rd, - funct3, - rs, - imm12, - shamt5, - shamt6, - funct7, - funct6, - } - } - fn print_b(self) { - println!("opcode:{:b}", self.op_code); - println!("rd:{}", self.rd); - println!("funct3:{:b}", self.funct3); - println!("rs:{}", self.rs); - println!("shamt5:{:b}", self.shamt5); - println!("shamt6:{:b}", self.shamt6); - println!("funct6:{:b}", self.funct6); - println!("funct7:{:b}", self.funct7); - } -} - -#[test] -fn xxx() { - let x = 1240847763; - let x = DebugITypeInst::from_u32(x); - x.print_b(); -} - #[test] fn riscv64_worst_case_instruction_size() { let (flags, isa_flags) = make_test_flags(); @@ -2223,61 +2122,6 @@ fn riscv64_worst_case_instruction_size() { //there are all candidates potential generate a lot of bytes. let mut candidates: Vec = vec![]; - candidates.push(Inst::FcvtToInt { - rd: writable_a0(), - rs: fa0(), - is_signed: true, - in_type: F64, - out_type: I8, - is_sat: false, - tmp: writable_a1(), - }); - candidates.push(Inst::FcvtToInt { - rd: writable_a0(), - rs: fa0(), - is_signed: true, - in_type: F64, - out_type: I16, - is_sat: false, - tmp: writable_a1(), - }); - candidates.push(Inst::FcvtToInt { - rd: writable_a0(), - rs: fa0(), - is_signed: true, - in_type: F32, - out_type: I8, - is_sat: false, - tmp: writable_a1(), - }); - candidates.push(Inst::FcvtToInt { - rd: writable_a0(), - rs: fa0(), - is_signed: true, - in_type: F32, - out_type: I16, - is_sat: false, - tmp: writable_a1(), - }); - candidates.push(Inst::FcvtToInt { - rd: writable_a0(), - rs: fa0(), - is_signed: true, - in_type: F64, - out_type: I8, - is_sat: false, - tmp: writable_a1(), - }); - candidates.push(Inst::FcvtToInt { - rd: writable_a0(), - rs: fa0(), - is_signed: true, - in_type: F64, - out_type: I16, - is_sat: false, - tmp: writable_a1(), - }); - candidates.push(Inst::FloatRound { op: FloatRoundOP::Trunc, int_tmp: writable_a0(), diff --git a/cranelift/codegen/src/isa/riscv64/inst/imms.rs b/cranelift/codegen/src/isa/riscv64/inst/imms.rs index f1126fd914cb..259599b53086 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/imms.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/imms.rs @@ -199,10 +199,6 @@ impl Imm6 { value.try_into().ok().and_then(Imm6::maybe_from_i16) } - pub fn maybe_from_i64(value: i64) -> Option { - value.try_into().ok().and_then(Imm6::maybe_from_i16) - } - pub fn maybe_from_imm12(value: Imm12) -> Option { Imm6::maybe_from_i16(value.as_i16()) } diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index 5b588943c022..875b53561020 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -1,9 +1,5 @@ //! This module defines riscv64-specific machine instruction types. -// Some variants are not constructed, but we still want them as options in the future. -#![allow(dead_code)] -#![allow(non_camel_case_types)] - use super::lower::isle::generated_code::{VecAMode, VecElementWidth, VecOpMasking}; use crate::binemit::{Addend, CodeOffset, Reloc}; pub use crate::ir::condcodes::IntCC; @@ -44,12 +40,8 @@ mod emit_tests; use std::fmt::{Display, Formatter}; -pub(crate) type OptionReg = Option; -pub(crate) type OptionImm12 = Option; -pub(crate) type OptionUimm5 = Option; -pub(crate) type OptionFloatRoundingMode = Option; pub(crate) type VecU8 = Vec; -pub(crate) type VecWritableReg = Vec>; + //============================================================================= // Instructions (top level): definition @@ -59,10 +51,6 @@ pub use crate::isa::riscv64::lower::isle::generated_code::{ }; use crate::isa::riscv64::lower::isle::generated_code::{CjOp, MInst, VecAluOpRRImm5, VecAluOpRRR}; -type BoxCallInfo = Box; -type BoxCallIndInfo = Box; -type BoxReturnCallInfo = Box; - /// Additional information for (direct) Call instructions, left out of line to lower the size of /// the Inst enum. #[derive(Clone, Debug)] @@ -259,7 +247,7 @@ impl Inst { let tmp = alloc_tmp(I64); insts.extend(Self::load_constant_u32(tmp, const_data as u64)); insts.push(Inst::FpuRR { - frm: None, + frm: FRM::RNE, alu_op: FpuOPRR::move_x_to_f_op(F32), rd, rs: tmp.to_reg(), @@ -277,7 +265,7 @@ impl Inst { let tmp = alloc_tmp(I64); insts.extend(Self::load_constant_u64(tmp, const_data)); insts.push(Inst::FpuRR { - frm: None, + frm: FRM::RNE, alu_op: FpuOPRR::move_x_to_f_op(F64), rd, rs: tmp.to_reg(), @@ -556,11 +544,6 @@ fn riscv64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_early_def(dst); } - &Inst::FcvtToInt { rd, rs, tmp, .. } => { - collector.reg_use(rs); - collector.reg_early_def(tmp); - collector.reg_early_def(rd); - } &Inst::RawData { .. } => {} &Inst::AtomicStore { src, p, .. } => { collector.reg_use(src); @@ -1044,12 +1027,8 @@ impl Inst { x }; - fn format_frm(rounding_mode: Option) -> String { - if let Some(r) = rounding_mode { - format!(",{}", r.to_static_str(),) - } else { - "".into() - } + fn format_frm(rounding_mode: FRM) -> String { + format!(",{}", rounding_mode.to_static_str()) } let mut empty_allocs = AllocationConsumer::default(); @@ -1203,29 +1182,6 @@ impl Inst { step ) } - &Inst::FcvtToInt { - is_sat, - rd, - rs, - is_signed, - in_type, - out_type, - tmp, - } => { - let rs = format_reg(rs, allocs); - let tmp = format_reg(tmp.to_reg(), allocs); - let rd = format_reg(rd.to_reg(), allocs); - format!( - "fcvt_to_{}int{}.{} {},{}##in_ty={} tmp={}", - if is_signed { "s" } else { "u" }, - if is_sat { "_sat" } else { "" }, - out_type, - rd, - rs, - in_type, - tmp - ) - } &Inst::AtomicCas { offset, t0, @@ -1317,7 +1273,18 @@ impl Inst { } => { let rs = format_reg(rs, allocs); let rd = format_reg(rd.to_reg(), allocs); - format!("{} {},{}{}", alu_op.op_name(), rd, rs, format_frm(frm)) + let frm = match alu_op { + FpuOPRR::FmvXW + | FpuOPRR::FmvWX + | FpuOPRR::FmvXD + | FpuOPRR::FmvDX + | FpuOPRR::FclassS + | FpuOPRR::FclassD + | FpuOPRR::FcvtDW + | FpuOPRR::FcvtDWU => String::new(), + _ => format_frm(frm), + }; + format!("{} {rd},{rs}{frm}", alu_op.op_name()) } &Inst::FpuRRR { alu_op, @@ -1332,35 +1299,32 @@ impl Inst { let rs1_is_rs2 = rs1 == rs2; if rs1_is_rs2 && alu_op.is_copy_sign() { // this is move instruction. - format!( - "fmv.{} {},{}", - if alu_op.is_32() { "s" } else { "d" }, - rd, - rs1 - ) + format!("fmv.{} {rd},{rs1}", if alu_op.is_32() { "s" } else { "d" }) } else if rs1_is_rs2 && alu_op.is_copy_neg_sign() { - format!( - "fneg.{} {},{}", - if alu_op.is_32() { "s" } else { "d" }, - rd, - rs1 - ) + format!("fneg.{} {rd},{rs1}", if alu_op.is_32() { "s" } else { "d" }) } else if rs1_is_rs2 && alu_op.is_copy_xor_sign() { - format!( - "fabs.{} {},{}", - if alu_op.is_32() { "s" } else { "d" }, - rd, - rs1 - ) + format!("fabs.{} {rd},{rs1}", if alu_op.is_32() { "s" } else { "d" }) } else { - format!( - "{} {},{},{}{}", - alu_op.op_name(), - rd, - rs1, - rs2, - format_frm(frm) - ) + let frm = match alu_op { + FpuOPRRR::FsgnjS + | FpuOPRRR::FsgnjnS + | FpuOPRRR::FsgnjxS + | FpuOPRRR::FsgnjD + | FpuOPRRR::FsgnjnD + | FpuOPRRR::FsgnjxD + | FpuOPRRR::FminS + | FpuOPRRR::FminD + | FpuOPRRR::FmaxS + | FpuOPRRR::FmaxD + | FpuOPRRR::FeqS + | FpuOPRRR::FeqD + | FpuOPRRR::FltS + | FpuOPRRR::FltD + | FpuOPRRR::FleS + | FpuOPRRR::FleD => String::new(), + _ => format_frm(frm), + }; + format!("{} {rd},{rs1},{rs2}{frm}", alu_op.op_name()) } } &Inst::FpuRRRR { @@ -1969,6 +1933,7 @@ impl MachInstLabelUse for LabelUse { } impl LabelUse { + #[allow(dead_code)] // in case it's needed in the future fn offset_in_range(self, offset: i64) -> bool { let min = -(self.max_neg_range() as i64); let max = self.max_pos_range() as i64; diff --git a/cranelift/codegen/src/isa/riscv64/inst/regs.rs b/cranelift/codegen/src/isa/riscv64/inst/regs.rs index e78acee06b6c..d17e19b37744 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/regs.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/regs.rs @@ -18,38 +18,46 @@ pub fn a0() -> Reg { // second argument of function call #[inline] +#[allow(dead_code)] pub fn a1() -> Reg { x_reg(11) } // third argument of function call #[inline] +#[allow(dead_code)] pub fn a2() -> Reg { x_reg(12) } #[inline] +#[allow(dead_code)] pub fn writable_a0() -> Writable { Writable::from_reg(a0()) } #[inline] +#[allow(dead_code)] pub fn writable_a1() -> Writable { Writable::from_reg(a1()) } #[inline] +#[allow(dead_code)] pub fn writable_a2() -> Writable { Writable::from_reg(a2()) } #[inline] +#[allow(dead_code)] pub fn fa0() -> Reg { f_reg(10) } #[inline] +#[allow(dead_code)] pub fn writable_fa0() -> Writable { Writable::from_reg(fa0()) } #[inline] +#[allow(dead_code)] pub fn writable_fa1() -> Writable { Writable::from_reg(fa1()) } @@ -58,11 +66,6 @@ pub fn fa1() -> Reg { f_reg(11) } -#[inline] -pub fn fa7() -> Reg { - f_reg(17) -} - /// Get a reference to the zero-register. #[inline] pub fn zero_reg() -> Reg { @@ -167,12 +170,6 @@ pub(crate) fn x_reg_range(start: usize, end: usize) -> Vec> { regs } -#[inline] -pub fn v_reg(enc: usize) -> Reg { - let p_reg = PReg::new(enc, RegClass::Vector); - let v_reg = VReg::new(p_reg.index(), p_reg.class()); - Reg::from(v_reg) -} pub const fn pv_reg(enc: usize) -> PReg { PReg::new(enc, RegClass::Vector) } diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index 6ab2016f1020..4d5ac4f611ff 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -1,5 +1,4 @@ use crate::isa::riscv64::inst::AllocationConsumer; -use crate::isa::riscv64::inst::EmitState; use crate::isa::riscv64::lower::isle::generated_code::VecAluOpRRRR; use crate::isa::riscv64::lower::isle::generated_code::{ VecAMode, VecAluOpRImm5, VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, VecAluOpRRRImm5, VecAvl, @@ -1085,12 +1084,6 @@ impl VecAMode { } } - pub(crate) fn get_offset_with_state(&self, state: &EmitState) -> i64 { - match self { - VecAMode::UnitStride { base, .. } => base.get_offset_with_state(state), - } - } - /// `mop` field, described in Table 7 of Section 7.2. Vector Load/Store Addressing Modes /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes pub fn mop(&self) -> u32 { diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index de00086d8e6c..0d3a271f4df6 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -1500,7 +1500,7 @@ ;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 0 (lower (has_type (ty_scalar_float ty) (fma x y z))) - (rv_fmadd ty x y z)) + (rv_fmadd ty (FRM.RNE) x y z)) ;; (fma x y z) computes x * y + z ;; vfmacc computes vd[i] = +(vs1[i] * vs2[i]) + vd[i] @@ -1539,7 +1539,7 @@ ;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 0 (lower (has_type (ty_scalar_float ty) (sqrt x))) - (rv_fsqrt ty x)) + (rv_fsqrt ty (FRM.RNE) x)) (rule 1 (lower (has_type (ty_vec_fits_in_register ty) (sqrt x))) (rv_vfsqrt_v x (unmasked) ty)) @@ -1651,7 +1651,7 @@ ;;;;; Rules for `fdemote`;;;;;;;;;;;;;;;;;; (rule (lower (fdemote x)) - (rv_fcvtsd x)) + (rv_fcvtsd (FRM.RNE) x)) ;;;;; Rules for `fvdemote`;;;;;;;;;;;;;;;;; @@ -1670,7 +1670,7 @@ ;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 0 (lower (has_type (ty_scalar_float ty) (fadd x y))) - (rv_fadd ty x y)) + (rv_fadd ty (FRM.RNE) x y)) (rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fadd x y))) (rv_vfadd_vv x y (unmasked) ty)) @@ -1684,7 +1684,7 @@ ;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 0 (lower (has_type (ty_scalar_float ty) (fsub x y))) - (rv_fsub ty x y)) + (rv_fsub ty (FRM.RNE) x y)) (rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fsub x y))) (rv_vfsub_vv x y (unmasked) ty)) @@ -1697,7 +1697,7 @@ ;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 0 (lower (has_type (ty_scalar_float ty) (fmul x y))) - (rv_fmul ty x y)) + (rv_fmul ty (FRM.RNE) x y)) (rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmul x y))) (rv_vfmul_vv x y (unmasked) ty)) @@ -1711,7 +1711,7 @@ ;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 0 (lower (has_type (ty_scalar_float ty) (fdiv x y))) - (rv_fdiv ty x y)) + (rv_fdiv ty (FRM.RNE) x y)) (rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fdiv x y))) (rv_vfdiv_vv x y (unmasked) ty)) @@ -1731,7 +1731,7 @@ (let (;; Check if both inputs are not nan. (is_ordered FloatCompare (fcmp_to_float_compare (FloatCC.Ordered) ty x y)) ;; `fadd` returns a nan if any of the inputs is a NaN. - (nan FReg (rv_fadd ty x y)) + (nan FReg (rv_fadd ty (FRM.RNE) x y)) (min FReg (rv_fmin ty x y))) (gen_select_freg is_ordered min nan))) @@ -1757,7 +1757,7 @@ (let (;; Check if both inputs are not nan. (is_ordered FloatCompare (fcmp_to_float_compare (FloatCC.Ordered) ty x y)) ;; `fadd` returns a NaN if any of the inputs is a NaN. - (nan FReg (rv_fadd ty x y)) + (nan FReg (rv_fadd ty (FRM.RNE) x y)) (max FReg (rv_fmax ty x y))) (gen_select_freg is_ordered max nan))) @@ -2231,17 +2231,81 @@ (load_ext_name name 0)) ;;;;; Rules for `fcvt_to_uint`;;;;;;;;; -(rule - (lower (has_type to (fcvt_to_uint v @ (value_type from)))) - (gen_fcvt_int $false v $false from to)) + +;; RISC-V float-to-integer conversion does not trap, but Cranelift semantics are +;; to trap. This manually performs checks for NaN and out-of-bounds values and +;; traps in such cases. +;; +;; TODO: could this perhaps be more optimal through inspection of the `fcsr`? +;; Unsure whether that needs to be preserved across function calls and/or would +;; cause other problems. Also unsure whether it's actually more performant. +(rule (lower (has_type ity (fcvt_to_uint v @ (value_type fty)))) + (let ((_ InstOutput (gen_trapz (rv_feq fty v v) (TrapCode.BadConversionToInteger))) + (min FReg (imm fty (fcvt_umin_bound fty $false))) + (_ InstOutput (gen_trapnz (rv_fle fty v min) (TrapCode.IntegerOverflow))) + (max FReg (imm fty (fcvt_umax_bound fty ity $false))) + (_ InstOutput (gen_trapnz (rv_fge fty v max) (TrapCode.IntegerOverflow)))) + (lower_inbounds_fcvt_to_uint ity fty v))) + +(decl lower_inbounds_fcvt_to_uint (Type Type FReg) XReg) +(rule 0 (lower_inbounds_fcvt_to_uint (fits_in_32 _) fty v) + (rv_fcvtwu fty (FRM.RTZ) v)) +(rule 1 (lower_inbounds_fcvt_to_uint $I64 fty v) + (rv_fcvtlu fty (FRM.RTZ) v)) ;;;;; Rules for `fcvt_to_sint`;;;;;;;;; -(rule 0 (lower (has_type to (fcvt_to_sint v @ (value_type (ty_scalar_float from))))) - (gen_fcvt_int $false v $true from to)) + +;; NB: see above with `fcvt_to_uint` as this is similar +(rule (lower (has_type ity (fcvt_to_sint v @ (value_type fty)))) + (let ((_ InstOutput (gen_trapz (rv_feq fty v v) (TrapCode.BadConversionToInteger))) + (min FReg (imm fty (fcvt_smin_bound fty ity $false))) + (_ InstOutput (gen_trapnz (rv_fle fty v min) (TrapCode.IntegerOverflow))) + (max FReg (imm fty (fcvt_smax_bound fty ity $false))) + (_ InstOutput (gen_trapnz (rv_fge fty v max) (TrapCode.IntegerOverflow)))) + (lower_inbounds_fcvt_to_sint ity fty v))) + +(decl lower_inbounds_fcvt_to_sint (Type Type FReg) XReg) +(rule 0 (lower_inbounds_fcvt_to_sint (fits_in_32 _) fty v) + (rv_fcvtw fty (FRM.RTZ) v)) +(rule 1 (lower_inbounds_fcvt_to_sint $I64 fty v) + (rv_fcvtl fty (FRM.RTZ) v)) ;;;;; Rules for `fcvt_to_sint_sat`;;;;;;;;; + (rule 0 (lower (has_type to (fcvt_to_sint_sat v @ (value_type (ty_scalar_float from))))) - (gen_fcvt_int $true v $true from to)) + (handle_fcvt_to_int_nan from v (lower_fcvt_to_sint_sat from to v))) + +;; Lowers to a `rv_fcvt*` instruction but handles 8/16-bit cases where the +;; float is clamped before the conversion. +(decl lower_fcvt_to_sint_sat (Type Type FReg) XReg) +(rule 0 (lower_fcvt_to_sint_sat ty (fits_in_16 out_ty) v) + (let ((max FReg (imm ty (fcvt_smax_bound ty out_ty $true))) + (min FReg (imm ty (fcvt_smin_bound ty out_ty $true))) + (clamped FReg (rv_fmin ty max (rv_fmax ty min v)))) + (rv_fcvtw ty (FRM.RTZ) clamped))) +(rule 1 (lower_fcvt_to_sint_sat ty $I32 v) (rv_fcvtw ty (FRM.RTZ) v)) +(rule 1 (lower_fcvt_to_sint_sat ty $I64 v) (rv_fcvtl ty (FRM.RTZ) v)) + +(decl fcvt_smax_bound (Type Type bool) u64) +(extern constructor fcvt_smax_bound fcvt_smax_bound) +(decl fcvt_smin_bound (Type Type bool) u64) +(extern constructor fcvt_smin_bound fcvt_smin_bound) + +;; RISC-V float-to-int conversions generate the same output for NaN and +Inf, +;; but Cranelift semantics are to produce 0 for NaN instead. This helper +;; translates these semantics by taking the float being converted (with the type +;; specified) and the native RISC-V output as an `XReg`. The returned `XReg` +;; will be zeroed out if the float is NaN. +;; +;; This is done by comparing the float to itself, generating 0 if it's NaN. This +;; bit is then negated to become either all-ones or all-zeros which is then +;; and-ed against the native output. That'll produce all zeros if the input is +;; NaN or the native output otherwise. +(decl handle_fcvt_to_int_nan (Type FReg XReg) XReg) +(rule (handle_fcvt_to_int_nan ty freg xreg) + (let ((is_not_nan XReg (rv_feq ty freg freg)) + (not_nan_mask XReg (rv_neg is_not_nan))) + (rv_and xreg not_nan_mask))) (rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_to_sint_sat v @ (value_type from_ty)))) (if-let zero (i8_to_imm5 0)) @@ -2250,8 +2314,25 @@ (rv_vmerge_vim cvt zero is_nan from_ty))) ;;;;; Rules for `fcvt_to_uint_sat`;;;;;;;;; + (rule 0 (lower (has_type to (fcvt_to_uint_sat v @ (value_type (ty_scalar_float from))))) - (gen_fcvt_int $true v $false from to)) + (handle_fcvt_to_int_nan from v (lower_fcvt_to_uint_sat from to v))) + +;; Lowers to a `rv_fcvt*` instruction but handles 8/16-bit cases where the +;; float is clamped before the conversion. +(decl lower_fcvt_to_uint_sat (Type Type FReg) XReg) +(rule 0 (lower_fcvt_to_uint_sat ty (fits_in_16 out_ty) v) + (let ((max FReg (imm ty (fcvt_umax_bound ty out_ty $true))) + (min FReg (rv_fmvdx (zero_reg))) + (clamped FReg (rv_fmin ty max (rv_fmax ty min v)))) + (rv_fcvtwu ty (FRM.RTZ) clamped))) +(rule 1 (lower_fcvt_to_uint_sat ty $I32 v) (rv_fcvtwu ty (FRM.RTZ) v)) +(rule 1 (lower_fcvt_to_uint_sat ty $I64 v) (rv_fcvtlu ty (FRM.RTZ) v)) + +(decl fcvt_umax_bound (Type Type bool) u64) +(extern constructor fcvt_umax_bound fcvt_umax_bound) +(decl fcvt_umin_bound (Type bool) u64) +(extern constructor fcvt_umin_bound fcvt_umin_bound) (rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_to_uint_sat v @ (value_type from_ty)))) (if-let zero (i8_to_imm5 0)) @@ -2261,44 +2342,44 @@ ;;;;; Rules for `fcvt_from_sint`;;;;;;;;; (rule 0 (lower (has_type $F32 (fcvt_from_sint v @ (value_type (fits_in_16 ty))))) - (rv_fcvtsl (sext v))) + (rv_fcvtsl (FRM.RNE) (sext v))) (rule 1 (lower (has_type $F32 (fcvt_from_sint v @ (value_type $I32)))) - (rv_fcvtsw v)) + (rv_fcvtsw (FRM.RNE) v)) (rule 1 (lower (has_type $F32 (fcvt_from_sint v @ (value_type $I64)))) - (rv_fcvtsl v)) + (rv_fcvtsl (FRM.RNE) v)) (rule 0 (lower (has_type $F64 (fcvt_from_sint v @ (value_type (fits_in_16 ty))))) - (rv_fcvtdl (sext v))) + (rv_fcvtdl (FRM.RNE) (sext v))) (rule 1 (lower (has_type $F64 (fcvt_from_sint v @ (value_type $I32)))) (rv_fcvtdw v)) (rule 1 (lower (has_type $F64 (fcvt_from_sint v @ (value_type $I64)))) - (rv_fcvtdl v)) + (rv_fcvtdl (FRM.RNE) v)) (rule 2 (lower (has_type (ty_vec_fits_in_register _) (fcvt_from_sint v @ (value_type from_ty)))) (rv_vfcvt_f_x_v v (unmasked) from_ty)) ;;;;; Rules for `fcvt_from_uint`;;;;;;;;; (rule 0 (lower (has_type $F32 (fcvt_from_uint v @ (value_type (fits_in_16 ty))))) - (rv_fcvtslu (zext v))) + (rv_fcvtslu (FRM.RNE) (zext v))) (rule 1 (lower (has_type $F32 (fcvt_from_uint v @ (value_type $I32)))) - (rv_fcvtswu v)) + (rv_fcvtswu (FRM.RNE) v)) (rule 1 (lower (has_type $F32 (fcvt_from_uint v @ (value_type $I64)))) - (rv_fcvtslu v)) + (rv_fcvtslu (FRM.RNE) v)) (rule 0 (lower (has_type $F64 (fcvt_from_uint v @ (value_type (fits_in_16 ty))))) - (rv_fcvtdlu (zext v))) + (rv_fcvtdlu (FRM.RNE) (zext v))) (rule 1 (lower (has_type $F64 (fcvt_from_uint v @ (value_type $I32)))) (rv_fcvtdwu v)) (rule 1 (lower (has_type $F64 (fcvt_from_uint v @ (value_type $I64)))) - (rv_fcvtdlu v)) + (rv_fcvtdlu (FRM.RNE) v)) (rule 2 (lower (has_type (ty_vec_fits_in_register _) (fcvt_from_uint v @ (value_type from_ty)))) (rv_vfcvt_f_xu_v v (unmasked) from_ty)) diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index 232e35e08140..ab5c6ddf1299 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -303,10 +303,6 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> Imm12::maybe_from_i64(val).unwrap() } - fn gen_default_frm(&mut self) -> OptionFloatRoundingMode { - None - } - fn frm_bits(&mut self, frm: &FRM) -> UImm5 { UImm5::maybe_from_u8(frm.bits()).unwrap() } @@ -541,6 +537,63 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> fn bseti_imm(&mut self, i: u64) -> Option { self.binvi_imm(i) } + + fn fcvt_smin_bound(&mut self, float: Type, int: Type, saturating: bool) -> u64 { + match (int, float) { + // Saturating cases for larger integers are handled using the + // `fcvt.{w,d}.{s,d}` instruction directly, that automatically + // saturates up/down to the correct limit. + // + // NB: i32/i64 don't use this function because the native RISC-V + // instruction does everything we already need, so only cases for + // i8/i16 are listed here. + (I8, F32) if saturating => f32::from(i8::MIN).to_bits().into(), + (I8, F64) if saturating => f64::from(i8::MIN).to_bits(), + (I16, F32) if saturating => f32::from(i16::MIN).to_bits().into(), + (I16, F64) if saturating => f64::from(i16::MIN).to_bits(), + + (_, F32) if !saturating => f32_cvt_to_int_bounds(true, int.bits()).0.to_bits().into(), + (_, F64) if !saturating => f64_cvt_to_int_bounds(true, int.bits()).0.to_bits(), + _ => unimplemented!(), + } + } + + fn fcvt_smax_bound(&mut self, float: Type, int: Type, saturating: bool) -> u64 { + // NB: see `fcvt_smin_bound` for some more comments + match (int, float) { + (I8, F32) if saturating => f32::from(i8::MAX).to_bits().into(), + (I8, F64) if saturating => f64::from(i8::MAX).to_bits(), + (I16, F32) if saturating => f32::from(i16::MAX).to_bits().into(), + (I16, F64) if saturating => f64::from(i16::MAX).to_bits(), + + (_, F32) if !saturating => f32_cvt_to_int_bounds(true, int.bits()).1.to_bits().into(), + (_, F64) if !saturating => f64_cvt_to_int_bounds(true, int.bits()).1.to_bits(), + _ => unimplemented!(), + } + } + + fn fcvt_umax_bound(&mut self, float: Type, int: Type, saturating: bool) -> u64 { + // NB: see `fcvt_smin_bound` for some more comments + match (int, float) { + (I8, F32) if saturating => f32::from(u8::MAX).to_bits().into(), + (I8, F64) if saturating => f64::from(u8::MAX).to_bits(), + (I16, F32) if saturating => f32::from(u16::MAX).to_bits().into(), + (I16, F64) if saturating => f64::from(u16::MAX).to_bits(), + + (_, F32) if !saturating => f32_cvt_to_int_bounds(false, int.bits()).1.to_bits().into(), + (_, F64) if !saturating => f64_cvt_to_int_bounds(false, int.bits()).1.to_bits(), + _ => unimplemented!(), + } + } + + fn fcvt_umin_bound(&mut self, float: Type, saturating: bool) -> u64 { + assert!(!saturating); + match float { + F32 => (-1.0f32).to_bits().into(), + F64 => (-1.0f64).to_bits(), + _ => unimplemented!(), + } + } } /// The main entry point for lowering with ISLE. diff --git a/cranelift/filetests/filetests/isa/riscv64/fcvt-small.clif b/cranelift/filetests/filetests/isa/riscv64/fcvt-small.clif index 26745e25a37c..24ec8d5ea02f 100644 --- a/cranelift/filetests/filetests/isa/riscv64/fcvt-small.clif +++ b/cranelift/filetests/filetests/isa/riscv64/fcvt-small.clif @@ -11,13 +11,13 @@ block0(v0: i8): ; VCode: ; block0: ; andi a2,a0,255 -; fcvt.s.lu fa0,a2 +; fcvt.s.lu fa0,a2,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; andi a2, a0, 0xff -; fcvt.s.lu fa0, a2 +; fcvt.s.lu fa0, a2, rne ; ret function u0:0(i8) -> f64 { @@ -29,13 +29,13 @@ block0(v0: i8): ; VCode: ; block0: ; andi a2,a0,255 -; fcvt.d.lu fa0,a2 +; fcvt.d.lu fa0,a2,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; andi a2, a0, 0xff -; fcvt.d.lu fa0, a2 +; fcvt.d.lu fa0, a2, rne ; ret function u0:0(i16) -> f32 { @@ -48,14 +48,14 @@ block0(v0: i16): ; block0: ; slli a2,a0,48 ; srli a4,a2,48 -; fcvt.s.lu fa0,a4 +; fcvt.s.lu fa0,a4,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; slli a2, a0, 0x30 ; srli a4, a2, 0x30 -; fcvt.s.lu fa0, a4 +; fcvt.s.lu fa0, a4, rne ; ret function u0:0(i16) -> f64 { @@ -68,14 +68,14 @@ block0(v0: i16): ; block0: ; slli a2,a0,48 ; srli a4,a2,48 -; fcvt.d.lu fa0,a4 +; fcvt.d.lu fa0,a4,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; slli a2, a0, 0x30 ; srli a4, a2, 0x30 -; fcvt.d.lu fa0, a4 +; fcvt.d.lu fa0, a4, rne ; ret function u0:0(f32) -> i8 { @@ -86,29 +86,35 @@ block0(v0: f32): ; VCode: ; block0: -; fcvt_to_uint.i8 a0,fa0##in_ty=f32 tmp=fa3 +; feq.s a2,fa0,fa0 +; trap_if bad_toint##(a2 eq zero) +; lui a5,-264192 +; fmv.w.x fa1,a5 +; fle.s a3,fa0,fa1 +; trap_if int_ovf##(a3 ne zero) +; lui a0,276480 +; fmv.w.x fa2,a0 +; fle.s a4,fa2,fa0 +; trap_if int_ovf##(a4 ne zero) +; fcvt.wu.s a0,fa0,rtz ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.s a0, fa0, fa0 -; beqz a0, 0x40 -; auipc t6, 0 -; lw t6, 0xc(t6) -; j 8 -; .byte 0x00, 0x00, 0x80, 0xbf -; fmv.w.x fa3, t6 -; fle.s a0, fa0, fa3 -; beqz a0, 8 +; feq.s a2, fa0, fa0 +; bnez a2, 8 +; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint +; lui a5, 0xbf800 +; fmv.w.x fa1, a5 +; fle.s a3, fa0, fa1 +; beqz a3, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf -; lui t6, 0x43800 -; fmv.w.x fa3, t6 -; fle.s a0, fa3, fa0 -; beqz a0, 8 +; lui a0, 0x43800 +; fmv.w.x fa2, a0 +; fle.s a4, fa2, fa0 +; beqz a4, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf ; fcvt.wu.s a0, fa0, rtz -; j 8 -; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint ; ret function u0:0(f64) -> i8 { @@ -119,34 +125,39 @@ block0(v0: f64): ; VCode: ; block0: -; fcvt_to_uint.i8 a0,fa0##in_ty=f64 tmp=fa3 +; feq.d a2,fa0,fa0 +; trap_if bad_toint##(a2 eq zero) +; lui a5,3071 +; slli a1,a5,40 +; fmv.d.x fa3,a1 +; fle.d a5,fa0,fa3 +; trap_if int_ovf##(a5 ne zero) +; lui a2,1031 +; slli a4,a2,40 +; fmv.d.x fa1,a4 +; fle.d a2,fa1,fa0 +; trap_if int_ovf##(a2 ne zero) +; fcvt.wu.d a0,fa0,rtz ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.d a0, fa0, fa0 -; beqz a0, 0x54 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0xf0, 0xbf -; fmv.d.x fa3, t6 -; fle.d a0, fa0, fa3 -; beqz a0, 8 +; feq.d a2, fa0, fa0 +; bnez a2, 8 +; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint +; lui a5, 0xbff +; slli a1, a5, 0x28 +; fmv.d.x fa3, a1 +; fle.d a5, fa0, fa3 +; beqz a5, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x70, 0x40 -; fmv.d.x fa3, t6 -; fle.d a0, fa3, fa0 -; beqz a0, 8 +; lui a2, 0x407 +; slli a4, a2, 0x28 +; fmv.d.x fa1, a4 +; fle.d a2, fa1, fa0 +; beqz a2, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf ; fcvt.wu.d a0, fa0, rtz -; j 8 -; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint ; ret function u0:0(f32) -> i16 { @@ -157,29 +168,35 @@ block0(v0: f32): ; VCode: ; block0: -; fcvt_to_uint.i16 a0,fa0##in_ty=f32 tmp=fa3 +; feq.s a2,fa0,fa0 +; trap_if bad_toint##(a2 eq zero) +; lui a5,-264192 +; fmv.w.x fa1,a5 +; fle.s a3,fa0,fa1 +; trap_if int_ovf##(a3 ne zero) +; lui a0,292864 +; fmv.w.x fa2,a0 +; fle.s a4,fa2,fa0 +; trap_if int_ovf##(a4 ne zero) +; fcvt.wu.s a0,fa0,rtz ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.s a0, fa0, fa0 -; beqz a0, 0x40 -; auipc t6, 0 -; lw t6, 0xc(t6) -; j 8 -; .byte 0x00, 0x00, 0x80, 0xbf -; fmv.w.x fa3, t6 -; fle.s a0, fa0, fa3 -; beqz a0, 8 +; feq.s a2, fa0, fa0 +; bnez a2, 8 +; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint +; lui a5, 0xbf800 +; fmv.w.x fa1, a5 +; fle.s a3, fa0, fa1 +; beqz a3, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf -; lui t6, 0x47800 -; fmv.w.x fa3, t6 -; fle.s a0, fa3, fa0 -; beqz a0, 8 +; lui a0, 0x47800 +; fmv.w.x fa2, a0 +; fle.s a4, fa2, fa0 +; beqz a4, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf ; fcvt.wu.s a0, fa0, rtz -; j 8 -; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint ; ret function u0:0(f64) -> i16 { @@ -190,33 +207,38 @@ block0(v0: f64): ; VCode: ; block0: -; fcvt_to_uint.i16 a0,fa0##in_ty=f64 tmp=fa3 +; feq.d a2,fa0,fa0 +; trap_if bad_toint##(a2 eq zero) +; lui a5,3071 +; slli a1,a5,40 +; fmv.d.x fa3,a1 +; fle.d a5,fa0,fa3 +; trap_if int_ovf##(a5 ne zero) +; lui a2,1039 +; slli a4,a2,40 +; fmv.d.x fa1,a4 +; fle.d a2,fa1,fa0 +; trap_if int_ovf##(a2 ne zero) +; fcvt.wu.d a0,fa0,rtz ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.d a0, fa0, fa0 -; beqz a0, 0x54 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0xf0, 0xbf -; fmv.d.x fa3, t6 -; fle.d a0, fa0, fa3 -; beqz a0, 8 +; feq.d a2, fa0, fa0 +; bnez a2, 8 +; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint +; lui a5, 0xbff +; slli a1, a5, 0x28 +; fmv.d.x fa3, a1 +; fle.d a5, fa0, fa3 +; beqz a5, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0xf0, 0x40 -; fmv.d.x fa3, t6 -; fle.d a0, fa3, fa0 -; beqz a0, 8 +; lui a2, 0x40f +; slli a4, a2, 0x28 +; fmv.d.x fa1, a4 +; fle.d a2, fa1, fa0 +; beqz a2, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf ; fcvt.wu.d a0, fa0, rtz -; j 8 -; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint ; ret diff --git a/cranelift/filetests/filetests/isa/riscv64/float.clif b/cranelift/filetests/filetests/isa/riscv64/float.clif index fc14272e316e..dae86033df6c 100644 --- a/cranelift/filetests/filetests/isa/riscv64/float.clif +++ b/cranelift/filetests/filetests/isa/riscv64/float.clif @@ -10,12 +10,12 @@ block0(v0: f32, v1: f32): ; VCode: ; block0: -; fadd.s fa0,fa0,fa1 +; fadd.s fa0,fa0,fa1,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fadd.s fa0, fa0, fa1 +; fadd.s fa0, fa0, fa1, rne ; ret function %f2(f64, f64) -> f64 { @@ -26,12 +26,12 @@ block0(v0: f64, v1: f64): ; VCode: ; block0: -; fadd.d fa0,fa0,fa1 +; fadd.d fa0,fa0,fa1,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fadd.d fa0, fa0, fa1 +; fadd.d fa0, fa0, fa1, rne ; ret function %f3(f32, f32) -> f32 { @@ -42,12 +42,12 @@ block0(v0: f32, v1: f32): ; VCode: ; block0: -; fsub.s fa0,fa0,fa1 +; fsub.s fa0,fa0,fa1,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fsub.s fa0, fa0, fa1 +; fsub.s fa0, fa0, fa1, rne ; ret function %f4(f64, f64) -> f64 { @@ -58,12 +58,12 @@ block0(v0: f64, v1: f64): ; VCode: ; block0: -; fsub.d fa0,fa0,fa1 +; fsub.d fa0,fa0,fa1,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fsub.d fa0, fa0, fa1 +; fsub.d fa0, fa0, fa1, rne ; ret function %f5(f32, f32) -> f32 { @@ -74,12 +74,12 @@ block0(v0: f32, v1: f32): ; VCode: ; block0: -; fmul.s fa0,fa0,fa1 +; fmul.s fa0,fa0,fa1,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fmul.s fa0, fa0, fa1 +; fmul.s fa0, fa0, fa1, rne ; ret function %f6(f64, f64) -> f64 { @@ -90,12 +90,12 @@ block0(v0: f64, v1: f64): ; VCode: ; block0: -; fmul.d fa0,fa0,fa1 +; fmul.d fa0,fa0,fa1,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fmul.d fa0, fa0, fa1 +; fmul.d fa0, fa0, fa1, rne ; ret function %f7(f32, f32) -> f32 { @@ -106,12 +106,12 @@ block0(v0: f32, v1: f32): ; VCode: ; block0: -; fdiv.s fa0,fa0,fa1 +; fdiv.s fa0,fa0,fa1,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fdiv.s fa0, fa0, fa1 +; fdiv.s fa0, fa0, fa1, rne ; ret function %f8(f64, f64) -> f64 { @@ -122,15 +122,14 @@ block0(v0: f64, v1: f64): ; VCode: ; block0: -; fdiv.d fa0,fa0,fa1 +; fdiv.d fa0,fa0,fa1,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fdiv.d fa0, fa0, fa1 +; fdiv.d fa0, fa0, fa1, rne ; ret - function %f13(f32) -> f32 { block0(v0: f32): v1 = sqrt v0 @@ -139,12 +138,12 @@ block0(v0: f32): ; VCode: ; block0: -; fsqrt.s fa0,fa0 +; fsqrt.s fa0,fa0,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fsqrt.s fa0, fa0 +; fsqrt.s fa0, fa0, rne ; ret function %f15(f64) -> f64 { @@ -155,12 +154,12 @@ block0(v0: f64): ; VCode: ; block0: -; fsqrt.d fa0,fa0 +; fsqrt.d fa0,fa0,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fsqrt.d fa0, fa0 +; fsqrt.d fa0, fa0, rne ; ret function %f16(f32) -> f32 { @@ -235,12 +234,12 @@ block0(v0: f32): ; VCode: ; block0: -; fcvt.d.s fa0,fa0 +; fcvt.d.s fa0,fa0,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; .byte 0x53, 0x75, 0x05, 0x42 +; fcvt.d.s fa0, fa0 ; ret function %f21(f64) -> f32 { @@ -251,12 +250,12 @@ block0(v0: f64): ; VCode: ; block0: -; fcvt.s.d fa0,fa0 +; fcvt.s.d fa0,fa0,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fcvt.s.d fa0, fa0 +; fcvt.s.d fa0, fa0, rne ; ret function %f22(f32) -> f32 { @@ -285,7 +284,7 @@ block0(v0: f32): ; fcvt.s.l fa0, a3, rup ; fsgnj.s fa0, fa0, fa5 ; j 0x10 -; fadd.s fa0, fa5, fa5 +; fadd.s fa0, fa5, fa5, rne ; j 8 ; fmv.s fa0, fa5 ; ret @@ -320,7 +319,7 @@ block0(v0: f64): ; fcvt.d.l fa0, a3, rup ; fsgnj.d fa0, fa0, fa5 ; j 0x10 -; fadd.d fa0, fa5, fa5 +; fadd.d fa0, fa5, fa5, rne ; j 8 ; fmv.d fa0, fa5 ; ret @@ -351,7 +350,7 @@ block0(v0: f32): ; fcvt.s.l fa0, a3, rdn ; fsgnj.s fa0, fa0, fa5 ; j 0x10 -; fadd.s fa0, fa5, fa5 +; fadd.s fa0, fa5, fa5, rne ; j 8 ; fmv.s fa0, fa5 ; ret @@ -386,7 +385,7 @@ block0(v0: f64): ; fcvt.d.l fa0, a3, rdn ; fsgnj.d fa0, fa0, fa5 ; j 0x10 -; fadd.d fa0, fa5, fa5 +; fadd.d fa0, fa5, fa5, rne ; j 8 ; fmv.d fa0, fa5 ; ret @@ -417,7 +416,7 @@ block0(v0: f32): ; fcvt.s.l fa0, a3, rtz ; fsgnj.s fa0, fa0, fa5 ; j 0x10 -; fadd.s fa0, fa5, fa5 +; fadd.s fa0, fa5, fa5, rne ; j 8 ; fmv.s fa0, fa5 ; ret @@ -452,7 +451,7 @@ block0(v0: f64): ; fcvt.d.l fa0, a3, rtz ; fsgnj.d fa0, fa0, fa5 ; j 0x10 -; fadd.d fa0, fa5, fa5 +; fadd.d fa0, fa5, fa5, rne ; j 8 ; fmv.d fa0, fa5 ; ret @@ -483,7 +482,7 @@ block0(v0: f32): ; fcvt.s.l fa0, a3, rne ; fsgnj.s fa0, fa0, fa5 ; j 0x10 -; fadd.s fa0, fa5, fa5 +; fadd.s fa0, fa5, fa5, rne ; j 8 ; fmv.s fa0, fa5 ; ret @@ -518,7 +517,7 @@ block0(v0: f64): ; fcvt.d.l fa0, a3, rne ; fsgnj.d fa0, fa0, fa5 ; j 0x10 -; fadd.d fa0, fa5, fa5 +; fadd.d fa0, fa5, fa5, rne ; j 8 ; fmv.d fa0, fa5 ; ret @@ -531,12 +530,12 @@ block0(v0: f32, v1: f32, v2: f32): ; VCode: ; block0: -; fmadd.s fa0,fa0,fa1,fa2 +; fmadd.s fa0,fa0,fa1,fa2,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fmadd.s fa0, fa0, fa1, fa2 +; fmadd.s fa0, fa0, fa1, fa2, rne ; ret function %f30(f64, f64, f64) -> f64 { @@ -547,12 +546,12 @@ block0(v0: f64, v1: f64, v2: f64): ; VCode: ; block0: -; fmadd.d fa0,fa0,fa1,fa2 +; fmadd.d fa0,fa0,fa1,fa2,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fmadd.d fa0, fa0, fa1, fa2 +; fmadd.d fa0, fa0, fa1, fa2, rne ; ret function %f31(f32, f32) -> f32 { @@ -595,29 +594,35 @@ block0(v0: f32): ; VCode: ; block0: -; fcvt_to_uint.i32 a0,fa0##in_ty=f32 tmp=fa3 +; feq.s a2,fa0,fa0 +; trap_if bad_toint##(a2 eq zero) +; lui a5,-264192 +; fmv.w.x fa1,a5 +; fle.s a3,fa0,fa1 +; trap_if int_ovf##(a3 ne zero) +; lui a0,325632 +; fmv.w.x fa2,a0 +; fle.s a4,fa2,fa0 +; trap_if int_ovf##(a4 ne zero) +; fcvt.wu.s a0,fa0,rtz ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.s a0, fa0, fa0 -; beqz a0, 0x40 -; auipc t6, 0 -; lw t6, 0xc(t6) -; j 8 -; .byte 0x00, 0x00, 0x80, 0xbf -; fmv.w.x fa3, t6 -; fle.s a0, fa0, fa3 -; beqz a0, 8 +; feq.s a2, fa0, fa0 +; bnez a2, 8 +; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint +; lui a5, 0xbf800 +; fmv.w.x fa1, a5 +; fle.s a3, fa0, fa1 +; beqz a3, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf -; lui t6, 0x4f800 -; fmv.w.x fa3, t6 -; fle.s a0, fa3, fa0 -; beqz a0, 8 +; lui a0, 0x4f800 +; fmv.w.x fa2, a0 +; fle.s a4, fa2, fa0 +; beqz a4, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf ; fcvt.wu.s a0, fa0, rtz -; j 8 -; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint ; ret function %f34(f32) -> i32 { @@ -628,29 +633,37 @@ block0(v0: f32): ; VCode: ; block0: -; fcvt_to_sint.i32 a0,fa0##in_ty=f32 tmp=fa3 +; feq.s a2,fa0,fa0 +; trap_if bad_toint##(a2 eq zero) +; lui a5,-200704 +; addi a1,a5,1 +; fmv.w.x fa3,a1 +; fle.s a5,fa0,fa3 +; trap_if int_ovf##(a5 ne zero) +; lui a2,323584 +; fmv.w.x fa4,a2 +; fle.s a0,fa4,fa0 +; trap_if int_ovf##(a0 ne zero) +; fcvt.w.s a0,fa0,rtz ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.s a0, fa0, fa0 -; beqz a0, 0x40 -; auipc t6, 0 -; lw t6, 0xc(t6) -; j 8 -; .byte 0x01, 0x00, 0x00, 0xcf -; fmv.w.x fa3, t6 -; fle.s a0, fa0, fa3 -; beqz a0, 8 +; feq.s a2, fa0, fa0 +; bnez a2, 8 +; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint +; lui a5, 0xcf000 +; addi a1, a5, 1 +; fmv.w.x fa3, a1 +; fle.s a5, fa0, fa3 +; beqz a5, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf -; lui t6, 0x4f000 -; fmv.w.x fa3, t6 -; fle.s a0, fa3, fa0 +; lui a2, 0x4f000 +; fmv.w.x fa4, a2 +; fle.s a0, fa4, fa0 ; beqz a0, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf ; fcvt.w.s a0, fa0, rtz -; j 8 -; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint ; ret function %f35(f32) -> i64 { @@ -661,29 +674,35 @@ block0(v0: f32): ; VCode: ; block0: -; fcvt_to_uint.i64 a0,fa0##in_ty=f32 tmp=fa3 +; feq.s a2,fa0,fa0 +; trap_if bad_toint##(a2 eq zero) +; lui a5,-264192 +; fmv.w.x fa1,a5 +; fle.s a3,fa0,fa1 +; trap_if int_ovf##(a3 ne zero) +; lui a0,391168 +; fmv.w.x fa2,a0 +; fle.s a4,fa2,fa0 +; trap_if int_ovf##(a4 ne zero) +; fcvt.lu.s a0,fa0,rtz ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.s a0, fa0, fa0 -; beqz a0, 0x40 -; auipc t6, 0 -; lw t6, 0xc(t6) -; j 8 -; .byte 0x00, 0x00, 0x80, 0xbf -; fmv.w.x fa3, t6 -; fle.s a0, fa0, fa3 -; beqz a0, 8 +; feq.s a2, fa0, fa0 +; bnez a2, 8 +; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint +; lui a5, 0xbf800 +; fmv.w.x fa1, a5 +; fle.s a3, fa0, fa1 +; beqz a3, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf -; lui t6, 0x5f800 -; fmv.w.x fa3, t6 -; fle.s a0, fa3, fa0 -; beqz a0, 8 +; lui a0, 0x5f800 +; fmv.w.x fa2, a0 +; fle.s a4, fa2, fa0 +; beqz a4, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf ; fcvt.lu.s a0, fa0, rtz -; j 8 -; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint ; ret function %f36(f32) -> i64 { @@ -694,29 +713,37 @@ block0(v0: f32): ; VCode: ; block0: -; fcvt_to_sint.i64 a0,fa0##in_ty=f32 tmp=fa3 +; feq.s a2,fa0,fa0 +; trap_if bad_toint##(a2 eq zero) +; lui a5,-135168 +; addi a1,a5,1 +; fmv.w.x fa3,a1 +; fle.s a5,fa0,fa3 +; trap_if int_ovf##(a5 ne zero) +; lui a2,389120 +; fmv.w.x fa4,a2 +; fle.s a0,fa4,fa0 +; trap_if int_ovf##(a0 ne zero) +; fcvt.l.s a0,fa0,rtz ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.s a0, fa0, fa0 -; beqz a0, 0x40 -; auipc t6, 0 -; lw t6, 0xc(t6) -; j 8 -; .byte 0x01, 0x00, 0x00, 0xdf -; fmv.w.x fa3, t6 -; fle.s a0, fa0, fa3 -; beqz a0, 8 +; feq.s a2, fa0, fa0 +; bnez a2, 8 +; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint +; lui a5, 0xdf000 +; addi a1, a5, 1 +; fmv.w.x fa3, a1 +; fle.s a5, fa0, fa3 +; beqz a5, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf -; lui t6, 0x5f000 -; fmv.w.x fa3, t6 -; fle.s a0, fa3, fa0 +; lui a2, 0x5f000 +; fmv.w.x fa4, a2 +; fle.s a0, fa4, fa0 ; beqz a0, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf ; fcvt.l.s a0, fa0, rtz -; j 8 -; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint ; ret function %f37(f64) -> i32 { @@ -727,34 +754,39 @@ block0(v0: f64): ; VCode: ; block0: -; fcvt_to_uint.i32 a0,fa0##in_ty=f64 tmp=fa3 +; feq.d a2,fa0,fa0 +; trap_if bad_toint##(a2 eq zero) +; lui a5,3071 +; slli a1,a5,40 +; fmv.d.x fa3,a1 +; fle.d a5,fa0,fa3 +; trap_if int_ovf##(a5 ne zero) +; lui a2,1055 +; slli a4,a2,40 +; fmv.d.x fa1,a4 +; fle.d a2,fa1,fa0 +; trap_if int_ovf##(a2 ne zero) +; fcvt.wu.d a0,fa0,rtz ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.d a0, fa0, fa0 -; beqz a0, 0x54 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0xf0, 0xbf -; fmv.d.x fa3, t6 -; fle.d a0, fa0, fa3 -; beqz a0, 8 +; feq.d a2, fa0, fa0 +; bnez a2, 8 +; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint +; lui a5, 0xbff +; slli a1, a5, 0x28 +; fmv.d.x fa3, a1 +; fle.d a5, fa0, fa3 +; beqz a5, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0xf0, 0x41 -; fmv.d.x fa3, t6 -; fle.d a0, fa3, fa0 -; beqz a0, 8 +; lui a2, 0x41f +; slli a4, a2, 0x28 +; fmv.d.x fa1, a4 +; fle.d a2, fa1, fa0 +; beqz a2, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf ; fcvt.wu.d a0, fa0, rtz -; j 8 -; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint ; ret function %f38(f64) -> i32 { @@ -765,35 +797,42 @@ block0(v0: f64): ; VCode: ; block0: -; fcvt_to_sint.i32 a0,fa0##in_ty=f64 tmp=fa3 +; feq.d a2,fa0,fa0 +; trap_if bad_toint##(a2 eq zero) +; ld a5,[const(0)] +; fmv.d.x fa1,a5 +; fle.d a3,fa0,fa1 +; trap_if int_ovf##(a3 ne zero) +; lui a0,527 +; slli a2,a0,41 +; fmv.d.x fa4,a2 +; fle.d a0,fa4,fa0 +; trap_if int_ovf##(a0 ne zero) +; fcvt.w.d a0,fa0,rtz ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.d a0, fa0, fa0 -; beqz a0, 0x54 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x20, 0x00 -; .byte 0x00, 0x00, 0xe0, 0xc1 -; fmv.d.x fa3, t6 -; fle.d a0, fa0, fa3 -; beqz a0, 8 +; feq.d a2, fa0, fa0 +; bnez a2, 8 +; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint +; auipc a5, 0 +; ld a5, 0x3c(a5) +; fmv.d.x fa1, a5 +; fle.d a3, fa0, fa1 +; beqz a3, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0xe0, 0x41 -; fmv.d.x fa3, t6 -; fle.d a0, fa3, fa0 +; lui a0, 0x20f +; slli a2, a0, 0x29 +; fmv.d.x fa4, a2 +; fle.d a0, fa4, fa0 ; beqz a0, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf ; fcvt.w.d a0, fa0, rtz -; j 8 -; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint ; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x20, 0x00 +; .byte 0x00, 0x00, 0xe0, 0xc1 function %f39(f64) -> i64 { block0(v0: f64): @@ -803,34 +842,39 @@ block0(v0: f64): ; VCode: ; block0: -; fcvt_to_uint.i64 a0,fa0##in_ty=f64 tmp=fa3 +; feq.d a2,fa0,fa0 +; trap_if bad_toint##(a2 eq zero) +; lui a5,3071 +; slli a1,a5,40 +; fmv.d.x fa3,a1 +; fle.d a5,fa0,fa3 +; trap_if int_ovf##(a5 ne zero) +; lui a2,1087 +; slli a4,a2,40 +; fmv.d.x fa1,a4 +; fle.d a2,fa1,fa0 +; trap_if int_ovf##(a2 ne zero) +; fcvt.lu.d a0,fa0,rtz ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.d a0, fa0, fa0 -; beqz a0, 0x54 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0xf0, 0xbf -; fmv.d.x fa3, t6 -; fle.d a0, fa0, fa3 -; beqz a0, 8 +; feq.d a2, fa0, fa0 +; bnez a2, 8 +; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint +; lui a5, 0xbff +; slli a1, a5, 0x28 +; fmv.d.x fa3, a1 +; fle.d a5, fa0, fa3 +; beqz a5, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0xf0, 0x43 -; fmv.d.x fa3, t6 -; fle.d a0, fa3, fa0 -; beqz a0, 8 +; lui a2, 0x43f +; slli a4, a2, 0x28 +; fmv.d.x fa1, a4 +; fle.d a2, fa1, fa0 +; beqz a2, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf ; fcvt.lu.d a0, fa0, rtz -; j 8 -; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint ; ret function %f40(f64) -> i64 { @@ -841,35 +885,42 @@ block0(v0: f64): ; VCode: ; block0: -; fcvt_to_sint.i64 a0,fa0##in_ty=f64 tmp=fa3 +; feq.d a2,fa0,fa0 +; trap_if bad_toint##(a2 eq zero) +; ld a5,[const(0)] +; fmv.d.x fa1,a5 +; fle.d a3,fa0,fa1 +; trap_if int_ovf##(a3 ne zero) +; lui a0,543 +; slli a2,a0,41 +; fmv.d.x fa4,a2 +; fle.d a0,fa4,fa0 +; trap_if int_ovf##(a0 ne zero) +; fcvt.l.d a0,fa0,rtz ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.d a0, fa0, fa0 -; beqz a0, 0x54 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x01, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0xe0, 0xc3 -; fmv.d.x fa3, t6 -; fle.d a0, fa0, fa3 -; beqz a0, 8 +; feq.d a2, fa0, fa0 +; bnez a2, 8 +; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint +; auipc a5, 0 +; ld a5, 0x3c(a5) +; fmv.d.x fa1, a5 +; fle.d a3, fa0, fa1 +; beqz a3, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0xe0, 0x43 -; fmv.d.x fa3, t6 -; fle.d a0, fa3, fa0 +; lui a0, 0x21f +; slli a2, a0, 0x29 +; fmv.d.x fa4, a2 +; fle.d a0, fa4, fa0 ; beqz a0, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: int_ovf ; fcvt.l.d a0, fa0, rtz -; j 8 -; .byte 0x00, 0x00, 0x00, 0x00 ; trap: bad_toint ; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x01, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0xe0, 0xc3 function %f41(i32) -> f32 { block0(v0: i32): @@ -879,12 +930,12 @@ block0(v0: i32): ; VCode: ; block0: -; fcvt.s.wu fa0,a0 +; fcvt.s.wu fa0,a0,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fcvt.s.wu fa0, a0 +; fcvt.s.wu fa0, a0, rne ; ret function %f42(i32) -> f32 { @@ -895,12 +946,12 @@ block0(v0: i32): ; VCode: ; block0: -; fcvt.s.w fa0,a0 +; fcvt.s.w fa0,a0,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fcvt.s.w fa0, a0 +; fcvt.s.w fa0, a0, rne ; ret function %f43(i64) -> f32 { @@ -911,12 +962,12 @@ block0(v0: i64): ; VCode: ; block0: -; fcvt.s.lu fa0,a0 +; fcvt.s.lu fa0,a0,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fcvt.s.lu fa0, a0 +; fcvt.s.lu fa0, a0, rne ; ret function %f44(i64) -> f32 { @@ -927,12 +978,12 @@ block0(v0: i64): ; VCode: ; block0: -; fcvt.s.l fa0,a0 +; fcvt.s.l fa0,a0,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fcvt.s.l fa0, a0 +; fcvt.s.l fa0, a0, rne ; ret function %f45(i32) -> f64 { @@ -964,7 +1015,7 @@ block0(v0: i32): ; ; Disassembled: ; block0: ; offset 0x0 -; .byte 0x53, 0x75, 0x05, 0xd2 +; fcvt.d.w fa0, a0 ; ret function %f47(i64) -> f64 { @@ -975,12 +1026,12 @@ block0(v0: i64): ; VCode: ; block0: -; fcvt.d.lu fa0,a0 +; fcvt.d.lu fa0,a0,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fcvt.d.lu fa0, a0 +; fcvt.d.lu fa0, a0, rne ; ret function %f48(i64) -> f64 { @@ -991,12 +1042,12 @@ block0(v0: i64): ; VCode: ; block0: -; fcvt.d.l fa0,a0 +; fcvt.d.l fa0,a0,rne ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; fcvt.d.l fa0, a0 +; fcvt.d.l fa0, a0, rne ; ret function %f49(f32) -> i32 { @@ -1007,16 +1058,18 @@ block0(v0: f32): ; VCode: ; block0: -; fcvt_to_uint_sat.i32 a0,fa0##in_ty=f32 tmp=fa3 +; fcvt.wu.s a2,fa0,rtz +; feq.s a4,fa0,fa0 +; sub a0,zero,a4 +; and a0,a2,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.s a0, fa0, fa0 -; beqz a0, 0xc -; fcvt.wu.s a0, fa0, rtz -; j 8 -; mv a0, zero +; fcvt.wu.s a2, fa0, rtz +; feq.s a4, fa0, fa0 +; neg a0, a4 +; and a0, a2, a0 ; ret function %f50(f32) -> i32 { @@ -1027,16 +1080,18 @@ block0(v0: f32): ; VCode: ; block0: -; fcvt_to_sint_sat.i32 a0,fa0##in_ty=f32 tmp=fa3 +; fcvt.w.s a2,fa0,rtz +; feq.s a4,fa0,fa0 +; sub a0,zero,a4 +; and a0,a2,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.s a0, fa0, fa0 -; beqz a0, 0xc -; fcvt.w.s a0, fa0, rtz -; j 8 -; mv a0, zero +; fcvt.w.s a2, fa0, rtz +; feq.s a4, fa0, fa0 +; neg a0, a4 +; and a0, a2, a0 ; ret function %f51(f32) -> i64 { @@ -1047,16 +1102,18 @@ block0(v0: f32): ; VCode: ; block0: -; fcvt_to_uint_sat.i64 a0,fa0##in_ty=f32 tmp=fa3 +; fcvt.lu.s a2,fa0,rtz +; feq.s a4,fa0,fa0 +; sub a0,zero,a4 +; and a0,a2,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.s a0, fa0, fa0 -; beqz a0, 0xc -; fcvt.lu.s a0, fa0, rtz -; j 8 -; mv a0, zero +; fcvt.lu.s a2, fa0, rtz +; feq.s a4, fa0, fa0 +; neg a0, a4 +; and a0, a2, a0 ; ret function %f52(f32) -> i64 { @@ -1067,16 +1124,18 @@ block0(v0: f32): ; VCode: ; block0: -; fcvt_to_sint_sat.i64 a0,fa0##in_ty=f32 tmp=fa3 +; fcvt.l.s a2,fa0,rtz +; feq.s a4,fa0,fa0 +; sub a0,zero,a4 +; and a0,a2,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.s a0, fa0, fa0 -; beqz a0, 0xc -; fcvt.l.s a0, fa0, rtz -; j 8 -; mv a0, zero +; fcvt.l.s a2, fa0, rtz +; feq.s a4, fa0, fa0 +; neg a0, a4 +; and a0, a2, a0 ; ret function %f53(f64) -> i32 { @@ -1087,16 +1146,18 @@ block0(v0: f64): ; VCode: ; block0: -; fcvt_to_uint_sat.i32 a0,fa0##in_ty=f64 tmp=fa3 +; fcvt.wu.d a2,fa0,rtz +; feq.d a4,fa0,fa0 +; sub a0,zero,a4 +; and a0,a2,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.d a0, fa0, fa0 -; beqz a0, 0xc -; fcvt.wu.d a0, fa0, rtz -; j 8 -; mv a0, zero +; fcvt.wu.d a2, fa0, rtz +; feq.d a4, fa0, fa0 +; neg a0, a4 +; and a0, a2, a0 ; ret function %f54(f64) -> i32 { @@ -1107,16 +1168,18 @@ block0(v0: f64): ; VCode: ; block0: -; fcvt_to_sint_sat.i32 a0,fa0##in_ty=f64 tmp=fa3 +; fcvt.w.d a2,fa0,rtz +; feq.d a4,fa0,fa0 +; sub a0,zero,a4 +; and a0,a2,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.d a0, fa0, fa0 -; beqz a0, 0xc -; fcvt.w.d a0, fa0, rtz -; j 8 -; mv a0, zero +; fcvt.w.d a2, fa0, rtz +; feq.d a4, fa0, fa0 +; neg a0, a4 +; and a0, a2, a0 ; ret function %f55(f64) -> i64 { @@ -1127,16 +1190,18 @@ block0(v0: f64): ; VCode: ; block0: -; fcvt_to_uint_sat.i64 a0,fa0##in_ty=f64 tmp=fa3 +; fcvt.lu.d a2,fa0,rtz +; feq.d a4,fa0,fa0 +; sub a0,zero,a4 +; and a0,a2,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.d a0, fa0, fa0 -; beqz a0, 0xc -; fcvt.lu.d a0, fa0, rtz -; j 8 -; mv a0, zero +; fcvt.lu.d a2, fa0, rtz +; feq.d a4, fa0, fa0 +; neg a0, a4 +; and a0, a2, a0 ; ret function %f56(f64) -> i64 { @@ -1147,15 +1212,17 @@ block0(v0: f64): ; VCode: ; block0: -; fcvt_to_sint_sat.i64 a0,fa0##in_ty=f64 tmp=fa3 +; fcvt.l.d a2,fa0,rtz +; feq.d a4,fa0,fa0 +; sub a0,zero,a4 +; and a0,a2,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; feq.d a0, fa0, fa0 -; beqz a0, 0xc -; fcvt.l.d a0, fa0, rtz -; j 8 -; mv a0, zero +; fcvt.l.d a2, fa0, rtz +; feq.d a4, fa0, fa0 +; neg a0, a4 +; and a0, a2, a0 ; ret diff --git a/cranelift/filetests/filetests/isa/riscv64/fmax.clif b/cranelift/filetests/filetests/isa/riscv64/fmax.clif index 04fcdc2622fb..c7939f36b57e 100644 --- a/cranelift/filetests/filetests/isa/riscv64/fmax.clif +++ b/cranelift/filetests/filetests/isa/riscv64/fmax.clif @@ -13,7 +13,7 @@ block0(v0: f32, v1: f32): ; feq.s a3,fa0,fa0 ; feq.s a5,fa1,fa1 ; and a1,a3,a5 -; fadd.s fa3,fa0,fa1 +; fadd.s fa3,fa0,fa1,rne ; fmax.s fa5,fa0,fa1 ; select fa0,fa5,fa3##condition=(a1 ne zero) ; ret @@ -23,7 +23,7 @@ block0(v0: f32, v1: f32): ; feq.s a3, fa0, fa0 ; feq.s a5, fa1, fa1 ; and a1, a3, a5 -; fadd.s fa3, fa0, fa1 +; fadd.s fa3, fa0, fa1, rne ; fmax.s fa5, fa0, fa1 ; beqz a1, 0xc ; fmv.d fa0, fa5 @@ -42,7 +42,7 @@ block0(v0: f64, v1: f64): ; feq.d a3,fa0,fa0 ; feq.d a5,fa1,fa1 ; and a1,a3,a5 -; fadd.d fa3,fa0,fa1 +; fadd.d fa3,fa0,fa1,rne ; fmax.d fa5,fa0,fa1 ; select fa0,fa5,fa3##condition=(a1 ne zero) ; ret @@ -52,7 +52,7 @@ block0(v0: f64, v1: f64): ; feq.d a3, fa0, fa0 ; feq.d a5, fa1, fa1 ; and a1, a3, a5 -; fadd.d fa3, fa0, fa1 +; fadd.d fa3, fa0, fa1, rne ; fmax.d fa5, fa0, fa1 ; beqz a1, 0xc ; fmv.d fa0, fa5 diff --git a/cranelift/filetests/filetests/isa/riscv64/fmin.clif b/cranelift/filetests/filetests/isa/riscv64/fmin.clif index eb68c53f96d0..cdc3b15bbdf2 100644 --- a/cranelift/filetests/filetests/isa/riscv64/fmin.clif +++ b/cranelift/filetests/filetests/isa/riscv64/fmin.clif @@ -13,7 +13,7 @@ block0(v0: f32, v1: f32): ; feq.s a3,fa0,fa0 ; feq.s a5,fa1,fa1 ; and a1,a3,a5 -; fadd.s fa3,fa0,fa1 +; fadd.s fa3,fa0,fa1,rne ; fmin.s fa5,fa0,fa1 ; select fa0,fa5,fa3##condition=(a1 ne zero) ; ret @@ -23,7 +23,7 @@ block0(v0: f32, v1: f32): ; feq.s a3, fa0, fa0 ; feq.s a5, fa1, fa1 ; and a1, a3, a5 -; fadd.s fa3, fa0, fa1 +; fadd.s fa3, fa0, fa1, rne ; fmin.s fa5, fa0, fa1 ; beqz a1, 0xc ; fmv.d fa0, fa5 @@ -42,7 +42,7 @@ block0(v0: f64, v1: f64): ; feq.d a3,fa0,fa0 ; feq.d a5,fa1,fa1 ; and a1,a3,a5 -; fadd.d fa3,fa0,fa1 +; fadd.d fa3,fa0,fa1,rne ; fmin.d fa5,fa0,fa1 ; select fa0,fa5,fa3##condition=(a1 ne zero) ; ret @@ -52,7 +52,7 @@ block0(v0: f64, v1: f64): ; feq.d a3, fa0, fa0 ; feq.d a5, fa1, fa1 ; and a1, a3, a5 -; fadd.d fa3, fa0, fa1 +; fadd.d fa3, fa0, fa1, rne ; fmin.d fa5, fa0, fa1 ; beqz a1, 0xc ; fmv.d fa0, fa5 diff --git a/cranelift/filetests/filetests/isa/riscv64/prologue.clif b/cranelift/filetests/filetests/isa/riscv64/prologue.clif index a3691a234f0a..9a611af5d50e 100644 --- a/cranelift/filetests/filetests/isa/riscv64/prologue.clif +++ b/cranelift/filetests/filetests/isa/riscv64/prologue.clif @@ -93,68 +93,68 @@ block0(v0: f64): ; fsd fs11,-88(sp) ; addi sp,sp,-96 ; block0: -; fadd.d fa3,fa0,fa0 -; fadd.d fa4,fa0,fa0 -; fadd.d fa5,fa0,fa0 -; fadd.d fa1,fa0,fa0 -; fadd.d fa2,fa0,fa0 -; fadd.d ft9,fa0,fa0 -; fadd.d ft10,fa0,fa0 -; fadd.d ft11,fa0,fa0 -; fadd.d fs0,fa0,fa0 -; fadd.d fs1,fa0,fa0 -; fadd.d fs2,fa0,fa0 -; fadd.d fs3,fa0,fa0 -; fadd.d fs4,fa0,fa0 -; fadd.d fs5,fa0,fa0 -; fadd.d fs6,fa0,fa0 -; fadd.d fs7,fa0,fa0 -; fadd.d fs8,fa0,fa0 -; fadd.d fs9,fa0,fa0 -; fadd.d fs10,fa0,fa0 -; fadd.d fs11,fa0,fa0 -; fadd.d ft0,fa0,fa0 -; fadd.d ft1,fa0,fa0 -; fadd.d ft2,fa0,fa0 -; fadd.d ft3,fa0,fa0 -; fadd.d ft4,fa0,fa0 -; fadd.d ft5,fa0,fa0 -; fadd.d ft6,fa0,fa0 -; fadd.d ft7,fa0,fa0 -; fadd.d fa6,fa0,fa0 -; fadd.d fa7,fa0,fa0 -; fadd.d ft8,fa0,fa0 -; fadd.d fa3,fa0,fa3 -; fadd.d fa4,fa4,fa5 -; fadd.d fa5,fa1,fa2 -; fadd.d fa0,ft9,ft10 -; fadd.d fa1,ft11,fs0 -; fadd.d fa2,fs1,fs2 -; fadd.d ft9,fs3,fs4 -; fadd.d ft10,fs5,fs6 -; fadd.d ft11,fs7,fs8 -; fadd.d fs0,fs9,fs10 -; fadd.d fs1,fs11,ft0 -; fadd.d fs2,ft1,ft2 -; fadd.d fs3,ft3,ft4 -; fadd.d fs4,ft5,ft6 -; fadd.d fs5,ft7,fa6 -; fadd.d fs6,fa7,ft8 -; fadd.d fa3,fa3,fa4 -; fadd.d fa4,fa5,fa0 -; fadd.d fa5,fa1,fa2 -; fadd.d fa0,ft9,ft10 -; fadd.d fa1,ft11,fs0 -; fadd.d fa2,fs1,fs2 -; fadd.d ft9,fs3,fs4 -; fadd.d ft10,fs5,fs6 -; fadd.d fa3,fa3,fa4 -; fadd.d fa4,fa5,fa0 -; fadd.d fa5,fa1,fa2 -; fadd.d fa0,ft9,ft10 -; fadd.d fa3,fa3,fa4 -; fadd.d fa4,fa5,fa0 -; fadd.d fa0,fa3,fa4 +; fadd.d fa3,fa0,fa0,rne +; fadd.d fa4,fa0,fa0,rne +; fadd.d fa5,fa0,fa0,rne +; fadd.d fa1,fa0,fa0,rne +; fadd.d fa2,fa0,fa0,rne +; fadd.d ft9,fa0,fa0,rne +; fadd.d ft10,fa0,fa0,rne +; fadd.d ft11,fa0,fa0,rne +; fadd.d fs0,fa0,fa0,rne +; fadd.d fs1,fa0,fa0,rne +; fadd.d fs2,fa0,fa0,rne +; fadd.d fs3,fa0,fa0,rne +; fadd.d fs4,fa0,fa0,rne +; fadd.d fs5,fa0,fa0,rne +; fadd.d fs6,fa0,fa0,rne +; fadd.d fs7,fa0,fa0,rne +; fadd.d fs8,fa0,fa0,rne +; fadd.d fs9,fa0,fa0,rne +; fadd.d fs10,fa0,fa0,rne +; fadd.d fs11,fa0,fa0,rne +; fadd.d ft0,fa0,fa0,rne +; fadd.d ft1,fa0,fa0,rne +; fadd.d ft2,fa0,fa0,rne +; fadd.d ft3,fa0,fa0,rne +; fadd.d ft4,fa0,fa0,rne +; fadd.d ft5,fa0,fa0,rne +; fadd.d ft6,fa0,fa0,rne +; fadd.d ft7,fa0,fa0,rne +; fadd.d fa6,fa0,fa0,rne +; fadd.d fa7,fa0,fa0,rne +; fadd.d ft8,fa0,fa0,rne +; fadd.d fa3,fa0,fa3,rne +; fadd.d fa4,fa4,fa5,rne +; fadd.d fa5,fa1,fa2,rne +; fadd.d fa0,ft9,ft10,rne +; fadd.d fa1,ft11,fs0,rne +; fadd.d fa2,fs1,fs2,rne +; fadd.d ft9,fs3,fs4,rne +; fadd.d ft10,fs5,fs6,rne +; fadd.d ft11,fs7,fs8,rne +; fadd.d fs0,fs9,fs10,rne +; fadd.d fs1,fs11,ft0,rne +; fadd.d fs2,ft1,ft2,rne +; fadd.d fs3,ft3,ft4,rne +; fadd.d fs4,ft5,ft6,rne +; fadd.d fs5,ft7,fa6,rne +; fadd.d fs6,fa7,ft8,rne +; fadd.d fa3,fa3,fa4,rne +; fadd.d fa4,fa5,fa0,rne +; fadd.d fa5,fa1,fa2,rne +; fadd.d fa0,ft9,ft10,rne +; fadd.d fa1,ft11,fs0,rne +; fadd.d fa2,fs1,fs2,rne +; fadd.d ft9,fs3,fs4,rne +; fadd.d ft10,fs5,fs6,rne +; fadd.d fa3,fa3,fa4,rne +; fadd.d fa4,fa5,fa0,rne +; fadd.d fa5,fa1,fa2,rne +; fadd.d fa0,ft9,ft10,rne +; fadd.d fa3,fa3,fa4,rne +; fadd.d fa4,fa5,fa0,rne +; fadd.d fa0,fa3,fa4,rne ; addi sp,sp,96 ; fld fs0,-8(sp) ; fld fs2,-16(sp) @@ -191,68 +191,68 @@ block0(v0: f64): ; fsd fs11, -0x58(sp) ; addi sp, sp, -0x60 ; block1: ; offset 0x40 -; fadd.d fa3, fa0, fa0 -; fadd.d fa4, fa0, fa0 -; fadd.d fa5, fa0, fa0 -; fadd.d fa1, fa0, fa0 -; fadd.d fa2, fa0, fa0 -; fadd.d ft9, fa0, fa0 -; fadd.d ft10, fa0, fa0 -; fadd.d ft11, fa0, fa0 -; fadd.d fs0, fa0, fa0 -; fadd.d fs1, fa0, fa0 -; fadd.d fs2, fa0, fa0 -; fadd.d fs3, fa0, fa0 -; fadd.d fs4, fa0, fa0 -; fadd.d fs5, fa0, fa0 -; fadd.d fs6, fa0, fa0 -; fadd.d fs7, fa0, fa0 -; fadd.d fs8, fa0, fa0 -; fadd.d fs9, fa0, fa0 -; fadd.d fs10, fa0, fa0 -; fadd.d fs11, fa0, fa0 -; fadd.d ft0, fa0, fa0 -; fadd.d ft1, fa0, fa0 -; fadd.d ft2, fa0, fa0 -; fadd.d ft3, fa0, fa0 -; fadd.d ft4, fa0, fa0 -; fadd.d ft5, fa0, fa0 -; fadd.d ft6, fa0, fa0 -; fadd.d ft7, fa0, fa0 -; fadd.d fa6, fa0, fa0 -; fadd.d fa7, fa0, fa0 -; fadd.d ft8, fa0, fa0 -; fadd.d fa3, fa0, fa3 -; fadd.d fa4, fa4, fa5 -; fadd.d fa5, fa1, fa2 -; fadd.d fa0, ft9, ft10 -; fadd.d fa1, ft11, fs0 -; fadd.d fa2, fs1, fs2 -; fadd.d ft9, fs3, fs4 -; fadd.d ft10, fs5, fs6 -; fadd.d ft11, fs7, fs8 -; fadd.d fs0, fs9, fs10 -; fadd.d fs1, fs11, ft0 -; fadd.d fs2, ft1, ft2 -; fadd.d fs3, ft3, ft4 -; fadd.d fs4, ft5, ft6 -; fadd.d fs5, ft7, fa6 -; fadd.d fs6, fa7, ft8 -; fadd.d fa3, fa3, fa4 -; fadd.d fa4, fa5, fa0 -; fadd.d fa5, fa1, fa2 -; fadd.d fa0, ft9, ft10 -; fadd.d fa1, ft11, fs0 -; fadd.d fa2, fs1, fs2 -; fadd.d ft9, fs3, fs4 -; fadd.d ft10, fs5, fs6 -; fadd.d fa3, fa3, fa4 -; fadd.d fa4, fa5, fa0 -; fadd.d fa5, fa1, fa2 -; fadd.d fa0, ft9, ft10 -; fadd.d fa3, fa3, fa4 -; fadd.d fa4, fa5, fa0 -; fadd.d fa0, fa3, fa4 +; fadd.d fa3, fa0, fa0, rne +; fadd.d fa4, fa0, fa0, rne +; fadd.d fa5, fa0, fa0, rne +; fadd.d fa1, fa0, fa0, rne +; fadd.d fa2, fa0, fa0, rne +; fadd.d ft9, fa0, fa0, rne +; fadd.d ft10, fa0, fa0, rne +; fadd.d ft11, fa0, fa0, rne +; fadd.d fs0, fa0, fa0, rne +; fadd.d fs1, fa0, fa0, rne +; fadd.d fs2, fa0, fa0, rne +; fadd.d fs3, fa0, fa0, rne +; fadd.d fs4, fa0, fa0, rne +; fadd.d fs5, fa0, fa0, rne +; fadd.d fs6, fa0, fa0, rne +; fadd.d fs7, fa0, fa0, rne +; fadd.d fs8, fa0, fa0, rne +; fadd.d fs9, fa0, fa0, rne +; fadd.d fs10, fa0, fa0, rne +; fadd.d fs11, fa0, fa0, rne +; fadd.d ft0, fa0, fa0, rne +; fadd.d ft1, fa0, fa0, rne +; fadd.d ft2, fa0, fa0, rne +; fadd.d ft3, fa0, fa0, rne +; fadd.d ft4, fa0, fa0, rne +; fadd.d ft5, fa0, fa0, rne +; fadd.d ft6, fa0, fa0, rne +; fadd.d ft7, fa0, fa0, rne +; fadd.d fa6, fa0, fa0, rne +; fadd.d fa7, fa0, fa0, rne +; fadd.d ft8, fa0, fa0, rne +; fadd.d fa3, fa0, fa3, rne +; fadd.d fa4, fa4, fa5, rne +; fadd.d fa5, fa1, fa2, rne +; fadd.d fa0, ft9, ft10, rne +; fadd.d fa1, ft11, fs0, rne +; fadd.d fa2, fs1, fs2, rne +; fadd.d ft9, fs3, fs4, rne +; fadd.d ft10, fs5, fs6, rne +; fadd.d ft11, fs7, fs8, rne +; fadd.d fs0, fs9, fs10, rne +; fadd.d fs1, fs11, ft0, rne +; fadd.d fs2, ft1, ft2, rne +; fadd.d fs3, ft3, ft4, rne +; fadd.d fs4, ft5, ft6, rne +; fadd.d fs5, ft7, fa6, rne +; fadd.d fs6, fa7, ft8, rne +; fadd.d fa3, fa3, fa4, rne +; fadd.d fa4, fa5, fa0, rne +; fadd.d fa5, fa1, fa2, rne +; fadd.d fa0, ft9, ft10, rne +; fadd.d fa1, ft11, fs0, rne +; fadd.d fa2, fs1, fs2, rne +; fadd.d ft9, fs3, fs4, rne +; fadd.d ft10, fs5, fs6, rne +; fadd.d fa3, fa3, fa4, rne +; fadd.d fa4, fa5, fa0, rne +; fadd.d fa5, fa1, fa2, rne +; fadd.d fa0, ft9, ft10, rne +; fadd.d fa3, fa3, fa4, rne +; fadd.d fa4, fa5, fa0, rne +; fadd.d fa0, fa3, fa4, rne ; addi sp, sp, 0x60 ; fld fs0, -8(sp) ; fld fs2, -0x10(sp) diff --git a/cranelift/filetests/filetests/isa/riscv64/return-call.clif b/cranelift/filetests/filetests/isa/riscv64/return-call.clif index ba742c2388f3..2ea34afb40d8 100644 --- a/cranelift/filetests/filetests/isa/riscv64/return-call.clif +++ b/cranelift/filetests/filetests/isa/riscv64/return-call.clif @@ -99,7 +99,7 @@ block0(v0: f64): ; lui a3,1027 ; slli a5,a3,40 ; fmv.d.x fa1,a5 -; fadd.d ft0,ft0,fa1 +; fadd.d ft0,ft0,fa1,rne ; ret ; ; Disassembled: @@ -107,7 +107,7 @@ block0(v0: f64): ; lui a3, 0x403 ; slli a5, a3, 0x28 ; fmv.d.x fa1, a5 -; fadd.d ft0, ft0, fa1 +; fadd.d ft0, ft0, fa1, rne ; ret function %call_f64(f64) -> f64 tail { diff --git a/cranelift/filetests/filetests/runtests/conversion-i8-i16.clif b/cranelift/filetests/filetests/runtests/conversion-i8-i16.clif new file mode 100644 index 000000000000..dde4635be504 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/conversion-i8-i16.clif @@ -0,0 +1,234 @@ +test interpret +test run +target aarch64 +target s390x +target riscv64 has_c has_zcb +target riscv64 + +function %f32_to_i8(f32) -> i8 { +block0(v0: f32): + v1 = fcvt_to_sint.i8 v0 + return v1 +} +; run: %f32_to_i8(0x0.0) == 0 +; run: %f32_to_i8(0x1.0) == 1 +; run: %f32_to_i8(0x8.1) == 8 +; run: %f32_to_i8(-0x8.1) == -8 +; run: %f32_to_i8(-0x80.1) == 0x80 +; run: %f32_to_i8(0x7f.1) == 0x7f +; run: %f32_to_i8(-0x0.1) == 0 + +function %f64_to_i8(f64) -> i8 { +block0(v0: f64): + v1 = fcvt_to_sint.i8 v0 + return v1 +} +; run: %f64_to_i8(0x0.0) == 0 +; run: %f64_to_i8(0x1.0) == 1 +; run: %f64_to_i8(0x8.1) == 8 +; run: %f64_to_i8(-0x8.1) == -8 +; run: %f64_to_i8(-0x80.1) == 0x80 +; run: %f64_to_i8(0x7f.1) == 0x7f +; run: %f64_to_i8(-0x0.1) == 0 + +function %f32_to_i16(f32) -> i16 { +block0(v0: f32): + v1 = fcvt_to_sint.i16 v0 + return v1 +} +; run: %f32_to_i16(0x0.0) == 0 +; run: %f32_to_i16(0x1.0) == 1 +; run: %f32_to_i16(0x8.1) == 8 +; run: %f32_to_i16(-0x8.1) == -8 +; run: %f32_to_i16(-0x8000.1) == 0x8000 +; run: %f32_to_i16(0x7fff.1) == 0x7fff +; run: %f32_to_i16(-0x0.1) == 0 + +function %f64_to_i16(f64) -> i16 { +block0(v0: f64): + v1 = fcvt_to_sint.i16 v0 + return v1 +} +; run: %f64_to_i16(0x0.0) == 0 +; run: %f64_to_i16(0x1.0) == 1 +; run: %f64_to_i16(0x8.1) == 8 +; run: %f64_to_i16(-0x8.1) == -8 +; run: %f64_to_i16(-0x8000.1) == 0x8000 +; run: %f64_to_i16(0x7fff.1) == 0x7fff +; run: %f64_to_i16(-0x0.1) == 0 + +function %f32_to_u8(f32) -> i8 { +block0(v0:f32): + v1 = fcvt_to_uint.i8 v0 + return v1 +} +; run: %f32_to_u8(0x0.0) == 0 +; run: %f32_to_u8(0x1.0) == 1 +; run: %f32_to_u8(0x4.2) == 4 +; run: %f32_to_u8(0x4.6) == 4 +; run: %f32_to_u8(-0x0.1) == 0 + +function %f64_to_u8(f64) -> i8 { +block0(v0:f64): + v1 = fcvt_to_uint.i8 v0 + return v1 +} +; run: %f64_to_u8(0x0.0) == 0 +; run: %f64_to_u8(0x1.0) == 1 +; run: %f64_to_u8(0x4.2) == 4 +; run: %f64_to_u8(0x4.6) == 4 +; run: %f64_to_u8(-0x0.1) == 0 + +function %f32_to_u16(f32) -> i16 { +block0(v0:f32): + v1 = fcvt_to_uint.i16 v0 + return v1 +} +; run: %f32_to_u16(0x0.0) == 0 +; run: %f32_to_u16(0x1.0) == 1 +; run: %f32_to_u16(0x4.2) == 4 +; run: %f32_to_u16(0x4.6) == 4 +; run: %f32_to_u16(-0x0.1) == 0 + +function %f64_to_u16(f64) -> i16 { +block0(v0:f64): + v1 = fcvt_to_uint.i16 v0 + return v1 +} +; run: %f64_to_u16(0x0.0) == 0 +; run: %f64_to_u16(0x1.0) == 1 +; run: %f64_to_u16(0x4.2) == 4 +; run: %f64_to_u16(0x4.6) == 4 +; run: %f64_to_u16(-0x0.1) == 0 + +function %f32_to_i8_sat(f32) -> i8 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i8 v0 + return v1 +} +; run: %f32_to_i8_sat(0x0.0) == 0 +; run: %f32_to_i8_sat(0x1.0) == 1 +; run: %f32_to_i8_sat(0x8.1) == 8 +; run: %f32_to_i8_sat(-0x1.0) == -1 +; run: %f32_to_i8_sat(0x1.fffffep127) == 0x7f +; run: %f32_to_i8_sat(-0x1.fffffep127) == 0x80 +; run: %f32_to_i8_sat(+NaN) == 0 +; run: %f32_to_i8_sat(-NaN) == 0 +; run: %f32_to_i8_sat(+Inf) == 0x7f +; run: %f32_to_i8_sat(-Inf) == 0x80 + +function %f64_to_i8_sat(f64) -> i8 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i8 v0 + return v1 +} +; run: %f64_to_i8_sat(0x0.0) == 0 +; run: %f64_to_i8_sat(0x1.0) == 1 +; run: %f64_to_i8_sat(0x8.1) == 8 +; run: %f64_to_i8_sat(-0x1.0) == -1 +; run: %f64_to_i8_sat(0x1.fffffep127) == 0x7f +; run: %f64_to_i8_sat(-0x1.fffffep127) == 0x80 +; run: %f64_to_i8_sat(+NaN) == 0 +; run: %f64_to_i8_sat(-NaN) == 0 +; run: %f64_to_i8_sat(+Inf) == 0x7f +; run: %f64_to_i8_sat(-Inf) == 0x80 + +function %f32_to_i16_sat(f32) -> i16 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i16 v0 + return v1 +} +; run: %f32_to_i16_sat(0x0.0) == 0 +; run: %f32_to_i16_sat(0x1.0) == 1 +; run: %f32_to_i16_sat(0x8.1) == 8 +; run: %f32_to_i16_sat(-0x1.0) == -1 +; run: %f32_to_i16_sat(0x1.fffffep127) == 0x7fff +; run: %f32_to_i16_sat(-0x1.fffffep127) == 0x8000 +; run: %f32_to_i16_sat(+NaN) == 0 +; run: %f32_to_i16_sat(-NaN) == 0 +; run: %f32_to_i16_sat(+Inf) == 0x7fff +; run: %f32_to_i16_sat(-Inf) == 0x8000 + +function %f64_to_i16_sat(f64) -> i16 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i16 v0 + return v1 +} +; run: %f64_to_i16_sat(0x0.0) == 0 +; run: %f64_to_i16_sat(0x1.0) == 1 +; run: %f64_to_i16_sat(0x8.1) == 8 +; run: %f64_to_i16_sat(-0x1.0) == -1 +; run: %f64_to_i16_sat(0x1.fffffep127) == 0x7fff +; run: %f64_to_i16_sat(-0x1.fffffep127) == 0x8000 +; run: %f64_to_i16_sat(+NaN) == 0 +; run: %f64_to_i16_sat(-NaN) == 0 +; run: %f64_to_i16_sat(+Inf) == 0x7fff +; run: %f64_to_i16_sat(-Inf) == 0x8000 + +function %f32_to_u8_sat(f32) -> i8 { +block0(v0:f32): + v1 = fcvt_to_uint_sat.i8 v0 + return v1 +} +; run: %f32_to_u8_sat(0x0.0) == 0 +; run: %f32_to_u8_sat(0x1.0) == 1 +; run: %f32_to_u8_sat(0x4.2) == 4 +; run: %f32_to_u8_sat(0x4.6) == 4 +; run: %f32_to_u8_sat(-0x1.0) == 0 +; run: %f32_to_u8_sat(0x1.fffffep127) == 0xff +; run: %f32_to_u8_sat(-0x1.fffffep127) == 0 +; run: %f32_to_u8_sat(+NaN) == 0 +; run: %f32_to_u8_sat(-NaN) == 0 +; run: %f32_to_u8_sat(+Inf) == 0xff +; run: %f32_to_u8_sat(-Inf) == 0 + +function %f64_to_u8_sat(f64) -> i8 { +block0(v0:f64): + v1 = fcvt_to_uint_sat.i8 v0 + return v1 +} +; run: %f64_to_u8_sat(0x0.0) == 0 +; run: %f64_to_u8_sat(0x1.0) == 1 +; run: %f64_to_u8_sat(0x4.2) == 4 +; run: %f64_to_u8_sat(0x4.6) == 4 +; run: %f64_to_u8_sat(-0x1.0) == 0 +; run: %f64_to_u8_sat(0x1.fffffep127) == 0xff +; run: %f64_to_u8_sat(-0x1.fffffep127) == 0 +; run: %f64_to_u8_sat(+NaN) == 0 +; run: %f64_to_u8_sat(-NaN) == 0 +; run: %f64_to_u8_sat(+Inf) == 0xff +; run: %f64_to_u8_sat(-Inf) == 0 + +function %f32_to_u16_sat(f32) -> i16 { +block0(v0:f32): + v1 = fcvt_to_uint_sat.i16 v0 + return v1 +} +; run: %f32_to_u16_sat(0x0.0) == 0 +; run: %f32_to_u16_sat(0x1.0) == 1 +; run: %f32_to_u16_sat(0x4.2) == 4 +; run: %f32_to_u16_sat(0x4.6) == 4 +; run: %f32_to_u16_sat(-0x1.0) == 0 +; run: %f32_to_u16_sat(0x1.fffffep127) == 0xffff +; run: %f32_to_u16_sat(-0x1.fffffep127) == 0 +; run: %f32_to_u16_sat(+NaN) == 0 +; run: %f32_to_u16_sat(-NaN) == 0 +; run: %f32_to_u16_sat(+Inf) == 0xffff +; run: %f32_to_u16_sat(-Inf) == 0 + +function %f64_to_u16_sat(f64) -> i16 { +block0(v0:f64): + v1 = fcvt_to_uint_sat.i16 v0 + return v1 +} +; run: %f64_to_u16_sat(0x0.0) == 0 +; run: %f64_to_u16_sat(0x1.0) == 1 +; run: %f64_to_u16_sat(0x4.2) == 4 +; run: %f64_to_u16_sat(0x4.6) == 4 +; run: %f64_to_u16_sat(-0x1.0) == 0 +; run: %f64_to_u16_sat(0x1.fffffep127) == 0xffff +; run: %f64_to_u16_sat(-0x1.fffffep127) == 0 +; run: %f64_to_u16_sat(+NaN) == 0 +; run: %f64_to_u16_sat(-NaN) == 0 +; run: %f64_to_u16_sat(+Inf) == 0xffff +; run: %f64_to_u16_sat(-Inf) == 0 diff --git a/cranelift/filetests/filetests/runtests/conversion.clif b/cranelift/filetests/filetests/runtests/conversion.clif index 1040e7e1040f..5b4a554e080f 100644 --- a/cranelift/filetests/filetests/runtests/conversion.clif +++ b/cranelift/filetests/filetests/runtests/conversion.clif @@ -7,52 +7,248 @@ target x86_64 has_avx target riscv64 has_c has_zcb target riscv64 -function %fcvt_to_sint(f32) -> i32 { +function %f32_to_i32(f32) -> i32 { block0(v0: f32): v1 = fcvt_to_sint.i32 v0 return v1 } -; run: %fcvt_to_sint(0x0.0) == 0 -; run: %fcvt_to_sint(0x1.0) == 1 -; run: %fcvt_to_sint(0x1.d6f346p26) == 123456792 -; run: %fcvt_to_sint(0x8.1) == 8 +; run: %f32_to_i32(0x0.0) == 0 +; run: %f32_to_i32(0x1.0) == 1 +; run: %f32_to_i32(0x1.d6f346p26) == 123456792 +; run: %f32_to_i32(0x8.1) == 8 +; run: %f32_to_i32(-0x8.1) == -8 +; run: %f32_to_i32(-0x0.1) == 0 +; run: %f32_to_i32(-0x1.0p31) == 0x8000_0000 -function %fcvt_to_uint(f32) -> i32 { +function %f64_to_i32(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_sint.i32 v0 + return v1 +} +; run: %f64_to_i32(0x0.0) == 0 +; run: %f64_to_i32(0x1.0) == 1 +; run: %f64_to_i32(0x1.d6f346p26) == 123456792 +; run: %f64_to_i32(0x8.1) == 8 +; run: %f64_to_i32(-0x8.1) == -8 +; run: %f64_to_i32(-0x0.1) == 0 +; run: %f64_to_i32(-0x1.0p31) == 0x8000_0000 + +function %f32_to_i64(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_sint.i64 v0 + return v1 +} +; run: %f32_to_i64(0x0.0) == 0 +; run: %f32_to_i64(0x1.0) == 1 +; run: %f32_to_i64(0x1.d6f346p26) == 123456792 +; run: %f32_to_i64(0x8.1) == 8 +; run: %f32_to_i64(-0x8.1) == -8 +; run: %f32_to_i64(-0x0.1) == 0 + +function %f64_to_i64(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_sint.i64 v0 + return v1 +} +; run: %f64_to_i64(0x0.0) == 0 +; run: %f64_to_i64(0x1.0) == 1 +; run: %f64_to_i64(0x1.d6f346p26) == 123456792 +; run: %f64_to_i64(0x8.1) == 8 +; run: %f64_to_i64(-0x8.1) == -8 +; run: %f64_to_i64(-0x0.1) == 0 + +function %f32_to_u32(f32) -> i32 { block0(v0:f32): v1 = fcvt_to_uint.i32 v0 return v1 } -; run: %fcvt_to_uint(0x0.0) == 0 -; run: %fcvt_to_uint(0x1.0) == 1 -; run: %fcvt_to_uint(0x4.2) == 4 -; run: %fcvt_to_uint(0x4.6) == 4 -; run: %fcvt_to_uint(0x1.d6f346p26) == 123456792 -; run: %fcvt_to_uint(0xB2D05E00.0) == 3000000000 +; run: %f32_to_u32(0x0.0) == 0 +; run: %f32_to_u32(0x1.0) == 1 +; run: %f32_to_u32(0x4.2) == 4 +; run: %f32_to_u32(0x4.6) == 4 +; run: %f32_to_u32(0x1.d6f346p26) == 123456792 +; run: %f32_to_u32(0xB2D05E00.0) == 3000000000 +; run: %f32_to_u32(-0x0.1) == 0 + +function %f64_to_u32(f64) -> i32 { +block0(v0:f64): + v1 = fcvt_to_uint.i32 v0 + return v1 +} +; run: %f64_to_u32(0x0.0) == 0 +; run: %f64_to_u32(0x1.0) == 1 +; run: %f64_to_u32(0x4.2) == 4 +; run: %f64_to_u32(0x4.6) == 4 +; run: %f64_to_u32(0x1.d6f346p26) == 123456792 +; run: %f64_to_u32(0xB2D05E00.0) == 3000000000 +; run: %f64_to_u32(-0x0.1) == 0 + +function %f32_to_u64(f32) -> i64 { +block0(v0:f32): + v1 = fcvt_to_uint.i64 v0 + return v1 +} +; run: %f32_to_u64(0x0.0) == 0 +; run: %f32_to_u64(0x1.0) == 1 +; run: %f32_to_u64(0x4.2) == 4 +; run: %f32_to_u64(0x4.6) == 4 +; run: %f32_to_u64(0x1.d6f346p26) == 123456792 +; run: %f32_to_u64(0xB2D05E00.0) == 3000000000 +; run: %f32_to_u64(-0x0.1) == 0 + +function %f64_to_u64(f64) -> i64 { +block0(v0:f64): + v1 = fcvt_to_uint.i64 v0 + return v1 +} +; run: %f64_to_u64(0x0.0) == 0 +; run: %f64_to_u64(0x1.0) == 1 +; run: %f64_to_u64(0x4.2) == 4 +; run: %f64_to_u64(0x4.6) == 4 +; run: %f64_to_u64(0x1.d6f346p26) == 123456792 +; run: %f64_to_u64(0xB2D05E00.0) == 3000000000 +; run: %f64_to_u64(-0x0.1) == 0 -function %fcvt_to_sint_sat(f32) -> i32 { +function %f32_to_i32_sat(f32) -> i32 { block0(v0: f32): v1 = fcvt_to_sint_sat.i32 v0 return v1 } -; run: %fcvt_to_sint_sat(0x0.0) == 0 -; run: %fcvt_to_sint_sat(0x1.0) == 1 -; run: %fcvt_to_sint_sat(0x1.d6f346p26) == 123456792 -; run: %fcvt_to_sint_sat(0x8.1) == 8 -; run: %fcvt_to_sint_sat(-0x1.0) == -1 -; run: %fcvt_to_sint_sat(0x1.fffffep127) == 2147483647 -; run: %fcvt_to_sint_sat(-0x1.fffffep127) == -2147483648 +; run: %f32_to_i32_sat(0x0.0) == 0 +; run: %f32_to_i32_sat(0x1.0) == 1 +; run: %f32_to_i32_sat(0x1.d6f346p26) == 123456792 +; run: %f32_to_i32_sat(0x8.1) == 8 +; run: %f32_to_i32_sat(-0x1.0) == -1 +; run: %f32_to_i32_sat(0x1.fffffep127) == 2147483647 +; run: %f32_to_i32_sat(-0x1.fffffep127) == -2147483648 +; run: %f32_to_i32_sat(+NaN) == 0 +; run: %f32_to_i32_sat(-NaN) == 0 +; run: %f32_to_i32_sat(+Inf) == 2147483647 +; run: %f32_to_i32_sat(-Inf) == -2147483648 -function %fcvt_to_uint_sat(f32) -> i32 { +function %f64_to_i32_sat(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i32 v0 + return v1 +} +; run: %f64_to_i32_sat(0x0.0) == 0 +; run: %f64_to_i32_sat(0x1.0) == 1 +; run: %f64_to_i32_sat(0x1.d6f346p26) == 123456792 +; run: %f64_to_i32_sat(0x8.1) == 8 +; run: %f64_to_i32_sat(-0x1.0) == -1 +; run: %f64_to_i32_sat(0x1.fffffep127) == 2147483647 +; run: %f64_to_i32_sat(-0x1.fffffep127) == -2147483648 +; run: %f64_to_i32_sat(+NaN) == 0 +; run: %f64_to_i32_sat(-NaN) == 0 +; run: %f64_to_i32_sat(+Inf) == 2147483647 +; run: %f64_to_i32_sat(-Inf) == -2147483648 + +function %f32_to_i64_sat(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i64 v0 + return v1 +} +; run: %f32_to_i64_sat(0x0.0) == 0 +; run: %f32_to_i64_sat(0x1.0) == 1 +; run: %f32_to_i64_sat(0x1.d6f346p26) == 123456792 +; run: %f32_to_i64_sat(0x8.1) == 8 +; run: %f32_to_i64_sat(-0x1.0) == -1 +; run: %f32_to_i64_sat(0x1.fffffep127) == 0x7fffffff_ffffffff +; run: %f32_to_i64_sat(-0x1.fffffep127) == 0x80000000_00000000 +; run: %f32_to_i64_sat(+NaN) == 0 +; run: %f32_to_i64_sat(-NaN) == 0 +; run: %f32_to_i64_sat(+Inf) == 0x7fffffff_ffffffff +; run: %f32_to_i64_sat(-Inf) == 0x80000000_00000000 + +function %f64_to_i64_sat(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i64 v0 + return v1 +} +; run: %f64_to_i64_sat(0x0.0) == 0 +; run: %f64_to_i64_sat(0x1.0) == 1 +; run: %f64_to_i64_sat(0x1.d6f346p26) == 123456792 +; run: %f64_to_i64_sat(0x8.1) == 8 +; run: %f64_to_i64_sat(-0x1.0) == -1 +; run: %f64_to_i64_sat(0x1.fffffep127) == 0x7fffffff_ffffffff +; run: %f64_to_i64_sat(-0x1.fffffep127) == 0x80000000_00000000 +; run: %f64_to_i64_sat(+NaN) == 0 +; run: %f64_to_i64_sat(-NaN) == 0 +; run: %f64_to_i64_sat(+Inf) == 0x7fffffff_ffffffff +; run: %f64_to_i64_sat(-Inf) == 0x80000000_00000000 + +function %f32_to_u32_sat(f32) -> i32 { block0(v0:f32): v1 = fcvt_to_uint_sat.i32 v0 return v1 } -; run: %fcvt_to_uint_sat(0x0.0) == 0 -; run: %fcvt_to_uint_sat(0x1.0) == 1 -; run: %fcvt_to_uint_sat(0x4.2) == 4 -; run: %fcvt_to_uint_sat(0x4.6) == 4 -; run: %fcvt_to_uint_sat(0x1.d6f346p26) == 123456792 -; run: %fcvt_to_uint_sat(0xB2D05E00.0) == 3000000000 -; run: %fcvt_to_uint_sat(-0x1.0) == 0 -; run: %fcvt_to_uint_sat(0x1.fffffep127) == 4294967295 -; run: %fcvt_to_uint_sat(-0x1.fffffep127) == 0 +; run: %f32_to_u32_sat(0x0.0) == 0 +; run: %f32_to_u32_sat(0x1.0) == 1 +; run: %f32_to_u32_sat(0x4.2) == 4 +; run: %f32_to_u32_sat(0x4.6) == 4 +; run: %f32_to_u32_sat(0x1.d6f346p26) == 123456792 +; run: %f32_to_u32_sat(0xB2D05E00.0) == 3000000000 +; run: %f32_to_u32_sat(-0x1.0) == 0 +; run: %f32_to_u32_sat(0x1.fffffep127) == 4294967295 +; run: %f32_to_u32_sat(-0x1.fffffep127) == 0 +; run: %f32_to_u32_sat(+NaN) == 0 +; run: %f32_to_u32_sat(-NaN) == 0 +; run: %f32_to_u32_sat(+Inf) == 0xffffffff +; run: %f32_to_u32_sat(-Inf) == 0 + +function %f64_to_u32_sat(f64) -> i32 { +block0(v0:f64): + v1 = fcvt_to_uint_sat.i32 v0 + return v1 +} +; run: %f64_to_u32_sat(0x0.0) == 0 +; run: %f64_to_u32_sat(0x1.0) == 1 +; run: %f64_to_u32_sat(0x4.2) == 4 +; run: %f64_to_u32_sat(0x4.6) == 4 +; run: %f64_to_u32_sat(0x1.d6f346p26) == 123456792 +; run: %f64_to_u32_sat(0xB2D05E00.0) == 3000000000 +; run: %f64_to_u32_sat(-0x1.0) == 0 +; run: %f64_to_u32_sat(0x1.fffffep127) == 4294967295 +; run: %f64_to_u32_sat(-0x1.fffffep127) == 0 +; run: %f64_to_u32_sat(+NaN) == 0 +; run: %f64_to_u32_sat(-NaN) == 0 +; run: %f64_to_u32_sat(+Inf) == 0xffffffff +; run: %f64_to_u32_sat(-Inf) == 0 + +function %f32_to_u64_sat(f32) -> i64 { +block0(v0:f32): + v1 = fcvt_to_uint_sat.i64 v0 + return v1 +} +; run: %f32_to_u64_sat(0x0.0) == 0 +; run: %f32_to_u64_sat(0x1.0) == 1 +; run: %f32_to_u64_sat(0x4.2) == 4 +; run: %f32_to_u64_sat(0x4.6) == 4 +; run: %f32_to_u64_sat(0x1.d6f346p26) == 123456792 +; run: %f32_to_u64_sat(0xB2D05E00.0) == 3000000000 +; run: %f32_to_u64_sat(-0x1.0) == 0 +; run: %f32_to_u64_sat(0x1.fffffep127) == 0xffffffff_ffffffff +; run: %f32_to_u64_sat(-0x1.fffffep127) == 0 +; run: %f32_to_u64_sat(+NaN) == 0 +; run: %f32_to_u64_sat(-NaN) == 0 +; run: %f32_to_u64_sat(+Inf) == 0xffffffff_ffffffff +; run: %f32_to_u64_sat(-Inf) == 0 + +function %f64_to_u64_sat(f64) -> i64 { +block0(v0:f64): + v1 = fcvt_to_uint_sat.i64 v0 + return v1 +} +; run: %f64_to_u64_sat(0x0.0) == 0 +; run: %f64_to_u64_sat(0x1.0) == 1 +; run: %f64_to_u64_sat(0x4.2) == 4 +; run: %f64_to_u64_sat(0x4.6) == 4 +; run: %f64_to_u64_sat(0x1.d6f346p26) == 123456792 +; run: %f64_to_u64_sat(0xB2D05E00.0) == 3000000000 +; run: %f64_to_u64_sat(-0x1.0) == 0 +; run: %f64_to_u64_sat(0x1.fffffep127) == 0xffffffff_ffffffff +; run: %f64_to_u64_sat(-0x1.fffffep127) == 0 +; run: %f64_to_u64_sat(+NaN) == 0 +; run: %f64_to_u64_sat(-NaN) == 0 +; run: %f64_to_u64_sat(+Inf) == 0xffffffff_ffffffff +; run: %f64_to_u64_sat(-Inf) == 0 diff --git a/cranelift/filetests/filetests/runtests/issue-5992.clif b/cranelift/filetests/filetests/runtests/issue-5992.clif new file mode 100644 index 000000000000..a1d9e3692639 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/issue-5992.clif @@ -0,0 +1,36 @@ +test interpret +test run +target riscv64 +target aarch64 +target s390x + +function %a_f32(f32) -> i16 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i16 v0 + return v1 +} +; run: %a_f32(0x1.949400p21) == -1 + + +function %b_f32(f32) -> i8 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i8 v0 + return v1 +} +; run: %b_f32(0x1.949400p21) == -1 + + +function %a_f64(f64) -> i16 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i16 v0 + return v1 +} +; run: %a_f64(0x1.949400p21) == -1 + + +function %b_f64(f64) -> i8 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i8 v0 + return v1 +} +; run: %b_f64(0x1.949400p21) == -1 diff --git a/cranelift/filetests/filetests/runtests/issue-5993.clif b/cranelift/filetests/filetests/runtests/issue-5993.clif new file mode 100644 index 000000000000..b19a865b246c --- /dev/null +++ b/cranelift/filetests/filetests/runtests/issue-5993.clif @@ -0,0 +1,19 @@ +test interpret +test run +target riscv64 +target aarch64 +target s390x + +function %a(f64) -> i8 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i8 v0 + return v1 +} +; run: %a(-0x1.811d818400000p30) == -128 + +function %b(f64) -> i16 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i16 v0 + return v1 +} +; run: %b(-0x1.811d818400000p30) == -32768 diff --git a/cranelift/fuzzgen/src/function_generator.rs b/cranelift/fuzzgen/src/function_generator.rs index 4b0e7a0719a5..651b784b2b53 100644 --- a/cranelift/fuzzgen/src/function_generator.rs +++ b/cranelift/fuzzgen/src/function_generator.rs @@ -757,7 +757,7 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) - ( Opcode::FcvtToUintSat | Opcode::FcvtToSintSat, &[F32 | F64], - &[I8 | I16 | I128] + &[I128] ), // https://github.com/bytecodealliance/wasmtime/issues/5528 (