Port Fcopysign..FcvtToSintSat to ISLE (AArch64) (#4753)

* Port `Fcopysign`..``FcvtToSintSat` to ISLE (AArch64) Ported the existing implementations of the following opcodes to ISLE on AArch64: - `Fcopysign` - Also introduced missing support for `fcopysign` on vector values, as per the docs. - This introduces the vector encoding for the `SLI` machine instruction. - `FcvtToUint` - `FcvtToSint` - `FcvtFromUint` - `FcvtFromSint` - `FcvtToUintSat` - `FcvtToSintSat` Copyright (c) 2022 Arm Limited * Document helpers and abstract conversion checks
bytecodealliance · Aug 24, 2022 · 94bcbe8 · 94bcbe8
1 parent 7e3c481
commit 94bcbe8
Show file tree

Hide file tree

Showing 12 changed files with 863 additions and 548 deletions.
diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -619,6 +619,14 @@
         (size VectorSize)
         (imm u8))
 
+       ;; Destructive vector shift by immediate.
+       (VecShiftImmMod
+        (op VecShiftImmModOp)
+        (rd WritableReg)
+        (rn Reg)
+        (size VectorSize)
+        (imm u8))
+
        ;; Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes
        ;; of `rm` followed by the uppermost `16 - imm4` bytes of `rn`.
        (VecExtract
@@ -1315,6 +1323,13 @@
     (Sshr)
 ))
 
+;; Destructive shift-by-immediate operation on each lane of a vector.
+(type VecShiftImmModOp
+  (enum
+    ;; Shift left and insert
+    (Sli)
+))
+
 ;; Atomic read-modify-write operations with acquire-release semantics
 (type AtomicRMWOp
   (enum
@@ -1386,6 +1401,48 @@
 (decl u64_into_imm_logic (Type u64) ImmLogic)
 (extern constructor u64_into_imm_logic u64_into_imm_logic)
 
+;; Calculate the minimum floating-point bound for a conversion to floating
+;; point from an integer type.
+;; Accepts whether the output is signed, the size of the input
+;; floating point type in bits, and the size of the output integer type
+;; in bits.
+(decl min_fp_value (bool u8 u8) Reg)
+(extern constructor min_fp_value min_fp_value)
+
+;; Calculate the maximum floating-point bound for a conversion to floating
+;; point from an integer type.
+;; Accepts whether the output is signed, the size of the input
+;; floating point type in bits, and the size of the output integer type
+;; in bits.
+(decl max_fp_value (bool u8 u8) Reg)
+(extern constructor max_fp_value max_fp_value)
+
+;; Calculate the minimum acceptable floating-point value for a conversion to
+;; floating point from an integer type.
+;; Accepts whether the output is signed, the size of the input
+;; floating point type in bits, and the size of the output integer type
+;; in bits.
+(decl min_fp_value_sat (bool u8 u8) Reg)
+(extern constructor min_fp_value_sat min_fp_value_sat)
+
+;; Calculate the maximum acceptable floating-point value for a conversion to
+;; floating point from an integer type.
+;; Accepts whether the output is signed, the size of the input
+;; floating point type in bits, and the size of the output integer type
+;; in bits.
+(decl max_fp_value_sat (bool u8 u8) Reg)
+(extern constructor max_fp_value_sat max_fp_value_sat)
+
+;; Constructs an FPUOpRI.Ushr* given the size in bits of the value (or lane)
+;; and the amount to shift by.
+(decl fpu_op_ri_ushr (u8 u8) FPUOpRI)
+(extern constructor fpu_op_ri_ushr fpu_op_ri_ushr)
+
+;; Constructs an FPUOpRI.Sli* given the size in bits of the value (or lane)
+;; and the amount to shift by.
+(decl fpu_op_ri_sli (u8 u8) FPUOpRI)
+(extern constructor fpu_op_ri_sli fpu_op_ri_sli)
+
 (decl imm12_from_negated_u64 (Imm12) u64)
 (extern extractor imm12_from_negated_u64 imm12_from_negated_u64)
 
@@ -1533,6 +1590,12 @@
             (_2 Unit (emit (MInst.VecRRRMod op dst src2 src3 size))))
         dst))
 
+(decl fpu_rri (FPUOpRI Reg) Reg)
+(rule (fpu_rri op src)
+      (let ((dst WritableReg (temp_writable_reg $F64))
+            (_ Unit (emit (MInst.FpuRRI op dst src))))
+        dst))
+
 ;; Helper for emitting `MInst.FpuRRR` instructions.
 (decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg)
 (rule (fpu_rrr op src1 src2 size)
@@ -2611,3 +2674,147 @@
       ;; to clobber LR.
       (let ((_ Unit (emit (MInst.Xpaclri))))
            (mov_preg (preg_link))))
+
+;; Helper for getting the maximum shift amount for a type.
+
+(decl max_shift (Type) u8)
+(rule (max_shift $F64) 63)
+(rule (max_shift $F32) 31)
+
+;; Helper for generating `fcopysign` instruction sequences.
+
+(decl fcopy_sign (Reg Reg Type) Reg)
+(rule (fcopy_sign x y (ty_scalar_float ty))
+      (let ((dst WritableReg (temp_writable_reg $F64))
+            (_ Unit (emit (MInst.FpuMove64 dst x)))
+            (tmp Reg (fpu_rri (fpu_op_ri_ushr (ty_bits ty) (max_shift ty)) y))
+            (_ Unit (emit (MInst.FpuRRI (fpu_op_ri_sli (ty_bits ty) (max_shift ty)) dst tmp))))
+       dst))
+(rule (fcopy_sign x y ty @ (multi_lane _ _))
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.FpuMove128 dst x)))
+            (tmp Reg (vec_shift_imm (VecShiftImmOp.Ushr) (max_shift (lane_type ty)) y (vector_size ty)))
+            (_ Unit (emit (MInst.VecShiftImmMod (VecShiftImmModOp.Sli) dst tmp (vector_size ty) (max_shift (lane_type ty))))))
+       dst))
+
+;; Helpers for generating `MInst.FpuToInt` instructions.
+
+(decl fpu_to_int_nan_check (ScalarSize Reg) Reg)
+(rule (fpu_to_int_nan_check size src)
+      (let ((r ValueRegs
+                  (with_flags (fpu_cmp size src src)
+                   (ConsumesFlags.ConsumesFlagsReturnsReg
+                    (MInst.TrapIf (cond_br_cond (Cond.Vs))
+                        (trap_code_bad_conversion_to_integer))
+                    src))))
+       (value_regs_get r 0)))
+
+;; Checks that the value is not less than the minimum bound,
+;; accepting a boolean (whether the type is signed), input type,
+;; output type, and registers containing the source and minimum bound.
+(decl fpu_to_int_underflow_check (bool Type Type Reg Reg) Reg)
+(rule (fpu_to_int_underflow_check $true $F32 (fits_in_16 out_ty) src min)
+      (let ((r ValueRegs
+                  (with_flags (fpu_cmp (ScalarSize.Size32) src min)
+                   (ConsumesFlags.ConsumesFlagsReturnsReg
+                    (MInst.TrapIf (cond_br_cond (Cond.Le))
+                        (trap_code_integer_overflow))
+                    src))))
+       (value_regs_get r 0)))
+(rule (fpu_to_int_underflow_check $true $F64 (fits_in_32 out_ty) src min)
+      (let ((r ValueRegs
+                  (with_flags (fpu_cmp (ScalarSize.Size64) src min)
+                   (ConsumesFlags.ConsumesFlagsReturnsReg
+                    (MInst.TrapIf (cond_br_cond (Cond.Le))
+                        (trap_code_integer_overflow))
+                    src))))
+       (value_regs_get r 0)))
+(rule -1 (fpu_to_int_underflow_check $true in_ty _out_ty src min)
+      (let ((r ValueRegs
+                  (with_flags (fpu_cmp (scalar_size in_ty) src min)
+                   (ConsumesFlags.ConsumesFlagsReturnsReg
+                    (MInst.TrapIf (cond_br_cond (Cond.Lt))
+                        (trap_code_integer_overflow))
+                    src))))
+       (value_regs_get r 0)))
+(rule (fpu_to_int_underflow_check $false in_ty _out_ty src min)
+      (let ((r ValueRegs
+                  (with_flags (fpu_cmp (scalar_size in_ty) src min)
+                   (ConsumesFlags.ConsumesFlagsReturnsReg
+                    (MInst.TrapIf (cond_br_cond (Cond.Le))
+                        (trap_code_integer_overflow))
+                    src))))
+       (value_regs_get r 0)))
+
+(decl fpu_to_int_overflow_check (ScalarSize Reg Reg) Reg)
+(rule (fpu_to_int_overflow_check size src max)
+      (let ((r ValueRegs
+                  (with_flags (fpu_cmp size src max)
+                   (ConsumesFlags.ConsumesFlagsReturnsReg
+                    (MInst.TrapIf (cond_br_cond (Cond.Ge))
+                        (trap_code_integer_overflow))
+                    src))))
+       (value_regs_get r 0)))
+
+;; Emits the appropriate instruction sequence to convert a
+;; floating-point value to an integer, trapping if the value
+;; is a NaN or does not fit in the target type.
+;; Accepts the specific conversion op, the source register,
+;; whether the input is signed, and finally the input and output
+;; types.
+(decl fpu_to_int_cvt (FpuToIntOp Reg bool Type Type) Reg)
+(rule (fpu_to_int_cvt op src signed in_ty out_ty)
+      (let ((size ScalarSize (scalar_size in_ty))
+            (in_bits u8 (ty_bits in_ty))
+            (out_bits u8 (ty_bits out_ty))
+            (src Reg (fpu_to_int_nan_check size src))
+            (min Reg (min_fp_value signed in_bits out_bits))
+            (src Reg (fpu_to_int_underflow_check signed in_ty out_ty src min))
+            (max Reg (max_fp_value signed in_bits out_bits))
+            (src Reg (fpu_to_int_overflow_check size src max)))
+       (fpu_to_int op src)))
+
+;; Emits the appropriate instruction sequence to convert a
+;; floating-point value to an integer, saturating if the value
+;; does not fit in the target type.
+;; Accepts the specific conversion op, the source register,
+;; whether the input is signed, and finally the input and output
+;; types.
+(decl fpu_to_int_cvt_sat (FpuToIntOp Reg bool Type Type) Reg)
+(rule (fpu_to_int_cvt_sat op src $true in_ty out_ty)
+      (let ((size ScalarSize (scalar_size in_ty))
+            (in_bits u8 (ty_bits in_ty))
+            (out_bits u8 (ty_bits out_ty))
+            (max Reg (max_fp_value_sat $true in_bits out_bits))
+            (tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
+            (min Reg (min_fp_value_sat $true in_bits out_bits))
+            (tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
+            (zero Reg (constant_f128 0))
+            (tmp ValueRegs (with_flags (fpu_cmp size src src)
+                    (fpu_csel in_ty (Cond.Ne) zero tmp))))
+       (fpu_to_int op (value_regs_get tmp 0))))
+(rule (fpu_to_int_cvt_sat op src $false in_ty out_ty)
+      (let ((size ScalarSize (scalar_size in_ty))
+            (in_bits u8 (ty_bits in_ty))
+            (out_bits u8 (ty_bits out_ty))
+            (max Reg (max_fp_value_sat $false in_bits out_bits))
+            (tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
+            (min Reg (min_fp_value_sat $false in_bits out_bits))
+            (tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
+            (tmp ValueRegs (with_flags (fpu_cmp size src src)
+                    (fpu_csel in_ty (Cond.Ne) min tmp))))
+       (fpu_to_int op (value_regs_get tmp 0))))
+
+(decl fpu_to_int (FpuToIntOp Reg) Reg)
+(rule (fpu_to_int op src)
+      (let ((dst WritableReg (temp_writable_reg $I64))
+            (_ Unit (emit (MInst.FpuToInt op dst src))))
+       dst))
+
+;; Helper for generating `MInst.IntToFpu` instructions.
+
+(decl int_to_fpu (IntToFpuOp Reg) Reg)
+(rule (int_to_fpu op src)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.IntToFpu op dst src))))
+       dst))
diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -2033,6 +2033,50 @@ impl MachInstEmit for Inst {
                 let rd_enc = machreg_to_vec(rd.to_reg());
                 sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
             }
+            &Inst::VecShiftImmMod {
+                op,
+                rd,
+                rn,
+                size,
+                imm,
+            } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let (is_shr, mut template) = match op {
+                    VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32),
+                };
+                if size.is_128bits() {
+                    template |= 0b1 << 30;
+                }
+                let imm = imm as u32;
+                // Deal with the somewhat strange encoding scheme for, and limits on,
+                // the shift amount.
+                let immh_immb = match (size.lane_size(), is_shr) {
+                    (ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
+                        0b_1000_000_u32 | (64 - imm)
+                    }
+                    (ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
+                        0b_0100_000_u32 | (32 - imm)
+                    }
+                    (ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
+                        0b_0010_000_u32 | (16 - imm)
+                    }
+                    (ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
+                        0b_0001_000_u32 | (8 - imm)
+                    }
+                    (ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
+                    (ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
+                    (ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
+                    (ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
+                    _ => panic!(
+                        "aarch64: Inst::VecShiftImmMod: emit: invalid op/size/imm {:?}, {:?}, {:?}",
+                        op, size, imm
+                    ),
+                };
+                let rn_enc = machreg_to_vec(rn);
+                let rd_enc = machreg_to_vec(rd.to_reg());
+                sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
+            }
             &Inst::VecExtract { rd, rn, rm, imm4 } => {
                 let rd = allocs.next_writable(rd);
                 let rn = allocs.next(rn);

diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -39,7 +39,7 @@ pub use crate::isa::aarch64::lower::isle::generated_code::{
     ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3,
     FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp,
     VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp,
-    VecRRRLongOp, VecShiftImmOp,
+    VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp,
 };
 
 /// A floating-point unit (FPU) operation with two args, a register and an immediate.
@@ -767,6 +767,10 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
             collector.reg_def(rd);
             collector.reg_use(rn);
         }
+        &Inst::VecShiftImmMod { rd, rn, .. } => {
+            collector.reg_mod(rd);
+            collector.reg_use(rn);
+        }
         &Inst::VecExtract { rd, rn, rm, .. } => {
             collector.reg_def(rd);
             collector.reg_use(rn);
@@ -2371,6 +2375,20 @@ impl Inst {
                 let rn = pretty_print_vreg_vector(rn, size, allocs);
                 format!("{} {}, {}, #{}", op, rd, rn, imm)
             }
+            &Inst::VecShiftImmMod {
+                op,
+                rd,
+                rn,
+                size,
+                imm,
+            } => {
+                let op = match op {
+                    VecShiftImmModOp::Sli => "sli",
+                };
+                let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
+                let rn = pretty_print_vreg_vector(rn, size, allocs);
+                format!("{} {}, {}, #{}", op, rd, rn, imm)
+            }
             &Inst::VecExtract { rd, rn, rm, imm4 } => {
                 let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
                 let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);