Skip to content

Commit

Permalink
Improve fcvt_to_{u,s}int_sat lowering (AArch64) (#4913)
Browse files Browse the repository at this point in the history
Improved the instruction lowering for the following opcodes on AArch64,
and introduced support for converting to integers less than 32-bits wide
as per the docs:
- `FcvtToSintSat`
- `FcvtToUintSat`

Copyright (c) 2022 Arm Limited
  • Loading branch information
dheaton-arm authored Sep 21, 2022
1 parent e786bda commit 352c759
Show file tree
Hide file tree
Showing 6 changed files with 326 additions and 356 deletions.
73 changes: 31 additions & 42 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1635,22 +1635,6 @@
(decl max_fp_value (bool u8 u8) Reg)
(extern constructor max_fp_value max_fp_value)

;; Calculate the minimum acceptable floating-point value for a conversion to
;; floating point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl min_fp_value_sat (bool u8 u8) Reg)
(extern constructor min_fp_value_sat min_fp_value_sat)

;; Calculate the maximum acceptable floating-point value for a conversion to
;; floating point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl max_fp_value_sat (bool u8 u8) Reg)
(extern constructor max_fp_value_sat max_fp_value_sat)

;; Constructs an FPUOpRI.Ushr* given the size in bits of the value (or lane)
;; and the amount to shift by.
(decl fpu_op_ri_ushr (u8 u8) FPUOpRI)
Expand Down Expand Up @@ -3147,32 +3131,37 @@
;; floating-point value to an integer, saturating if the value
;; does not fit in the target type.
;; Accepts the specific conversion op, the source register,
;; whether the input is signed, and finally the input and output
;; types.
(decl fpu_to_int_cvt_sat (FpuToIntOp Reg bool Type Type) Reg)
(rule (fpu_to_int_cvt_sat op src $true in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(max Reg (max_fp_value_sat $true in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
(min Reg (min_fp_value_sat $true in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
(zero Reg (constant_f128 0))
(tmp ValueRegs (with_flags (fpu_cmp size src src)
(fpu_csel in_ty (Cond.Ne) zero tmp))))
(fpu_to_int op (value_regs_get tmp 0))))
(rule (fpu_to_int_cvt_sat op src $false in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(max Reg (max_fp_value_sat $false in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
(min Reg (min_fp_value_sat $false in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
(tmp ValueRegs (with_flags (fpu_cmp size src src)
(fpu_csel in_ty (Cond.Ne) min tmp))))
(fpu_to_int op (value_regs_get tmp 0))))
;; whether the input is signed, and finally the output type.
(decl fpu_to_int_cvt_sat (FpuToIntOp Reg bool Type) Reg)
(rule (fpu_to_int_cvt_sat op src _ $I64)
(fpu_to_int op src))
(rule (fpu_to_int_cvt_sat op src _ $I32)
(fpu_to_int op src))
(rule (fpu_to_int_cvt_sat op src $false (fits_in_16 out_ty))
(let ((result Reg (fpu_to_int op src))
(max Reg (imm out_ty (ImmExtend.Zero) -1)))
(with_flags_reg
(cmp (OperandSize.Size32) result max)
(csel (Cond.Hi) max result))))
(rule (fpu_to_int_cvt_sat op src $true (fits_in_16 out_ty))
(let ((result Reg (fpu_to_int op src))
(max Reg (imm $I32 (ImmExtend.Sign) (signed_max out_ty)))
(min Reg (imm $I32 (ImmExtend.Sign) (signed_min out_ty)))
(result Reg (with_flags_reg
(cmp (operand_size out_ty) result max)
(csel (Cond.Gt) max result)))
(result Reg (with_flags_reg
(cmp (operand_size out_ty) result min)
(csel (Cond.Lt) min result))))
result))

(decl signed_min (Type) u64)
(rule (signed_min $I8) -128)
(rule (signed_min $I16) -32768)

(decl signed_max (Type) u64)
(rule (signed_max $I8) 127)
(rule (signed_max $I16) 32767)

(decl fpu_to_int (FpuToIntOp Reg) Reg)
(rule (fpu_to_int op src)
Expand Down
24 changes: 12 additions & 12 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -472,17 +472,17 @@
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 64 _)))))
(vec_misc (VecMisc2.Fcvtzu) x (vector_size ty)))

(rule (lower (has_type $I32 (fcvt_to_uint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU32) x $false $F32 $I32))
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU32) x $false out_ty))

(rule (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU64) x $false $F32 $I64))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU64) x $false $I64))

(rule (lower (has_type $I32 (fcvt_to_uint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU32) x $false $F64 $I32))
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU32) x $false out_ty))

(rule (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU64) x $false $F64 $I64))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU64) x $false $I64))

;;;; Rules for `fcvt_to_sint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand All @@ -492,17 +492,17 @@
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 64 _)))))
(vec_misc (VecMisc2.Fcvtzs) x (vector_size ty)))

(rule (lower (has_type $I32 (fcvt_to_sint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI32) x $true $F32 $I32))
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI32) x $true out_ty))

(rule (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI64) x $true $F32 $I64))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI64) x $true $I64))

(rule (lower (has_type $I32 (fcvt_to_sint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI32) x $true $F64 $I32))
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI32) x $true out_ty))

(rule (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI64) x $true $F64 $I64))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI64) x $true $I64))

;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down
62 changes: 0 additions & 62 deletions cranelift/codegen/src/isa/aarch64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -637,68 +637,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
tmp.to_reg()
}

fn min_fp_value_sat(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();

let min: f64 = match (out_bits, signed) {
(32, true) => i32::MIN as f64,
(32, false) => 0.0,
(64, true) => i64::MIN as f64,
(64, false) => 0.0,
_ => unimplemented!(
"unexpected {} output size of {} bits",
if signed { "signed" } else { "unsigned" },
out_bits
),
};

if in_bits == 32 {
lower_constant_f32(self.lower_ctx, tmp, min as f32)
} else if in_bits == 64 {
lower_constant_f64(self.lower_ctx, tmp, min)
} else {
unimplemented!(
"unexpected input size for min_fp_value_sat: {} (signed: {}, output size: {})",
in_bits,
signed,
out_bits
);
}

tmp.to_reg()
}

fn max_fp_value_sat(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();

let max = match (out_bits, signed) {
(32, true) => i32::MAX as f64,
(32, false) => u32::MAX as f64,
(64, true) => i64::MAX as f64,
(64, false) => u64::MAX as f64,
_ => unimplemented!(
"unexpected {} output size of {} bits",
if signed { "signed" } else { "unsigned" },
out_bits
),
};

if in_bits == 32 {
lower_constant_f32(self.lower_ctx, tmp, max as f32)
} else if in_bits == 64 {
lower_constant_f64(self.lower_ctx, tmp, max)
} else {
unimplemented!(
"unexpected input size for max_fp_value_sat: {} (signed: {}, output size: {})",
in_bits,
signed,
out_bits
);
}

tmp.to_reg()
}

fn fpu_op_ri_ushr(&mut self, ty_bits: u8, shift: u8) -> FPUOpRI {
if ty_bits == 32 {
FPUOpRI::UShr32(FPURightShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
Expand Down
Loading

0 comments on commit 352c759

Please sign in to comment.