Skip to content

Commit

Permalink
Port Fcopysign..FcvtToSintSat to ISLE (AArch64) (#4753)
Browse files Browse the repository at this point in the history
* Port `Fcopysign`..``FcvtToSintSat` to ISLE (AArch64)

Ported the existing implementations of the following opcodes to ISLE on
AArch64:
- `Fcopysign`
  - Also introduced missing support for `fcopysign` on vector values, as
    per the docs.
  - This introduces the vector encoding for the `SLI` machine
    instruction.
- `FcvtToUint`
- `FcvtToSint`
- `FcvtFromUint`
- `FcvtFromSint`
- `FcvtToUintSat`
- `FcvtToSintSat`

Copyright (c) 2022 Arm Limited

* Document helpers and abstract conversion checks
  • Loading branch information
dheaton-arm authored Aug 24, 2022
1 parent 7e3c481 commit 94bcbe8
Show file tree
Hide file tree
Showing 12 changed files with 863 additions and 548 deletions.
207 changes: 207 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,14 @@
(size VectorSize)
(imm u8))

;; Destructive vector shift by immediate.
(VecShiftImmMod
(op VecShiftImmModOp)
(rd WritableReg)
(rn Reg)
(size VectorSize)
(imm u8))

;; Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes
;; of `rm` followed by the uppermost `16 - imm4` bytes of `rn`.
(VecExtract
Expand Down Expand Up @@ -1315,6 +1323,13 @@
(Sshr)
))

;; Destructive shift-by-immediate operation on each lane of a vector.
(type VecShiftImmModOp
(enum
;; Shift left and insert
(Sli)
))

;; Atomic read-modify-write operations with acquire-release semantics
(type AtomicRMWOp
(enum
Expand Down Expand Up @@ -1386,6 +1401,48 @@
(decl u64_into_imm_logic (Type u64) ImmLogic)
(extern constructor u64_into_imm_logic u64_into_imm_logic)

;; Calculate the minimum floating-point bound for a conversion to floating
;; point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl min_fp_value (bool u8 u8) Reg)
(extern constructor min_fp_value min_fp_value)

;; Calculate the maximum floating-point bound for a conversion to floating
;; point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl max_fp_value (bool u8 u8) Reg)
(extern constructor max_fp_value max_fp_value)

;; Calculate the minimum acceptable floating-point value for a conversion to
;; floating point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl min_fp_value_sat (bool u8 u8) Reg)
(extern constructor min_fp_value_sat min_fp_value_sat)

;; Calculate the maximum acceptable floating-point value for a conversion to
;; floating point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl max_fp_value_sat (bool u8 u8) Reg)
(extern constructor max_fp_value_sat max_fp_value_sat)

;; Constructs an FPUOpRI.Ushr* given the size in bits of the value (or lane)
;; and the amount to shift by.
(decl fpu_op_ri_ushr (u8 u8) FPUOpRI)
(extern constructor fpu_op_ri_ushr fpu_op_ri_ushr)

;; Constructs an FPUOpRI.Sli* given the size in bits of the value (or lane)
;; and the amount to shift by.
(decl fpu_op_ri_sli (u8 u8) FPUOpRI)
(extern constructor fpu_op_ri_sli fpu_op_ri_sli)

(decl imm12_from_negated_u64 (Imm12) u64)
(extern extractor imm12_from_negated_u64 imm12_from_negated_u64)

Expand Down Expand Up @@ -1533,6 +1590,12 @@
(_2 Unit (emit (MInst.VecRRRMod op dst src2 src3 size))))
dst))

(decl fpu_rri (FPUOpRI Reg) Reg)
(rule (fpu_rri op src)
(let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.FpuRRI op dst src))))
dst))

;; Helper for emitting `MInst.FpuRRR` instructions.
(decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg)
(rule (fpu_rrr op src1 src2 size)
Expand Down Expand Up @@ -2611,3 +2674,147 @@
;; to clobber LR.
(let ((_ Unit (emit (MInst.Xpaclri))))
(mov_preg (preg_link))))

;; Helper for getting the maximum shift amount for a type.

(decl max_shift (Type) u8)
(rule (max_shift $F64) 63)
(rule (max_shift $F32) 31)

;; Helper for generating `fcopysign` instruction sequences.

(decl fcopy_sign (Reg Reg Type) Reg)
(rule (fcopy_sign x y (ty_scalar_float ty))
(let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.FpuMove64 dst x)))
(tmp Reg (fpu_rri (fpu_op_ri_ushr (ty_bits ty) (max_shift ty)) y))
(_ Unit (emit (MInst.FpuRRI (fpu_op_ri_sli (ty_bits ty) (max_shift ty)) dst tmp))))
dst))
(rule (fcopy_sign x y ty @ (multi_lane _ _))
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.FpuMove128 dst x)))
(tmp Reg (vec_shift_imm (VecShiftImmOp.Ushr) (max_shift (lane_type ty)) y (vector_size ty)))
(_ Unit (emit (MInst.VecShiftImmMod (VecShiftImmModOp.Sli) dst tmp (vector_size ty) (max_shift (lane_type ty))))))
dst))

;; Helpers for generating `MInst.FpuToInt` instructions.

(decl fpu_to_int_nan_check (ScalarSize Reg) Reg)
(rule (fpu_to_int_nan_check size src)
(let ((r ValueRegs
(with_flags (fpu_cmp size src src)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Vs))
(trap_code_bad_conversion_to_integer))
src))))
(value_regs_get r 0)))

;; Checks that the value is not less than the minimum bound,
;; accepting a boolean (whether the type is signed), input type,
;; output type, and registers containing the source and minimum bound.
(decl fpu_to_int_underflow_check (bool Type Type Reg Reg) Reg)
(rule (fpu_to_int_underflow_check $true $F32 (fits_in_16 out_ty) src min)
(let ((r ValueRegs
(with_flags (fpu_cmp (ScalarSize.Size32) src min)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Le))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))
(rule (fpu_to_int_underflow_check $true $F64 (fits_in_32 out_ty) src min)
(let ((r ValueRegs
(with_flags (fpu_cmp (ScalarSize.Size64) src min)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Le))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))
(rule -1 (fpu_to_int_underflow_check $true in_ty _out_ty src min)
(let ((r ValueRegs
(with_flags (fpu_cmp (scalar_size in_ty) src min)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Lt))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))
(rule (fpu_to_int_underflow_check $false in_ty _out_ty src min)
(let ((r ValueRegs
(with_flags (fpu_cmp (scalar_size in_ty) src min)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Le))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))

(decl fpu_to_int_overflow_check (ScalarSize Reg Reg) Reg)
(rule (fpu_to_int_overflow_check size src max)
(let ((r ValueRegs
(with_flags (fpu_cmp size src max)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Ge))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))

;; Emits the appropriate instruction sequence to convert a
;; floating-point value to an integer, trapping if the value
;; is a NaN or does not fit in the target type.
;; Accepts the specific conversion op, the source register,
;; whether the input is signed, and finally the input and output
;; types.
(decl fpu_to_int_cvt (FpuToIntOp Reg bool Type Type) Reg)
(rule (fpu_to_int_cvt op src signed in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(src Reg (fpu_to_int_nan_check size src))
(min Reg (min_fp_value signed in_bits out_bits))
(src Reg (fpu_to_int_underflow_check signed in_ty out_ty src min))
(max Reg (max_fp_value signed in_bits out_bits))
(src Reg (fpu_to_int_overflow_check size src max)))
(fpu_to_int op src)))

;; Emits the appropriate instruction sequence to convert a
;; floating-point value to an integer, saturating if the value
;; does not fit in the target type.
;; Accepts the specific conversion op, the source register,
;; whether the input is signed, and finally the input and output
;; types.
(decl fpu_to_int_cvt_sat (FpuToIntOp Reg bool Type Type) Reg)
(rule (fpu_to_int_cvt_sat op src $true in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(max Reg (max_fp_value_sat $true in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
(min Reg (min_fp_value_sat $true in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
(zero Reg (constant_f128 0))
(tmp ValueRegs (with_flags (fpu_cmp size src src)
(fpu_csel in_ty (Cond.Ne) zero tmp))))
(fpu_to_int op (value_regs_get tmp 0))))
(rule (fpu_to_int_cvt_sat op src $false in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(max Reg (max_fp_value_sat $false in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
(min Reg (min_fp_value_sat $false in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
(tmp ValueRegs (with_flags (fpu_cmp size src src)
(fpu_csel in_ty (Cond.Ne) min tmp))))
(fpu_to_int op (value_regs_get tmp 0))))

(decl fpu_to_int (FpuToIntOp Reg) Reg)
(rule (fpu_to_int op src)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.FpuToInt op dst src))))
dst))

;; Helper for generating `MInst.IntToFpu` instructions.

(decl int_to_fpu (IntToFpuOp Reg) Reg)
(rule (int_to_fpu op src)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.IntToFpu op dst src))))
dst))
44 changes: 44 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2033,6 +2033,50 @@ impl MachInstEmit for Inst {
let rd_enc = machreg_to_vec(rd.to_reg());
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
}
&Inst::VecShiftImmMod {
op,
rd,
rn,
size,
imm,
} => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let (is_shr, mut template) = match op {
VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32),
};
if size.is_128bits() {
template |= 0b1 << 30;
}
let imm = imm as u32;
// Deal with the somewhat strange encoding scheme for, and limits on,
// the shift amount.
let immh_immb = match (size.lane_size(), is_shr) {
(ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
0b_1000_000_u32 | (64 - imm)
}
(ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
0b_0100_000_u32 | (32 - imm)
}
(ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
0b_0010_000_u32 | (16 - imm)
}
(ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
0b_0001_000_u32 | (8 - imm)
}
(ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
(ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
(ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
(ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
_ => panic!(
"aarch64: Inst::VecShiftImmMod: emit: invalid op/size/imm {:?}, {:?}, {:?}",
op, size, imm
),
};
let rn_enc = machreg_to_vec(rn);
let rd_enc = machreg_to_vec(rd.to_reg());
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
}
&Inst::VecExtract { rd, rn, rm, imm4 } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
Expand Down
20 changes: 19 additions & 1 deletion cranelift/codegen/src/isa/aarch64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub use crate::isa::aarch64::lower::isle::generated_code::{
ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3,
FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp,
VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp,
VecRRRLongOp, VecShiftImmOp,
VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp,
};

/// A floating-point unit (FPU) operation with two args, a register and an immediate.
Expand Down Expand Up @@ -767,6 +767,10 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_def(rd);
collector.reg_use(rn);
}
&Inst::VecShiftImmMod { rd, rn, .. } => {
collector.reg_mod(rd);
collector.reg_use(rn);
}
&Inst::VecExtract { rd, rn, rm, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
Expand Down Expand Up @@ -2371,6 +2375,20 @@ impl Inst {
let rn = pretty_print_vreg_vector(rn, size, allocs);
format!("{} {}, {}, #{}", op, rd, rn, imm)
}
&Inst::VecShiftImmMod {
op,
rd,
rn,
size,
imm,
} => {
let op = match op {
VecShiftImmModOp::Sli => "sli",
};
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
let rn = pretty_print_vreg_vector(rn, size, allocs);
format!("{} {}, {}, #{}", op, rd, rn, imm)
}
&Inst::VecExtract { rd, rn, rm, imm4 } => {
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
Expand Down
Loading

0 comments on commit 94bcbe8

Please sign in to comment.