Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port Fcopysign..FcvtToSintSat to ISLE (AArch64) #4753

Merged
merged 2 commits into from
Aug 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 207 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,14 @@
(size VectorSize)
(imm u8))

;; Destructive vector shift by immediate.
(VecShiftImmMod
(op VecShiftImmModOp)
(rd WritableReg)
(rn Reg)
(size VectorSize)
(imm u8))

;; Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes
;; of `rm` followed by the uppermost `16 - imm4` bytes of `rn`.
(VecExtract
Expand Down Expand Up @@ -1315,6 +1323,13 @@
(Sshr)
))

;; Destructive shift-by-immediate operation on each lane of a vector.
(type VecShiftImmModOp
(enum
;; Shift left and insert
(Sli)
))

;; Atomic read-modify-write operations with acquire-release semantics
(type AtomicRMWOp
(enum
Expand Down Expand Up @@ -1386,6 +1401,48 @@
(decl u64_into_imm_logic (Type u64) ImmLogic)
(extern constructor u64_into_imm_logic u64_into_imm_logic)

;; Calculate the minimum floating-point bound for a conversion to floating
;; point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl min_fp_value (bool u8 u8) Reg)
dheaton-arm marked this conversation as resolved.
Show resolved Hide resolved
(extern constructor min_fp_value min_fp_value)

;; Calculate the maximum floating-point bound for a conversion to floating
;; point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl max_fp_value (bool u8 u8) Reg)
(extern constructor max_fp_value max_fp_value)

;; Calculate the minimum acceptable floating-point value for a conversion to
;; floating point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl min_fp_value_sat (bool u8 u8) Reg)
(extern constructor min_fp_value_sat min_fp_value_sat)

;; Calculate the maximum acceptable floating-point value for a conversion to
;; floating point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl max_fp_value_sat (bool u8 u8) Reg)
(extern constructor max_fp_value_sat max_fp_value_sat)

;; Constructs an FPUOpRI.Ushr* given the size in bits of the value (or lane)
;; and the amount to shift by.
(decl fpu_op_ri_ushr (u8 u8) FPUOpRI)
(extern constructor fpu_op_ri_ushr fpu_op_ri_ushr)

;; Constructs an FPUOpRI.Sli* given the size in bits of the value (or lane)
;; and the amount to shift by.
(decl fpu_op_ri_sli (u8 u8) FPUOpRI)
(extern constructor fpu_op_ri_sli fpu_op_ri_sli)

(decl imm12_from_negated_u64 (Imm12) u64)
(extern extractor imm12_from_negated_u64 imm12_from_negated_u64)

Expand Down Expand Up @@ -1533,6 +1590,12 @@
(_2 Unit (emit (MInst.VecRRRMod op dst src2 src3 size))))
dst))

(decl fpu_rri (FPUOpRI Reg) Reg)
(rule (fpu_rri op src)
(let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.FpuRRI op dst src))))
dst))

;; Helper for emitting `MInst.FpuRRR` instructions.
(decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg)
(rule (fpu_rrr op src1 src2 size)
Expand Down Expand Up @@ -2611,3 +2674,147 @@
;; to clobber LR.
(let ((_ Unit (emit (MInst.Xpaclri))))
(mov_preg (preg_link))))

;; Helper for getting the maximum shift amount for a type.

(decl max_shift (Type) u8)
(rule (max_shift $F64) 63)
(rule (max_shift $F32) 31)

;; Helper for generating `fcopysign` instruction sequences.

(decl fcopy_sign (Reg Reg Type) Reg)
(rule (fcopy_sign x y (ty_scalar_float ty))
(let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.FpuMove64 dst x)))
(tmp Reg (fpu_rri (fpu_op_ri_ushr (ty_bits ty) (max_shift ty)) y))
(_ Unit (emit (MInst.FpuRRI (fpu_op_ri_sli (ty_bits ty) (max_shift ty)) dst tmp))))
dst))
(rule (fcopy_sign x y ty @ (multi_lane _ _))
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.FpuMove128 dst x)))
(tmp Reg (vec_shift_imm (VecShiftImmOp.Ushr) (max_shift (lane_type ty)) y (vector_size ty)))
(_ Unit (emit (MInst.VecShiftImmMod (VecShiftImmModOp.Sli) dst tmp (vector_size ty) (max_shift (lane_type ty))))))
dst))

;; Helpers for generating `MInst.FpuToInt` instructions.

(decl fpu_to_int_nan_check (ScalarSize Reg) Reg)
(rule (fpu_to_int_nan_check size src)
(let ((r ValueRegs
(with_flags (fpu_cmp size src src)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Vs))
(trap_code_bad_conversion_to_integer))
src))))
(value_regs_get r 0)))

;; Checks that the value is not less than the minimum bound,
;; accepting a boolean (whether the type is signed), input type,
;; output type, and registers containing the source and minimum bound.
(decl fpu_to_int_underflow_check (bool Type Type Reg Reg) Reg)
(rule (fpu_to_int_underflow_check $true $F32 (fits_in_16 out_ty) src min)
(let ((r ValueRegs
(with_flags (fpu_cmp (ScalarSize.Size32) src min)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Le))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))
(rule (fpu_to_int_underflow_check $true $F64 (fits_in_32 out_ty) src min)
(let ((r ValueRegs
(with_flags (fpu_cmp (ScalarSize.Size64) src min)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Le))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))
(rule -1 (fpu_to_int_underflow_check $true in_ty _out_ty src min)
(let ((r ValueRegs
(with_flags (fpu_cmp (scalar_size in_ty) src min)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Lt))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))
(rule (fpu_to_int_underflow_check $false in_ty _out_ty src min)
(let ((r ValueRegs
(with_flags (fpu_cmp (scalar_size in_ty) src min)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Le))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))

(decl fpu_to_int_overflow_check (ScalarSize Reg Reg) Reg)
(rule (fpu_to_int_overflow_check size src max)
(let ((r ValueRegs
(with_flags (fpu_cmp size src max)
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.TrapIf (cond_br_cond (Cond.Ge))
(trap_code_integer_overflow))
src))))
(value_regs_get r 0)))

;; Emits the appropriate instruction sequence to convert a
;; floating-point value to an integer, trapping if the value
;; is a NaN or does not fit in the target type.
;; Accepts the specific conversion op, the source register,
;; whether the input is signed, and finally the input and output
;; types.
(decl fpu_to_int_cvt (FpuToIntOp Reg bool Type Type) Reg)
(rule (fpu_to_int_cvt op src signed in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(src Reg (fpu_to_int_nan_check size src))
(min Reg (min_fp_value signed in_bits out_bits))
(src Reg (fpu_to_int_underflow_check signed in_ty out_ty src min))
(max Reg (max_fp_value signed in_bits out_bits))
(src Reg (fpu_to_int_overflow_check size src max)))
(fpu_to_int op src)))

;; Emits the appropriate instruction sequence to convert a
;; floating-point value to an integer, saturating if the value
;; does not fit in the target type.
;; Accepts the specific conversion op, the source register,
;; whether the input is signed, and finally the input and output
;; types.
(decl fpu_to_int_cvt_sat (FpuToIntOp Reg bool Type Type) Reg)
(rule (fpu_to_int_cvt_sat op src $true in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(max Reg (max_fp_value_sat $true in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
(min Reg (min_fp_value_sat $true in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
(zero Reg (constant_f128 0))
(tmp ValueRegs (with_flags (fpu_cmp size src src)
(fpu_csel in_ty (Cond.Ne) zero tmp))))
(fpu_to_int op (value_regs_get tmp 0))))
(rule (fpu_to_int_cvt_sat op src $false in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(max Reg (max_fp_value_sat $false in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
(min Reg (min_fp_value_sat $false in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
(tmp ValueRegs (with_flags (fpu_cmp size src src)
(fpu_csel in_ty (Cond.Ne) min tmp))))
(fpu_to_int op (value_regs_get tmp 0))))

(decl fpu_to_int (FpuToIntOp Reg) Reg)
(rule (fpu_to_int op src)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.FpuToInt op dst src))))
dst))

;; Helper for generating `MInst.IntToFpu` instructions.

(decl int_to_fpu (IntToFpuOp Reg) Reg)
(rule (int_to_fpu op src)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.IntToFpu op dst src))))
dst))
44 changes: 44 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2033,6 +2033,50 @@ impl MachInstEmit for Inst {
let rd_enc = machreg_to_vec(rd.to_reg());
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
}
&Inst::VecShiftImmMod {
op,
rd,
rn,
size,
imm,
} => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let (is_shr, mut template) = match op {
VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32),
};
if size.is_128bits() {
template |= 0b1 << 30;
}
let imm = imm as u32;
// Deal with the somewhat strange encoding scheme for, and limits on,
// the shift amount.
let immh_immb = match (size.lane_size(), is_shr) {
(ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
0b_1000_000_u32 | (64 - imm)
}
(ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
0b_0100_000_u32 | (32 - imm)
}
(ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
0b_0010_000_u32 | (16 - imm)
}
(ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
0b_0001_000_u32 | (8 - imm)
}
(ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
(ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
(ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
(ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
_ => panic!(
"aarch64: Inst::VecShiftImmMod: emit: invalid op/size/imm {:?}, {:?}, {:?}",
op, size, imm
),
};
let rn_enc = machreg_to_vec(rn);
let rd_enc = machreg_to_vec(rd.to_reg());
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
}
&Inst::VecExtract { rd, rn, rm, imm4 } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
Expand Down
20 changes: 19 additions & 1 deletion cranelift/codegen/src/isa/aarch64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub use crate::isa::aarch64::lower::isle::generated_code::{
ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3,
FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp,
VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp,
VecRRRLongOp, VecShiftImmOp,
VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp,
};

/// A floating-point unit (FPU) operation with two args, a register and an immediate.
Expand Down Expand Up @@ -767,6 +767,10 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_def(rd);
collector.reg_use(rn);
}
&Inst::VecShiftImmMod { rd, rn, .. } => {
collector.reg_mod(rd);
collector.reg_use(rn);
}
&Inst::VecExtract { rd, rn, rm, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
Expand Down Expand Up @@ -2371,6 +2375,20 @@ impl Inst {
let rn = pretty_print_vreg_vector(rn, size, allocs);
format!("{} {}, {}, #{}", op, rd, rn, imm)
}
&Inst::VecShiftImmMod {
op,
rd,
rn,
size,
imm,
} => {
let op = match op {
VecShiftImmModOp::Sli => "sli",
};
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
let rn = pretty_print_vreg_vector(rn, size, allocs);
format!("{} {}, {}, #{}", op, rd, rn, imm)
}
&Inst::VecExtract { rd, rn, rm, imm4 } => {
let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
Expand Down
Loading