diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index b5826a226ea9..f45caf58b594 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -576,6 +576,14 @@ (rm Reg) (size VectorSize)) + ;; A vector ALU op modifying a source register. + (VecRRRMod + (alu_op VecALUModOp) + (rd WritableReg) + (rn Reg) + (rm Reg) + (size VectorSize)) + ;; Vector two register miscellaneous instruction. (VecMisc (op VecMisc2) @@ -1108,10 +1116,6 @@ (Orr) ;; Bitwise exclusive or (Eor) - ;; Bitwise select - ;; This opcode should only be used with the `vec_rrr_inplace` - ;; constructor. - (Bsl) ;; Unsigned maximum pairwise (Umaxp) ;; Add @@ -1146,10 +1150,6 @@ (Fmin) ;; Floating-point multiply (Fmul) - ;; Floating-point fused multiply-add vectors - ;; This opcode should only be used with the `vec_rrr_inplace` - ;; constructor. - (Fmla) ;; Add pairwise (Addp) ;; Zip vectors (primary) [meaning, high halves] @@ -1158,6 +1158,15 @@ (Sqrdmulh) )) +;; A Vector ALU operation which modifies a source register. +(type VecALUModOp + (enum + ;; Bitwise select + (Bsl) + ;; Floating-point fused multiply-add vectors + (Fmla) +)) + ;; A Vector miscellaneous operation with two registers. (type VecMisc2 (enum @@ -1508,11 +1517,11 @@ ;; Helper for emitting `MInst.VecRRR` instructions which use three registers, ;; one of which is both source and output. -(decl vec_rrr_inplace (VecALUOp Reg Reg Reg VectorSize) Reg) -(rule (vec_rrr_inplace op src1 src2 src3 size) +(decl vec_rrr_mod (VecALUModOp Reg Reg Reg VectorSize) Reg) +(rule (vec_rrr_mod op src1 src2 src3 size) (let ((dst WritableReg (temp_writable_reg $I8X16)) (_1 Unit (emit (MInst.FpuMove128 dst src1))) - (_2 Unit (emit (MInst.VecRRR op dst src2 src3 size)))) + (_2 Unit (emit (MInst.VecRRRMod op dst src2 src3 size)))) dst)) ;; Helper for emitting `MInst.FpuRRR` instructions. @@ -2198,10 +2207,7 @@ (decl bsl (Type Reg Reg Reg) Reg) (rule (bsl ty c x y) - (let ((dst WritableReg (temp_writable_reg ty)) - (_ Unit (emit (MInst.FpuMove128 dst c))) - (_ Unit (emit (MInst.VecRRR (VecALUOp.Bsl) dst x y (vector_size ty))))) - dst)) + (vec_rrr_mod (VecALUModOp.Bsl) c x y (vector_size ty))) ;; Helper for generating a `udf` instruction. diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 7ce8a048d183..0d307ba7c923 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -752,6 +752,16 @@ impl VectorSize { (q, size) } + + /// Return the encoding bit that is used by some floating-point SIMD + /// instructions for a particular operand size. + pub fn enc_float_size(&self) -> u32 { + match self.lane_size() { + ScalarSize::Size32 => 0b0, + ScalarSize::Size64 => 0b1, + size => panic!("Unsupported floating-point size for vector op: {:?}", size), + } + } } pub(crate) fn dynamic_to_fixed(ty: Type) -> Type { diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 4ebf4de99449..d8a0f805aca6 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -2543,17 +2543,9 @@ impl MachInstEmit for Inst { | VecALUOp::Fdiv | VecALUOp::Fmax | VecALUOp::Fmin - | VecALUOp::Fmul - | VecALUOp::Fmla => true, + | VecALUOp::Fmul => true, _ => false, }; - let enc_float_size = match (is_float, size) { - (true, VectorSize::Size32x2) => 0b0, - (true, VectorSize::Size32x4) => 0b0, - (true, VectorSize::Size64x2) => 0b1, - (true, _) => unimplemented!(), - _ => 0, - }; let (top11, bit15_10) = match alu_op { VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011), @@ -2574,7 +2566,6 @@ impl MachInstEmit for Inst { VecALUOp::Bic => (0b000_01110_01_1, 0b000111), VecALUOp::Orr => (0b000_01110_10_1, 0b000111), VecALUOp::Eor => (0b001_01110_00_1, 0b000111), - VecALUOp::Bsl => (0b001_01110_01_1, 0b000111), VecALUOp::Umaxp => { debug_assert_ne!(size, VectorSize::Size64x2); @@ -2619,7 +2610,6 @@ impl MachInstEmit for Inst { VecALUOp::Fmax => (0b000_01110_00_1, 0b111101), VecALUOp::Fmin => (0b000_01110_10_1, 0b111101), VecALUOp::Fmul => (0b001_01110_00_1, 0b110111), - VecALUOp::Fmla => (0b000_01110_00_1, 0b110011), VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111), VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110), VecALUOp::Sqrdmulh => { @@ -2632,12 +2622,32 @@ impl MachInstEmit for Inst { } }; let top11 = if is_float { - top11 | enc_float_size << 1 + top11 | size.enc_float_size() << 1 } else { top11 }; sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd)); } + &Inst::VecRRRMod { + rd, + rn, + rm, + alu_op, + size, + } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); + let (q, _enc_size) = size.enc_size(); + + let (top11, bit15_10) = match alu_op { + VecALUModOp::Bsl => (0b001_01110_01_1, 0b000111), + VecALUModOp::Fmla => { + (0b000_01110_00_1 | (size.enc_float_size() << 1), 0b110011) + } + }; + sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd)); + } &Inst::VecLoadReplicate { rd, rn, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 01d3e0fe48b5..86b6a543f525 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -3383,8 +3383,8 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRR { - alu_op: VecALUOp::Bsl, + Inst::VecRRRMod { + alu_op: VecALUModOp::Bsl, rd: writable_vreg(8), rn: vreg(9), rm: vreg(1), @@ -4055,8 +4055,8 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRR { - alu_op: VecALUOp::Fmla, + Inst::VecRRRMod { + alu_op: VecALUModOp::Fmla, rd: writable_vreg(2), rn: vreg(0), rm: vreg(5), @@ -4067,8 +4067,8 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRR { - alu_op: VecALUOp::Fmla, + Inst::VecRRRMod { + alu_op: VecALUModOp::Fmla, rd: writable_vreg(2), rn: vreg(0), rm: vreg(5), @@ -4079,8 +4079,8 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecRRR { - alu_op: VecALUOp::Fmla, + Inst::VecRRRMod { + alu_op: VecALUModOp::Fmla, rd: writable_vreg(2), rn: vreg(0), rm: vreg(5), diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index a35e97e1c59a..6e45beb66b39 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -37,9 +37,9 @@ mod emit_tests; pub use crate::isa::aarch64::lower::isle::generated_code::{ ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3, - FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUOp, VecExtendOp, - VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, VecRRRLongOp, - VecShiftImmOp, + FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp, + VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, + VecRRRLongOp, VecShiftImmOp, }; /// A floating-point unit (FPU) operation with two args, a register and an immediate. @@ -957,14 +957,13 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_def(rd); collector.reg_use(rn); } - &Inst::VecRRR { - alu_op, rd, rn, rm, .. - } => { - if alu_op == VecALUOp::Bsl || alu_op == VecALUOp::Fmla { - collector.reg_mod(rd); - } else { - collector.reg_def(rd); - } + &Inst::VecRRR { rd, rn, rm, .. } => { + collector.reg_def(rd); + collector.reg_use(rn); + collector.reg_use(rm); + } + &Inst::VecRRRMod { rd, rn, rm, .. } => { + collector.reg_mod(rd); collector.reg_use(rn); collector.reg_use(rm); } @@ -2208,7 +2207,6 @@ impl Inst { VecALUOp::Bic => ("bic", VectorSize::Size8x16), VecALUOp::Orr => ("orr", VectorSize::Size8x16), VecALUOp::Eor => ("eor", VectorSize::Size8x16), - VecALUOp::Bsl => ("bsl", VectorSize::Size8x16), VecALUOp::Umaxp => ("umaxp", size), VecALUOp::Add => ("add", size), VecALUOp::Sub => ("sub", size), @@ -2226,7 +2224,6 @@ impl Inst { VecALUOp::Fmax => ("fmax", size), VecALUOp::Fmin => ("fmin", size), VecALUOp::Fmul => ("fmul", size), - VecALUOp::Fmla => ("fmla", size), VecALUOp::Addp => ("addp", size), VecALUOp::Zip1 => ("zip1", size), VecALUOp::Sqrdmulh => ("sqrdmulh", size), @@ -2236,6 +2233,22 @@ impl Inst { let rm = pretty_print_vreg_vector(rm, size, allocs); format!("{} {}, {}, {}", op, rd, rn, rm) } + &Inst::VecRRRMod { + rd, + rn, + rm, + alu_op, + size, + } => { + let (op, size) = match alu_op { + VecALUModOp::Bsl => ("bsl", VectorSize::Size8x16), + VecALUModOp::Fmla => ("fmla", size), + }; + let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); + let rn = pretty_print_vreg_vector(rn, size, allocs); + let rm = pretty_print_vreg_vector(rm, size, allocs); + format!("{} {}, {}, {}", op, rd, rn, rm) + } &Inst::VecRRRLong { rd, rn, diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index deeac5193840..293cd9bc0f13 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -380,7 +380,7 @@ ;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty @ (multi_lane _ _) (fma x y z))) - (vec_rrr_inplace (VecALUOp.Fmla) z x y (vector_size ty))) + (vec_rrr_mod (VecALUModOp.Fmla) z x y (vector_size ty))) (rule (lower (has_type (ty_scalar_float ty) (fma x y z))) (fpu_rrrr (FPUOp3.MAdd) (scalar_size ty) x y z))