Merge pull request #2862 from akirilov-arm/simd_boolean

Enable the simd_boolean test for AArch64
bytecodealliance · Apr 28, 2021 · 207da98 · 207da98
2 parents b89c959 + 480670e
commit 207da98
Show file tree

Hide file tree

Showing 5 changed files with 182 additions and 35 deletions.
diff --git a/build.rs b/build.rs
@@ -220,10 +220,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
         },
         "Cranelift" => match (testsuite, testname) {
             ("simd", _) if cfg!(feature = "old-x86-backend") => return true, // skip all SIMD tests on old backend.
-            // These are only implemented on x64.
-            ("simd", "simd_i64x2_arith2") | ("simd", "simd_boolean") => {
-                return !platform_is_x64() || cfg!(feature = "old-x86-backend")
-            }
             // These are new instructions that are not really implemented in any backend.
             ("simd", "simd_i8x16_arith2")
             | ("simd", "simd_conversions")

diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -427,6 +427,15 @@ fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: R
         | machreg_to_vec(rd.to_reg())
 }
 
+fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+    debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
+
+    0b010_11110_11_11000_11011_10_00000_00000
+        | bits_12_16 << 12
+        | machreg_to_vec(rn) << 5
+        | machreg_to_vec(rd.to_reg())
+}
+
 fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
     debug_assert_eq!(q & 0b1, q);
     debug_assert_eq!(u & 0b1, u);
@@ -1628,6 +1637,7 @@ impl MachInstEmit for Inst {
                         debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
                         (0b0, 0b00101, enc_size)
                     }
+                    VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size),
                 };
                 sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
             }
@@ -2054,6 +2064,13 @@ impl MachInstEmit for Inst {
                         | machreg_to_vec(rd.to_reg()),
                 );
             }
+            &Inst::VecRRPair { op, rd, rn } => {
+                let bits_12_16 = match op {
+                    VecPairOp::Addp => 0b11011,
+                };
+
+                sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn));
+            }
             &Inst::VecRRR {
                 rd,
                 rn,

diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -2311,6 +2311,16 @@ fn test_aarch64_binemit() {
         "sqxtun v16.8b, v23.8h",
     ));
 
+    insns.push((
+        Inst::VecRRPair {
+            op: VecPairOp::Addp,
+            rd: writable_vreg(0),
+            rn: vreg(30),
+        },
+        "C0BBF15E",
+        "addp d0, v30.2d",
+    ));
+
     insns.push((
         Inst::VecRRR {
             alu_op: VecALUOp::Sqadd,
@@ -3803,6 +3813,17 @@ fn test_aarch64_binemit() {
         "cnt v23.8b, v5.8b",
     ));
 
+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Cmeq0,
+            rd: writable_vreg(12),
+            rn: vreg(27),
+            size: VectorSize::Size16x8,
+        },
+        "6C9B604E",
+        "cmeq v12.8h, v27.8h, #0",
+    ));
+
     insns.push((
         Inst::VecLanes {
             op: VecLanesOp::Uminv,

diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -334,6 +334,8 @@ pub enum VecMisc2 {
     Frintp,
     /// Population count per byte
     Cnt,
+    /// Compare bitwise equal to 0
+    Cmeq0,
 }
 
 /// A Vector narrowing operation with two registers.
@@ -347,6 +349,13 @@ pub enum VecMiscNarrowOp {
     Sqxtun,
 }
 
+/// A vector operation on a pair of elements with one register.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecPairOp {
+    /// Add pair of elements
+    Addp,
+}
+
 /// An operation across the lanes of vectors.
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
 pub enum VecLanesOp {
@@ -1011,6 +1020,13 @@ pub enum Inst {
         high_half: bool,
     },
 
+    /// 1-operand vector instruction that operates on a pair of elements.
+    VecRRPair {
+        op: VecPairOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+    },
+
     /// A vector ALU op.
     VecRRR {
         alu_op: VecALUOp,
@@ -2028,6 +2044,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
                 collector.add_def(rd);
             }
         }
+        &Inst::VecRRPair { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
         &Inst::VecRRR {
             alu_op, rd, rn, rm, ..
         } => {
@@ -2816,6 +2836,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
                 map_def(mapper, rd);
             }
         }
+        &mut Inst::VecRRPair {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
         &mut Inst::VecRRR {
             alu_op,
             ref mut rd,
@@ -3856,6 +3884,15 @@ impl Inst {
                 };
                 format!("{} {}, {}", op, rd, rn)
             }
+            &Inst::VecRRPair { op, rd, rn } => {
+                let op = match op {
+                    VecPairOp::Addp => "addp",
+                };
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
+                let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size64x2);
+
+                format!("{} {}, {}", op, rd, rn)
+            }
             &Inst::VecRRR {
                 rd,
                 rn,
@@ -3919,43 +3956,44 @@ impl Inst {
                 format!("{} {}, {}, {}", op, rd, rn, rm)
             }
             &Inst::VecMisc { op, rd, rn, size } => {
-                let is_shll = op == VecMisc2::Shll;
-                let suffix = match (is_shll, size) {
-                    (true, VectorSize::Size8x8) => ", #8",
-                    (true, VectorSize::Size16x4) => ", #16",
-                    (true, VectorSize::Size32x2) => ", #32",
-                    _ => "",
-                };
-
-                let (op, size) = match op {
-                    VecMisc2::Not => (
-                        "mvn",
-                        if size.is_128bits() {
+                let (op, rd_size, size, suffix) = match op {
+                    VecMisc2::Not => {
+                        let size = if size.is_128bits() {
                             VectorSize::Size8x16
                         } else {
                             VectorSize::Size8x8
+                        };
+
+                        ("mvn", size, size, "")
+                    }
+                    VecMisc2::Neg => ("neg", size, size, ""),
+                    VecMisc2::Abs => ("abs", size, size, ""),
+                    VecMisc2::Fabs => ("fabs", size, size, ""),
+                    VecMisc2::Fneg => ("fneg", size, size, ""),
+                    VecMisc2::Fsqrt => ("fsqrt", size, size, ""),
+                    VecMisc2::Rev64 => ("rev64", size, size, ""),
+                    VecMisc2::Shll => (
+                        "shll",
+                        size.widen(),
+                        size,
+                        match size {
+                            VectorSize::Size8x8 => ", #8",
+                            VectorSize::Size16x4 => ", #16",
+                            VectorSize::Size32x2 => ", #32",
+                            _ => panic!("Unexpected vector size: {:?}", size),
                         },
                     ),
-                    VecMisc2::Neg => ("neg", size),
-                    VecMisc2::Abs => ("abs", size),
-                    VecMisc2::Fabs => ("fabs", size),
-                    VecMisc2::Fneg => ("fneg", size),
-                    VecMisc2::Fsqrt => ("fsqrt", size),
-                    VecMisc2::Rev64 => ("rev64", size),
-                    VecMisc2::Shll => ("shll", size),
-                    VecMisc2::Fcvtzs => ("fcvtzs", size),
-                    VecMisc2::Fcvtzu => ("fcvtzu", size),
-                    VecMisc2::Scvtf => ("scvtf", size),
-                    VecMisc2::Ucvtf => ("ucvtf", size),
-                    VecMisc2::Frintn => ("frintn", size),
-                    VecMisc2::Frintz => ("frintz", size),
-                    VecMisc2::Frintm => ("frintm", size),
-                    VecMisc2::Frintp => ("frintp", size),
-                    VecMisc2::Cnt => ("cnt", size),
+                    VecMisc2::Fcvtzs => ("fcvtzs", size, size, ""),
+                    VecMisc2::Fcvtzu => ("fcvtzu", size, size, ""),
+                    VecMisc2::Scvtf => ("scvtf", size, size, ""),
+                    VecMisc2::Ucvtf => ("ucvtf", size, size, ""),
+                    VecMisc2::Frintn => ("frintn", size, size, ""),
+                    VecMisc2::Frintz => ("frintz", size, size, ""),
+                    VecMisc2::Frintm => ("frintm", size, size, ""),
+                    VecMisc2::Frintp => ("frintp", size, size, ""),
+                    VecMisc2::Cnt => ("cnt", size, size, ""),
+                    VecMisc2::Cmeq0 => ("cmeq", size, size, ", #0"),
                 };
-
-                let rd_size = if is_shll { size.widen() } else { size };
-
                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
                 let rn = show_vreg_vector(rn, mb_rru, size);
                 format!("{} {}, {}{}", op, rd, rn, suffix)

diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1950,6 +1950,40 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             }
         }
 
+        Opcode::VallTrue if ctx.input_ty(insn, 0) == I64X2 => {
+            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
+            let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let tmp = ctx.alloc_tmp(I64X2).only_reg().unwrap();
+
+            // cmeq vtmp.2d, vm.2d, #0
+            // addp dtmp, vtmp.2d
+            // fcmp dtmp, dtmp
+            // cset xd, eq
+            //
+            // Note that after the ADDP the value of the temporary register will
+            // be either 0 when all input elements are true, i.e. non-zero, or a
+            // NaN otherwise (either -1 or -2 when represented as an integer);
+            // NaNs are the only floating-point numbers that compare unequal to
+            // themselves.
+
+            ctx.emit(Inst::VecMisc {
+                op: VecMisc2::Cmeq0,
+                rd: tmp,
+                rn: rm,
+                size: VectorSize::Size64x2,
+            });
+            ctx.emit(Inst::VecRRPair {
+                op: VecPairOp::Addp,
+                rd: tmp,
+                rn: tmp.to_reg(),
+            });
+            ctx.emit(Inst::FpuCmp64 {
+                rn: tmp.to_reg(),
+                rm: tmp.to_reg(),
+            });
+            materialize_bool_result(ctx, insn, rd, Cond::Eq);
+        }
+
         Opcode::VanyTrue | Opcode::VallTrue => {
             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
             let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
@@ -2180,6 +2214,47 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                         size: VectorSize::Size32x4,
                     });
                 }
+                I64X2 => {
+                    // mov dst_r, src_v.d[0]
+                    // mov tmp_r0, src_v.d[1]
+                    // lsr dst_r, dst_r, #63
+                    // lsr tmp_r0, tmp_r0, #63
+                    // add dst_r, dst_r, tmp_r0, lsl #1
+                    ctx.emit(Inst::MovFromVec {
+                        rd: dst_r,
+                        rn: src_v,
+                        idx: 0,
+                        size: VectorSize::Size64x2,
+                    });
+                    ctx.emit(Inst::MovFromVec {
+                        rd: tmp_r0,
+                        rn: src_v,
+                        idx: 1,
+                        size: VectorSize::Size64x2,
+                    });
+                    ctx.emit(Inst::AluRRImmShift {
+                        alu_op: ALUOp::Lsr64,
+                        rd: dst_r,
+                        rn: dst_r.to_reg(),
+                        immshift: ImmShift::maybe_from_u64(63).unwrap(),
+                    });
+                    ctx.emit(Inst::AluRRImmShift {
+                        alu_op: ALUOp::Lsr64,
+                        rd: tmp_r0,
+                        rn: tmp_r0.to_reg(),
+                        immshift: ImmShift::maybe_from_u64(63).unwrap(),
+                    });
+                    ctx.emit(Inst::AluRRRShift {
+                        alu_op: ALUOp::Add32,
+                        rd: dst_r,
+                        rn: dst_r.to_reg(),
+                        rm: tmp_r0.to_reg(),
+                        shiftop: ShiftOpAndAmt::new(
+                            ShiftOp::LSL,
+                            ShiftOpShiftImm::maybe_from_shift(1).unwrap(),
+                        ),
+                    });
+                }
                 _ => panic!("arm64 isel: VhighBits unhandled, ty = {:?}", ty),
             }
         }