From 753fb070bb2962fc6f909c6d7adf568b6b28bbb5 Mon Sep 17 00:00:00 2001 From: beetrees Date: Sun, 16 Jun 2024 16:56:39 +0100 Subject: [PATCH] Add `f16` inline ASM support for 32-bit ARM --- compiler/rustc_codegen_llvm/src/asm.rs | 39 ++ compiler/rustc_target/src/asm/arm.rs | 12 +- tests/assembly/asm/arm-types.rs | 497 ++++++++++++++++--------- 3 files changed, 365 insertions(+), 183 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs index 60e63b956db6e..b3df9470b3ad5 100644 --- a/compiler/rustc_codegen_llvm/src/asm.rs +++ b/compiler/rustc_codegen_llvm/src/asm.rs @@ -1020,6 +1020,19 @@ fn llvm_fixup_input<'ll, 'tcx>( value } } + ( + InlineAsmRegClass::Arm( + ArmInlineAsmRegClass::dreg + | ArmInlineAsmRegClass::dreg_low8 + | ArmInlineAsmRegClass::dreg_low16 + | ArmInlineAsmRegClass::qreg + | ArmInlineAsmRegClass::qreg_low4 + | ArmInlineAsmRegClass::qreg_low8, + ), + Abi::Vector { element, count: count @ (4 | 8) }, + ) if element.primitive() == Primitive::Float(Float::F16) => { + bx.bitcast(value, bx.type_vector(bx.type_i16(), count)) + } (InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => { match s.primitive() { // MIPS only supports register-length arithmetics. @@ -1130,6 +1143,19 @@ fn llvm_fixup_output<'ll, 'tcx>( value } } + ( + InlineAsmRegClass::Arm( + ArmInlineAsmRegClass::dreg + | ArmInlineAsmRegClass::dreg_low8 + | ArmInlineAsmRegClass::dreg_low16 + | ArmInlineAsmRegClass::qreg + | ArmInlineAsmRegClass::qreg_low4 + | ArmInlineAsmRegClass::qreg_low8, + ), + Abi::Vector { element, count: count @ (4 | 8) }, + ) if element.primitive() == Primitive::Float(Float::F16) => { + bx.bitcast(value, bx.type_vector(bx.type_f16(), count)) + } (InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => { match s.primitive() { // MIPS only supports register-length arithmetics. @@ -1233,6 +1259,19 @@ fn llvm_fixup_output_type<'ll, 'tcx>( layout.llvm_type(cx) } } + ( + InlineAsmRegClass::Arm( + ArmInlineAsmRegClass::dreg + | ArmInlineAsmRegClass::dreg_low8 + | ArmInlineAsmRegClass::dreg_low16 + | ArmInlineAsmRegClass::qreg + | ArmInlineAsmRegClass::qreg_low4 + | ArmInlineAsmRegClass::qreg_low8, + ), + Abi::Vector { element, count: count @ (4 | 8) }, + ) if element.primitive() == Primitive::Float(Float::F16) => { + cx.type_vector(cx.type_i16(), count) + } (InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => { match s.primitive() { // MIPS only supports register-length arithmetics. diff --git a/compiler/rustc_target/src/asm/arm.rs b/compiler/rustc_target/src/asm/arm.rs index 37184393a730b..9d79faadd619f 100644 --- a/compiler/rustc_target/src/asm/arm.rs +++ b/compiler/rustc_target/src/asm/arm.rs @@ -47,16 +47,18 @@ impl ArmInlineAsmRegClass { _arch: InlineAsmArch, ) -> &'static [(InlineAsmType, Option)] { match self { - Self::reg => types! { _: I8, I16, I32, F32; }, - Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F32; }, + Self::reg => types! { _: I8, I16, I32, F16, F32; }, + Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F16, F32; }, Self::dreg_low16 | Self::dreg_low8 => types! { - vfp2: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2); + vfp2: I64, F64; + neon: VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2); }, Self::dreg => types! { - d32: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2); + d32: I64, F64; + neon: VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2); }, Self::qreg | Self::qreg_low8 | Self::qreg_low4 => types! { - neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4); + neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4); }, } } diff --git a/tests/assembly/asm/arm-types.rs b/tests/assembly/asm/arm-types.rs index 280b6d4a2280b..eeff1a070b492 100644 --- a/tests/assembly/asm/arm-types.rs +++ b/tests/assembly/asm/arm-types.rs @@ -1,10 +1,13 @@ +//@ revisions: base d32 neon //@ assembly-output: emit-asm //@ compile-flags: --target armv7-unknown-linux-gnueabihf -//@ compile-flags: -C target-feature=+neon //@ compile-flags: -C opt-level=0 +//@[d32] compile-flags: -C target-feature=+d32 +//@[neon] compile-flags: -C target-feature=+neon --cfg d32 +//@[neon] filecheck-flags: --check-prefix d32 //@ needs-llvm-components: arm -#![feature(no_core, lang_items, rustc_attrs, repr_simd)] +#![feature(no_core, lang_items, rustc_attrs, repr_simd, f16)] #![crate_type = "rlib"] #![no_core] #![allow(asm_sub_register, non_camel_case_types)] @@ -38,6 +41,8 @@ pub struct i32x2(i32, i32); #[repr(simd)] pub struct i64x1(i64); #[repr(simd)] +pub struct f16x4(f16, f16, f16, f16); +#[repr(simd)] pub struct f32x2(f32, f32); #[repr(simd)] pub struct i8x16(i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8); @@ -48,11 +53,14 @@ pub struct i32x4(i32, i32, i32, i32); #[repr(simd)] pub struct i64x2(i64, i64); #[repr(simd)] +pub struct f16x8(f16, f16, f16, f16, f16, f16, f16, f16); +#[repr(simd)] pub struct f32x4(f32, f32, f32, f32); impl Copy for i8 {} impl Copy for i16 {} impl Copy for i32 {} +impl Copy for f16 {} impl Copy for f32 {} impl Copy for i64 {} impl Copy for f64 {} @@ -61,11 +69,13 @@ impl Copy for i8x8 {} impl Copy for i16x4 {} impl Copy for i32x2 {} impl Copy for i64x1 {} +impl Copy for f16x4 {} impl Copy for f32x2 {} impl Copy for i8x16 {} impl Copy for i16x8 {} impl Copy for i32x4 {} impl Copy for i64x2 {} +impl Copy for f16x8 {} impl Copy for f32x4 {} extern "C" { @@ -152,6 +162,12 @@ check!(reg_i16 i16 reg "mov"); // CHECK: @NO_APP check!(reg_i32 i32 reg "mov"); +// CHECK-LABEL: reg_f16: +// CHECK: @APP +// CHECK: mov {{[a-z0-9]+}}, {{[a-z0-9]+}} +// CHECK: @NO_APP +check!(reg_f16 f16 reg "mov"); + // CHECK-LABEL: reg_f32: // CHECK: @APP // CHECK: mov {{[a-z0-9]+}}, {{[a-z0-9]+}} @@ -170,6 +186,12 @@ check!(reg_ptr ptr reg "mov"); // CHECK: @NO_APP check!(sreg_i32 i32 sreg "vmov.f32"); +// CHECK-LABEL: sreg_f16: +// CHECK: @APP +// CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}} +// CHECK: @NO_APP +check!(sreg_f16 f16 sreg "vmov.f32"); + // CHECK-LABEL: sreg_f32: // CHECK: @APP // CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}} @@ -188,52 +210,72 @@ check!(sreg_ptr ptr sreg "vmov.f32"); // CHECK: @NO_APP check!(sreg_low16_i32 i32 sreg_low16 "vmov.f32"); -// CHECK-LABEL: sreg_low16_f32: +// CHECK-LABEL: sreg_low16_f16: // CHECK: @APP // CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}} // CHECK: @NO_APP -check!(sreg_low16_f32 f32 sreg_low16 "vmov.f32"); +check!(sreg_low16_f16 f16 sreg_low16 "vmov.f32"); -// CHECK-LABEL: dreg_i64: +// CHECK-LABEL: sreg_low16_f32: // CHECK: @APP -// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}} // CHECK: @NO_APP +check!(sreg_low16_f32 f32 sreg_low16 "vmov.f32"); + +// d32-LABEL: dreg_i64: +// d32: @APP +// d32: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// d32: @NO_APP +#[cfg(d32)] check!(dreg_i64 i64 dreg "vmov.f64"); -// CHECK-LABEL: dreg_f64: -// CHECK: @APP -// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: @NO_APP +// d32-LABEL: dreg_f64: +// d32: @APP +// d32: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// d32: @NO_APP +#[cfg(d32)] check!(dreg_f64 f64 dreg "vmov.f64"); -// CHECK-LABEL: dreg_i8x8: -// CHECK: @APP -// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: dreg_i8x8: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(dreg_i8x8 i8x8 dreg "vmov.f64"); -// CHECK-LABEL: dreg_i16x4: -// CHECK: @APP -// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: dreg_i16x4: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(dreg_i16x4 i16x4 dreg "vmov.f64"); -// CHECK-LABEL: dreg_i32x2: -// CHECK: @APP -// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: dreg_i32x2: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(dreg_i32x2 i32x2 dreg "vmov.f64"); -// CHECK-LABEL: dreg_i64x1: -// CHECK: @APP -// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: dreg_i64x1: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(dreg_i64x1 i64x1 dreg "vmov.f64"); -// CHECK-LABEL: dreg_f32x2: -// CHECK: @APP -// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: dreg_f16x4: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] +check!(dreg_f16x4 f16x4 dreg "vmov.f64"); + +// neon-LABEL: dreg_f32x2: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(dreg_f32x2 f32x2 dreg "vmov.f64"); // CHECK-LABEL: dreg_low16_i64: @@ -248,34 +290,46 @@ check!(dreg_low16_i64 i64 dreg_low16 "vmov.f64"); // CHECK: @NO_APP check!(dreg_low16_f64 f64 dreg_low16 "vmov.f64"); -// CHECK-LABEL: dreg_low16_i8x8: -// CHECK: @APP -// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: dreg_low16_i8x8: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(dreg_low16_i8x8 i8x8 dreg_low16 "vmov.f64"); -// CHECK-LABEL: dreg_low16_i16x4: -// CHECK: @APP -// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: dreg_low16_i16x4: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(dreg_low16_i16x4 i16x4 dreg_low16 "vmov.f64"); -// CHECK-LABEL: dreg_low16_i32x2: -// CHECK: @APP -// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: dreg_low16_i32x2: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(dreg_low16_i32x2 i32x2 dreg_low16 "vmov.f64"); -// CHECK-LABEL: dreg_low16_i64x1: -// CHECK: @APP -// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: dreg_low16_i64x1: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(dreg_low16_i64x1 i64x1 dreg_low16 "vmov.f64"); -// CHECK-LABEL: dreg_low16_f32x2: -// CHECK: @APP -// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: dreg_low16_f16x4: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] +check!(dreg_low16_f16x4 f16x4 dreg_low16 "vmov.f64"); + +// neon-LABEL: dreg_low16_f32x2: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(dreg_low16_f32x2 f32x2 dreg_low16 "vmov.f64"); // CHECK-LABEL: dreg_low8_i64: @@ -290,124 +344,172 @@ check!(dreg_low8_i64 i64 dreg_low8 "vmov.f64"); // CHECK: @NO_APP check!(dreg_low8_f64 f64 dreg_low8 "vmov.f64"); -// CHECK-LABEL: dreg_low8_i8x8: -// CHECK: @APP -// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: dreg_low8_i8x8: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(dreg_low8_i8x8 i8x8 dreg_low8 "vmov.f64"); -// CHECK-LABEL: dreg_low8_i16x4: -// CHECK: @APP -// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: dreg_low8_i16x4: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(dreg_low8_i16x4 i16x4 dreg_low8 "vmov.f64"); -// CHECK-LABEL: dreg_low8_i32x2: -// CHECK: @APP -// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: dreg_low8_i32x2: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(dreg_low8_i32x2 i32x2 dreg_low8 "vmov.f64"); -// CHECK-LABEL: dreg_low8_i64x1: -// CHECK: @APP -// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: dreg_low8_i64x1: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(dreg_low8_i64x1 i64x1 dreg_low8 "vmov.f64"); -// CHECK-LABEL: dreg_low8_f32x2: -// CHECK: @APP -// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: dreg_low8_f16x4: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] +check!(dreg_low8_f16x4 f16x4 dreg_low8 "vmov.f64"); + +// neon-LABEL: dreg_low8_f32x2: +// neon: @APP +// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(dreg_low8_f32x2 f32x2 dreg_low8 "vmov.f64"); -// CHECK-LABEL: qreg_i8x16: -// CHECK: @APP -// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: qreg_i8x16: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(qreg_i8x16 i8x16 qreg "vmov"); -// CHECK-LABEL: qreg_i16x8: -// CHECK: @APP -// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: qreg_i16x8: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(qreg_i16x8 i16x8 qreg "vmov"); -// CHECK-LABEL: qreg_i32x4: -// CHECK: @APP -// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: qreg_i32x4: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(qreg_i32x4 i32x4 qreg "vmov"); -// CHECK-LABEL: qreg_i64x2: -// CHECK: @APP -// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: qreg_i64x2: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(qreg_i64x2 i64x2 qreg "vmov"); -// CHECK-LABEL: qreg_f32x4: -// CHECK: @APP -// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: qreg_f16x8: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] +check!(qreg_f16x8 f16x8 qreg "vmov"); + +// neon-LABEL: qreg_f32x4: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(qreg_f32x4 f32x4 qreg "vmov"); -// CHECK-LABEL: qreg_low8_i8x16: -// CHECK: @APP -// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: qreg_low8_i8x16: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(qreg_low8_i8x16 i8x16 qreg_low8 "vmov"); -// CHECK-LABEL: qreg_low8_i16x8: -// CHECK: @APP -// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: qreg_low8_i16x8: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(qreg_low8_i16x8 i16x8 qreg_low8 "vmov"); -// CHECK-LABEL: qreg_low8_i32x4: -// CHECK: @APP -// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: qreg_low8_i32x4: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(qreg_low8_i32x4 i32x4 qreg_low8 "vmov"); -// CHECK-LABEL: qreg_low8_i64x2: -// CHECK: @APP -// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: qreg_low8_i64x2: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(qreg_low8_i64x2 i64x2 qreg_low8 "vmov"); -// CHECK-LABEL: qreg_low8_f32x4: -// CHECK: @APP -// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: qreg_low8_f16x8: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] +check!(qreg_low8_f16x8 f16x8 qreg_low8 "vmov"); + +// neon-LABEL: qreg_low8_f32x4: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(qreg_low8_f32x4 f32x4 qreg_low8 "vmov"); -// CHECK-LABEL: qreg_low4_i8x16: -// CHECK: @APP -// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: qreg_low4_i8x16: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(qreg_low4_i8x16 i8x16 qreg_low4 "vmov"); -// CHECK-LABEL: qreg_low4_i16x8: -// CHECK: @APP -// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: qreg_low4_i16x8: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(qreg_low4_i16x8 i16x8 qreg_low4 "vmov"); -// CHECK-LABEL: qreg_low4_i32x4: -// CHECK: @APP -// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: qreg_low4_i32x4: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(qreg_low4_i32x4 i32x4 qreg_low4 "vmov"); -// CHECK-LABEL: qreg_low4_i64x2: -// CHECK: @APP -// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: qreg_low4_i64x2: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(qreg_low4_i64x2 i64x2 qreg_low4 "vmov"); -// CHECK-LABEL: qreg_low4_f32x4: -// CHECK: @APP -// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// CHECK: @NO_APP +// neon-LABEL: qreg_low4_f16x8: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] +check!(qreg_low4_f16x8 f16x8 qreg_low4 "vmov"); + +// neon-LABEL: qreg_low4_f32x4: +// neon: @APP +// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// neon: @NO_APP +#[cfg(neon)] check!(qreg_low4_f32x4 f32x4 qreg_low4 "vmov"); // CHECK-LABEL: r0_i8: @@ -428,6 +530,12 @@ check_reg!(r0_i16 i16 "r0" "mov"); // CHECK: @NO_APP check_reg!(r0_i32 i32 "r0" "mov"); +// CHECK-LABEL: r0_f16: +// CHECK: @APP +// CHECK: mov r0, r0 +// CHECK: @NO_APP +check_reg!(r0_f16 f16 "r0" "mov"); + // CHECK-LABEL: r0_f32: // CHECK: @APP // CHECK: mov r0, r0 @@ -446,6 +554,12 @@ check_reg!(r0_ptr ptr "r0" "mov"); // CHECK: @NO_APP check_reg!(s0_i32 i32 "s0" "vmov.f32"); +// CHECK-LABEL: s0_f16: +// CHECK: @APP +// CHECK: vmov.f32 s0, s0 +// CHECK: @NO_APP +check_reg!(s0_f16 f16 "s0" "vmov.f32"); + // CHECK-LABEL: s0_f32: // CHECK: @APP // CHECK: vmov.f32 s0, s0 @@ -458,74 +572,101 @@ check_reg!(s0_f32 f32 "s0" "vmov.f32"); // CHECK: @NO_APP check_reg!(s0_ptr ptr "s0" "vmov.f32"); -// CHECK-LABEL: d0_i64: -// CHECK: @APP -// CHECK: vmov.f64 d0, d0 -// CHECK: @NO_APP +// FIXME(#126797): "d0" should work with `i64` and `f64` even when `d32` is disabled. +// d32-LABEL: d0_i64: +// d32: @APP +// d32: vmov.f64 d0, d0 +// d32: @NO_APP +#[cfg(d32)] check_reg!(d0_i64 i64 "d0" "vmov.f64"); -// CHECK-LABEL: d0_f64: -// CHECK: @APP -// CHECK: vmov.f64 d0, d0 -// CHECK: @NO_APP +// d32-LABEL: d0_f64: +// d32: @APP +// d32: vmov.f64 d0, d0 +// d32: @NO_APP +#[cfg(d32)] check_reg!(d0_f64 f64 "d0" "vmov.f64"); -// CHECK-LABEL: d0_i8x8: -// CHECK: @APP -// CHECK: vmov.f64 d0, d0 -// CHECK: @NO_APP +// neon-LABEL: d0_i8x8: +// neon: @APP +// neon: vmov.f64 d0, d0 +// neon: @NO_APP +#[cfg(neon)] check_reg!(d0_i8x8 i8x8 "d0" "vmov.f64"); -// CHECK-LABEL: d0_i16x4: -// CHECK: @APP -// CHECK: vmov.f64 d0, d0 -// CHECK: @NO_APP +// neon-LABEL: d0_i16x4: +// neon: @APP +// neon: vmov.f64 d0, d0 +// neon: @NO_APP +#[cfg(neon)] check_reg!(d0_i16x4 i16x4 "d0" "vmov.f64"); -// CHECK-LABEL: d0_i32x2: -// CHECK: @APP -// CHECK: vmov.f64 d0, d0 -// CHECK: @NO_APP +// neon-LABEL: d0_i32x2: +// neon: @APP +// neon: vmov.f64 d0, d0 +// neon: @NO_APP +#[cfg(neon)] check_reg!(d0_i32x2 i32x2 "d0" "vmov.f64"); -// CHECK-LABEL: d0_i64x1: -// CHECK: @APP -// CHECK: vmov.f64 d0, d0 -// CHECK: @NO_APP +// neon-LABEL: d0_i64x1: +// neon: @APP +// neon: vmov.f64 d0, d0 +// neon: @NO_APP +#[cfg(neon)] check_reg!(d0_i64x1 i64x1 "d0" "vmov.f64"); -// CHECK-LABEL: d0_f32x2: -// CHECK: @APP -// CHECK: vmov.f64 d0, d0 -// CHECK: @NO_APP +// neon-LABEL: d0_f16x4: +// neon: @APP +// neon: vmov.f64 d0, d0 +// neon: @NO_APP +#[cfg(neon)] +check_reg!(d0_f16x4 f16x4 "d0" "vmov.f64"); + +// neon-LABEL: d0_f32x2: +// neon: @APP +// neon: vmov.f64 d0, d0 +// neon: @NO_APP +#[cfg(neon)] check_reg!(d0_f32x2 f32x2 "d0" "vmov.f64"); -// CHECK-LABEL: q0_i8x16: -// CHECK: @APP -// CHECK: vorr q0, q0, q0 -// CHECK: @NO_APP +// neon-LABEL: q0_i8x16: +// neon: @APP +// neon: vorr q0, q0, q0 +// neon: @NO_APP +#[cfg(neon)] check_reg!(q0_i8x16 i8x16 "q0" "vmov"); -// CHECK-LABEL: q0_i16x8: -// CHECK: @APP -// CHECK: vorr q0, q0, q0 -// CHECK: @NO_APP +// neon-LABEL: q0_i16x8: +// neon: @APP +// neon: vorr q0, q0, q0 +// neon: @NO_APP +#[cfg(neon)] check_reg!(q0_i16x8 i16x8 "q0" "vmov"); -// CHECK-LABEL: q0_i32x4: -// CHECK: @APP -// CHECK: vorr q0, q0, q0 -// CHECK: @NO_APP +// neon-LABEL: q0_i32x4: +// neon: @APP +// neon: vorr q0, q0, q0 +// neon: @NO_APP +#[cfg(neon)] check_reg!(q0_i32x4 i32x4 "q0" "vmov"); -// CHECK-LABEL: q0_i64x2: -// CHECK: @APP -// CHECK: vorr q0, q0, q0 -// CHECK: @NO_APP +// neon-LABEL: q0_i64x2: +// neon: @APP +// neon: vorr q0, q0, q0 +// neon: @NO_APP +#[cfg(neon)] check_reg!(q0_i64x2 i64x2 "q0" "vmov"); -// CHECK-LABEL: q0_f32x4: -// CHECK: @APP -// CHECK: vorr q0, q0, q0 -// CHECK: @NO_APP +// neon-LABEL: q0_f16x8: +// neon: @APP +// neon: vorr q0, q0, q0 +// neon: @NO_APP +#[cfg(neon)] +check_reg!(q0_f16x8 f16x8 "q0" "vmov"); + +// neon-LABEL: q0_f32x4: +// neon: @APP +// neon: vorr q0, q0, q0 +// neon: @NO_APP +#[cfg(neon)] check_reg!(q0_f32x4 f32x4 "q0" "vmov");