From 4eca1c06a4a9183fcf7bb230d894617caf3cf3be Mon Sep 17 00:00:00 2001 From: Pavel Iliin Date: Wed, 22 Apr 2020 12:34:36 +0100 Subject: [PATCH] [AArch64][FIX] f16 indexed patterns encoding restrictions. --- .../aarch64-v8.2a-neon-intrinsics-constrained.c | 4 ++-- llvm/lib/Target/AArch64/AArch64InstrFormats.td | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c index b72bd3f977ddf2..6058e6f92832f8 100644 --- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c +++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c @@ -121,7 +121,7 @@ float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) { // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]]) // CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict") -// CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h +// CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret <4 x half> [[FMLA]] float16x4_t test_vfma_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) { return vfma_laneq_f16(a, b, c, 7); @@ -239,7 +239,7 @@ float16x8_t test_vfmsq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) { // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]]) // CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict") -// CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h +// CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret <4 x half> [[FMLA]] float16x4_t test_vfms_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) { return vfms_laneq_f16(a, b, c, 7); diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 061e2a0ec61937..29422fa650e6dc 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -8068,29 +8068,29 @@ multiclass SIMDFPIndexedTiedPatterns { let Predicates = [HasNEON, HasFullFP16] in { // Patterns for f16: DUPLANE, DUP scalar and vector_extract. def : Pat<(v8f16 (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn), - (AArch64duplane16 (v8f16 V128:$Rm), + (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))), (!cast(INST # "v8i16_indexed") - V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexH:$idx)>; + V128:$Rd, V128:$Rn, V128_lo:$Rm, VectorIndexH:$idx)>; def : Pat<(v8f16 (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn), (AArch64dup (f16 FPR16Op:$Rm)))), (!cast(INST # "v8i16_indexed") V128:$Rd, V128:$Rn, (SUBREG_TO_REG (i32 0), FPR16Op:$Rm, hsub), (i64 0))>; def : Pat<(v4f16 (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn), - (AArch64duplane16 (v8f16 V128:$Rm), - VectorIndexS:$idx))), + (AArch64duplane16 (v8f16 V128_lo:$Rm), + VectorIndexH:$idx))), (!cast(INST # "v4i16_indexed") - V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; + V64:$Rd, V64:$Rn, V128_lo:$Rm, VectorIndexH:$idx)>; def : Pat<(v4f16 (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn), (AArch64dup (f16 FPR16Op:$Rm)))), (!cast(INST # "v4i16_indexed") V64:$Rd, V64:$Rn, (SUBREG_TO_REG (i32 0), FPR16Op:$Rm, hsub), (i64 0))>; def : Pat<(f16 (OpNode (f16 FPR16:$Rd), (f16 FPR16:$Rn), - (vector_extract (v8f16 V128:$Rm), VectorIndexH:$idx))), + (vector_extract (v8f16 V128_lo:$Rm), VectorIndexH:$idx))), (!cast(INST # "v1i16_indexed") FPR16:$Rd, FPR16:$Rn, - V128:$Rm, VectorIndexH:$idx)>; + V128_lo:$Rm, VectorIndexH:$idx)>; } // Predicates = [HasNEON, HasFullFP16] // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar.