diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2cb7770ee21e4f..5bb8aabea80dbb 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -26949,7 +26949,7 @@ bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported( return false; // If the vector is scalable, SVE is enabled, implying support for complex - // numbers. Otherwirse, we need to ensure complex number support is avaialble + // numbers. Otherwise, we need to ensure complex number support is available if (!VTy->isScalableTy() && !Subtarget->hasComplxNum()) return false; @@ -26965,7 +26965,7 @@ bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported( !llvm::isPowerOf2_32(VTyWidth)) return false; - if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2()) { + if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2() && VTy->isScalableTy()) { unsigned ScalarWidth = ScalarTy->getScalarSizeInBits(); return 8 <= ScalarWidth && ScalarWidth <= 64; } diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll index 93497f38063d28..7b8448de2331b4 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s ; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s +; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s target triple = "aarch64" @@ -158,6 +159,32 @@ entry: ret <16 x half> %interleaved.vec } + +; Expected not to transform as it is integer +define <16 x i16> @complex_add_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: complex_add_v16i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: uzp1 v4.8h, v2.8h, v3.8h +; CHECK-NEXT: uzp1 v5.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp2 v1.8h, v2.8h, v3.8h +; CHECK-NEXT: sub v2.8h, v4.8h, v0.8h +; CHECK-NEXT: add v1.8h, v1.8h, v5.8h +; CHECK-NEXT: zip1 v0.8h, v2.8h, v1.8h +; CHECK-NEXT: zip2 v1.8h, v2.8h, v1.8h +; CHECK-NEXT: ret +entry: + %a.real = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> + %a.imag = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> + %b.real = shufflevector <16 x i16> %b, <16 x i16> zeroinitializer, <8 x i32> + %b.imag = shufflevector <16 x i16> %b, <16 x i16> zeroinitializer, <8 x i32> + %0 = sub <8 x i16> %b.real, %a.imag + %1 = add <8 x i16> %b.imag, %a.real + %interleaved.vec = shufflevector <8 x i16> %0, <8 x i16> %1, <16 x i32> + ret <16 x i16> %interleaved.vec +} + + declare { <2 x half>, <2 x half> } @llvm.experimental.vector.deinterleave2.v4f16(<4 x half>) declare <4 x half> @llvm.experimental.vector.interleave2.v4f16(<2 x half>, <2 x half>)