From 7ff82b2e0b3cbaf5f3174b093fe3487f4b51b3a4 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 12 Feb 2024 10:00:34 +0100 Subject: [PATCH] [AArch64] Only apply bool vector bitcast opt if result is scalar (#81256) This optimization tries to optimize bitcasts from `` to iN, but currently also triggers for `` to `` bitcasts, if custom lowering has been requested for these for an unrelated reason. Fix this by explicitly checking that the result type is scalar. Fixes https://github.com/llvm/llvm-project/issues/81216. (cherry picked from commit 92d79922051f732560acf3791b543df1e6580689) --- .../Target/AArch64/AArch64ISelLowering.cpp | 3 +- .../AArch64/vec-combine-compare-to-bitmask.ll | 28 +++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e97f5e32201488..2d6b5bc983e739 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -24419,7 +24419,8 @@ void AArch64TargetLowering::ReplaceBITCASTResults( return; } - if (SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1) + if (SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 && + !VT.isVector()) return replaceBoolVectorBitcast(N, Results, DAG); if (VT != MVT::i16 || (SrcVT != MVT::f16 && SrcVT != MVT::bf16)) diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll index 1b22e2f900ddb7..557aa010b3a7d9 100644 --- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll +++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll @@ -489,3 +489,31 @@ define i6 @no_combine_illegal_num_elements(<6 x i32> %vec) { %bitmask = bitcast <6 x i1> %cmp_result to i6 ret i6 %bitmask } + +; Only apply the combine when casting a vector to a scalar. +define <2 x i8> @vector_to_vector_cast(<16 x i1> %arg) nounwind { +; CHECK-LABEL: vector_to_vector_cast: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: shl.16b v0, v0, #7 +; CHECK-NEXT: Lloh36: +; CHECK-NEXT: adrp x8, lCPI20_0@PAGE +; CHECK-NEXT: Lloh37: +; CHECK-NEXT: ldr q1, [x8, lCPI20_0@PAGEOFF] +; CHECK-NEXT: add x8, sp, #14 +; CHECK-NEXT: cmlt.16b v0, v0, #0 +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: ext.16b v1, v0, v0, #8 +; CHECK-NEXT: zip1.16b v0, v0, v1 +; CHECK-NEXT: addv.8h h0, v0 +; CHECK-NEXT: str h0, [sp, #14] +; CHECK-NEXT: ld1.b { v0 }[0], [x8] +; CHECK-NEXT: orr x8, x8, #0x1 +; CHECK-NEXT: ld1.b { v0 }[4], [x8] +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh36, Lloh37 + %bc = bitcast <16 x i1> %arg to <2 x i8> + ret <2 x i8> %bc +}