From d38bff460acb4fe3156d90ec739da49344db14ca Mon Sep 17 00:00:00 2001 From: Sizov Nikita Date: Sat, 6 Apr 2024 23:41:24 +0300 Subject: [PATCH] [AArch64] SimplifyDemandedBitsForTargetNode - add AArch64ISD::BICi handling (#76644) Fold BICi if all destination bits are already known to be zeroes ```llvm define <8 x i16> @haddu_known(<8 x i8> %a0, <8 x i8> %a1) { %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> %hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1) %res = and <8 x i16> %hadd, ret <8 x i16> %res } declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>) ``` ``` haddu_known: // @haddu_known ushll v0.8h, v0.8b, #0 ushll v1.8h, v1.8b, #0 uhadd v0.8h, v0.8h, v1.8h bic v0.8h, #254, lsl #8 <-- this one will be removed as we know high bits are zero extended ret ``` Fixes #53881 Fixes #53622 --- .../Target/AArch64/AArch64ISelLowering.cpp | 30 +++++++++++++++++++ .../AArch64/aarch64-known-bits-hadd.ll | 4 --- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index b10d8a80c8c9bf..819e8ccd5c33f0 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -24555,6 +24555,18 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false)) return R; return performFlagSettingCombine(N, DCI, AArch64ISD::SBC); + case AArch64ISD::BICi: { + APInt DemandedBits = + APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits()); + APInt DemandedElts = + APInt::getAllOnes(N->getValueType(0).getVectorNumElements()); + + if (DAG.getTargetLoweringInfo().SimplifyDemandedBits( + SDValue(N, 0), DemandedBits, DemandedElts, DCI)) + return SDValue(); + + break; + } case ISD::XOR: return performXorCombine(N, DAG, DCI, Subtarget); case ISD::MUL: @@ -27595,6 +27607,24 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode( // used - simplify to just Val. return TLO.CombineTo(Op, ShiftR->getOperand(0)); } + case AArch64ISD::BICi: { + // Fold BICi if all destination bits already known to be zeroed + SDValue Op0 = Op.getOperand(0); + KnownBits KnownOp0 = + TLO.DAG.computeKnownBits(Op0, OriginalDemandedElts, Depth + 1); + // Op0 &= ~(ConstantOperandVal(1) << ConstantOperandVal(2)) + uint64_t BitsToClear = Op->getConstantOperandVal(1) + << Op->getConstantOperandVal(2); + APInt AlreadyZeroedBitsToClear = BitsToClear & KnownOp0.Zero; + if (APInt(Known.getBitWidth(), BitsToClear) + .isSubsetOf(AlreadyZeroedBitsToClear)) + return TLO.CombineTo(Op, Op0); + + Known = KnownOp0 & + KnownBits::makeConstant(APInt(Known.getBitWidth(), ~BitsToClear)); + + return false; + } case ISD::INTRINSIC_WO_CHAIN: { if (auto ElementSize = IsSVECntIntrinsic(Op)) { unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits(); diff --git a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll index 017f382774892c..f36b8440fe4bfb 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll @@ -12,7 +12,6 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) { ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: bic v0.8h, #254, lsl #8 ; CHECK-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> @@ -27,7 +26,6 @@ define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) { ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: bic v0.8h, #254, lsl #8 ; CHECK-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> @@ -42,7 +40,6 @@ define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) { ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: bic v0.8h, #254, lsl #8 ; CHECK-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> @@ -57,7 +54,6 @@ define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) { ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: bic v0.8h, #254, lsl #8 ; CHECK-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16>