From 3551109c9f318c600c7e246a4a4488774e67e730 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sat, 11 May 2024 00:57:05 +0800 Subject: [PATCH 1/2] [InstCombine] Add pre-commit tests for PR91691. NFC. --- llvm/test/Transforms/InstCombine/bit_ceil.ll | 36 ++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll index 16631afa4878da6..88126c981e03d0e 100644 --- a/llvm/test/Transforms/InstCombine/bit_ceil.ll +++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll @@ -284,6 +284,42 @@ define <4 x i32> @bit_ceil_v4i32(<4 x i32> %x) { ret <4 x i32> %sel } +define i32 @pr91691(i32 %0) { +; CHECK-LABEL: @pr91691( +; CHECK-NEXT: [[TMP2:%.*]] = sub nuw i32 -2, [[TMP0:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false) +; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 31 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i32 1, [[TMP5]] +; CHECK-NEXT: ret i32 [[TMP6]] +; + %2 = sub nuw i32 -2, %0 + %3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false) + %4 = sub i32 32, %3 + %5 = shl i32 1, %4 + %6 = icmp ult i32 %0, -2 + %7 = select i1 %6, i32 %5, i32 1 + ret i32 %7 +} + +define i32 @pr91691_keep_nsw(i32 %0) { +; CHECK-LABEL: @pr91691_keep_nsw( +; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 -2, [[TMP0:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false) +; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 31 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i32 1, [[TMP5]] +; CHECK-NEXT: ret i32 [[TMP6]] +; + %2 = sub nsw i32 -2, %0 + %3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false) + %4 = sub i32 32, %3 + %5 = shl i32 1, %4 + %6 = icmp ult i32 %0, -2 + %7 = select i1 %6, i32 %5, i32 1 + ret i32 %7 +} + declare i32 @llvm.ctlz.i32(i32, i1 immarg) declare i64 @llvm.ctlz.i64(i64, i1 immarg) declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) From 2f5bad292c9ac64bc97cc27b5d612750c2253dbb Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sat, 11 May 2024 00:58:38 +0800 Subject: [PATCH 2/2] [InstCombine] Drop nuw flag when CtlzOp is a sub nuw --- .../Transforms/InstCombine/InstCombineSelect.cpp | 14 ++++++++++++-- llvm/test/Transforms/InstCombine/bit_ceil.ll | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 8818369e79452bf..9a565481d967819 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3342,7 +3342,8 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { // pattern. static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0, const APInt *Cond1, Value *CtlzOp, - unsigned BitWidth) { + unsigned BitWidth, + bool &ShouldDropNUW) { // The challenge in recognizing std::bit_ceil(X) is that the operand is used // for the CTLZ proper and select condition, each possibly with some // operation like add and sub. @@ -3365,6 +3366,8 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0, ConstantRange CR = ConstantRange::makeExactICmpRegion( CmpInst::getInversePredicate(Pred), *Cond1); + ShouldDropNUW = false; + // Match the operation that's used to compute CtlzOp from CommonAncestor. If // CtlzOp == CommonAncestor, return true as no operation is needed. If a // match is found, execute the operation on CR, update CR, and return true. @@ -3378,6 +3381,7 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0, return true; } if (match(CtlzOp, m_Sub(m_APInt(C), m_Specific(CommonAncestor)))) { + ShouldDropNUW = true; CR = ConstantRange(*C).sub(CR); return true; } @@ -3447,14 +3451,20 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder) { Pred = CmpInst::getInversePredicate(Pred); } + bool ShouldDropNUW; + if (!match(FalseVal, m_One()) || !match(TrueVal, m_OneUse(m_Shl(m_One(), m_OneUse(m_Sub(m_SpecificInt(BitWidth), m_Value(Ctlz)))))) || !match(Ctlz, m_Intrinsic(m_Value(CtlzOp), m_Zero())) || - !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth)) + !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth, + ShouldDropNUW)) return nullptr; + if (ShouldDropNUW) + cast(CtlzOp)->setHasNoUnsignedWrap(false); + // Build 1 << (-CTLZ & (BitWidth-1)). The negation likely corresponds to a // single hardware instruction as opposed to BitWidth - CTLZ, where BitWidth // is an integer constant. Masking with BitWidth-1 comes free on some diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll index 88126c981e03d0e..79665be01576a78 100644 --- a/llvm/test/Transforms/InstCombine/bit_ceil.ll +++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll @@ -286,7 +286,7 @@ define <4 x i32> @bit_ceil_v4i32(<4 x i32> %x) { define i32 @pr91691(i32 %0) { ; CHECK-LABEL: @pr91691( -; CHECK-NEXT: [[TMP2:%.*]] = sub nuw i32 -2, [[TMP0:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 -2, [[TMP0:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false) ; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 31