-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
release/18.x: [InstCombine] Drop nuw flag when CtlzOp is a sub nuw (#91776) #91917
Conversation
@nikic What do you think about merging this PR to the release branch? |
@llvm/pr-subscribers-llvm-transforms Author: None (llvmbot) ChangesBackport b5f4210 Requested by: @dtcxzyw Full diff: https://github.com/llvm/llvm-project/pull/91917.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 8cc7901cbac7f..86a39cf2ee93f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3201,7 +3201,8 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
// pattern.
static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
const APInt *Cond1, Value *CtlzOp,
- unsigned BitWidth) {
+ unsigned BitWidth,
+ bool &ShouldDropNUW) {
// The challenge in recognizing std::bit_ceil(X) is that the operand is used
// for the CTLZ proper and select condition, each possibly with some
// operation like add and sub.
@@ -3224,6 +3225,8 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
ConstantRange CR = ConstantRange::makeExactICmpRegion(
CmpInst::getInversePredicate(Pred), *Cond1);
+ ShouldDropNUW = false;
+
// Match the operation that's used to compute CtlzOp from CommonAncestor. If
// CtlzOp == CommonAncestor, return true as no operation is needed. If a
// match is found, execute the operation on CR, update CR, and return true.
@@ -3237,6 +3240,7 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
return true;
}
if (match(CtlzOp, m_Sub(m_APInt(C), m_Specific(CommonAncestor)))) {
+ ShouldDropNUW = true;
CR = ConstantRange(*C).sub(CR);
return true;
}
@@ -3306,14 +3310,20 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder) {
Pred = CmpInst::getInversePredicate(Pred);
}
+ bool ShouldDropNUW;
+
if (!match(FalseVal, m_One()) ||
!match(TrueVal,
m_OneUse(m_Shl(m_One(), m_OneUse(m_Sub(m_SpecificInt(BitWidth),
m_Value(Ctlz)))))) ||
!match(Ctlz, m_Intrinsic<Intrinsic::ctlz>(m_Value(CtlzOp), m_Zero())) ||
- !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth))
+ !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth,
+ ShouldDropNUW))
return nullptr;
+ if (ShouldDropNUW)
+ cast<Instruction>(CtlzOp)->setHasNoUnsignedWrap(false);
+
// Build 1 << (-CTLZ & (BitWidth-1)). The negation likely corresponds to a
// single hardware instruction as opposed to BitWidth - CTLZ, where BitWidth
// is an integer constant. Masking with BitWidth-1 comes free on some
diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll
index 52e70c78ba542..63a5ae012eeb6 100644
--- a/llvm/test/Transforms/InstCombine/bit_ceil.ll
+++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll
@@ -284,6 +284,42 @@ define <4 x i32> @bit_ceil_v4i32(<4 x i32> %x) {
ret <4 x i32> %sel
}
+define i32 @pr91691(i32 %0) {
+; CHECK-LABEL: @pr91691(
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 -2, [[TMP0:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false)
+; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 31
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i32 1, [[TMP5]]
+; CHECK-NEXT: ret i32 [[TMP6]]
+;
+ %2 = sub nuw i32 -2, %0
+ %3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false)
+ %4 = sub i32 32, %3
+ %5 = shl i32 1, %4
+ %6 = icmp ult i32 %0, -2
+ %7 = select i1 %6, i32 %5, i32 1
+ ret i32 %7
+}
+
+define i32 @pr91691_keep_nsw(i32 %0) {
+; CHECK-LABEL: @pr91691_keep_nsw(
+; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 -2, [[TMP0:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false)
+; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 31
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i32 1, [[TMP5]]
+; CHECK-NEXT: ret i32 [[TMP6]]
+;
+ %2 = sub nsw i32 -2, %0
+ %3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false)
+ %4 = sub i32 32, %3
+ %5 = shl i32 1, %4
+ %6 = icmp ult i32 %0, -2
+ %7 = select i1 %6, i32 %5, i32 1
+ ret i32 %7
+}
+
declare i32 @llvm.ctlz.i32(i32, i1 immarg)
declare i64 @llvm.ctlz.i64(i64, i1 immarg)
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like the test is failing on the release branch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
See the following case: ``` define i32 @SRC1(i32 %x) { %dec = sub nuw i32 -2, %x %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) %sub = sub nsw i32 32, %ctlz %shl = shl i32 1, %sub %ugt = icmp ult i32 %x, -2 %sel = select i1 %ugt, i32 %shl, i32 1 ret i32 %sel } define i32 @tgt1(i32 %x) { %dec = sub nuw i32 -2, %x %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) %sub = sub nsw i32 32, %ctlz %and = and i32 %sub, 31 %shl = shl nuw i32 1, %and ret i32 %shl } ``` `nuw` in `%dec` should be dropped after the select instruction is eliminated. Alive2: https://alive2.llvm.org/ce/z/7S9529 Fixes llvm#91691. (cherry picked from commit b5f4210)
Co-authored-by: Yingwei Zheng <dtcxzyw@qq.com>
@dtcxzyw (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Backport b5f4210
Requested by: @dtcxzyw