Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ValueTracking] Handle intrinsics in computeConstantRange #100870

Closed
wants to merge 2 commits into from

Conversation

dtcxzyw
Copy link
Member

@dtcxzyw dtcxzyw commented Jul 27, 2024

This patch is needed by #95255.

@llvmbot
Copy link
Member

llvmbot commented Jul 27, 2024

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-llvm-analysis

Author: Yingwei Zheng (dtcxzyw)

Changes

This patch is needed by #95255.


Full diff: https://github.com/llvm/llvm-project/pull/100870.diff

4 Files Affected:

  • (modified) llvm/lib/Analysis/ValueTracking.cpp (+14-3)
  • (modified) llvm/test/Analysis/ValueTracking/constant-ranges.ll (+69)
  • (modified) llvm/test/Transforms/InstCombine/sub.ll (+1-1)
  • (modified) llvm/test/Transforms/InstSimplify/call.ll (+2-6)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index bfd26fadd237b..79ed66316da37 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -9687,12 +9687,12 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
                                          unsigned Depth) {
   assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
 
-  if (Depth == MaxAnalysisRecursionDepth)
-    return ConstantRange::getFull(V->getType()->getScalarSizeInBits());
-
   if (auto *C = dyn_cast<Constant>(V))
     return C->toConstantRange();
 
+  if (Depth == MaxAnalysisRecursionDepth)
+    return ConstantRange::getFull(V->getType()->getScalarSizeInBits());
+
   unsigned BitWidth = V->getType()->getScalarSizeInBits();
   InstrInfoQuery IIQ(UseInstrInfo);
   ConstantRange CR = ConstantRange::getFull(BitWidth);
@@ -9728,6 +9728,17 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
     if (const auto *CB = dyn_cast<CallBase>(V))
       if (std::optional<ConstantRange> Range = CB->getRange())
         CR = CR.intersectWith(*Range);
+
+    if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
+      Intrinsic::ID IID = II->getIntrinsicID();
+      if (ConstantRange::isIntrinsicSupported(IID)) {
+        SmallVector<ConstantRange, 2> OpRanges;
+        for (Value *Op : II->args())
+          OpRanges.push_back(computeConstantRange(Op, ForSigned, UseInstrInfo,
+                                                  AC, CtxI, DT, Depth + 1));
+        CR = CR.intersectWith(ConstantRange::intrinsic(IID, OpRanges));
+      }
+    }
   }
 
   if (CtxI && AC) {
diff --git a/llvm/test/Analysis/ValueTracking/constant-ranges.ll b/llvm/test/Analysis/ValueTracking/constant-ranges.ll
index c440cfad889d3..bc9d78f4473b3 100644
--- a/llvm/test/Analysis/ValueTracking/constant-ranges.ll
+++ b/llvm/test/Analysis/ValueTracking/constant-ranges.ll
@@ -230,3 +230,72 @@ define i1 @srem_negC_fail1(i8 %x) {
   %r = icmp sge i8 %val, -33
   ret i1 %r
 }
+
+define i1 @intrinsic_test1(i64 %x, i32 %y, i64 %z) {
+; CHECK-LABEL: @intrinsic_test1(
+; CHECK-NEXT:    ret i1 false
+;
+  %sh_prom = zext nneg i32 %y to i64
+  %shl = shl nuw i64 1, %sh_prom
+  %cmp1 = icmp eq i64 %z, 0
+  %umin1 = call i64 @llvm.umin.i64(i64 %shl, i64 %z)
+  %sel = select i1 %cmp1, i64 1, i64 %umin1
+  %umin2 = call i64 @llvm.umin.i64(i64 %x, i64 %sel)
+  %cmp = icmp ugt i64 %umin2, -71777214294589697
+  ret i1 %cmp
+}
+
+define i1 @intrinsic_test2(i32 %x, i32 %y) {
+; CHECK-LABEL: @intrinsic_test2(
+; CHECK-NEXT:    [[SH:%.*]] = shl nuw nsw i32 16, [[X:%.*]]
+; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SH]], i32 64)
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[Y:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[ADD]], [[UMIN]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sh = shl nuw nsw i32 16, %x
+  %umin = call i32 @llvm.umin.i32(i32 %sh, i32 64)
+  %umax = call i32 @llvm.umax.i32(i32 %umin, i32 1)
+  %add = add nuw nsw i32 %y, 1
+  %cmp = icmp eq i32 %add, %umax
+  ret i1 %cmp
+}
+
+define i1 @constant_test(i64 %x, i1 %cond) {
+; CHECK-LABEL: @constant_test(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND:%.*]], i64 2147483647, i64 18446744073709551
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i64 [[X:%.*]], [[SEL]]
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %sel = select i1 %cond, i64 2147483647, i64 18446744073709551
+  %cmp1 = icmp slt i64 %x, 0
+  %cmp2 = icmp ugt i64 %x, %sel
+  %or.cond = or i1 %cmp1, %cmp2
+  ret i1 %or.cond
+}
+
+; Make sure that we don't return a full range for an imm arg.
+define i1 @immarg_test(i32 %x, i32 %y, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7) {
+; CHECK-LABEL: @immarg_test(
+; CHECK-NEXT:    [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[Y:%.*]], i1 true)
+; CHECK-NEXT:    [[M1:%.*]] = select i1 [[C1:%.*]], i32 [[ABS]], i32 0
+; CHECK-NEXT:    [[M2:%.*]] = select i1 [[C2:%.*]], i32 [[M1]], i32 1
+; CHECK-NEXT:    [[M3:%.*]] = select i1 [[C3:%.*]], i32 [[M2]], i32 2
+; CHECK-NEXT:    [[M4:%.*]] = select i1 [[C4:%.*]], i32 [[M3]], i32 3
+; CHECK-NEXT:    [[M5:%.*]] = select i1 [[C5:%.*]], i32 [[M4]], i32 4
+; CHECK-NEXT:    [[M6:%.*]] = select i1 [[C6:%.*]], i32 [[M5]], i32 5
+; CHECK-NEXT:    [[M7:%.*]] = select i1 [[C7:%.*]], i32 [[M6]], i32 6
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[M7]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %abs = call i32 @llvm.abs.i32(i32 %y, i1 true)
+  %m1 = select i1 %c1, i32 %abs, i32 0
+  %m2 = select i1 %c2, i32 %m1, i32 1
+  %m3 = select i1 %c3, i32 %m2, i32 2
+  %m4 = select i1 %c4, i32 %m3, i32 3
+  %m5 = select i1 %c5, i32 %m4, i32 4
+  %m6 = select i1 %c6, i32 %m5, i32 5
+  %m7 = select i1 %c7, i32 %m6, i32 6
+  %cmp = icmp eq i32 %m7, 1
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
index cb308ab66b093..1070b9ff745a7 100644
--- a/llvm/test/Transforms/InstCombine/sub.ll
+++ b/llvm/test/Transforms/InstCombine/sub.ll
@@ -1329,7 +1329,7 @@ define <2 x i32> @test68(<2 x i32> %x) {
 define <2 x i32> @test69(<2 x i32> %x) {
 ; CHECK-LABEL: @test69(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 255, i32 127>)
-; CHECK-NEXT:    [[DOTNEG:%.*]] = add <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[DOTNEG:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 1, i32 1>
 ; CHECK-NEXT:    ret <2 x i32> [[DOTNEG]]
 ;
   %1 = xor <2 x i32> %x, <i32 -1, i32 -1>
diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll
index c6f6b65f89dc2..503be81c65645 100644
--- a/llvm/test/Transforms/InstSimplify/call.ll
+++ b/llvm/test/Transforms/InstSimplify/call.ll
@@ -1582,9 +1582,7 @@ define i1 @ctlz_i1_non_poison_eq_false(i1 %x) {
 
 define i1 @ctlz_i1_poison_eq_false(i1 %x) {
 ; CHECK-LABEL: @ctlz_i1_poison_eq_false(
-; CHECK-NEXT:    [[CT:%.*]] = call i1 @llvm.ctlz.i1(i1 [[X:%.*]], i1 true)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i1 [[CT]], false
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    ret i1 true
 ;
   %ct = call i1 @llvm.ctlz.i1(i1 %x, i1 true)
   %cmp = icmp eq i1 %ct, false
@@ -1604,9 +1602,7 @@ define i1 @cttz_i1_non_poison_eq_false(i1 %x) {
 
 define i1 @cttz_i1_poison_eq_false(i1 %x) {
 ; CHECK-LABEL: @cttz_i1_poison_eq_false(
-; CHECK-NEXT:    [[CT:%.*]] = call i1 @llvm.cttz.i1(i1 [[X:%.*]], i1 true)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i1 [[CT]], false
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    ret i1 true
 ;
   %ct = call i1 @llvm.cttz.i1(i1 %x, i1 true)
   %cmp = icmp eq i1 %ct, false

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Jul 27, 2024
@dtcxzyw
Copy link
Member Author

dtcxzyw commented Jul 27, 2024

Failed Tests (1):
LLVM :: CodeGen/AMDGPU/select-constant-cttz.ll

if (auto *C = dyn_cast<Constant>(V))
return C->toConstantRange();

if (Depth == MaxAnalysisRecursionDepth)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Related test: @immarg_test

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This patch is needed by #95255.

How/why does that patch need this? I don't see the relation at all.

%umin2 = call i64 @llvm.umin.i64(i64 %x, i64 %sel)
%cmp = icmp ugt i64 %umin2, -71777214294589697
ret i1 %cmp
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't this case already be handled by CVP with the shl nuw support from #100594?

AC, CtxI, DT, Depth + 1));
CR = CR.intersectWith(ConstantRange::intrinsic(IID, OpRanges));
}
}
Copy link
Contributor

@nikic nikic Jul 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Placing the code here doesn't makes sense, there is already getRangeForIntrinsic above.

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Independent of the specific feedback, I'm rejecting this in principle. See this comment in the doc comment:

This is intended as a cheap, non-recursive check.

Complex recursive cases are intended to be handled by SCCP or LVI/CVP.

I am willing to explore making computeConstantRange() a full-blown recursive analysis, but only if this is done in a principled fashion, and not by adding these things piece-meal, such that each step looks free but we have no insight into the overall cost.

That is, if we want to go in this direction, computeConstantRange() needs to be switched fully to a recursive implementation, so that we can properly evaluate the cost/benefit of doing so. Maybe it's fine. Maybe it's fine if we switch some of the existing calls to use maximum starting depth. Maybe we can get rid of computeConstantRangeIncludingKnownBits() and use only computeConstantRange() (possibly having cross-recursion between the two to pick whichever is best for a given opcode).

@dtcxzyw
Copy link
Member Author

dtcxzyw commented Jul 27, 2024

My original plan is to set range metadata for ctpop in InstCombine: #95255 (comment)

// Add range attribute since known bits can't completely reflect what we know.
if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
!II.getMetadata(LLVMContext::MD_range)) {
ConstantRange Range(APInt(BitWidth, Known.countMinPopulation()),
APInt(BitWidth, Known.countMaxPopulation() + 1));
II.addRangeRetAttr(Range);
return &II;
}

define i1 @src(i32 noundef %n) {
entry:
  %tobool = icmp ne i32 %n, 0
  call void @llvm.assume(i1 %tobool)
  %0 = tail call i32 @llvm.ctpop.i32(i32 %n)
  %cmp = icmp eq i32 %0, 1
  ret i1 %cmp
}

define i1 @tgt(i32 noundef %n) {
entry:
  %tobool = icmp ne i32 %n, 0
  call void @llvm.assume(i1 %tobool)
  %0 = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %n)
  %cmp = icmp eq i32 %0, 1
  ret i1 %cmp
}

Is it ok to infer range metadata for ctpop in CVP/SCCP?

@tschuett
Copy link

@andjo403
Copy link
Contributor

for the #95255 (comment) is it not enough to do a non Zero check something like this in the foldCtpop

    unsigned KnownMinPop = Known.countMinPopulation();
    unsigned Lower =
        KnownMinPop ? KnownMinPop
                    : isKnownNonZero(
                          Op0, IC.getSimplifyQuery().getWithInstruction(&II));
    ConstantRange Range(APInt(BitWidth, Lower),
                        APInt(BitWidth, Known.countMaxPopulation() + 1));

@dtcxzyw
Copy link
Member Author

dtcxzyw commented Jul 27, 2024

for the #95255 (comment) is it not enough to do a non Zero check something like this in the foldCtpop

    unsigned KnownMinPop = Known.countMinPopulation();
    unsigned Lower =
        KnownMinPop ? KnownMinPop
                    : isKnownNonZero(
                          Op0, IC.getSimplifyQuery().getWithInstruction(&II));
    ConstantRange Range(APInt(BitWidth, Lower),
                        APInt(BitWidth, Known.countMaxPopulation() + 1));

Are you willing to file a PR with this method? I drafted an alternative patch before submitting this PR:

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index f6c4b6e18093..9f04ed85e831 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -692,12 +692,17 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
                             Ty);
 
   // Add range attribute since known bits can't completely reflect what we know.
-  if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
-      !II.getMetadata(LLVMContext::MD_range)) {
-    ConstantRange Range(APInt(BitWidth, Known.countMinPopulation()),
-                        APInt(BitWidth, Known.countMaxPopulation() + 1));
-    II.addRangeRetAttr(Range);
-    return &II;
+  if (BitWidth != 1) {
+    ConstantRange OldRange = II.getRange().value_or(ConstantRange::getNonEmpty(APInt::getZero(BitWidth),
+                                      APInt(BitWidth, BitWidth + 1)));
+    ConstantRange Range = OldRange;
+    Range = Range.intersectWith(ConstantRange(APInt(BitWidth, Known.countMinPopulation()), APInt(BitWidth, Known.countMaxPopulation() + 1)), ConstantRange::Unsigned);
+    if (Range.contains(APInt::getZero(BitWidth)) && isKnownNonZero(Op0, IC.getSimplifyQuery().getWithInstruction(&II)))
+      Range = Range.intersectWith(ConstantRange(APInt(BitWidth, 1), APInt(BitWidth, BitWidth + 1)), ConstantRange::Unsigned);
+    if (Range != OldRange) {
+      II.addRangeRetAttr(Range);
+      return &II;
+    }
   }
 
   return nullptr;

@dtcxzyw dtcxzyw closed this Jul 27, 2024
@andjo403
Copy link
Contributor

sure can make a PR

@dtcxzyw dtcxzyw deleted the perf/constantrange-intrinsic branch July 28, 2024 03:01
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants