[ValueTracking] Handle intrinsics in `computeConstantRange` #100870

dtcxzyw · 2024-07-27T12:48:28Z

This patch is needed by #95255.

llvmbot · 2024-07-27T12:48:58Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-llvm-analysis

Author: Yingwei Zheng (dtcxzyw)

Changes

This patch is needed by #95255.

Full diff: https://github.com/llvm/llvm-project/pull/100870.diff

4 Files Affected:

(modified) llvm/lib/Analysis/ValueTracking.cpp (+14-3)
(modified) llvm/test/Analysis/ValueTracking/constant-ranges.ll (+69)
(modified) llvm/test/Transforms/InstCombine/sub.ll (+1-1)
(modified) llvm/test/Transforms/InstSimplify/call.ll (+2-6)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index bfd26fadd237b..79ed66316da37 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -9687,12 +9687,12 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
                                          unsigned Depth) {
   assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
 
-  if (Depth == MaxAnalysisRecursionDepth)
-    return ConstantRange::getFull(V->getType()->getScalarSizeInBits());
-
   if (auto *C = dyn_cast<Constant>(V))
     return C->toConstantRange();
 
+  if (Depth == MaxAnalysisRecursionDepth)
+    return ConstantRange::getFull(V->getType()->getScalarSizeInBits());
+
   unsigned BitWidth = V->getType()->getScalarSizeInBits();
   InstrInfoQuery IIQ(UseInstrInfo);
   ConstantRange CR = ConstantRange::getFull(BitWidth);
@@ -9728,6 +9728,17 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
     if (const auto *CB = dyn_cast<CallBase>(V))
       if (std::optional<ConstantRange> Range = CB->getRange())
         CR = CR.intersectWith(*Range);
+
+    if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
+      Intrinsic::ID IID = II->getIntrinsicID();
+      if (ConstantRange::isIntrinsicSupported(IID)) {
+        SmallVector<ConstantRange, 2> OpRanges;
+        for (Value *Op : II->args())
+          OpRanges.push_back(computeConstantRange(Op, ForSigned, UseInstrInfo,
+                                                  AC, CtxI, DT, Depth + 1));
+        CR = CR.intersectWith(ConstantRange::intrinsic(IID, OpRanges));
+      }
+    }
   }
 
   if (CtxI && AC) {
diff --git a/llvm/test/Analysis/ValueTracking/constant-ranges.ll b/llvm/test/Analysis/ValueTracking/constant-ranges.ll
index c440cfad889d3..bc9d78f4473b3 100644
--- a/llvm/test/Analysis/ValueTracking/constant-ranges.ll
+++ b/llvm/test/Analysis/ValueTracking/constant-ranges.ll
@@ -230,3 +230,72 @@ define i1 @srem_negC_fail1(i8 %x) {
   %r = icmp sge i8 %val, -33
   ret i1 %r
 }
+
+define i1 @intrinsic_test1(i64 %x, i32 %y, i64 %z) {
+; CHECK-LABEL: @intrinsic_test1(
+; CHECK-NEXT:    ret i1 false
+;
+  %sh_prom = zext nneg i32 %y to i64
+  %shl = shl nuw i64 1, %sh_prom
+  %cmp1 = icmp eq i64 %z, 0
+  %umin1 = call i64 @llvm.umin.i64(i64 %shl, i64 %z)
+  %sel = select i1 %cmp1, i64 1, i64 %umin1
+  %umin2 = call i64 @llvm.umin.i64(i64 %x, i64 %sel)
+  %cmp = icmp ugt i64 %umin2, -71777214294589697
+  ret i1 %cmp
+}
+
+define i1 @intrinsic_test2(i32 %x, i32 %y) {
+; CHECK-LABEL: @intrinsic_test2(
+; CHECK-NEXT:    [[SH:%.*]] = shl nuw nsw i32 16, [[X:%.*]]
+; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SH]], i32 64)
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[Y:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[ADD]], [[UMIN]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sh = shl nuw nsw i32 16, %x
+  %umin = call i32 @llvm.umin.i32(i32 %sh, i32 64)
+  %umax = call i32 @llvm.umax.i32(i32 %umin, i32 1)
+  %add = add nuw nsw i32 %y, 1
+  %cmp = icmp eq i32 %add, %umax
+  ret i1 %cmp
+}
+
+define i1 @constant_test(i64 %x, i1 %cond) {
+; CHECK-LABEL: @constant_test(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND:%.*]], i64 2147483647, i64 18446744073709551
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i64 [[X:%.*]], [[SEL]]
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %sel = select i1 %cond, i64 2147483647, i64 18446744073709551
+  %cmp1 = icmp slt i64 %x, 0
+  %cmp2 = icmp ugt i64 %x, %sel
+  %or.cond = or i1 %cmp1, %cmp2
+  ret i1 %or.cond
+}
+
+; Make sure that we don't return a full range for an imm arg.
+define i1 @immarg_test(i32 %x, i32 %y, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7) {
+; CHECK-LABEL: @immarg_test(
+; CHECK-NEXT:    [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[Y:%.*]], i1 true)
+; CHECK-NEXT:    [[M1:%.*]] = select i1 [[C1:%.*]], i32 [[ABS]], i32 0
+; CHECK-NEXT:    [[M2:%.*]] = select i1 [[C2:%.*]], i32 [[M1]], i32 1
+; CHECK-NEXT:    [[M3:%.*]] = select i1 [[C3:%.*]], i32 [[M2]], i32 2
+; CHECK-NEXT:    [[M4:%.*]] = select i1 [[C4:%.*]], i32 [[M3]], i32 3
+; CHECK-NEXT:    [[M5:%.*]] = select i1 [[C5:%.*]], i32 [[M4]], i32 4
+; CHECK-NEXT:    [[M6:%.*]] = select i1 [[C6:%.*]], i32 [[M5]], i32 5
+; CHECK-NEXT:    [[M7:%.*]] = select i1 [[C7:%.*]], i32 [[M6]], i32 6
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[M7]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %abs = call i32 @llvm.abs.i32(i32 %y, i1 true)
+  %m1 = select i1 %c1, i32 %abs, i32 0
+  %m2 = select i1 %c2, i32 %m1, i32 1
+  %m3 = select i1 %c3, i32 %m2, i32 2
+  %m4 = select i1 %c4, i32 %m3, i32 3
+  %m5 = select i1 %c5, i32 %m4, i32 4
+  %m6 = select i1 %c6, i32 %m5, i32 5
+  %m7 = select i1 %c7, i32 %m6, i32 6
+  %cmp = icmp eq i32 %m7, 1
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
index cb308ab66b093..1070b9ff745a7 100644
--- a/llvm/test/Transforms/InstCombine/sub.ll
+++ b/llvm/test/Transforms/InstCombine/sub.ll
@@ -1329,7 +1329,7 @@ define <2 x i32> @test68(<2 x i32> %x) {
 define <2 x i32> @test69(<2 x i32> %x) {
 ; CHECK-LABEL: @test69(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 255, i32 127>)
-; CHECK-NEXT:    [[DOTNEG:%.*]] = add <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[DOTNEG:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 1, i32 1>
 ; CHECK-NEXT:    ret <2 x i32> [[DOTNEG]]
 ;
   %1 = xor <2 x i32> %x, <i32 -1, i32 -1>
diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll
index c6f6b65f89dc2..503be81c65645 100644
--- a/llvm/test/Transforms/InstSimplify/call.ll
+++ b/llvm/test/Transforms/InstSimplify/call.ll
@@ -1582,9 +1582,7 @@ define i1 @ctlz_i1_non_poison_eq_false(i1 %x) {
 
 define i1 @ctlz_i1_poison_eq_false(i1 %x) {
 ; CHECK-LABEL: @ctlz_i1_poison_eq_false(
-; CHECK-NEXT:    [[CT:%.*]] = call i1 @llvm.ctlz.i1(i1 [[X:%.*]], i1 true)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i1 [[CT]], false
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    ret i1 true
 ;
   %ct = call i1 @llvm.ctlz.i1(i1 %x, i1 true)
   %cmp = icmp eq i1 %ct, false
@@ -1604,9 +1602,7 @@ define i1 @cttz_i1_non_poison_eq_false(i1 %x) {
 
 define i1 @cttz_i1_poison_eq_false(i1 %x) {
 ; CHECK-LABEL: @cttz_i1_poison_eq_false(
-; CHECK-NEXT:    [[CT:%.*]] = call i1 @llvm.cttz.i1(i1 [[X:%.*]], i1 true)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i1 [[CT]], false
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    ret i1 true
 ;
   %ct = call i1 @llvm.cttz.i1(i1 %x, i1 true)
   %cmp = icmp eq i1 %ct, false

PR Link: llvm/llvm-project#100870

dtcxzyw · 2024-07-27T13:35:39Z

Compile-time impact: http://llvm-compile-time-tracker.com/compare.php?from=164a57805bd06f8857730b104fe4e4f11ed83ba1&to=bec349692a0c7f667a3370d14d8fbae0d5e2ab31&stat=instructions:u

dtcxzyw · 2024-07-27T13:35:57Z

Failed Tests (1):
LLVM :: CodeGen/AMDGPU/select-constant-cttz.ll

dtcxzyw · 2024-07-27T13:37:02Z

llvm/lib/Analysis/ValueTracking.cpp

  if (auto *C = dyn_cast<Constant>(V))
    return C->toConstantRange();

+  if (Depth == MaxAnalysisRecursionDepth)


Related test: @immarg_test

nikic

This patch is needed by #95255.

How/why does that patch need this? I don't see the relation at all.

nikic · 2024-07-27T13:43:23Z

llvm/test/Analysis/ValueTracking/constant-ranges.ll

+  %umin2 = call i64 @llvm.umin.i64(i64 %x, i64 %sel)
+  %cmp = icmp ugt i64 %umin2, -71777214294589697
+  ret i1 %cmp
+}


Won't this case already be handled by CVP with the shl nuw support from #100594?

nikic · 2024-07-27T13:43:50Z

llvm/lib/Analysis/ValueTracking.cpp

+                                                  AC, CtxI, DT, Depth + 1));
+        CR = CR.intersectWith(ConstantRange::intrinsic(IID, OpRanges));
+      }
+    }


Placing the code here doesn't makes sense, there is already getRangeForIntrinsic above.

nikic

Independent of the specific feedback, I'm rejecting this in principle. See this comment in the doc comment:

This is intended as a cheap, non-recursive check.

Complex recursive cases are intended to be handled by SCCP or LVI/CVP.

I am willing to explore making computeConstantRange() a full-blown recursive analysis, but only if this is done in a principled fashion, and not by adding these things piece-meal, such that each step looks free but we have no insight into the overall cost.

That is, if we want to go in this direction, computeConstantRange() needs to be switched fully to a recursive implementation, so that we can properly evaluate the cost/benefit of doing so. Maybe it's fine. Maybe it's fine if we switch some of the existing calls to use maximum starting depth. Maybe we can get rid of computeConstantRangeIncludingKnownBits() and use only computeConstantRange() (possibly having cross-recursion between the two to pick whichever is best for a given opcode).

dtcxzyw · 2024-07-27T14:22:42Z

My original plan is to set range metadata for ctpop in InstCombine: #95255 (comment)

llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines 694 to 701 in 154d00d

    
           // Add range attribute since known bits can't completely reflect what we know. 
        
           if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) && 
        
               !II.getMetadata(LLVMContext::MD_range)) { 
        
             ConstantRange Range(APInt(BitWidth, Known.countMinPopulation()), 
        
                                 APInt(BitWidth, Known.countMaxPopulation() + 1)); 
        
             II.addRangeRetAttr(Range); 
        
             return &II; 
        
           }

define i1 @src(i32 noundef %n) {
entry:
  %tobool = icmp ne i32 %n, 0
  call void @llvm.assume(i1 %tobool)
  %0 = tail call i32 @llvm.ctpop.i32(i32 %n)
  %cmp = icmp eq i32 %0, 1
  ret i1 %cmp
}

define i1 @tgt(i32 noundef %n) {
entry:
  %tobool = icmp ne i32 %n, 0
  call void @llvm.assume(i1 %tobool)
  %0 = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %n)
  %cmp = icmp eq i32 %0, 1
  ret i1 %cmp
}

Is it ok to infer range metadata for ctpop in CVP/SCCP?

tschuett · 2024-07-27T15:14:55Z

Or recursive and caching:
https://developers.redhat.com/blog/2019/10/11/a-upside-down-approach-to-gcc-optimizations

andjo403 · 2024-07-27T17:37:22Z

for the #95255 (comment) is it not enough to do a non Zero check something like this in the foldCtpop

    unsigned KnownMinPop = Known.countMinPopulation();
    unsigned Lower =
        KnownMinPop ? KnownMinPop
                    : isKnownNonZero(
                          Op0, IC.getSimplifyQuery().getWithInstruction(&II));
    ConstantRange Range(APInt(BitWidth, Lower),
                        APInt(BitWidth, Known.countMaxPopulation() + 1));

dtcxzyw · 2024-07-27T17:53:23Z

for the #95255 (comment) is it not enough to do a non Zero check something like this in the foldCtpop

    unsigned KnownMinPop = Known.countMinPopulation();
    unsigned Lower =
        KnownMinPop ? KnownMinPop
                    : isKnownNonZero(
                          Op0, IC.getSimplifyQuery().getWithInstruction(&II));
    ConstantRange Range(APInt(BitWidth, Lower),
                        APInt(BitWidth, Known.countMaxPopulation() + 1));

Are you willing to file a PR with this method? I drafted an alternative patch before submitting this PR:

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index f6c4b6e18093..9f04ed85e831 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -692,12 +692,17 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
                             Ty);
 
   // Add range attribute since known bits can't completely reflect what we know.
-  if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
-      !II.getMetadata(LLVMContext::MD_range)) {
-    ConstantRange Range(APInt(BitWidth, Known.countMinPopulation()),
-                        APInt(BitWidth, Known.countMaxPopulation() + 1));
-    II.addRangeRetAttr(Range);
-    return &II;
+  if (BitWidth != 1) {
+    ConstantRange OldRange = II.getRange().value_or(ConstantRange::getNonEmpty(APInt::getZero(BitWidth),
+                                      APInt(BitWidth, BitWidth + 1)));
+    ConstantRange Range = OldRange;
+    Range = Range.intersectWith(ConstantRange(APInt(BitWidth, Known.countMinPopulation()), APInt(BitWidth, Known.countMaxPopulation() + 1)), ConstantRange::Unsigned);
+    if (Range.contains(APInt::getZero(BitWidth)) && isKnownNonZero(Op0, IC.getSimplifyQuery().getWithInstruction(&II)))
+      Range = Range.intersectWith(ConstantRange(APInt(BitWidth, 1), APInt(BitWidth, BitWidth + 1)), ConstantRange::Unsigned);
+    if (Range != OldRange) {
+      II.addRangeRetAttr(Range);
+      return &II;
+    }
   }
 
   return nullptr;

andjo403 · 2024-07-27T18:54:09Z

sure can make a PR

dtcxzyw added 2 commits July 27, 2024 20:39

[ValueTracking] Add pre-commit tests. NFC.

164a578

[ValueTracking] Handle intrinsics in computeConstantRange

bec3496

dtcxzyw requested a review from goldsteinn July 27, 2024 12:48

dtcxzyw requested a review from nikic as a code owner July 27, 2024 12:48

llvmbot added llvm:analysis llvm:transforms labels Jul 27, 2024

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Jul 27, 2024

pre-commit: test PR100870

d4d6124

PR Link: llvm/llvm-project#100870

dtcxzyw mentioned this pull request Jul 27, 2024

pre-commit: test PR100870 dtcxzyw/llvm-opt-benchmark#1092

Closed

dtcxzyw commented Jul 27, 2024

View reviewed changes

nikic reviewed Jul 27, 2024

View reviewed changes

nikic requested changes Jul 27, 2024

View reviewed changes

dtcxzyw closed this Jul 27, 2024

dtcxzyw deleted the perf/constantrange-intrinsic branch July 28, 2024 03:01

dtcxzyw mentioned this pull request Jul 28, 2024

[InstCombine] Reduce range of ctpop for non zero argument #100899

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[ValueTracking] Handle intrinsics in `computeConstantRange` #100870

[ValueTracking] Handle intrinsics in `computeConstantRange` #100870

dtcxzyw commented Jul 27, 2024

llvmbot commented Jul 27, 2024 •

edited

Loading

dtcxzyw commented Jul 27, 2024

dtcxzyw commented Jul 27, 2024

dtcxzyw Jul 27, 2024

nikic left a comment

nikic Jul 27, 2024

nikic Jul 27, 2024 •

edited

Loading

nikic left a comment

dtcxzyw commented Jul 27, 2024

tschuett commented Jul 27, 2024

andjo403 commented Jul 27, 2024

dtcxzyw commented Jul 27, 2024

andjo403 commented Jul 27, 2024

[ValueTracking] Handle intrinsics in computeConstantRange #100870

[ValueTracking] Handle intrinsics in computeConstantRange #100870

Conversation

dtcxzyw commented Jul 27, 2024

llvmbot commented Jul 27, 2024 • edited Loading

dtcxzyw commented Jul 27, 2024

dtcxzyw commented Jul 27, 2024

dtcxzyw Jul 27, 2024

Choose a reason for hiding this comment

nikic left a comment

Choose a reason for hiding this comment

nikic Jul 27, 2024

Choose a reason for hiding this comment

nikic Jul 27, 2024 • edited Loading

Choose a reason for hiding this comment

nikic left a comment

Choose a reason for hiding this comment

dtcxzyw commented Jul 27, 2024

tschuett commented Jul 27, 2024

andjo403 commented Jul 27, 2024

dtcxzyw commented Jul 27, 2024

andjo403 commented Jul 27, 2024

[ValueTracking] Handle intrinsics in `computeConstantRange` #100870

[ValueTracking] Handle intrinsics in `computeConstantRange` #100870

llvmbot commented Jul 27, 2024 •

edited

Loading

nikic Jul 27, 2024 •

edited

Loading