Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[InstCombine] Reduce range of ctpop for non zero argument #100899

Merged
merged 2 commits into from
Jul 29, 2024

Conversation

andjo403
Copy link
Contributor

@andjo403 andjo403 commented Jul 27, 2024

Part of solution for #95255

@andjo403 andjo403 requested a review from dtcxzyw July 27, 2024 21:22
@andjo403 andjo403 requested a review from nikic as a code owner July 27, 2024 21:22
@llvmbot
Copy link
Member

llvmbot commented Jul 27, 2024

@llvm/pr-subscribers-llvm-transforms

Author: Andreas Jonson (andjo403)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/100899.diff

5 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+7-2)
  • (modified) llvm/test/Transforms/InstCombine/ctpop-pow2.ll (+2-2)
  • (modified) llvm/test/Transforms/InstCombine/ctpop.ll (+11-2)
  • (modified) llvm/test/Transforms/InstCombine/icmp-ne-pow2.ll (+2-2)
  • (modified) llvm/test/Transforms/InstCombine/ispow2.ll (+4-4)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index f6c4b6e180937..52932f19512f3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -694,8 +694,13 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
   // Add range attribute since known bits can't completely reflect what we know.
   if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
       !II.getMetadata(LLVMContext::MD_range)) {
-    ConstantRange Range(APInt(BitWidth, Known.countMinPopulation()),
-                        APInt(BitWidth, Known.countMaxPopulation() + 1));
+    unsigned KnownMinPop = Known.countMinPopulation();
+    unsigned Lower =
+        KnownMinPop ? KnownMinPop
+                    : isKnownNonZero(
+                          Op0, IC.getSimplifyQuery().getWithInstruction(&II));
+    unsigned Upper = Known.countMaxPopulation() + 1;
+    ConstantRange Range(APInt(BitWidth, Lower), APInt(BitWidth, Upper));
     II.addRangeRetAttr(Range);
     return ⅈ
   }
diff --git a/llvm/test/Transforms/InstCombine/ctpop-pow2.ll b/llvm/test/Transforms/InstCombine/ctpop-pow2.ll
index 7facdaf7590d3..4ef1ed0ec4976 100644
--- a/llvm/test/Transforms/InstCombine/ctpop-pow2.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop-pow2.ll
@@ -60,7 +60,7 @@ define i8 @ctpop_imin_plus1_lshr_nz(i8 %x) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[X:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
 ; CHECK-NEXT:    [[V:%.*]] = lshr i8 -127, [[X]]
-; CHECK-NEXT:    [[CNT:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[V]])
+; CHECK-NEXT:    [[CNT:%.*]] = call range(i8 1, 9) i8 @llvm.ctpop.i8(i8 [[V]])
 ; CHECK-NEXT:    ret i8 [[CNT]]
 ;
   %cmp = icmp ne i8 %x, 0
@@ -104,7 +104,7 @@ define <2 x i32> @ctpop_lshr_intmin_intmin_plus1_vec_nz(<2 x i32> %x) {
 ; CHECK-LABEL: @ctpop_lshr_intmin_intmin_plus1_vec_nz(
 ; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], <i32 1, i32 1>
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i32> <i32 -2147483648, i32 -2147483647>, [[X1]]
-; CHECK-NEXT:    [[CNT:%.*]] = call range(i32 0, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHR]])
+; CHECK-NEXT:    [[CNT:%.*]] = call range(i32 1, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHR]])
 ; CHECK-NEXT:    ret <2 x i32> [[CNT]]
 ;
   %x1 = or <2 x i32> %x, <i32 1 ,i32 1>
diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll
index 83700e72de080..56ac2cad752a3 100644
--- a/llvm/test/Transforms/InstCombine/ctpop.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop.ll
@@ -169,8 +169,8 @@ define <2 x i32> @_parity_of_not_poison(<2 x i32> %x) {
 
 define <2 x i32> @_parity_of_not_poison2(<2 x i32> %x) {
 ; CHECK-LABEL: @_parity_of_not_poison2(
-; CHECK-NEXT:    [[CNT:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[CNT]], <i32 1, i32 poison>
+; CHECK-NEXT:    [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[TMP1]], <i32 1, i32 poison>
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %neg = xor <2 x i32> %x, <i32 -1 ,i32 -1>
@@ -485,3 +485,12 @@ define i32 @select_ctpop_zero(i32 %x) {
   %res = select i1 %cmp, i32 0, i32 %ctpop
   ret i32 %res
 }
+
+define i32 @ctpop_non_zero(i32 range(i32 1, 255) %x) {
+; CHECK-LABEL: @ctpop_non_zero(
+; CHECK-NEXT:    [[CTPOP:%.*]] = call range(i32 1, 9) i32 @llvm.ctpop.i32(i32 [[X:%.*]])
+; CHECK-NEXT:    ret i32 [[CTPOP]]
+;
+  %ctpop = call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %ctpop
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-ne-pow2.ll b/llvm/test/Transforms/InstCombine/icmp-ne-pow2.ll
index e9ec6b415d462..618f5d641dc1a 100644
--- a/llvm/test/Transforms/InstCombine/icmp-ne-pow2.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-ne-pow2.ll
@@ -306,7 +306,7 @@ define i32 @pow2_32_nonconst_assume(i32 %x, i32 %y) {
 
 define i32 @pow2_32_gtnonconst_assume(i32 %x, i32 %y) {
 ; CHECK-LABEL: @pow2_32_gtnonconst_assume(
-; CHECK-NEXT:    [[CTPOP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[Y:%.*]])
+; CHECK-NEXT:    [[CTPOP:%.*]] = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 [[Y:%.*]])
 ; CHECK-NEXT:    [[YP2:%.*]] = icmp eq i32 [[CTPOP]], 1
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[YP2]])
 ; CHECK-NEXT:    [[YGT:%.*]] = icmp ugt i32 [[Y]], [[X:%.*]]
@@ -513,7 +513,7 @@ define i32 @maybe_pow2_32_noncont(i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[YGT8:%.*]] = icmp ugt i32 [[Y:%.*]], 8
 ; CHECK-NEXT:    br i1 [[YGT8]], label [[CONT1:%.*]], label [[CONT2:%.*]]
 ; CHECK:       Cont1:
-; CHECK-NEXT:    [[CTPOP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[Y]])
+; CHECK-NEXT:    [[CTPOP:%.*]] = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 [[Y]])
 ; CHECK-NEXT:    [[YP2:%.*]] = icmp eq i32 [[CTPOP]], 1
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[YP2]])
 ; CHECK-NEXT:    br i1 true, label [[CONT2]], label [[FALSE:%.*]]
diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll
index a143b1347ccee..3f2c31d05f3ed 100644
--- a/llvm/test/Transforms/InstCombine/ispow2.ll
+++ b/llvm/test/Transforms/InstCombine/ispow2.ll
@@ -197,7 +197,7 @@ define i1 @is_pow2_non_zero_ult_2(i32 %x) {
 ; CHECK-LABEL: @is_pow2_non_zero_ult_2(
 ; CHECK-NEXT:    [[NOTZERO:%.*]] = icmp ne i32 [[X:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[NOTZERO]])
-; CHECK-NEXT:    [[T0:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]])
+; CHECK-NEXT:    [[T0:%.*]] = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 [[X]])
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[T0]], 2
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -212,7 +212,7 @@ define i1 @is_pow2_non_zero_eq_1(i32 %x) {
 ; CHECK-LABEL: @is_pow2_non_zero_eq_1(
 ; CHECK-NEXT:    [[NOTZERO:%.*]] = icmp ne i32 [[X:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[NOTZERO]])
-; CHECK-NEXT:    [[T0:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]])
+; CHECK-NEXT:    [[T0:%.*]] = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 [[X]])
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[T0]], 1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -227,7 +227,7 @@ define i1 @is_pow2_non_zero_ugt_1(i32 %x) {
 ; CHECK-LABEL: @is_pow2_non_zero_ugt_1(
 ; CHECK-NEXT:    [[NOTZERO:%.*]] = icmp ne i32 [[X:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[NOTZERO]])
-; CHECK-NEXT:    [[T0:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]])
+; CHECK-NEXT:    [[T0:%.*]] = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 [[X]])
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[T0]], 1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -242,7 +242,7 @@ define i1 @is_pow2_non_zero_ne_1(i32 %x) {
 ; CHECK-LABEL: @is_pow2_non_zero_ne_1(
 ; CHECK-NEXT:    [[NOTZERO:%.*]] = icmp ne i32 [[X:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[NOTZERO]])
-; CHECK-NEXT:    [[T0:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]])
+; CHECK-NEXT:    [[T0:%.*]] = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 [[X]])
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[T0]], 1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;

@@ -694,8 +694,13 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
// Add range attribute since known bits can't completely reflect what we know.
if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
!II.getMetadata(LLVMContext::MD_range)) {
ConstantRange Range(APInt(BitWidth, Known.countMinPopulation()),
APInt(BitWidth, Known.countMaxPopulation() + 1));
unsigned KnownMinPop = Known.countMinPopulation();
Copy link
Member

@dtcxzyw dtcxzyw Jul 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a phase ordering problem. We may get a more precise result later. See my previous comment #100870 (comment).

Can you add the following test?

define i32 @ctpop_non_zero_with_existing_range_attr(i32 range(i32 1, 255) %x) {
   %ctpop = call i32 range(i32 0, 9) @llvm.ctpop.i32(i32 %x)
   ret i32 %ctpop
 }

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did not think that far, forgets that the checks id done multiple times.
but then it fells like the draft that you have is better then this solution so we do not need to execute isKnownNonZero if not needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added the test and when for a combination of our solutions that reduced number of calls to intersectWith also used full set range if old is missing as it is faster in intersectWith

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Jul 29, 2024
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Thank you!

@goldsteinn
Copy link
Contributor

LGTM

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@andjo403 andjo403 merged commit f7491f5 into llvm:main Jul 29, 2024
7 checks passed
@andjo403 andjo403 deleted the nonZeroCtpop branch July 29, 2024 12:07
banach-space pushed a commit to banach-space/llvm-project that referenced this pull request Aug 7, 2024
dtcxzyw added a commit that referenced this pull request Oct 11, 2024
…#111284)

Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) ==
1`. After #100899, we set the
range of ctpop's return value to indicate the argument/result is
non-zero.

This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP
to fix #95255.
dtcxzyw added a commit to dtcxzyw/llvm-project that referenced this pull request Oct 11, 2024
…llvm#111284)

Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) ==
1`. After llvm#100899, we set the
range of ctpop's return value to indicate the argument/result is
non-zero.

This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP
to fix llvm#95255.
dtcxzyw added a commit that referenced this pull request Oct 15, 2024
…/u> 2/1` (#111284)` (#111998)

Relands #111284. Test failure with stage2 build has been fixed by
#111946.


Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) ==
1`. After #100899, we set the
range of ctpop's return value to indicate the argument/result is
non-zero.

This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP
to fix #95255.
DanielCChen pushed a commit to DanielCChen/llvm-project that referenced this pull request Oct 16, 2024
…llvm#111284)

Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) ==
1`. After llvm#100899, we set the
range of ctpop's return value to indicate the argument/result is
non-zero.

This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP
to fix llvm#95255.
DanielCChen pushed a commit to DanielCChen/llvm-project that referenced this pull request Oct 16, 2024
…/u> 2/1` (llvm#111284)` (llvm#111998)

Relands llvm#111284. Test failure with stage2 build has been fixed by
llvm#111946.


Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) ==
1`. After llvm#100899, we set the
range of ctpop's return value to indicate the argument/result is
non-zero.

This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP
to fix llvm#95255.
bricknerb pushed a commit to bricknerb/llvm-project that referenced this pull request Oct 17, 2024
…llvm#111284)

Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) ==
1`. After llvm#100899, we set the
range of ctpop's return value to indicate the argument/result is
non-zero.

This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP
to fix llvm#95255.
bricknerb pushed a commit to bricknerb/llvm-project that referenced this pull request Oct 17, 2024
…/u> 2/1` (llvm#111284)` (llvm#111998)

Relands llvm#111284. Test failure with stage2 build has been fixed by
llvm#111946.


Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) ==
1`. After llvm#100899, we set the
range of ctpop's return value to indicate the argument/result is
non-zero.

This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP
to fix llvm#95255.
EricWF pushed a commit to efcs/llvm-project that referenced this pull request Oct 22, 2024
…/u> 2/1` (llvm#111284)` (llvm#111998)

Relands llvm#111284. Test failure with stage2 build has been fixed by
llvm#111946.


Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) ==
1`. After llvm#100899, we set the
range of ctpop's return value to indicate the argument/result is
non-zero.

This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP
to fix llvm#95255.
@pranavk
Copy link
Contributor

pranavk commented Oct 28, 2024

This seems to cause miscompiles for one of our Arm targets. Here's the reproducer:
https://godbolt.org/z/689eKhcnh

The problem seems to be the range attribute that doesn't contain "0" even though smax could return 0 in this case. This then combines with https://github.com/llvm/llvm-project/pull/111998/files which causes incorrect value of b on Arm machines.

@andjo403
Copy link
Contributor Author

this pass of simplifycfg https://godbolt.org/z/PhqdMcWG3 looks strange to me starts to call ctpop for zero but do not drop the range attribute

@dtcxzyw
Copy link
Member

dtcxzyw commented Oct 29, 2024

this pass of simplifycfg https://godbolt.org/z/PhqdMcWG3 looks strange to me starts to call ctpop for zero but do not drop the range attribute

It is correct. Even if ctpop returns a poison value due to range violation, %6 = select i1 %3, i1 %5, i1 false still returns false.

@dtcxzyw
Copy link
Member

dtcxzyw commented Oct 29, 2024

This seems to cause miscompiles for one of our Arm targets. Here's the reproducer: https://godbolt.org/z/689eKhcnh

The problem seems to be the range attribute that doesn't contain "0" even though smax could return 0 in this case. This then combines with https://github.com/llvm/llvm-project/pull/111998/files which causes incorrect value of b on Arm machines.

The result looks OK? https://godbolt.org/z/T5E1oYvse
bit_ceil(0) = 1, is_power_of_two(0) = 0

@pranavk
Copy link
Contributor

pranavk commented Oct 29, 2024

In the original repro, I am using absl and c++17. https://godbolt.org/z/35c9xfn6h but I can't make executor run this because it can't find absl libraries.

@pranavk
Copy link
Contributor

pranavk commented Oct 29, 2024

When I try to execute it on an Arm machine, I get b = INT_MAX.

@dtcxzyw
Copy link
Member

dtcxzyw commented Oct 29, 2024

In the original repro, I am using absl and c++17. https://godbolt.org/z/35c9xfn6h but I can't make executor run this because it can't find absl libraries.

define dso_local noundef range(i32 1, -2147483647) i32 @bucket(int)(i32 noundef %0) local_unnamed_addr #4 { %0 -> 0
  %2 = tail call i32 @llvm.smax.i32(i32 %0, i32 0) -> 0
  %3 = icmp sgt i32 %0, 0 -> false
  %4 = tail call range(i32 1, 32) i32 @llvm.ctpop.i32(i32 %2) -> poison
  %5 = icmp samesign ult i32 %4, 2 -> poison
  %6 = select i1 %3, i1 %5, i1 false -> false
  %7 = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 %2, i1 true) -> poison
  %8 = add nsw i32 %2, -1 -> -1
  %9 = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 %8, i1 false) -> 0
  %10 = select i1 %6, i32 %7, i32 %9 -> 0
  %11 = sub nsw i32 0, %10 -> 0
  %12 = and i32 %11, 31 -> 0
  %13 = shl nuw i32 1, %12 -> 1
  ret i32 %13 -> 1
}

Looks like it is a codegen bug in ARM backend.

@dtcxzyw
Copy link
Member

dtcxzyw commented Oct 29, 2024

I will file an issue later. It seems to be a SDAG bug: https://godbolt.org/z/W6cdshn7q

@dtcxzyw
Copy link
Member

dtcxzyw commented Oct 29, 2024

this pass of simplifycfg https://godbolt.org/z/PhqdMcWG3 looks strange to me starts to call ctpop for zero but do not drop the range attribute

You are correct. But it should be handled by GVNPass: https://godbolt.org/z/Gf9Mob8Kz

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

6 participants