-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Reduce range of ctpop for non zero argument #100899
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Andreas Jonson (andjo403) ChangesFull diff: https://github.com/llvm/llvm-project/pull/100899.diff 5 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index f6c4b6e180937..52932f19512f3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -694,8 +694,13 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
// Add range attribute since known bits can't completely reflect what we know.
if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
!II.getMetadata(LLVMContext::MD_range)) {
- ConstantRange Range(APInt(BitWidth, Known.countMinPopulation()),
- APInt(BitWidth, Known.countMaxPopulation() + 1));
+ unsigned KnownMinPop = Known.countMinPopulation();
+ unsigned Lower =
+ KnownMinPop ? KnownMinPop
+ : isKnownNonZero(
+ Op0, IC.getSimplifyQuery().getWithInstruction(&II));
+ unsigned Upper = Known.countMaxPopulation() + 1;
+ ConstantRange Range(APInt(BitWidth, Lower), APInt(BitWidth, Upper));
II.addRangeRetAttr(Range);
return ⅈ
}
diff --git a/llvm/test/Transforms/InstCombine/ctpop-pow2.ll b/llvm/test/Transforms/InstCombine/ctpop-pow2.ll
index 7facdaf7590d3..4ef1ed0ec4976 100644
--- a/llvm/test/Transforms/InstCombine/ctpop-pow2.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop-pow2.ll
@@ -60,7 +60,7 @@ define i8 @ctpop_imin_plus1_lshr_nz(i8 %x) {
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[X:%.*]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
; CHECK-NEXT: [[V:%.*]] = lshr i8 -127, [[X]]
-; CHECK-NEXT: [[CNT:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[V]])
+; CHECK-NEXT: [[CNT:%.*]] = call range(i8 1, 9) i8 @llvm.ctpop.i8(i8 [[V]])
; CHECK-NEXT: ret i8 [[CNT]]
;
%cmp = icmp ne i8 %x, 0
@@ -104,7 +104,7 @@ define <2 x i32> @ctpop_lshr_intmin_intmin_plus1_vec_nz(<2 x i32> %x) {
; CHECK-LABEL: @ctpop_lshr_intmin_intmin_plus1_vec_nz(
; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], <i32 1, i32 1>
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> <i32 -2147483648, i32 -2147483647>, [[X1]]
-; CHECK-NEXT: [[CNT:%.*]] = call range(i32 0, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHR]])
+; CHECK-NEXT: [[CNT:%.*]] = call range(i32 1, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHR]])
; CHECK-NEXT: ret <2 x i32> [[CNT]]
;
%x1 = or <2 x i32> %x, <i32 1 ,i32 1>
diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll
index 83700e72de080..56ac2cad752a3 100644
--- a/llvm/test/Transforms/InstCombine/ctpop.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop.ll
@@ -169,8 +169,8 @@ define <2 x i32> @_parity_of_not_poison(<2 x i32> %x) {
define <2 x i32> @_parity_of_not_poison2(<2 x i32> %x) {
; CHECK-LABEL: @_parity_of_not_poison2(
-; CHECK-NEXT: [[CNT:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[CNT]], <i32 1, i32 poison>
+; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]])
+; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP1]], <i32 1, i32 poison>
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%neg = xor <2 x i32> %x, <i32 -1 ,i32 -1>
@@ -485,3 +485,12 @@ define i32 @select_ctpop_zero(i32 %x) {
%res = select i1 %cmp, i32 0, i32 %ctpop
ret i32 %res
}
+
+define i32 @ctpop_non_zero(i32 range(i32 1, 255) %x) {
+; CHECK-LABEL: @ctpop_non_zero(
+; CHECK-NEXT: [[CTPOP:%.*]] = call range(i32 1, 9) i32 @llvm.ctpop.i32(i32 [[X:%.*]])
+; CHECK-NEXT: ret i32 [[CTPOP]]
+;
+ %ctpop = call i32 @llvm.ctpop.i32(i32 %x)
+ ret i32 %ctpop
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-ne-pow2.ll b/llvm/test/Transforms/InstCombine/icmp-ne-pow2.ll
index e9ec6b415d462..618f5d641dc1a 100644
--- a/llvm/test/Transforms/InstCombine/icmp-ne-pow2.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-ne-pow2.ll
@@ -306,7 +306,7 @@ define i32 @pow2_32_nonconst_assume(i32 %x, i32 %y) {
define i32 @pow2_32_gtnonconst_assume(i32 %x, i32 %y) {
; CHECK-LABEL: @pow2_32_gtnonconst_assume(
-; CHECK-NEXT: [[CTPOP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[Y:%.*]])
+; CHECK-NEXT: [[CTPOP:%.*]] = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 [[Y:%.*]])
; CHECK-NEXT: [[YP2:%.*]] = icmp eq i32 [[CTPOP]], 1
; CHECK-NEXT: call void @llvm.assume(i1 [[YP2]])
; CHECK-NEXT: [[YGT:%.*]] = icmp ugt i32 [[Y]], [[X:%.*]]
@@ -513,7 +513,7 @@ define i32 @maybe_pow2_32_noncont(i32 %x, i32 %y) {
; CHECK-NEXT: [[YGT8:%.*]] = icmp ugt i32 [[Y:%.*]], 8
; CHECK-NEXT: br i1 [[YGT8]], label [[CONT1:%.*]], label [[CONT2:%.*]]
; CHECK: Cont1:
-; CHECK-NEXT: [[CTPOP:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[Y]])
+; CHECK-NEXT: [[CTPOP:%.*]] = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 [[Y]])
; CHECK-NEXT: [[YP2:%.*]] = icmp eq i32 [[CTPOP]], 1
; CHECK-NEXT: call void @llvm.assume(i1 [[YP2]])
; CHECK-NEXT: br i1 true, label [[CONT2]], label [[FALSE:%.*]]
diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll
index a143b1347ccee..3f2c31d05f3ed 100644
--- a/llvm/test/Transforms/InstCombine/ispow2.ll
+++ b/llvm/test/Transforms/InstCombine/ispow2.ll
@@ -197,7 +197,7 @@ define i1 @is_pow2_non_zero_ult_2(i32 %x) {
; CHECK-LABEL: @is_pow2_non_zero_ult_2(
; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X:%.*]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[NOTZERO]])
-; CHECK-NEXT: [[T0:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]])
+; CHECK-NEXT: [[T0:%.*]] = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 [[X]])
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[T0]], 2
; CHECK-NEXT: ret i1 [[CMP]]
;
@@ -212,7 +212,7 @@ define i1 @is_pow2_non_zero_eq_1(i32 %x) {
; CHECK-LABEL: @is_pow2_non_zero_eq_1(
; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X:%.*]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[NOTZERO]])
-; CHECK-NEXT: [[T0:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]])
+; CHECK-NEXT: [[T0:%.*]] = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 [[X]])
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[T0]], 1
; CHECK-NEXT: ret i1 [[CMP]]
;
@@ -227,7 +227,7 @@ define i1 @is_pow2_non_zero_ugt_1(i32 %x) {
; CHECK-LABEL: @is_pow2_non_zero_ugt_1(
; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X:%.*]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[NOTZERO]])
-; CHECK-NEXT: [[T0:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]])
+; CHECK-NEXT: [[T0:%.*]] = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 [[X]])
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[T0]], 1
; CHECK-NEXT: ret i1 [[CMP]]
;
@@ -242,7 +242,7 @@ define i1 @is_pow2_non_zero_ne_1(i32 %x) {
; CHECK-LABEL: @is_pow2_non_zero_ne_1(
; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X:%.*]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[NOTZERO]])
-; CHECK-NEXT: [[T0:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X]])
+; CHECK-NEXT: [[T0:%.*]] = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 [[X]])
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[T0]], 1
; CHECK-NEXT: ret i1 [[CMP]]
;
|
@@ -694,8 +694,13 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) { | |||
// Add range attribute since known bits can't completely reflect what we know. | |||
if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) && | |||
!II.getMetadata(LLVMContext::MD_range)) { | |||
ConstantRange Range(APInt(BitWidth, Known.countMinPopulation()), | |||
APInt(BitWidth, Known.countMaxPopulation() + 1)); | |||
unsigned KnownMinPop = Known.countMinPopulation(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is a phase ordering problem. We may get a more precise result later. See my previous comment #100870 (comment).
Can you add the following test?
define i32 @ctpop_non_zero_with_existing_range_attr(i32 range(i32 1, 255) %x) {
%ctpop = call i32 range(i32 0, 9) @llvm.ctpop.i32(i32 %x)
ret i32 %ctpop
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
did not think that far, forgets that the checks id done multiple times.
but then it fells like the draft that you have is better then this solution so we do not need to execute isKnownNonZero if not needed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added the test and when for a combination of our solutions that reduced number of calls to intersectWith also used full set range if old is missing as it is faster in intersectWith
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thank you!
LGTM |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…llvm#111284) Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. After llvm#100899, we set the range of ctpop's return value to indicate the argument/result is non-zero. This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP to fix llvm#95255.
…/u> 2/1` (#111284)` (#111998) Relands #111284. Test failure with stage2 build has been fixed by #111946. Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. After #100899, we set the range of ctpop's return value to indicate the argument/result is non-zero. This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP to fix #95255.
…llvm#111284) Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. After llvm#100899, we set the range of ctpop's return value to indicate the argument/result is non-zero. This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP to fix llvm#95255.
…/u> 2/1` (llvm#111284)` (llvm#111998) Relands llvm#111284. Test failure with stage2 build has been fixed by llvm#111946. Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. After llvm#100899, we set the range of ctpop's return value to indicate the argument/result is non-zero. This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP to fix llvm#95255.
…llvm#111284) Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. After llvm#100899, we set the range of ctpop's return value to indicate the argument/result is non-zero. This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP to fix llvm#95255.
…/u> 2/1` (llvm#111284)` (llvm#111998) Relands llvm#111284. Test failure with stage2 build has been fixed by llvm#111946. Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. After llvm#100899, we set the range of ctpop's return value to indicate the argument/result is non-zero. This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP to fix llvm#95255.
…/u> 2/1` (llvm#111284)` (llvm#111998) Relands llvm#111284. Test failure with stage2 build has been fixed by llvm#111946. Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. After llvm#100899, we set the range of ctpop's return value to indicate the argument/result is non-zero. This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP to fix llvm#95255.
This seems to cause miscompiles for one of our Arm targets. Here's the reproducer: The problem seems to be the range attribute that doesn't contain "0" even though smax could return 0 in this case. This then combines with https://github.com/llvm/llvm-project/pull/111998/files which causes incorrect value of b on Arm machines. |
this pass of simplifycfg https://godbolt.org/z/PhqdMcWG3 looks strange to me starts to call ctpop for zero but do not drop the range attribute |
It is correct. Even if |
The result looks OK? https://godbolt.org/z/T5E1oYvse |
In the original repro, I am using absl and c++17. https://godbolt.org/z/35c9xfn6h but I can't make executor run this because it can't find absl libraries. |
When I try to execute it on an Arm machine, I get b = INT_MAX. |
Looks like it is a codegen bug in ARM backend. |
I will file an issue later. It seems to be a SDAG bug: https://godbolt.org/z/W6cdshn7q |
You are correct. But it should be handled by GVNPass: https://godbolt.org/z/Gf9Mob8Kz |
Part of solution for #95255