-
Notifications
You must be signed in to change notification settings - Fork 12.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Simplify and/or of icmp eq with op replacement #70335
Conversation
@llvm/pr-subscribers-pgo @llvm/pr-subscribers-llvm-transforms Author: Nikita Popov (nikic) Changesand/or in logical (select) form benefit from generic simplifications via simplifyWithOpReplaced(). However, the corresponding fold for plain and/or currently does not exist. Similar to selects, there are two general cases for this fold (illustrated with The basic case is something like The second case is something like As the test diffs show, this catches quite a lot of patterns in existing test coverage. I believe this also obsoletes quite a few existing special-case and/or of icmp special case folds we have, but I haven't removed anything as part of this patch in the interest of risk mitigation. Patch is 70.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70335.diff 9 Files Affected:
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 3d192d0759a1e67..ad3f89a3e93127f 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -2025,6 +2025,50 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, Value *Op0,
return nullptr;
}
+static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+ const SimplifyQuery &Q,
+ bool AllowRefinement,
+ SmallVectorImpl<Instruction *> *DropFlags,
+ unsigned MaxRecurse);
+
+static Value *simplifyAndOrWithICmpEq(unsigned Opcode, Value *Op0, Value *Op1,
+ const SimplifyQuery &Q,
+ unsigned MaxRecurse) {
+ assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&
+ "Must be and/or");
+ ICmpInst::Predicate Pred;
+ Value *A, *B;
+ if (!match(Op0, m_ICmp(Pred, m_Value(A), m_Value(B))) ||
+ !ICmpInst::isEquality(Pred) || !MaxRecurse--)
+ return nullptr;
+
+ auto Simplify = [&](Value *Res) -> Value * {
+ // and (icmp eq a, b), x implies (a==b) inside x.
+ // or (icmp ne a, b), x implies (a==b) inside x.
+ // If x simplifies to true/false, we can simplify the and/or.
+ if (Pred ==
+ (Opcode == Instruction::And ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE))
+ return simplifyBinOp(Opcode, Op0, Res, Q, MaxRecurse);
+ // If we have and (icmp ne a, b), x and for a==b we can simplify x to false,
+ // then we can drop the icmp, as x will already be false in the case where
+ // the icmp is false. Similar for or and true.
+ if (Res == ConstantExpr::getBinOpAbsorber(Opcode, Res->getType()))
+ return Op1;
+ return nullptr;
+ };
+
+ if (Value *Res =
+ simplifyWithOpReplaced(Op1, A, B, Q, /* AllowRefinement */ true,
+ /* DropFlags */ nullptr, MaxRecurse))
+ return Simplify(Res);
+ if (Value *Res =
+ simplifyWithOpReplaced(Op1, B, A, Q, /* AllowRefinement */ true,
+ /* DropFlags */ nullptr, MaxRecurse))
+ return Simplify(Res);
+
+ return nullptr;
+}
+
/// Given a bitwise logic op, check if the operands are add/sub with a common
/// source value and inverted constant (identity: C - X -> ~(X + ~C)).
static Value *simplifyLogicOfAddSub(Value *Op0, Value *Op1,
@@ -2159,6 +2203,13 @@ static Value *simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
return Constant::getNullValue(Op0->getType());
+ if (Value *V = simplifyAndOrWithICmpEq(Instruction::And, Op0, Op1, Q,
+ MaxRecurse))
+ return V;
+ if (Value *V = simplifyAndOrWithICmpEq(Instruction::And, Op1, Op0, Q,
+ MaxRecurse))
+ return V;
+
if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, true))
return V;
@@ -2435,6 +2486,13 @@ static Value *simplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
match(Op0, m_LShr(m_Specific(X), m_Specific(Y))))
return Op1;
+ if (Value *V =
+ simplifyAndOrWithICmpEq(Instruction::Or, Op0, Op1, Q, MaxRecurse))
+ return V;
+ if (Value *V =
+ simplifyAndOrWithICmpEq(Instruction::Or, Op1, Op0, Q, MaxRecurse))
+ return V;
+
if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
return V;
diff --git a/llvm/test/Transforms/InstCombine/div-by-0-guard-before-smul_ov.ll b/llvm/test/Transforms/InstCombine/div-by-0-guard-before-smul_ov.ll
index 23bfc75b945ba10..08eefbebb736340 100644
--- a/llvm/test/Transforms/InstCombine/div-by-0-guard-before-smul_ov.ll
+++ b/llvm/test/Transforms/InstCombine/div-by-0-guard-before-smul_ov.ll
@@ -47,11 +47,7 @@ define i1 @n2_wrong_size(i4 %size0, i4 %size1, i4 %nmemb) {
define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) {
; CHECK-LABEL: @n3_wrong_pred(
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0
-; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
-; CHECK-NEXT: [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1
-; CHECK-NEXT: [[AND:%.*]] = and i1 [[SMUL_OV]], [[CMP]]
-; CHECK-NEXT: ret i1 [[AND]]
+; CHECK-NEXT: ret i1 false
;
%cmp = icmp eq i4 %size, 0 ; not 'ne'
%smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb)
@@ -63,10 +59,7 @@ define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) {
define i1 @n4_not_and(i4 %size, i4 %nmemb) {
; CHECK-LABEL: @n4_not_and(
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0
-; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
-; CHECK-NEXT: [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1
-; CHECK-NEXT: [[AND:%.*]] = or i1 [[SMUL_OV]], [[CMP]]
-; CHECK-NEXT: ret i1 [[AND]]
+; CHECK-NEXT: ret i1 [[CMP]]
;
%cmp = icmp ne i4 %size, 0
%smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb)
diff --git a/llvm/test/Transforms/InstCombine/div-by-0-guard-before-umul_ov.ll b/llvm/test/Transforms/InstCombine/div-by-0-guard-before-umul_ov.ll
index dbc3b5e7a25be38..047f8855fe5cb81 100644
--- a/llvm/test/Transforms/InstCombine/div-by-0-guard-before-umul_ov.ll
+++ b/llvm/test/Transforms/InstCombine/div-by-0-guard-before-umul_ov.ll
@@ -47,11 +47,7 @@ define i1 @n2_wrong_size(i4 %size0, i4 %size1, i4 %nmemb) {
define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) {
; CHECK-LABEL: @n3_wrong_pred(
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0
-; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
-; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1
-; CHECK-NEXT: [[AND:%.*]] = and i1 [[UMUL_OV]], [[CMP]]
-; CHECK-NEXT: ret i1 [[AND]]
+; CHECK-NEXT: ret i1 false
;
%cmp = icmp eq i4 %size, 0 ; not 'ne'
%umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb)
@@ -63,10 +59,7 @@ define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) {
define i1 @n4_not_and(i4 %size, i4 %nmemb) {
; CHECK-LABEL: @n4_not_and(
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0
-; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
-; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1
-; CHECK-NEXT: [[AND:%.*]] = or i1 [[UMUL_OV]], [[CMP]]
-; CHECK-NEXT: ret i1 [[AND]]
+; CHECK-NEXT: ret i1 [[CMP]]
;
%cmp = icmp ne i4 %size, 0
%umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb)
diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll
index 740f79cd32b39e8..cc50c5cd1e6680a 100644
--- a/llvm/test/Transforms/InstCombine/ispow2.ll
+++ b/llvm/test/Transforms/InstCombine/ispow2.ll
@@ -392,9 +392,7 @@ define i1 @is_pow2_ctpop_wrong_pred1(i32 %x) {
; CHECK-LABEL: @is_pow2_ctpop_wrong_pred1(
; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0]]
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[T0]], 2
-; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X]], 0
-; CHECK-NEXT: [[R:%.*]] = and i1 [[NOTZERO]], [[CMP]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 [[CMP]]
;
%t0 = tail call i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp ugt i32 %t0, 2
@@ -946,9 +944,7 @@ define i1 @is_pow2or0_ctpop_wrong_pred1(i32 %x) {
; CHECK-LABEL: @is_pow2or0_ctpop_wrong_pred1(
; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0]]
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[T0]], 1
-; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq i32 [[X]], 0
-; CHECK-NEXT: [[R:%.*]] = or i1 [[ISZERO]], [[CMP]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 [[CMP]]
;
%t0 = tail call i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp ne i32 %t0, 1
@@ -959,11 +955,7 @@ define i1 @is_pow2or0_ctpop_wrong_pred1(i32 %x) {
define i1 @is_pow2or0_ctpop_wrong_pred2(i32 %x) {
; CHECK-LABEL: @is_pow2or0_ctpop_wrong_pred2(
-; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0]]
-; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[T0]], 1
-; CHECK-NEXT: [[ISZERO:%.*]] = icmp ne i32 [[X]], 0
-; CHECK-NEXT: [[R:%.*]] = or i1 [[ISZERO]], [[CMP]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 true
;
%t0 = tail call i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp ne i32 %t0, 1
@@ -1149,9 +1141,7 @@ define i1 @isnot_pow2nor0_ctpop_wrong_pred1(i32 %x) {
; CHECK-LABEL: @isnot_pow2nor0_ctpop_wrong_pred1(
; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[T0]], 1
-; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X]], 0
-; CHECK-NEXT: [[R:%.*]] = and i1 [[NOTZERO]], [[CMP]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 [[CMP]]
;
%t0 = tail call i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp eq i32 %t0, 1
@@ -1162,11 +1152,7 @@ define i1 @isnot_pow2nor0_ctpop_wrong_pred1(i32 %x) {
define i1 @isnot_pow2nor0_ctpop_wrong_pred2(i32 %x) {
; CHECK-LABEL: @isnot_pow2nor0_ctpop_wrong_pred2(
-; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0]]
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[T0]], 1
-; CHECK-NEXT: [[NOTZERO:%.*]] = icmp eq i32 [[X]], 0
-; CHECK-NEXT: [[R:%.*]] = and i1 [[NOTZERO]], [[CMP]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 false
;
%t0 = tail call i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp eq i32 %t0, 1
diff --git a/llvm/test/Transforms/InstSimplify/and-or-icmp-ctpop.ll b/llvm/test/Transforms/InstSimplify/and-or-icmp-ctpop.ll
index 6de97c3a7a76deb..6fe8d29bd10bf5f 100644
--- a/llvm/test/Transforms/InstSimplify/and-or-icmp-ctpop.ll
+++ b/llvm/test/Transforms/InstSimplify/and-or-icmp-ctpop.ll
@@ -40,11 +40,7 @@ define <2 x i1> @eq_or_non_0_commute(<2 x i32> %x) {
define i1 @eq_or_non_0_wrong_pred1(i32 %x) {
; CHECK-LABEL: @eq_or_non_0_wrong_pred1(
-; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]])
-; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[T0]], 10
-; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X]], 0
-; CHECK-NEXT: [[R:%.*]] = or i1 [[NOTZERO]], [[CMP]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 true
;
%t0 = tail call i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp ne i32 %t0, 10
@@ -90,9 +86,7 @@ define i1 @ne_and_is_0_wrong_pred1(i32 %x) {
; CHECK-LABEL: @ne_and_is_0_wrong_pred1(
; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]])
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[T0]], 10
-; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq i32 [[X]], 0
-; CHECK-NEXT: [[R:%.*]] = or i1 [[ISZERO]], [[CMP]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 [[CMP]]
;
%t0 = tail call i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp ne i32 %t0, 10
diff --git a/llvm/test/Transforms/InstSimplify/and-or-icmp-min-max.ll b/llvm/test/Transforms/InstSimplify/and-or-icmp-min-max.ll
index 7ea1797c99898fd..4e3832f31e5a4c6 100644
--- a/llvm/test/Transforms/InstSimplify/and-or-icmp-min-max.ll
+++ b/llvm/test/Transforms/InstSimplify/and-or-icmp-min-max.ll
@@ -16,10 +16,7 @@
define i1 @slt_and_max(i8 %x, i8 %y) {
; CHECK-LABEL: @slt_and_max(
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 127
-; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 false
;
%cmp = icmp slt i8 %x, %y
%cmpeq = icmp eq i8 %x, 127
@@ -29,10 +26,7 @@ define i1 @slt_and_max(i8 %x, i8 %y) {
define <2 x i1> @slt_and_max_commute(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @slt_and_max_commute(
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq <2 x i8> [[X]], <i8 127, i8 127>
-; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[CMPEQ]], [[CMP]]
-; CHECK-NEXT: ret <2 x i1> [[R]]
+; CHECK-NEXT: ret <2 x i1> zeroinitializer
;
%cmp = icmp slt <2 x i8> %x, %y
%cmpeq = icmp eq <2 x i8> %x, <i8 127, i8 127>
@@ -42,10 +36,7 @@ define <2 x i1> @slt_and_max_commute(<2 x i8> %x, <2 x i8> %y) {
define i1 @slt_swap_and_max(i8 %x, i8 %y) {
; CHECK-LABEL: @slt_swap_and_max(
-; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 127
-; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 false
;
%cmp = icmp sgt i8 %y, %x
%cmpeq = icmp eq i8 %x, 127
@@ -55,10 +46,7 @@ define i1 @slt_swap_and_max(i8 %x, i8 %y) {
define i1 @slt_swap_and_max_commute(i8 %x, i8 %y) {
; CHECK-LABEL: @slt_swap_and_max_commute(
-; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 127
-; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 false
;
%cmp = icmp sgt i8 %y, %x
%cmpeq = icmp eq i8 %x, 127
@@ -68,10 +56,7 @@ define i1 @slt_swap_and_max_commute(i8 %x, i8 %y) {
define i1 @ult_and_max(i8 %x, i8 %y) {
; CHECK-LABEL: @ult_and_max(
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -1
-; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 false
;
%cmp = icmp ult i8 %x, %y
%cmpeq = icmp eq i8 %x, 255
@@ -81,10 +66,7 @@ define i1 @ult_and_max(i8 %x, i8 %y) {
define i1 @ult_and_max_commute(i8 %x, i8 %y) {
; CHECK-LABEL: @ult_and_max_commute(
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -1
-; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 false
;
%cmp = icmp ult i8 %x, %y
%cmpeq = icmp eq i8 %x, 255
@@ -94,10 +76,7 @@ define i1 @ult_and_max_commute(i8 %x, i8 %y) {
define i1 @ult_swap_and_max(i8 %x, i8 %y) {
; CHECK-LABEL: @ult_swap_and_max(
-; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -1
-; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 false
;
%cmp = icmp ugt i8 %y, %x
%cmpeq = icmp eq i8 %x, 255
@@ -107,10 +86,7 @@ define i1 @ult_swap_and_max(i8 %x, i8 %y) {
define i1 @ult_swap_and_max_commute(i8 %x, i8 %y) {
; CHECK-LABEL: @ult_swap_and_max_commute(
-; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -1
-; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 false
;
%cmp = icmp ugt i8 %y, %x
%cmpeq = icmp eq i8 %x, 255
@@ -126,10 +102,7 @@ define i1 @ult_swap_and_max_commute(i8 %x, i8 %y) {
define i1 @sgt_and_min(i9 %x, i9 %y) {
; CHECK-LABEL: @sgt_and_min(
-; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i9 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i9 [[X]], -256
-; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 false
;
%cmp = icmp sgt i9 %x, %y
%cmpeq = icmp eq i9 %x, 256
@@ -139,10 +112,7 @@ define i1 @sgt_and_min(i9 %x, i9 %y) {
define i1 @sgt_and_min_commute(i8 %x, i8 %y) {
; CHECK-LABEL: @sgt_and_min_commute(
-; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -128
-; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 false
;
%cmp = icmp sgt i8 %x, %y
%cmpeq = icmp eq i8 %x, 128
@@ -152,10 +122,7 @@ define i1 @sgt_and_min_commute(i8 %x, i8 %y) {
define i1 @sgt_swap_and_min(i8 %x, i8 %y) {
; CHECK-LABEL: @sgt_swap_and_min(
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -128
-; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 false
;
%cmp = icmp slt i8 %y, %x
%cmpeq = icmp eq i8 %x, 128
@@ -165,10 +132,7 @@ define i1 @sgt_swap_and_min(i8 %x, i8 %y) {
define i1 @sgt_swap_and_min_commute(i8 %x, i8 %y) {
; CHECK-LABEL: @sgt_swap_and_min_commute(
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -128
-; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 false
;
%cmp = icmp slt i8 %y, %x
%cmpeq = icmp eq i8 %x, 128
@@ -224,10 +188,7 @@ define i1 @ugt_swap_and_min_commute(i8 %x, i8 %y) {
define i1 @sge_or_not_max(i8 %x, i8 %y) {
; CHECK-LABEL: @sge_or_not_max(
-; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127
-; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 true
;
%cmp = icmp sge i8 %x, %y
%cmpeq = icmp ne i8 %x, 127
@@ -237,10 +198,7 @@ define i1 @sge_or_not_max(i8 %x, i8 %y) {
define i1 @sge_or_not_max_commute(i8 %x, i8 %y) {
; CHECK-LABEL: @sge_or_not_max_commute(
-; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127
-; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 true
;
%cmp = icmp sge i8 %x, %y
%cmpeq = icmp ne i8 %x, 127
@@ -250,10 +208,7 @@ define i1 @sge_or_not_max_commute(i8 %x, i8 %y) {
define i1 @sge_swap_or_not_max(i8 %x, i8 %y) {
; CHECK-LABEL: @sge_swap_or_not_max(
-; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127
-; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 true
;
%cmp = icmp sle i8 %y, %x
%cmpeq = icmp ne i8 %x, 127
@@ -263,10 +218,7 @@ define i1 @sge_swap_or_not_max(i8 %x, i8 %y) {
define i1 @sge_swap_or_not_max_commute(i8 %x, i8 %y) {
; CHECK-LABEL: @sge_swap_or_not_max_commute(
-; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127
-; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 true
;
%cmp = icmp sle i8 %y, %x
%cmpeq = icmp ne i8 %x, 127
@@ -276,10 +228,7 @@ define i1 @sge_swap_or_not_max_commute(i8 %x, i8 %y) {
define i1 @uge_or_not_max(i8 %x, i8 %y) {
; CHECK-LABEL: @uge_or_not_max(
-; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1
-; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 true
;
%cmp = icmp uge i8 %x, %y
%cmpeq = icmp ne i8 %x, 255
@@ -289,10 +238,7 @@ define i1 @uge_or_not_max(i8 %x, i8 %y) {
define i1 @uge_or_not_max_commute(i8 %x, i8 %y) {
; CHECK-LABEL: @uge_or_not_max_commute(
-; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1
-; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 true
;
%cmp = icmp uge i8 %x, %y
%cmpeq = icmp ne i8 %x, 255
@@ -302,10 +248,7 @@ define i1 @uge_or_not_max_commute(i8 %x, i8 %y) {
define i1 @uge_swap_or_not_max(i8 %x, i8 %y) {
; CHECK-LABEL: @uge_swap_or_not_max(
-; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1
-; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]]
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 true
;
%cmp = icmp ule i8 %y, %x
%cmpeq = icmp ne i8 %x, 255
@@ -315,10 +258,7 @@ define i1 @uge_swap_or_not_max(i8 %x, i8 %y) {
define i1 @uge_swap_or_not_max_commute(i8 %x, i8 %y) {
; CHECK-LABEL: @uge_swap_or_not_max_commute(
-; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1
-; CHECK-NE...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
5877211
to
bcd36d9
Compare
It looks like your PR does similar optimizations as #69840. I will rebase it after landing this PR to see other missed optimizations. |
These two are not fixed. They would need an extension like this one: llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp Lines 4692 to 4716 in fb619b3
These are also not fixed, but only because of the recursion limit. I think the way I implemented it may be a bit too strict, as we'll decrement MaxRecurse in simplifyAndOrWithICmpEq() and again in simplifyWithOpReplaced(). |
bcd36d9
to
3d05eb3
Compare
I've slightly adjusted the handling of the recursion limit and added test cases from the two issues. |
and/or in logical (select) form benefit from generic simplifications via simplifyWithOpReplaced(). However, the corresponding fold for plain and/or currently does not exist. Similar to selects, there are two general cases for this fold (illustrated with `and`, but there are `or` conjugates). The basic case is something like `(a == b) & c`, where the replacement of a with b or b with a inside c allows it to fold to true or false. Then the whole operation will fold to either false or `a == b`. The second case is something like `(a != b) & c`, where the replacement inside c allows it to fold to false. In that case, the operand can be replaced with c, because in the case where a == b (and thus the icmp is false), c itself will already be false. As the test diffs show, this catches quite a lot of patterns in existing test coverage. I believe this also obsoletes quite a few existing special-case and/or of icmp special case folds we have, but I haven't removed anything as part of this patch in the interest of risk mitigation.
3d05eb3
to
8a1d0a6
Compare
; CHECK-NEXT: [[V0:%.*]] = icmp eq i32 [[Z_FR]], 0 | ||
; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED_FR]] | ||
; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED:%.*]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What happens here is that now a frozen condition gets optimized away earlier, before freeze is propagated into it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks!
|
||
auto Simplify = [&](Value *Res) -> Value * { | ||
// and (icmp eq a, b), x implies (a==b) inside x. | ||
// or (icmp ne a, b), x implies (a==b) inside x. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you get any benefit (and icmp ne (a, b), x)
and use a != b
inside x
? (Likewise for eq
+ or
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we had a way to say "simplify x under the assumption that a != b", I would expect that to be beneficial. But we can only do this for equalities right now, as in that case we can just replace operands.
I think we need some generic way to say "evaluate this predicate under the assumption that v has range CR" -- some of @dtcxzyw's recent PRs go in that direction, but I feel like there must be a more generic solution to this, which integrates with existing range propagation logic, instead of reimplementing it in parts. This seems like something that LVI/CVP should be doing, but I don't see a good way to integrate it there.
ICmpInst::Predicate Pred; | ||
Value *A, *B; | ||
if (!match(Op0, m_ICmp(Pred, m_Value(A), m_Value(B))) || | ||
!ICmpInst::isEquality(Pred) || !MaxRecurse--) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Would have recursive check first.
I reverted this because I'm seeing segfaults in stage2 llvm-tblgen on a build server, but for some reason I can't reproduce this in local stage2 builds :/ Not sure what the difference is, my best guess is the libstdc++ version. |
@nikic |
If it's not too much effort, I'd really appreciate that! |
Ok. I think this exposes a miscompile with this patch:
which looks broken to me. |
@mikaelholmen Thank you! Here is a reduced version: declare void @barrier()
define i1 @test1(i32 %x) {
%cmp1 = icmp ne i32 %x, 0
call void @barrier()
%div = udiv i32 2147483647, %x
%cmp2 = icmp ugt i32 %x, %div
%or = or i1 %cmp1, %cmp2
ret i1 %or
} What happens is that we try to evaluate Another slight variant of this would be: define i1 @test2(i32 %x) {
%cmp1 = icmp ne i32 %x, 32
%shl = shl i32 1, %x
%cmp2 = icmp ugt i32 %x, %shl
%or = or i1 %cmp1, %cmp2
ret i1 %or
} Same basic issue, but without UB involvement, just plain poison. |
These were miscompiled with the initial version of the patch.
I've tried the following variant that only checks for true/false results (d8ea7f2). Unfortunately, this still causes stage2 llvm-tblgen crashes, so this wasn't the only issue :( |
But... the In the original ic.ll example the udiv was avoided if the denominator was 0 and UB was avoided. |
If |
Ah, ye it folded the whole thing to poison regardless of %x. I follow. Thanks! |
I applied nikic@d8ea7f2 locally and reran the original testcase I had that failed, and now it passes so unfortunately that isn't of any use anymore. :( |
I believe the problem is this: define i1 @test(ptr %arg, ptr %arg2) {
%icmp = icmp eq ptr %arg, %arg2
%call = call i1 @llvm.is.constant.i1(i1 %icmp)
%and = and i1 %call, %icmp
ret i1 %and
}
declare i1 @llvm.is.constant.i1(i1) The So I guess we should exclude llvm.is.constant from being folded by simplifyWithOpReplaced. |
Test the interaction with llvm.is.constant.
) and/or in logical (select) form benefit from generic simplifications via simplifyWithOpReplaced(). However, the corresponding fold for plain and/or currently does not exist. Similar to selects, there are two general cases for this fold (illustrated with `and`, but there are `or` conjugates). The basic case is something like `(a == b) & c`, where the replacement of a with b or b with a inside c allows it to fold to true or false. Then the whole operation will fold to either false or `a == b`. The second case is something like `(a != b) & c`, where the replacement inside c allows it to fold to false. In that case, the operand can be replaced with c, because in the case where a == b (and thus the icmp is false), c itself will already be false. As the test diffs show, this catches quite a lot of patterns in existing test coverage. This also obsoletes quite a few existing special-case and/or of icmp folds we have (e.g. simplifyAndOrOfICmpsWithLimitConst), but I haven't removed anything as part of this patch in the interest of risk mitigation. Fixes llvm#69050. Fixes llvm#69091.
Looks like nikic@6343a80 does fix the llvm-tblgen issue. |
…70335) Relative to the first attempt, this contains two changes: First, we only handle the case where one side simplifies to true or false, instead of calling simplification recursively. The previous approach would return poison if one operand simplified to poison (under the equality assumption), which is incorrect. Second, we do not fold llvm.is.constant in simplifyWithOpReplaced(). We may be assuming that a value is constant, if the equality holds, but it may not actually be constant. This is nominally just a QoI issue, but the std::list implementation in libstdc++ relies on the precise behavior in a way that causes miscompiles. ----- and/or in logical (select) form benefit from generic simplifications via simplifyWithOpReplaced(). However, the corresponding fold for plain and/or currently does not exist. Similar to selects, there are two general cases for this fold (illustrated with `and`, but there are `or` conjugates). The basic case is something like `(a == b) & c`, where the replacement of a with b or b with a inside c allows it to fold to true or false. Then the whole operation will fold to either false or `a == b`. The second case is something like `(a != b) & c`, where the replacement inside c allows it to fold to false. In that case, the operand can be replaced with c, because in the case where a == b (and thus the icmp is false), c itself will already be false. As the test diffs show, this catches quite a lot of patterns in existing test coverage. This also obsoletes quite a few existing special-case and/or of icmp folds we have (e.g. simplifyAndOrOfICmpsWithLimitConst), but I haven't removed anything as part of this patch in the interest of risk mitigation. Fixes #69050. Fixes #69091.
Might be already addressed in previous discussion on this bug, but in chrome we saw a binary size increase from this patch (crbug.com/1498542) |
@amykhuang Do you still see a size increase with the reapplied version of the patch? I think it's plausible that the llvm.is.constant issue could have resulted in a binary size increase, in which case that should be resolved now. If it's not, I'd expect that this is some kind of second order effect not directly related to this patch. |
I may have hit a regression from this change at #98753 |
and/or in logical (select) form benefit from generic simplifications via simplifyWithOpReplaced(). However, the corresponding fold for plain and/or currently does not exist.
Similar to selects, there are two general cases for this fold (illustrated with
and
, but there areor
conjugates).The basic case is something like
(a == b) & c
, where the replacement of a with b or b with a inside c allows it to fold to true or false. Then the whole operation will fold to either false ora == b
.The second case is something like
(a != b) & c
, where the replacement inside c allows it to fold to false. In that case, the operand can be replaced with c, because in the case where a == b (and thus the icmp is false), c itself will already be false.As the test diffs show, this catches quite a lot of patterns in existing test coverage. This also obsoletes quite a few existing special-case and/or of icmp folds we have (e.g. simplifyAndOrOfICmpsWithLimitConst), but I haven't removed anything as part of this patch in the interest of risk mitigation.
Fixes #69050.
Fixes #69091.