-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Fold X * (2^N + 1) >> N -> X + X >> N, or directly to X if X >> N is 0 #90295
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: AtariDreams (AtariDreams) ChangesA generalization of the proposed x * 3/2 -> x + (x >> 1) transformation. Proof: https://alive2.llvm.org/ce/z/U7DWp4 Full diff: https://github.com/llvm/llvm-project/pull/90295.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 1cb21a1d81af4b..7767cf089a6d53 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1411,13 +1411,23 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
const APInt *MulC;
if (match(Op0, m_NUWMul(m_Value(X), m_APInt(MulC)))) {
- // Look for a "splat" mul pattern - it replicates bits across each half of
- // a value, so a right shift is just a mask of the low bits:
- // lshr i[2N] (mul nuw X, (2^N)+1), N --> and iN X, (2^N)-1
- // TODO: Generalize to allow more than just half-width shifts?
- if (BitWidth > 2 && ShAmtC * 2 == BitWidth && (*MulC - 1).isPowerOf2() &&
- MulC->logBase2() == ShAmtC)
- return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2));
+ if ((*MulC - 1).isPowerOf2() && MulC->logBase2() == ShAmtC) {
+ // Look for a "splat" mul pattern - it replicates bits across each half
+ // of a value, so a right shift is just a mask of the low bits:
+ // lshr i[2N] (mul nuw X, (2^N)+1), N --> and iN X, (2^N)-1
+ if (BitWidth > 2 && ShAmtC * 2 == BitWidth)
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2));
+
+ // lshr (mul (X, 2^N + 1)), N -> add (X, lshr(X, N))
+ if (Op0->hasOneUse()) {
+ auto *NewAdd = BinaryOperator::CreateNUWAdd(
+ X, Builder.CreateLShr(X, ConstantInt::get(Ty, ShAmtC), "",
+ I.isExact()));
+ NewAdd->setHasNoSignedWrap(
+ cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap());
+ return NewAdd;
+ }
+ }
// The one-use check is not strictly necessary, but codegen may not be
// able to invert the transform and perf may suffer with an extra mul
diff --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
index ac206dc7999dd2..d1fadddf8269e2 100644
--- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll
+++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
@@ -604,3 +604,97 @@ define <2 x i8> @ashr_known_pos_exact_vec(<2 x i8> %x, <2 x i8> %y) {
%r = ashr exact <2 x i8> %p, %y
ret <2 x i8> %r
}
+
+define i32 @ashr_mul_times_3_div_2(i32 %0) {
+; CHECK-LABEL: @ashr_mul_times_3_div_2(
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[TMP0:%.*]], 3
+; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[MUL]], 1
+; CHECK-NEXT: ret i32 [[ASHR]]
+;
+ %mul = mul nsw nuw i32 %0, 3
+ %ashr = ashr i32 %mul, 1
+ ret i32 %ashr
+}
+
+define i32 @ashr_mul_times_3_div_2_exact(i32 %x) {
+; CHECK-LABEL: @ashr_mul_times_3_div_2_exact(
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 3
+; CHECK-NEXT: [[ASHR:%.*]] = ashr exact i32 [[MUL]], 1
+; CHECK-NEXT: ret i32 [[ASHR]]
+;
+ %mul = mul nsw i32 %x, 3
+ %ashr = ashr exact i32 %mul, 1
+ ret i32 %ashr
+}
+
+define i32 @mul_times_3_div_2_multiuse(i32 %x) {
+; CHECK-LABEL: @mul_times_3_div_2_multiuse(
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[X:%.*]], 3
+; CHECK-NEXT: [[RES:%.*]] = ashr i32 [[MUL]], 1
+; CHECK-NEXT: call void @use(i32 [[MUL]])
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %mul = mul nuw i32 %x, 3
+ %res = ashr i32 %mul, 1
+ call void @use (i32 %mul)
+ ret i32 %res
+}
+
+define i32 @ashr_mul_times_3_div_2_exact_2(i32 %x) {
+; CHECK-LABEL: @ashr_mul_times_3_div_2_exact_2(
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[X:%.*]], 3
+; CHECK-NEXT: [[ASHR:%.*]] = ashr exact i32 [[MUL]], 1
+; CHECK-NEXT: ret i32 [[ASHR]]
+;
+ %mul = mul nuw i32 %x, 3
+ %ashr = ashr exact i32 %mul, 1
+ ret i32 %ashr
+}
+
+define i32 @lshr_mul_times_3_div_2(i32 %0) {
+; CHECK-LABEL: @lshr_mul_times_3_div_2(
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 1
+; CHECK-NEXT: [[LSHR:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP0]]
+; CHECK-NEXT: ret i32 [[LSHR]]
+;
+ %mul = mul nsw nuw i32 %0, 3
+ %lshr = lshr i32 %mul, 1
+ ret i32 %lshr
+}
+
+define i32 @lshr_mul_times_3_div_2_exact(i32 %x) {
+; CHECK-LABEL: @lshr_mul_times_3_div_2_exact(
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 3
+; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i32 [[MUL]], 1
+; CHECK-NEXT: ret i32 [[LSHR]]
+;
+ %mul = mul nsw i32 %x, 3
+ %lshr = lshr exact i32 %mul, 1
+ ret i32 %lshr
+}
+
+define i32 @mul_times_3_div_2_multiuse_lshr(i32 %x) {
+; CHECK-LABEL: @mul_times_3_div_2_multiuse_lshr(
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[X:%.*]], 3
+; CHECK-NEXT: [[RES:%.*]] = lshr i32 [[MUL]], 1
+; CHECK-NEXT: call void @use(i32 [[MUL]])
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %mul = mul nuw i32 %x, 3
+ %res = lshr i32 %mul, 1
+ call void @use (i32 %mul)
+ ret i32 %res
+}
+
+define i32 @lshr_mul_times_3_div_2_exact_2(i32 %x) {
+; CHECK-LABEL: @lshr_mul_times_3_div_2_exact_2(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 1
+; CHECK-NEXT: [[LSHR:%.*]] = add nuw i32 [[TMP1]], [[X]]
+; CHECK-NEXT: ret i32 [[LSHR]]
+;
+ %mul = mul nuw i32 %x, 3
+ %lshr = lshr exact i32 %mul, 1
+ ret i32 %lshr
+}
+
+declare void @use(i32)
|
The |
I addressed this and edited the commit. Please see. |
Also see: https://alive2.llvm.org/ce/z/NGZx9f |
Proposed where? |
@topperc I fixed the proofs. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
7318546
to
6448a89
Compare
303f517
to
c66d1ce
Compare
194b57b
to
a20e9f2
Compare
@topperc what do you think about this? |
1a9b7f5
to
f74c6d8
Compare
@AtariDreams why do you keep force pushing patches? The github UI doesn't make it easy to see if you changed anything in the code in the patch or just rebased. If you don't write any comment to explain the what you did, the reviewer is left guessing. |
@nikic can we please merge this? |
0e81698
to
b7dbeec
Compare
@nikic I moved the fold to X special cases to analysis, and the rest remain intact. This now is good to merge! |
@dtcxzyw What do you think about this? |
5051da2
to
d8b5db1
Compare
@dtcxzyw Can we please benchmark this one more time? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This PR looked sensible the last time I checked it -- now it contains another new random InstSimplify fold and also seems to change some one-use conditions.
Please learn some patience. Don't keep making random changes to your PR just because nobody is around to hit the merge button right this second. Do not ping PRs more often than once a week.
Not random. It is an alternate folding of the same pattern but if it meets more strict criteria. |
You are modifying an existing fold to produce a different result (in a different pass now no less) and adding a new fold in the same PR. That's not going to fly. Please submit two PRs for the two changes. |
Alive2 Proofs:
https://alive2.llvm.org/ce/z/eSinJY
https://alive2.llvm.org/ce/z/sweDgc
https://alive2.llvm.org/ce/z/-2dXZi
https://alive2.llvm.org/ce/z/kx2PhF
https://alive2.llvm.org/ce/z/e9QjM6