From 91d8aea0d3f0833dc875a7c915048f37ef14d621 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Mon, 29 Jul 2024 08:14:21 +0200 Subject: [PATCH 1/7] =?UTF-8?q?[InstCombine]=C2=A0Introduce=20test=20for?= =?UTF-8?q?=20PR100977=20(NFC)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../InstCombine/fold-ceil-div-idiom.ll | 253 ++++++++++++++++++ 1 file changed, 253 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/fold-ceil-div-idiom.ll diff --git a/llvm/test/Transforms/InstCombine/fold-ceil-div-idiom.ll b/llvm/test/Transforms/InstCombine/fold-ceil-div-idiom.ll new file mode 100644 index 00000000000000..b9cc0fa6ce050f --- /dev/null +++ b/llvm/test/Transforms/InstCombine/fold-ceil-div-idiom.ll @@ -0,0 +1,253 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define i8 @ceil_div_idiom(i8 %x, i8 %y) { +; CHECK-LABEL: define i8 @ceil_div_idiom( +; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1 +; CHECK-NEXT: [[OV_NOT:%.*]] = xor i1 [[OV]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[OV_NOT]]) +; CHECK-NEXT: [[NONZERO:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8 +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[BIAS]] +; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[SUB]], [[Y]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y) + %ov = extractvalue {i8, i1} %wo, 1 + %ov.not = xor i1 %ov, true + call void @llvm.assume(i1 %ov.not) + + %nonzero = icmp ne i8 %x, 0 + %bias = zext i1 %nonzero to i8 + %sub = sub i8 %x, %bias + %div = udiv i8 %sub, %y + %add = add i8 %div, %bias + ret i8 %add +} + +define i8 @ceil_div_idiom_2(i8 %x, i8 %y) { +; CHECK-LABEL: define i8 @ceil_div_idiom_2( +; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[OV_NOT:%.*]] = add nuw i8 [[X]], [[Y]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[OV_NOT]] to i1 +; CHECK-NEXT: call void @llvm.assume(i1 [[TRUNC]]) +; CHECK-NEXT: [[NONZERO:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8 +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[BIAS]] +; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[SUB]], [[Y]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %ov.not = add nuw i8 %x, %y + %trunc = trunc i8 %ov.not to i1 + call void @llvm.assume(i1 %trunc) + + %nonzero = icmp ne i8 %x, 0 + %bias = zext i1 %nonzero to i8 + %sub = sub i8 %x, %bias + %div = udiv i8 %sub, %y + %add = add i8 %div, %bias + ret i8 %add +} + +define i8 @ceil_div_idiom_with_lshr(i8 %x, i8 %y) { +; CHECK-LABEL: define i8 @ceil_div_idiom_with_lshr( +; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1 +; CHECK-NEXT: [[OV_NOT:%.*]] = xor i1 [[OV]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[OV_NOT]]) +; CHECK-NEXT: [[CTPOPULATION:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[Y]]) +; CHECK-NEXT: [[IS_POW_2:%.*]] = icmp eq i8 [[CTPOPULATION]], 1 +; CHECK-NEXT: call void @llvm.assume(i1 [[IS_POW_2]]) +; CHECK-NEXT: [[NONZERO:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8 +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[BIAS]] +; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[Y]], i1 true) +; CHECK-NEXT: [[N:%.*]] = xor i8 [[CTLZ]], 7 +; CHECK-NEXT: [[DIV:%.*]] = lshr i8 [[SUB]], [[N]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y) + %ov = extractvalue {i8, i1} %wo, 1 + %ov.not = xor i1 %ov, true + call void @llvm.assume(i1 %ov.not) + + %ctpopulation = call i8 @llvm.ctpop.i8(i8 %y) + %is_pow_2 = icmp eq i8 %ctpopulation, 1 + call void @llvm.assume(i1 %is_pow_2) + + %nonzero = icmp ne i8 %x, 0 + %bias = zext i1 %nonzero to i8 + %sub = sub i8 %x, %bias + %ctlz = tail call i8 @llvm.ctlz.i8(i8 %y, i1 true) + %n = sub i8 7, %ctlz + %div = lshr i8 %sub, %n + %add = add i8 %div, %bias + ret i8 %add +} + +define i8 @ceil_div_idiom_add_may_overflow(i8 %x, i8 %y) { +; CHECK-LABEL: define i8 @ceil_div_idiom_add_may_overflow( +; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[NONZERO:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8 +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[BIAS]] +; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[SUB]], [[Y]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %nonzero = icmp ne i8 %x, 0 + %bias = zext i1 %nonzero to i8 + %sub = sub i8 %x, %bias + %div = udiv i8 %sub, %y + %add = add i8 %div, %bias + ret i8 %add +} + +define i8 @ceil_div_idiom_multiuse_bias(i8 %x, i8 %y) { +; CHECK-LABEL: define i8 @ceil_div_idiom_multiuse_bias( +; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1 +; CHECK-NEXT: [[OV_NOT:%.*]] = xor i1 [[OV]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[OV_NOT]]) +; CHECK-NEXT: [[NONZERO:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8 +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[BIAS]] +; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[SUB]], [[Y]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]] +; CHECK-NEXT: call void @use(i8 [[BIAS]]) +; CHECK-NEXT: ret i8 [[ADD]] +; + %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y) + %ov = extractvalue {i8, i1} %wo, 1 + %ov.not = xor i1 %ov, true + call void @llvm.assume(i1 %ov.not) + + %nonzero = icmp ne i8 %x, 0 + %bias = zext i1 %nonzero to i8 + %sub = sub i8 %x, %bias + %div = udiv i8 %sub, %y + %add = add i8 %div, %bias + call void @use(i8 %bias) + ret i8 %add +} + +define i8 @ceil_div_idiom_with_lshr_not_power_2(i8 %x, i8 %y) { +; CHECK-LABEL: define i8 @ceil_div_idiom_with_lshr_not_power_2( +; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1 +; CHECK-NEXT: [[OV_NOT:%.*]] = xor i1 [[OV]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[OV_NOT]]) +; CHECK-NEXT: [[NONZERO:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8 +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[BIAS]] +; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[Y]], i1 true) +; CHECK-NEXT: [[N:%.*]] = xor i8 [[CTLZ]], 7 +; CHECK-NEXT: [[DIV:%.*]] = lshr i8 [[SUB]], [[N]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y) + %ov = extractvalue {i8, i1} %wo, 1 + %ov.not = xor i1 %ov, true + call void @llvm.assume(i1 %ov.not) + + %nonzero = icmp ne i8 %x, 0 + %bias = zext i1 %nonzero to i8 + %sub = sub i8 %x, %bias + %ctlz = tail call i8 @llvm.ctlz.i8(i8 %y, i1 true) + %n = sub i8 7, %ctlz + %div = lshr i8 %sub, %n + %add = add i8 %div, %bias + ret i8 %add +} + +define i8 @ceil_div_idiom_with_lshr_wrong_bw(i8 %x, i8 %y) { +; CHECK-LABEL: define i8 @ceil_div_idiom_with_lshr_wrong_bw( +; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1 +; CHECK-NEXT: [[OV_NOT:%.*]] = xor i1 [[OV]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[OV_NOT]]) +; CHECK-NEXT: [[CTPOPULATION:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[Y]]) +; CHECK-NEXT: [[IS_POW_2:%.*]] = icmp eq i8 [[CTPOPULATION]], 1 +; CHECK-NEXT: call void @llvm.assume(i1 [[IS_POW_2]]) +; CHECK-NEXT: [[NONZERO:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8 +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[BIAS]] +; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[Y]], i1 true) +; CHECK-NEXT: [[N:%.*]] = sub nuw nsw i8 8, [[CTLZ]] +; CHECK-NEXT: [[DIV:%.*]] = lshr i8 [[SUB]], [[N]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y) + %ov = extractvalue {i8, i1} %wo, 1 + %ov.not = xor i1 %ov, true + call void @llvm.assume(i1 %ov.not) + + %ctpopulation = call i8 @llvm.ctpop.i8(i8 %y) + %is_pow_2 = icmp eq i8 %ctpopulation, 1 + call void @llvm.assume(i1 %is_pow_2) + + %nonzero = icmp ne i8 %x, 0 + %bias = zext i1 %nonzero to i8 + %sub = sub i8 %x, %bias + %ctlz = tail call i8 @llvm.ctlz.i8(i8 %y, i1 true) + %n = sub i8 8, %ctlz + %div = lshr i8 %sub, %n + %add = add i8 %div, %bias + ret i8 %add +} + +define i8 @ceil_div_idiom_with_lshr_multiuse_n(i8 %x, i8 %y) { +; CHECK-LABEL: define i8 @ceil_div_idiom_with_lshr_multiuse_n( +; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1 +; CHECK-NEXT: [[OV_NOT:%.*]] = xor i1 [[OV]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[OV_NOT]]) +; CHECK-NEXT: [[CTPOPULATION:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[Y]]) +; CHECK-NEXT: [[IS_POW_2:%.*]] = icmp eq i8 [[CTPOPULATION]], 1 +; CHECK-NEXT: call void @llvm.assume(i1 [[IS_POW_2]]) +; CHECK-NEXT: [[NONZERO:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8 +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[BIAS]] +; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[Y]], i1 true) +; CHECK-NEXT: [[N:%.*]] = sub nuw nsw i8 8, [[CTLZ]] +; CHECK-NEXT: [[DIV:%.*]] = lshr i8 [[SUB]], [[N]] +; CHECK-NEXT: call void @use(i8 [[N]]) +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y) + %ov = extractvalue {i8, i1} %wo, 1 + %ov.not = xor i1 %ov, true + call void @llvm.assume(i1 %ov.not) + + %ctpopulation = call i8 @llvm.ctpop.i8(i8 %y) + %is_pow_2 = icmp eq i8 %ctpopulation, 1 + call void @llvm.assume(i1 %is_pow_2) + + %nonzero = icmp ne i8 %x, 0 + %bias = zext i1 %nonzero to i8 + %sub = sub i8 %x, %bias + %ctlz = tail call i8 @llvm.ctlz.i8(i8 %y, i1 true) + %n = sub i8 8, %ctlz + %div = lshr i8 %sub, %n + call void @use(i8 %n) + %add = add i8 %div, %bias + ret i8 %add +} + +declare { i8, i1 } @llvm.uadd.with.overflow.i8(i8, i8) +declare i8 @llvm.ctpop.i8(i8) +declare void @llvm.assume(i1) +declare void @use(i8) From af0d68ca1c881d8e18d12e9f86a256f91905bd28 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Mon, 29 Jul 2024 08:16:35 +0200 Subject: [PATCH 2/7] [InstCombine] Handle ceil division idiom The expression `add (udiv (sub A, Bias), B), Bias` can be folded to `udiv (add A, B - 1), B)` when the sum between `A` and `B` is known not to overflow, and `Bias = A != 0`. Fixes: https://github.com/llvm/llvm-project/issues/95652. Proof: https://alive2.llvm.org/ce/z/hiWHQA. --- .../InstCombine/InstCombineAddSub.cpp | 95 ++++++++++++++----- 1 file changed, 71 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 3bd086230cbec5..aded338982fcff 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1250,6 +1250,75 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) { return nullptr; } +static Value *foldCeilIdioms(BinaryOperator &I, InstCombinerImpl &IC) { + assert(I.getOpcode() == Instruction::Add && "Expecting add instruction."); + Value *A, *B; + ICmpInst::Predicate Pred; + auto &ICB = IC.Builder; + + // Fold the log2 ceil idiom: + // zext (ctpop(A) >u/!= 1) + (ctlz (A, true) ^ (BW - 1)) + // -> BW - ctlz (A - 1, false) + const APInt *XorC; + if (match(&I, + m_c_Add( + m_ZExt(m_ICmp(Pred, m_Intrinsic(m_Value(A)), + m_One())), + m_OneUse(m_ZExtOrSelf(m_OneUse(m_Xor( + m_OneUse(m_TruncOrSelf(m_OneUse( + m_Intrinsic(m_Deferred(A), m_One())))), + m_APInt(XorC))))))) && + (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) && + *XorC == A->getType()->getScalarSizeInBits() - 1) { + Value *Sub = ICB.CreateAdd(A, Constant::getAllOnesValue(A->getType())); + Value *Ctlz = ICB.CreateIntrinsic(Intrinsic::ctlz, {A->getType()}, + {Sub, ICB.getFalse()}); + Value *Ret = ICB.CreateSub( + ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()), + Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true); + return ICB.CreateZExtOrTrunc(Ret, I.getType()); + } + + // Fold the ceil division idiom: + // add (udiv (sub A, Bias), B), Bias + // -> udiv (add A, B - 1), B) + // with Bias = A != 0; A + B not to overflow + auto MatchDivision = [&IC](Instruction *Div, Value *&DivOp0, Value *&DivOp1) { + if (match(Div, m_UDiv(m_Value(DivOp0), m_Value(DivOp1)))) + return true; + + Value *N; + const APInt *C; + if (match(Div, m_LShr(m_Value(DivOp0), m_Value(N))) && + match(N, + m_OneUse(m_Sub(m_APInt(C), m_Intrinsic( + m_Specific(DivOp1), m_Zero())))) && + (*C == Div->getType()->getScalarSizeInBits() - 1) && + IC.isKnownToBeAPowerOfTwo(DivOp1, true, 0, Div)) + return true; + + return false; + }; + + Instruction *Div; + Value *Bias, *Sub; + if (match(&I, m_c_Add(m_Instruction(Div), m_Value(Bias))) && + MatchDivision(Div, Sub, B) && + match(Sub, m_Sub(m_Value(A), m_Value(Bias))) && + match(Bias, m_ZExt(m_ICmp(Pred, m_Specific(A), m_ZeroInt()))) && + Pred == ICmpInst::ICMP_NE && Bias->hasNUses(2)) { + WithCache LHSCache(A), RHSCache(B); + auto OR = IC.computeOverflowForUnsignedAdd(LHSCache, RHSCache, &I); + if (OR == OverflowResult::NeverOverflows) { + auto *BMinusOne = + ICB.CreateAdd(B, Constant::getAllOnesValue(I.getType())); + return ICB.CreateUDiv(ICB.CreateAdd(A, BMinusOne), B); + } + } + + return nullptr; +} + // Transform: // (add A, (shl (neg B), Y)) // -> (sub A, (shl B, Y)) @@ -1785,30 +1854,8 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()}, {Builder.CreateOr(A, B)})); - // Fold the log2_ceil idiom: - // zext(ctpop(A) >u/!= 1) + (ctlz(A, true) ^ (BW - 1)) - // --> - // BW - ctlz(A - 1, false) - const APInt *XorC; - ICmpInst::Predicate Pred; - if (match(&I, - m_c_Add( - m_ZExt(m_ICmp(Pred, m_Intrinsic(m_Value(A)), - m_One())), - m_OneUse(m_ZExtOrSelf(m_OneUse(m_Xor( - m_OneUse(m_TruncOrSelf(m_OneUse( - m_Intrinsic(m_Deferred(A), m_One())))), - m_APInt(XorC))))))) && - (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) && - *XorC == A->getType()->getScalarSizeInBits() - 1) { - Value *Sub = Builder.CreateAdd(A, Constant::getAllOnesValue(A->getType())); - Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {A->getType()}, - {Sub, Builder.getFalse()}); - Value *Ret = Builder.CreateSub( - ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()), - Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true); - return replaceInstUsesWith(I, Builder.CreateZExtOrTrunc(Ret, I.getType())); - } + if (Value *V = foldCeilIdioms(I, *this)) + return replaceInstUsesWith(I, V); if (Instruction *Res = foldSquareSumInt(I)) return Res; From b9593d290d95da9364aefdfd6f11d729480610be Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Mon, 29 Jul 2024 12:23:40 +0200 Subject: [PATCH 3/7] !fixup change to m_value --- llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index aded338982fcff..b82940730c63fd 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1290,9 +1290,8 @@ static Value *foldCeilIdioms(BinaryOperator &I, InstCombinerImpl &IC) { Value *N; const APInt *C; if (match(Div, m_LShr(m_Value(DivOp0), m_Value(N))) && - match(N, - m_OneUse(m_Sub(m_APInt(C), m_Intrinsic( - m_Specific(DivOp1), m_Zero())))) && + match(N, m_OneUse(m_Sub(m_APInt(C), m_Intrinsic( + m_Value(DivOp1), m_Zero())))) && (*C == Div->getType()->getScalarSizeInBits() - 1) && IC.isKnownToBeAPowerOfTwo(DivOp1, true, 0, Div)) return true; From cdf886fa80dc2aa066c9fc157f36cb9d140f39c7 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Mon, 29 Jul 2024 15:56:26 +0200 Subject: [PATCH 4/7] !fixup pow2 no zero --- llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index b82940730c63fd..69bf331ce6bbcb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1293,7 +1293,7 @@ static Value *foldCeilIdioms(BinaryOperator &I, InstCombinerImpl &IC) { match(N, m_OneUse(m_Sub(m_APInt(C), m_Intrinsic( m_Value(DivOp1), m_Zero())))) && (*C == Div->getType()->getScalarSizeInBits() - 1) && - IC.isKnownToBeAPowerOfTwo(DivOp1, true, 0, Div)) + IC.isKnownToBeAPowerOfTwo(DivOp1, /*OrZero*/ false, 0, Div)) return true; return false; From 855237864b84a4f87bb824b104c1641dac17d3a9 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Tue, 30 Jul 2024 16:06:45 +0200 Subject: [PATCH 5/7] !fixup drop oneuse in shift amount --- .../InstCombine/InstCombineAddSub.cpp | 4 +- .../InstCombine/fold-ceil-div-idiom.ll | 40 ------------------- 2 files changed, 2 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 69bf331ce6bbcb..9dcd0900f54d94 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1290,8 +1290,8 @@ static Value *foldCeilIdioms(BinaryOperator &I, InstCombinerImpl &IC) { Value *N; const APInt *C; if (match(Div, m_LShr(m_Value(DivOp0), m_Value(N))) && - match(N, m_OneUse(m_Sub(m_APInt(C), m_Intrinsic( - m_Value(DivOp1), m_Zero())))) && + match(N, m_Sub(m_APInt(C), m_Intrinsic(m_Value(DivOp1), + m_Zero()))) && (*C == Div->getType()->getScalarSizeInBits() - 1) && IC.isKnownToBeAPowerOfTwo(DivOp1, /*OrZero*/ false, 0, Div)) return true; diff --git a/llvm/test/Transforms/InstCombine/fold-ceil-div-idiom.ll b/llvm/test/Transforms/InstCombine/fold-ceil-div-idiom.ll index b9cc0fa6ce050f..b0aaa7cd6c991a 100644 --- a/llvm/test/Transforms/InstCombine/fold-ceil-div-idiom.ll +++ b/llvm/test/Transforms/InstCombine/fold-ceil-div-idiom.ll @@ -207,46 +207,6 @@ define i8 @ceil_div_idiom_with_lshr_wrong_bw(i8 %x, i8 %y) { ret i8 %add } -define i8 @ceil_div_idiom_with_lshr_multiuse_n(i8 %x, i8 %y) { -; CHECK-LABEL: define i8 @ceil_div_idiom_with_lshr_multiuse_n( -; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) { -; CHECK-NEXT: [[WO:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X]], i8 [[Y]]) -; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[WO]], 1 -; CHECK-NEXT: [[OV_NOT:%.*]] = xor i1 [[OV]], true -; CHECK-NEXT: call void @llvm.assume(i1 [[OV_NOT]]) -; CHECK-NEXT: [[CTPOPULATION:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[Y]]) -; CHECK-NEXT: [[IS_POW_2:%.*]] = icmp eq i8 [[CTPOPULATION]], 1 -; CHECK-NEXT: call void @llvm.assume(i1 [[IS_POW_2]]) -; CHECK-NEXT: [[NONZERO:%.*]] = icmp ne i8 [[X]], 0 -; CHECK-NEXT: [[BIAS:%.*]] = zext i1 [[NONZERO]] to i8 -; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[BIAS]] -; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[Y]], i1 true) -; CHECK-NEXT: [[N:%.*]] = sub nuw nsw i8 8, [[CTLZ]] -; CHECK-NEXT: [[DIV:%.*]] = lshr i8 [[SUB]], [[N]] -; CHECK-NEXT: call void @use(i8 [[N]]) -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[DIV]], [[BIAS]] -; CHECK-NEXT: ret i8 [[ADD]] -; - %wo = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 %y) - %ov = extractvalue {i8, i1} %wo, 1 - %ov.not = xor i1 %ov, true - call void @llvm.assume(i1 %ov.not) - - %ctpopulation = call i8 @llvm.ctpop.i8(i8 %y) - %is_pow_2 = icmp eq i8 %ctpopulation, 1 - call void @llvm.assume(i1 %is_pow_2) - - %nonzero = icmp ne i8 %x, 0 - %bias = zext i1 %nonzero to i8 - %sub = sub i8 %x, %bias - %ctlz = tail call i8 @llvm.ctlz.i8(i8 %y, i1 true) - %n = sub i8 8, %ctlz - %div = lshr i8 %sub, %n - call void @use(i8 %n) - %add = add i8 %div, %bias - ret i8 %add -} - declare { i8, i1 } @llvm.uadd.with.overflow.i8(i8, i8) declare i8 @llvm.ctpop.i8(i8) declare void @llvm.assume(i1) From 4e5cd1a5d64d0576d0bb7153b83e1f882cab5193 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Tue, 30 Jul 2024 16:13:26 +0200 Subject: [PATCH 6/7] !fixup style --- llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 9dcd0900f54d94..760c3de7849730 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1288,11 +1288,10 @@ static Value *foldCeilIdioms(BinaryOperator &I, InstCombinerImpl &IC) { return true; Value *N; - const APInt *C; if (match(Div, m_LShr(m_Value(DivOp0), m_Value(N))) && - match(N, m_Sub(m_APInt(C), m_Intrinsic(m_Value(DivOp1), - m_Zero()))) && - (*C == Div->getType()->getScalarSizeInBits() - 1) && + match(N, + m_Sub(m_SpecificInt(Div->getType()->getScalarSizeInBits() - 1), + m_Intrinsic(m_Value(DivOp1), m_Zero()))) && IC.isKnownToBeAPowerOfTwo(DivOp1, /*OrZero*/ false, 0, Div)) return true; From cca259cf6532b3cf50379c4ccc73701f4d255186 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Tue, 30 Jul 2024 16:59:30 +0200 Subject: [PATCH 7/7] !fixup style --- llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 760c3de7849730..3c1f2e79d74feb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1253,13 +1253,13 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) { static Value *foldCeilIdioms(BinaryOperator &I, InstCombinerImpl &IC) { assert(I.getOpcode() == Instruction::Add && "Expecting add instruction."); Value *A, *B; - ICmpInst::Predicate Pred; auto &ICB = IC.Builder; // Fold the log2 ceil idiom: // zext (ctpop(A) >u/!= 1) + (ctlz (A, true) ^ (BW - 1)) // -> BW - ctlz (A - 1, false) const APInt *XorC; + ICmpInst::Predicate Pred; if (match(&I, m_c_Add( m_ZExt(m_ICmp(Pred, m_Intrinsic(m_Value(A)), @@ -1303,8 +1303,9 @@ static Value *foldCeilIdioms(BinaryOperator &I, InstCombinerImpl &IC) { if (match(&I, m_c_Add(m_Instruction(Div), m_Value(Bias))) && MatchDivision(Div, Sub, B) && match(Sub, m_Sub(m_Value(A), m_Value(Bias))) && - match(Bias, m_ZExt(m_ICmp(Pred, m_Specific(A), m_ZeroInt()))) && - Pred == ICmpInst::ICMP_NE && Bias->hasNUses(2)) { + match(Bias, m_ZExt(m_SpecificICmp(ICmpInst::ICMP_NE, m_Specific(A), + m_ZeroInt()))) && + Bias->hasNUses(2)) { WithCache LHSCache(A), RHSCache(B); auto OR = IC.computeOverflowForUnsignedAdd(LHSCache, RHSCache, &I); if (OR == OverflowResult::NeverOverflows) {