-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DAG] Enhance SDPatternMatch to match integer minimum and maximum patterns in addition to the existing ISD nodes. #111774
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-aarch64 Author: None (c8ef) ChangesCloses #108218. This patch adds icmp+select patterns for integer min/max matchers in SDPatternMatch, similar to those in IR PatternMatch. Patch is 105.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/111774.diff 9 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 04135ee7e1c022..b629dd50aced00 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -542,6 +542,80 @@ struct BinaryOpc_match {
}
};
+template <typename LHS_P, typename RHS_P, typename Pred_t,
+ bool Commutable = false, bool ExcludeChain = false>
+struct MaxMin_match {
+ using PredType = Pred_t;
+ LHS_P LHS;
+ RHS_P RHS;
+
+ MaxMin_match(const LHS_P &L, const RHS_P &R) : LHS(L), RHS(R) {}
+
+ template <typename MatchContext>
+ bool match(const MatchContext &Ctx, SDValue N) {
+ if (sd_context_match(N, Ctx, m_Opc(ISD::SELECT))) {
+ EffectiveOperands<ExcludeChain> EO_SELECT(N, Ctx);
+ assert(EO_SELECT.Size == 3);
+ SDValue Cond = N->getOperand(EO_SELECT.FirstIndex);
+ SDValue TrueValue = N->getOperand(EO_SELECT.FirstIndex + 1);
+ SDValue FalseValue = N->getOperand(EO_SELECT.FirstIndex + 2);
+
+ if (sd_context_match(Cond, Ctx, m_Opc(ISD::SETCC))) {
+ EffectiveOperands<ExcludeChain> EO_SETCC(Cond, Ctx);
+ assert(EO_SETCC.Size == 3);
+ SDValue L = Cond->getOperand(EO_SETCC.FirstIndex);
+ SDValue R = Cond->getOperand(EO_SETCC.FirstIndex + 1);
+ CondCodeSDNode *CondNode =
+ cast<CondCodeSDNode>(Cond->getOperand(EO_SETCC.FirstIndex + 2));
+
+ if ((TrueValue != L || FalseValue != R) &&
+ (TrueValue != R || FalseValue != L)) {
+ return false;
+ }
+
+ ISD::CondCode Cond =
+ TrueValue == L ? CondNode->get()
+ : getSetCCInverse(CondNode->get(), L.getValueType());
+ if (!Pred_t::match(Cond)) {
+ return false;
+ }
+ return (LHS.match(Ctx, L) && RHS.match(Ctx, R)) ||
+ (Commutable && LHS.match(Ctx, R) && RHS.match(Ctx, L));
+ }
+ }
+
+ return false;
+ }
+};
+
+// Helper class for identifying signed max predicates.
+struct smax_pred_ty {
+ static bool match(ISD::CondCode Cond) {
+ return Cond == ISD::CondCode::SETGT || Cond == ISD::CondCode::SETGE;
+ }
+};
+
+// Helper class for identifying unsigned max predicates.
+struct umax_pred_ty {
+ static bool match(ISD::CondCode Cond) {
+ return Cond == ISD::CondCode::SETUGT || Cond == ISD::CondCode::SETUGE;
+ }
+};
+
+// Helper class for identifying signed min predicates.
+struct smin_pred_ty {
+ static bool match(ISD::CondCode Cond) {
+ return Cond == ISD::CondCode::SETLT || Cond == ISD::CondCode::SETLE;
+ }
+};
+
+// Helper class for identifying unsigned min predicates.
+struct umin_pred_ty {
+ static bool match(ISD::CondCode Cond) {
+ return Cond == ISD::CondCode::SETULT || Cond == ISD::CondCode::SETULE;
+ }
+};
+
template <typename LHS, typename RHS>
inline BinaryOpc_match<LHS, RHS> m_BinOp(unsigned Opc, const LHS &L,
const RHS &R) {
@@ -609,23 +683,27 @@ inline BinaryOpc_match<LHS, RHS, true> m_Xor(const LHS &L, const RHS &R) {
}
template <typename LHS, typename RHS>
-inline BinaryOpc_match<LHS, RHS, true> m_SMin(const LHS &L, const RHS &R) {
- return BinaryOpc_match<LHS, RHS, true>(ISD::SMIN, L, R);
+inline auto m_SMin(const LHS &L, const RHS &R) {
+ return m_AnyOf(BinaryOpc_match<LHS, RHS, true>(ISD::SMIN, L, R),
+ MaxMin_match<LHS, RHS, smin_pred_ty, true>(L, R));
}
template <typename LHS, typename RHS>
-inline BinaryOpc_match<LHS, RHS, true> m_SMax(const LHS &L, const RHS &R) {
- return BinaryOpc_match<LHS, RHS, true>(ISD::SMAX, L, R);
+inline auto m_SMax(const LHS &L, const RHS &R) {
+ return m_AnyOf(BinaryOpc_match<LHS, RHS, true>(ISD::SMAX, L, R),
+ MaxMin_match<LHS, RHS, smax_pred_ty, true>(L, R));
}
template <typename LHS, typename RHS>
-inline BinaryOpc_match<LHS, RHS, true> m_UMin(const LHS &L, const RHS &R) {
- return BinaryOpc_match<LHS, RHS, true>(ISD::UMIN, L, R);
+inline auto m_UMin(const LHS &L, const RHS &R) {
+ return m_AnyOf(BinaryOpc_match<LHS, RHS, true>(ISD::UMIN, L, R),
+ MaxMin_match<LHS, RHS, umin_pred_ty, true>(L, R));
}
template <typename LHS, typename RHS>
-inline BinaryOpc_match<LHS, RHS, true> m_UMax(const LHS &L, const RHS &R) {
- return BinaryOpc_match<LHS, RHS, true>(ISD::UMAX, L, R);
+inline auto m_UMax(const LHS &L, const RHS &R) {
+ return m_AnyOf(BinaryOpc_match<LHS, RHS, true>(ISD::UMAX, L, R),
+ MaxMin_match<LHS, RHS, umax_pred_ty, true>(L, R));
}
template <typename LHS, typename RHS>
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index e5cc04f9be1a1f..62db30f17747cf 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -547,10 +547,9 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_select_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: cmp w8, w1, sxtb
-; CHECK-NEXT: csel w8, w0, w1, lt
-; CHECK-NEXT: csel w9, w1, w0, lt
-; CHECK-NEXT: sub w0, w9, w8
+; CHECK-NEXT: sub w8, w8, w1, sxtb
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp slt i8 %a, %b
%ab = select i1 %cmp, i8 %a, i8 %b
@@ -563,10 +562,9 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_select_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: cmp w8, w1, sxth
-; CHECK-NEXT: csel w8, w0, w1, le
-; CHECK-NEXT: csel w9, w1, w0, le
-; CHECK-NEXT: sub w0, w9, w8
+; CHECK-NEXT: sub w8, w8, w1, sxth
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp sle i16 %a, %b
%ab = select i1 %cmp, i16 %a, i16 %b
@@ -578,10 +576,9 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_select_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w1
-; CHECK-NEXT: csel w8, w0, w1, gt
-; CHECK-NEXT: csel w9, w1, w0, gt
-; CHECK-NEXT: sub w0, w8, w9
+; CHECK-NEXT: sub w8, w1, w0
+; CHECK-NEXT: subs w9, w0, w1
+; CHECK-NEXT: csel w0, w9, w8, gt
; CHECK-NEXT: ret
%cmp = icmp sgt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -593,10 +590,9 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_select_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x1
-; CHECK-NEXT: csel x8, x0, x1, ge
-; CHECK-NEXT: csel x9, x1, x0, ge
-; CHECK-NEXT: sub x0, x8, x9
+; CHECK-NEXT: sub x8, x1, x0
+; CHECK-NEXT: subs x9, x0, x1
+; CHECK-NEXT: csel x0, x9, x8, gt
; CHECK-NEXT: ret
%cmp = icmp sge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
@@ -608,14 +604,13 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_select_i128:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x2
-; CHECK-NEXT: sbcs xzr, x1, x3
-; CHECK-NEXT: csel x8, x0, x2, lt
-; CHECK-NEXT: csel x9, x2, x0, lt
-; CHECK-NEXT: csel x10, x1, x3, lt
-; CHECK-NEXT: csel x11, x3, x1, lt
-; CHECK-NEXT: subs x0, x9, x8
-; CHECK-NEXT: sbc x1, x11, x10
+; CHECK-NEXT: subs x8, x0, x2
+; CHECK-NEXT: sbc x9, x1, x3
+; CHECK-NEXT: subs x10, x2, x0
+; CHECK-NEXT: sbc x11, x3, x1
+; CHECK-NEXT: sbcs xzr, x3, x1
+; CHECK-NEXT: csel x0, x8, x10, lt
+; CHECK-NEXT: csel x1, x9, x11, lt
; CHECK-NEXT: ret
%cmp = icmp slt i128 %a, %b
%ab = select i1 %cmp, i128 %a, i128 %b
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index 0a44ae16884582..4585de96c848f2 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -408,10 +408,9 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_select_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: cmp w8, w1, uxtb
-; CHECK-NEXT: csel w8, w0, w1, lo
-; CHECK-NEXT: csel w9, w1, w0, lo
-; CHECK-NEXT: sub w0, w9, w8
+; CHECK-NEXT: sub w8, w8, w1, uxtb
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp ult i8 %a, %b
%ab = select i1 %cmp, i8 %a, i8 %b
@@ -424,10 +423,9 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_select_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: cmp w8, w1, uxth
-; CHECK-NEXT: csel w8, w0, w1, ls
-; CHECK-NEXT: csel w9, w1, w0, ls
-; CHECK-NEXT: sub w0, w9, w8
+; CHECK-NEXT: sub w8, w8, w1, uxth
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp ule i16 %a, %b
%ab = select i1 %cmp, i16 %a, i16 %b
@@ -439,10 +437,9 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_select_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w1
-; CHECK-NEXT: csel w8, w0, w1, hi
-; CHECK-NEXT: csel w9, w1, w0, hi
-; CHECK-NEXT: sub w0, w8, w9
+; CHECK-NEXT: sub w8, w1, w0
+; CHECK-NEXT: subs w9, w0, w1
+; CHECK-NEXT: csel w0, w9, w8, hi
; CHECK-NEXT: ret
%cmp = icmp ugt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -454,10 +451,9 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_select_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x1
-; CHECK-NEXT: csel x8, x0, x1, hs
-; CHECK-NEXT: csel x9, x1, x0, hs
-; CHECK-NEXT: sub x0, x8, x9
+; CHECK-NEXT: sub x8, x1, x0
+; CHECK-NEXT: subs x9, x0, x1
+; CHECK-NEXT: csel x0, x9, x8, hi
; CHECK-NEXT: ret
%cmp = icmp uge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
@@ -469,14 +465,14 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_select_i128:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x2
-; CHECK-NEXT: sbcs xzr, x1, x3
-; CHECK-NEXT: csel x8, x0, x2, lo
-; CHECK-NEXT: csel x9, x2, x0, lo
-; CHECK-NEXT: csel x10, x1, x3, lo
-; CHECK-NEXT: csel x11, x3, x1, lo
-; CHECK-NEXT: subs x0, x9, x8
-; CHECK-NEXT: sbc x1, x11, x10
+; CHECK-NEXT: subs x8, x0, x2
+; CHECK-NEXT: sbcs x9, x1, x3
+; CHECK-NEXT: cset w10, lo
+; CHECK-NEXT: sbfx x10, x10, #0, #1
+; CHECK-NEXT: eor x8, x8, x10
+; CHECK-NEXT: eor x9, x9, x10
+; CHECK-NEXT: subs x0, x8, x10
+; CHECK-NEXT: sbc x1, x9, x10
; CHECK-NEXT: ret
%cmp = icmp ult i128 %a, %b
%ab = select i1 %cmp, i128 %a, i128 %b
diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll
index 1043fa5c4565ee..bbdce7c6e933b3 100644
--- a/llvm/test/CodeGen/AArch64/midpoint-int.ll
+++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll
@@ -13,12 +13,11 @@
define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: scalar_i32_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: sub w9, w1, w0
+; CHECK-NEXT: subs w10, w0, w1
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: csel w9, w1, w0, gt
-; CHECK-NEXT: csel w10, w0, w1, gt
+; CHECK-NEXT: csel w9, w10, w9, gt
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w9, w10, w9
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
@@ -36,12 +35,11 @@ define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind {
define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: scalar_i32_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: sub w9, w1, w0
+; CHECK-NEXT: subs w10, w0, w1
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: csel w9, w1, w0, hi
-; CHECK-NEXT: csel w10, w0, w1, hi
+; CHECK-NEXT: csel w9, w10, w9, hi
; CHECK-NEXT: cneg w8, w8, ls
-; CHECK-NEXT: sub w9, w10, w9
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
@@ -64,10 +62,10 @@ define i32 @scalar_i32_signed_mem_reg(ptr %a1_addr, i32 %a2) nounwind {
; CHECK-NEXT: ldr w9, [x0]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cmp w9, w1
-; CHECK-NEXT: csel w10, w1, w9, gt
-; CHECK-NEXT: csel w11, w9, w1, gt
+; CHECK-NEXT: sub w10, w1, w9
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w10, w11, w10
+; CHECK-NEXT: subs w11, w9, w1
+; CHECK-NEXT: csel w10, w11, w10, gt
; CHECK-NEXT: lsr w10, w10, #1
; CHECK-NEXT: madd w0, w10, w8, w9
; CHECK-NEXT: ret
@@ -89,10 +87,10 @@ define i32 @scalar_i32_signed_reg_mem(i32 %a1, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr w9, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cmp w0, w9
-; CHECK-NEXT: csel w10, w9, w0, gt
-; CHECK-NEXT: csel w9, w0, w9, gt
+; CHECK-NEXT: sub w10, w9, w0
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w9, w9, w10
+; CHECK-NEXT: subs w9, w0, w9
+; CHECK-NEXT: csel w9, w9, w10, gt
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
@@ -115,10 +113,10 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr w10, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: csel w11, w10, w9, gt
-; CHECK-NEXT: csel w10, w9, w10, gt
+; CHECK-NEXT: sub w11, w10, w9
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w10, w10, w11
+; CHECK-NEXT: subs w10, w9, w10
+; CHECK-NEXT: csel w10, w10, w11, gt
; CHECK-NEXT: lsr w10, w10, #1
; CHECK-NEXT: madd w0, w10, w8, w9
; CHECK-NEXT: ret
@@ -144,12 +142,11 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind {
; CHECK-LABEL: scalar_i64_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x1
+; CHECK-NEXT: sub x9, x1, x0
+; CHECK-NEXT: subs x10, x0, x1
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: csel x9, x1, x0, gt
-; CHECK-NEXT: csel x10, x0, x1, gt
+; CHECK-NEXT: csel x9, x10, x9, gt
; CHECK-NEXT: cneg x8, x8, le
-; CHECK-NEXT: sub x9, x10, x9
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
; CHECK-NEXT: ret
@@ -167,12 +164,11 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind {
define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind {
; CHECK-LABEL: scalar_i64_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x1
+; CHECK-NEXT: sub x9, x1, x0
+; CHECK-NEXT: subs x10, x0, x1
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: csel x9, x1, x0, hi
-; CHECK-NEXT: csel x10, x0, x1, hi
+; CHECK-NEXT: csel x9, x10, x9, hi
; CHECK-NEXT: cneg x8, x8, ls
-; CHECK-NEXT: sub x9, x10, x9
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
; CHECK-NEXT: ret
@@ -195,10 +191,10 @@ define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind {
; CHECK-NEXT: ldr x9, [x0]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: cmp x9, x1
-; CHECK-NEXT: csel x10, x1, x9, gt
-; CHECK-NEXT: csel x11, x9, x1, gt
+; CHECK-NEXT: sub x10, x1, x9
; CHECK-NEXT: cneg x8, x8, le
-; CHECK-NEXT: sub x10, x11, x10
+; CHECK-NEXT: subs x11, x9, x1
+; CHECK-NEXT: csel x10, x11, x10, gt
; CHECK-NEXT: lsr x10, x10, #1
; CHECK-NEXT: madd x0, x10, x8, x9
; CHECK-NEXT: ret
@@ -220,10 +216,10 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: cmp x0, x9
-; CHECK-NEXT: csel x10, x9, x0, gt
-; CHECK-NEXT: csel x9, x0, x9, gt
+; CHECK-NEXT: sub x10, x9, x0
; CHECK-NEXT: cneg x8, x8, le
-; CHECK-NEXT: sub x9, x9, x10
+; CHECK-NEXT: subs x9, x0, x9
+; CHECK-NEXT: csel x9, x9, x10, gt
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
; CHECK-NEXT: ret
@@ -246,10 +242,10 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr x10, [x1]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: cmp x9, x10
-; CHECK-NEXT: csel x11, x10, x9, gt
-; CHECK-NEXT: csel x10, x9, x10, gt
+; CHECK-NEXT: sub x11, x10, x9
; CHECK-NEXT: cneg x8, x8, le
-; CHECK-NEXT: sub x10, x10, x11
+; CHECK-NEXT: subs x10, x9, x10
+; CHECK-NEXT: csel x10, x10, x11, gt
; CHECK-NEXT: lsr x10, x10, #1
; CHECK-NEXT: madd x0, x10, x8, x9
; CHECK-NEXT: ret
@@ -275,14 +271,13 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w9, w0
+; CHECK-NEXT: sxth w9, w1
+; CHECK-NEXT: sxth w10, w0
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w9, w1, sxth
-; CHECK-NEXT: csel w9, w1, w0, gt
-; CHECK-NEXT: csel w10, w0, w1, gt
+; CHECK-NEXT: subs w9, w10, w9
+; CHECK-NEXT: cneg w9, w9, mi
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w9, w10, w9
-; CHECK-NEXT: ubfx w9, w9, #1, #15
+; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%t3 = icmp sgt i16 %a1, %a2 ; signed
@@ -299,14 +294,13 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w9, w0, #0xffff
+; CHECK-NEXT: and w9, w1, #0xffff
+; CHECK-NEXT: and w10, w0, #0xffff
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w9, w1, uxth
-; CHECK-NEXT: csel w9, w1, w0, hi
-; CHECK-NEXT: csel w10, w0, w1, hi
+; CHECK-NEXT: subs w9, w10, w9
+; CHECK-NEXT: cneg w9, w9, mi
; CHECK-NEXT: cneg w8, w8, ls
-; CHECK-NEXT: sub w9, w10, w9
-; CHECK-NEXT: ubfx w9, w9, #1, #15
+; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%t3 = icmp ugt i16 %a1, %a2
@@ -325,15 +319,14 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_signed_mem_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrsh w9, [x0]
+; CHECK-NEXT: sxth w9, w1
+; CHECK-NEXT: ldrsh w10, [x0]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w9, w1, sxth
-; CHECK-NEXT: csel w10, w1, w9, gt
-; CHECK-NEXT: csel w11, w9, w1, gt
+; CHECK-NEXT: subs w9, w10, w9
+; CHECK-NEXT: cneg w9, w9, mi
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w10, w11, w10
-; CHECK-NEXT: ubfx w10, w10, #1, #15
-; CHECK-NEXT: madd w0, w10, w8, w9
+; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: madd w0, w9, w8, w10
; CHECK-NEXT: ret
%a1 = load i16, ptr %a1_addr
%t3 = icmp sgt i16 %a1, %a2 ; signed
@@ -353,12 +346,10 @@ define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind {
; CHECK-NEXT: sxth w9, w0
; CHECK-NEXT: ldrsh w10, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: csel w9, w10, w0, gt
-; CHECK-NEXT: csel w10, w0, w10, gt
+; CHECK-NEXT: subs w9, w9, w10
+; CHECK-NEXT: cneg w9, w9, mi
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w9, w10, w9
-; CHECK-NEXT: ubfx w9, w9, #1, #15
+; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%a2 = load i16, ptr %a2_addr
@@ -379,12 +370,10 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldrsh w9, [x0]
; CHECK-NEXT: ldrsh w10, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: csel w11, w10, w9, gt
-; CHECK-NEXT: csel w10, w9, w10, gt
+; CHECK-NEXT: subs w10, w9, w10
+; CHECK-NEXT: cneg w10, w10, mi
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w10, w10, w11
-; CHECK-NEXT: ubfx w10, w10, #1, #15
+; CHE...
[truncated]
|
@llvm/pr-subscribers-backend-x86 Author: None (c8ef) ChangesCloses #108218. This patch adds icmp+select patterns for integer min/max matchers in SDPatternMatch, similar to those in IR PatternMatch. Patch is 105.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/111774.diff 9 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 04135ee7e1c022..b629dd50aced00 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -542,6 +542,80 @@ struct BinaryOpc_match {
}
};
+template <typename LHS_P, typename RHS_P, typename Pred_t,
+ bool Commutable = false, bool ExcludeChain = false>
+struct MaxMin_match {
+ using PredType = Pred_t;
+ LHS_P LHS;
+ RHS_P RHS;
+
+ MaxMin_match(const LHS_P &L, const RHS_P &R) : LHS(L), RHS(R) {}
+
+ template <typename MatchContext>
+ bool match(const MatchContext &Ctx, SDValue N) {
+ if (sd_context_match(N, Ctx, m_Opc(ISD::SELECT))) {
+ EffectiveOperands<ExcludeChain> EO_SELECT(N, Ctx);
+ assert(EO_SELECT.Size == 3);
+ SDValue Cond = N->getOperand(EO_SELECT.FirstIndex);
+ SDValue TrueValue = N->getOperand(EO_SELECT.FirstIndex + 1);
+ SDValue FalseValue = N->getOperand(EO_SELECT.FirstIndex + 2);
+
+ if (sd_context_match(Cond, Ctx, m_Opc(ISD::SETCC))) {
+ EffectiveOperands<ExcludeChain> EO_SETCC(Cond, Ctx);
+ assert(EO_SETCC.Size == 3);
+ SDValue L = Cond->getOperand(EO_SETCC.FirstIndex);
+ SDValue R = Cond->getOperand(EO_SETCC.FirstIndex + 1);
+ CondCodeSDNode *CondNode =
+ cast<CondCodeSDNode>(Cond->getOperand(EO_SETCC.FirstIndex + 2));
+
+ if ((TrueValue != L || FalseValue != R) &&
+ (TrueValue != R || FalseValue != L)) {
+ return false;
+ }
+
+ ISD::CondCode Cond =
+ TrueValue == L ? CondNode->get()
+ : getSetCCInverse(CondNode->get(), L.getValueType());
+ if (!Pred_t::match(Cond)) {
+ return false;
+ }
+ return (LHS.match(Ctx, L) && RHS.match(Ctx, R)) ||
+ (Commutable && LHS.match(Ctx, R) && RHS.match(Ctx, L));
+ }
+ }
+
+ return false;
+ }
+};
+
+// Helper class for identifying signed max predicates.
+struct smax_pred_ty {
+ static bool match(ISD::CondCode Cond) {
+ return Cond == ISD::CondCode::SETGT || Cond == ISD::CondCode::SETGE;
+ }
+};
+
+// Helper class for identifying unsigned max predicates.
+struct umax_pred_ty {
+ static bool match(ISD::CondCode Cond) {
+ return Cond == ISD::CondCode::SETUGT || Cond == ISD::CondCode::SETUGE;
+ }
+};
+
+// Helper class for identifying signed min predicates.
+struct smin_pred_ty {
+ static bool match(ISD::CondCode Cond) {
+ return Cond == ISD::CondCode::SETLT || Cond == ISD::CondCode::SETLE;
+ }
+};
+
+// Helper class for identifying unsigned min predicates.
+struct umin_pred_ty {
+ static bool match(ISD::CondCode Cond) {
+ return Cond == ISD::CondCode::SETULT || Cond == ISD::CondCode::SETULE;
+ }
+};
+
template <typename LHS, typename RHS>
inline BinaryOpc_match<LHS, RHS> m_BinOp(unsigned Opc, const LHS &L,
const RHS &R) {
@@ -609,23 +683,27 @@ inline BinaryOpc_match<LHS, RHS, true> m_Xor(const LHS &L, const RHS &R) {
}
template <typename LHS, typename RHS>
-inline BinaryOpc_match<LHS, RHS, true> m_SMin(const LHS &L, const RHS &R) {
- return BinaryOpc_match<LHS, RHS, true>(ISD::SMIN, L, R);
+inline auto m_SMin(const LHS &L, const RHS &R) {
+ return m_AnyOf(BinaryOpc_match<LHS, RHS, true>(ISD::SMIN, L, R),
+ MaxMin_match<LHS, RHS, smin_pred_ty, true>(L, R));
}
template <typename LHS, typename RHS>
-inline BinaryOpc_match<LHS, RHS, true> m_SMax(const LHS &L, const RHS &R) {
- return BinaryOpc_match<LHS, RHS, true>(ISD::SMAX, L, R);
+inline auto m_SMax(const LHS &L, const RHS &R) {
+ return m_AnyOf(BinaryOpc_match<LHS, RHS, true>(ISD::SMAX, L, R),
+ MaxMin_match<LHS, RHS, smax_pred_ty, true>(L, R));
}
template <typename LHS, typename RHS>
-inline BinaryOpc_match<LHS, RHS, true> m_UMin(const LHS &L, const RHS &R) {
- return BinaryOpc_match<LHS, RHS, true>(ISD::UMIN, L, R);
+inline auto m_UMin(const LHS &L, const RHS &R) {
+ return m_AnyOf(BinaryOpc_match<LHS, RHS, true>(ISD::UMIN, L, R),
+ MaxMin_match<LHS, RHS, umin_pred_ty, true>(L, R));
}
template <typename LHS, typename RHS>
-inline BinaryOpc_match<LHS, RHS, true> m_UMax(const LHS &L, const RHS &R) {
- return BinaryOpc_match<LHS, RHS, true>(ISD::UMAX, L, R);
+inline auto m_UMax(const LHS &L, const RHS &R) {
+ return m_AnyOf(BinaryOpc_match<LHS, RHS, true>(ISD::UMAX, L, R),
+ MaxMin_match<LHS, RHS, umax_pred_ty, true>(L, R));
}
template <typename LHS, typename RHS>
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index e5cc04f9be1a1f..62db30f17747cf 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -547,10 +547,9 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_select_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: cmp w8, w1, sxtb
-; CHECK-NEXT: csel w8, w0, w1, lt
-; CHECK-NEXT: csel w9, w1, w0, lt
-; CHECK-NEXT: sub w0, w9, w8
+; CHECK-NEXT: sub w8, w8, w1, sxtb
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp slt i8 %a, %b
%ab = select i1 %cmp, i8 %a, i8 %b
@@ -563,10 +562,9 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_select_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: cmp w8, w1, sxth
-; CHECK-NEXT: csel w8, w0, w1, le
-; CHECK-NEXT: csel w9, w1, w0, le
-; CHECK-NEXT: sub w0, w9, w8
+; CHECK-NEXT: sub w8, w8, w1, sxth
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp sle i16 %a, %b
%ab = select i1 %cmp, i16 %a, i16 %b
@@ -578,10 +576,9 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_select_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w1
-; CHECK-NEXT: csel w8, w0, w1, gt
-; CHECK-NEXT: csel w9, w1, w0, gt
-; CHECK-NEXT: sub w0, w8, w9
+; CHECK-NEXT: sub w8, w1, w0
+; CHECK-NEXT: subs w9, w0, w1
+; CHECK-NEXT: csel w0, w9, w8, gt
; CHECK-NEXT: ret
%cmp = icmp sgt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -593,10 +590,9 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_select_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x1
-; CHECK-NEXT: csel x8, x0, x1, ge
-; CHECK-NEXT: csel x9, x1, x0, ge
-; CHECK-NEXT: sub x0, x8, x9
+; CHECK-NEXT: sub x8, x1, x0
+; CHECK-NEXT: subs x9, x0, x1
+; CHECK-NEXT: csel x0, x9, x8, gt
; CHECK-NEXT: ret
%cmp = icmp sge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
@@ -608,14 +604,13 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_select_i128:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x2
-; CHECK-NEXT: sbcs xzr, x1, x3
-; CHECK-NEXT: csel x8, x0, x2, lt
-; CHECK-NEXT: csel x9, x2, x0, lt
-; CHECK-NEXT: csel x10, x1, x3, lt
-; CHECK-NEXT: csel x11, x3, x1, lt
-; CHECK-NEXT: subs x0, x9, x8
-; CHECK-NEXT: sbc x1, x11, x10
+; CHECK-NEXT: subs x8, x0, x2
+; CHECK-NEXT: sbc x9, x1, x3
+; CHECK-NEXT: subs x10, x2, x0
+; CHECK-NEXT: sbc x11, x3, x1
+; CHECK-NEXT: sbcs xzr, x3, x1
+; CHECK-NEXT: csel x0, x8, x10, lt
+; CHECK-NEXT: csel x1, x9, x11, lt
; CHECK-NEXT: ret
%cmp = icmp slt i128 %a, %b
%ab = select i1 %cmp, i128 %a, i128 %b
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index 0a44ae16884582..4585de96c848f2 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -408,10 +408,9 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_select_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: cmp w8, w1, uxtb
-; CHECK-NEXT: csel w8, w0, w1, lo
-; CHECK-NEXT: csel w9, w1, w0, lo
-; CHECK-NEXT: sub w0, w9, w8
+; CHECK-NEXT: sub w8, w8, w1, uxtb
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp ult i8 %a, %b
%ab = select i1 %cmp, i8 %a, i8 %b
@@ -424,10 +423,9 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_select_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: cmp w8, w1, uxth
-; CHECK-NEXT: csel w8, w0, w1, ls
-; CHECK-NEXT: csel w9, w1, w0, ls
-; CHECK-NEXT: sub w0, w9, w8
+; CHECK-NEXT: sub w8, w8, w1, uxth
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp ule i16 %a, %b
%ab = select i1 %cmp, i16 %a, i16 %b
@@ -439,10 +437,9 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_select_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w1
-; CHECK-NEXT: csel w8, w0, w1, hi
-; CHECK-NEXT: csel w9, w1, w0, hi
-; CHECK-NEXT: sub w0, w8, w9
+; CHECK-NEXT: sub w8, w1, w0
+; CHECK-NEXT: subs w9, w0, w1
+; CHECK-NEXT: csel w0, w9, w8, hi
; CHECK-NEXT: ret
%cmp = icmp ugt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -454,10 +451,9 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_select_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x1
-; CHECK-NEXT: csel x8, x0, x1, hs
-; CHECK-NEXT: csel x9, x1, x0, hs
-; CHECK-NEXT: sub x0, x8, x9
+; CHECK-NEXT: sub x8, x1, x0
+; CHECK-NEXT: subs x9, x0, x1
+; CHECK-NEXT: csel x0, x9, x8, hi
; CHECK-NEXT: ret
%cmp = icmp uge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
@@ -469,14 +465,14 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_select_i128:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x2
-; CHECK-NEXT: sbcs xzr, x1, x3
-; CHECK-NEXT: csel x8, x0, x2, lo
-; CHECK-NEXT: csel x9, x2, x0, lo
-; CHECK-NEXT: csel x10, x1, x3, lo
-; CHECK-NEXT: csel x11, x3, x1, lo
-; CHECK-NEXT: subs x0, x9, x8
-; CHECK-NEXT: sbc x1, x11, x10
+; CHECK-NEXT: subs x8, x0, x2
+; CHECK-NEXT: sbcs x9, x1, x3
+; CHECK-NEXT: cset w10, lo
+; CHECK-NEXT: sbfx x10, x10, #0, #1
+; CHECK-NEXT: eor x8, x8, x10
+; CHECK-NEXT: eor x9, x9, x10
+; CHECK-NEXT: subs x0, x8, x10
+; CHECK-NEXT: sbc x1, x9, x10
; CHECK-NEXT: ret
%cmp = icmp ult i128 %a, %b
%ab = select i1 %cmp, i128 %a, i128 %b
diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll
index 1043fa5c4565ee..bbdce7c6e933b3 100644
--- a/llvm/test/CodeGen/AArch64/midpoint-int.ll
+++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll
@@ -13,12 +13,11 @@
define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: scalar_i32_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: sub w9, w1, w0
+; CHECK-NEXT: subs w10, w0, w1
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: csel w9, w1, w0, gt
-; CHECK-NEXT: csel w10, w0, w1, gt
+; CHECK-NEXT: csel w9, w10, w9, gt
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w9, w10, w9
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
@@ -36,12 +35,11 @@ define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind {
define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: scalar_i32_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: sub w9, w1, w0
+; CHECK-NEXT: subs w10, w0, w1
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: csel w9, w1, w0, hi
-; CHECK-NEXT: csel w10, w0, w1, hi
+; CHECK-NEXT: csel w9, w10, w9, hi
; CHECK-NEXT: cneg w8, w8, ls
-; CHECK-NEXT: sub w9, w10, w9
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
@@ -64,10 +62,10 @@ define i32 @scalar_i32_signed_mem_reg(ptr %a1_addr, i32 %a2) nounwind {
; CHECK-NEXT: ldr w9, [x0]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cmp w9, w1
-; CHECK-NEXT: csel w10, w1, w9, gt
-; CHECK-NEXT: csel w11, w9, w1, gt
+; CHECK-NEXT: sub w10, w1, w9
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w10, w11, w10
+; CHECK-NEXT: subs w11, w9, w1
+; CHECK-NEXT: csel w10, w11, w10, gt
; CHECK-NEXT: lsr w10, w10, #1
; CHECK-NEXT: madd w0, w10, w8, w9
; CHECK-NEXT: ret
@@ -89,10 +87,10 @@ define i32 @scalar_i32_signed_reg_mem(i32 %a1, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr w9, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cmp w0, w9
-; CHECK-NEXT: csel w10, w9, w0, gt
-; CHECK-NEXT: csel w9, w0, w9, gt
+; CHECK-NEXT: sub w10, w9, w0
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w9, w9, w10
+; CHECK-NEXT: subs w9, w0, w9
+; CHECK-NEXT: csel w9, w9, w10, gt
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
@@ -115,10 +113,10 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr w10, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: csel w11, w10, w9, gt
-; CHECK-NEXT: csel w10, w9, w10, gt
+; CHECK-NEXT: sub w11, w10, w9
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w10, w10, w11
+; CHECK-NEXT: subs w10, w9, w10
+; CHECK-NEXT: csel w10, w10, w11, gt
; CHECK-NEXT: lsr w10, w10, #1
; CHECK-NEXT: madd w0, w10, w8, w9
; CHECK-NEXT: ret
@@ -144,12 +142,11 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind {
; CHECK-LABEL: scalar_i64_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x1
+; CHECK-NEXT: sub x9, x1, x0
+; CHECK-NEXT: subs x10, x0, x1
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: csel x9, x1, x0, gt
-; CHECK-NEXT: csel x10, x0, x1, gt
+; CHECK-NEXT: csel x9, x10, x9, gt
; CHECK-NEXT: cneg x8, x8, le
-; CHECK-NEXT: sub x9, x10, x9
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
; CHECK-NEXT: ret
@@ -167,12 +164,11 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind {
define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind {
; CHECK-LABEL: scalar_i64_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x1
+; CHECK-NEXT: sub x9, x1, x0
+; CHECK-NEXT: subs x10, x0, x1
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: csel x9, x1, x0, hi
-; CHECK-NEXT: csel x10, x0, x1, hi
+; CHECK-NEXT: csel x9, x10, x9, hi
; CHECK-NEXT: cneg x8, x8, ls
-; CHECK-NEXT: sub x9, x10, x9
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
; CHECK-NEXT: ret
@@ -195,10 +191,10 @@ define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind {
; CHECK-NEXT: ldr x9, [x0]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: cmp x9, x1
-; CHECK-NEXT: csel x10, x1, x9, gt
-; CHECK-NEXT: csel x11, x9, x1, gt
+; CHECK-NEXT: sub x10, x1, x9
; CHECK-NEXT: cneg x8, x8, le
-; CHECK-NEXT: sub x10, x11, x10
+; CHECK-NEXT: subs x11, x9, x1
+; CHECK-NEXT: csel x10, x11, x10, gt
; CHECK-NEXT: lsr x10, x10, #1
; CHECK-NEXT: madd x0, x10, x8, x9
; CHECK-NEXT: ret
@@ -220,10 +216,10 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: cmp x0, x9
-; CHECK-NEXT: csel x10, x9, x0, gt
-; CHECK-NEXT: csel x9, x0, x9, gt
+; CHECK-NEXT: sub x10, x9, x0
; CHECK-NEXT: cneg x8, x8, le
-; CHECK-NEXT: sub x9, x9, x10
+; CHECK-NEXT: subs x9, x0, x9
+; CHECK-NEXT: csel x9, x9, x10, gt
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
; CHECK-NEXT: ret
@@ -246,10 +242,10 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr x10, [x1]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: cmp x9, x10
-; CHECK-NEXT: csel x11, x10, x9, gt
-; CHECK-NEXT: csel x10, x9, x10, gt
+; CHECK-NEXT: sub x11, x10, x9
; CHECK-NEXT: cneg x8, x8, le
-; CHECK-NEXT: sub x10, x10, x11
+; CHECK-NEXT: subs x10, x9, x10
+; CHECK-NEXT: csel x10, x10, x11, gt
; CHECK-NEXT: lsr x10, x10, #1
; CHECK-NEXT: madd x0, x10, x8, x9
; CHECK-NEXT: ret
@@ -275,14 +271,13 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w9, w0
+; CHECK-NEXT: sxth w9, w1
+; CHECK-NEXT: sxth w10, w0
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w9, w1, sxth
-; CHECK-NEXT: csel w9, w1, w0, gt
-; CHECK-NEXT: csel w10, w0, w1, gt
+; CHECK-NEXT: subs w9, w10, w9
+; CHECK-NEXT: cneg w9, w9, mi
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w9, w10, w9
-; CHECK-NEXT: ubfx w9, w9, #1, #15
+; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%t3 = icmp sgt i16 %a1, %a2 ; signed
@@ -299,14 +294,13 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w9, w0, #0xffff
+; CHECK-NEXT: and w9, w1, #0xffff
+; CHECK-NEXT: and w10, w0, #0xffff
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w9, w1, uxth
-; CHECK-NEXT: csel w9, w1, w0, hi
-; CHECK-NEXT: csel w10, w0, w1, hi
+; CHECK-NEXT: subs w9, w10, w9
+; CHECK-NEXT: cneg w9, w9, mi
; CHECK-NEXT: cneg w8, w8, ls
-; CHECK-NEXT: sub w9, w10, w9
-; CHECK-NEXT: ubfx w9, w9, #1, #15
+; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%t3 = icmp ugt i16 %a1, %a2
@@ -325,15 +319,14 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_signed_mem_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrsh w9, [x0]
+; CHECK-NEXT: sxth w9, w1
+; CHECK-NEXT: ldrsh w10, [x0]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w9, w1, sxth
-; CHECK-NEXT: csel w10, w1, w9, gt
-; CHECK-NEXT: csel w11, w9, w1, gt
+; CHECK-NEXT: subs w9, w10, w9
+; CHECK-NEXT: cneg w9, w9, mi
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w10, w11, w10
-; CHECK-NEXT: ubfx w10, w10, #1, #15
-; CHECK-NEXT: madd w0, w10, w8, w9
+; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: madd w0, w9, w8, w10
; CHECK-NEXT: ret
%a1 = load i16, ptr %a1_addr
%t3 = icmp sgt i16 %a1, %a2 ; signed
@@ -353,12 +346,10 @@ define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind {
; CHECK-NEXT: sxth w9, w0
; CHECK-NEXT: ldrsh w10, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: csel w9, w10, w0, gt
-; CHECK-NEXT: csel w10, w0, w10, gt
+; CHECK-NEXT: subs w9, w9, w10
+; CHECK-NEXT: cneg w9, w9, mi
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w9, w10, w9
-; CHECK-NEXT: ubfx w9, w9, #1, #15
+; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%a2 = load i16, ptr %a2_addr
@@ -379,12 +370,10 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldrsh w9, [x0]
; CHECK-NEXT: ldrsh w10, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: csel w11, w10, w9, gt
-; CHECK-NEXT: csel w10, w9, w10, gt
+; CHECK-NEXT: subs w10, w9, w10
+; CHECK-NEXT: cneg w10, w10, mi
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w10, w10, w11
-; CHECK-NEXT: ubfx w10, w10, #1, #15
+; CHE...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You also need to add unit test coverage in SelectionDAGPatternMatchTest.cpp
@@ -609,23 +683,27 @@ inline BinaryOpc_match<LHS, RHS, true> m_Xor(const LHS &L, const RHS &R) { | |||
} | |||
|
|||
template <typename LHS, typename RHS> | |||
inline BinaryOpc_match<LHS, RHS, true> m_SMin(const LHS &L, const RHS &R) { | |||
return BinaryOpc_match<LHS, RHS, true>(ISD::SMIN, L, R); | |||
inline auto m_SMin(const LHS &L, const RHS &R) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Based off the discussion with @nikic on #108218 - we need to name these variants m_SMinLike (etc.) - these will match BOTH the ISD node and the MaxMin pattern. We then keep m_SMin (etc.) to just match the ISD node.
You will then need to update the ABD matchers in DAGCombiner.visitSUB to use the Like variants.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. Unit test added.
@@ -683,25 +683,45 @@ inline BinaryOpc_match<LHS, RHS, true> m_Xor(const LHS &L, const RHS &R) { | |||
} | |||
|
|||
template <typename LHS, typename RHS> | |||
inline auto m_SMin(const LHS &L, const RHS &R) { | |||
inline BinaryOpc_match<LHS, RHS, true> m_SMin(const LHS &L, const RHS &R) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
revert to using auto ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Other raw binary matchers use an explicit return type, so I believe we could maintain consistency with them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A few minors - @mshockwave any comments?
SDValue SMaxLike = DAG->getSelect(DL, MVT::i32, ICMP_GT, Op0, Op1); | ||
SDValue UMaxLike = DAG->getSelect(DL, MVT::i32, ICMP_UGT, Op0, Op1); | ||
SDValue SMinLike = DAG->getSelect(DL, MVT::i32, ICMP_LT, Op0, Op1); | ||
SDValue UMinLike = DAG->getSelect(DL, MVT::i32, ICMP_ULT, Op0, Op1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please can you add LE/GE test coverage?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
assert(EO_SETCC.Size == 3); | ||
SDValue L = Cond->getOperand(EO_SETCC.FirstIndex); | ||
SDValue R = Cond->getOperand(EO_SETCC.FirstIndex + 1); | ||
CondCodeSDNode *CondNode = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) auto *CondNode
=
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
|
||
template <typename MatchContext> | ||
bool match(const MatchContext &Ctx, SDValue N) { | ||
if (sd_context_match(N, Ctx, m_Opc(ISD::SELECT))) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about ISD::VSELECT?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
Friendly ping~ @RKSimon |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - @mshockwave any comments?
Thank you for your review! Merge it for now. Please feel free to ping me if you have any more comments. @mshockwave |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/6796 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/6896 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/6812 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/6769 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/95/builds/4991 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/11753 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/4719 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/9790 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/10159 Here is the relevant piece of the build log for the reference
|
…ximum patterns in addition to the existing ISD nodes." (llvm#112203) This patch adds icmp+select patterns for integer min/max matchers in SDPatternMatch, similar to those in IR PatternMatch. Reapply llvm#111774. Closes llvm#108218.
…terns in addition to the existing ISD nodes. (llvm#111774) Closes llvm#108218. This patch adds icmp+select patterns for integer min/max matchers in SDPatternMatch, similar to those in IR PatternMatch.
…imum patterns in addition to the existing ISD nodes." (llvm#112200) Reverts llvm#111774 This appears to be causing some tests to fail.
…ximum patterns in addition to the existing ISD nodes." (llvm#112203) This patch adds icmp+select patterns for integer min/max matchers in SDPatternMatch, similar to those in IR PatternMatch. Reapply llvm#111774. Closes llvm#108218.
…terns in addition to the existing ISD nodes. (llvm#111774) Closes llvm#108218. This patch adds icmp+select patterns for integer min/max matchers in SDPatternMatch, similar to those in IR PatternMatch.
…imum patterns in addition to the existing ISD nodes." (llvm#112200) Reverts llvm#111774 This appears to be causing some tests to fail.
…ximum patterns in addition to the existing ISD nodes." (llvm#112203) This patch adds icmp+select patterns for integer min/max matchers in SDPatternMatch, similar to those in IR PatternMatch. Reapply llvm#111774. Closes llvm#108218.
…terns in addition to the existing ISD nodes. (llvm#111774) Closes llvm#108218. This patch adds icmp+select patterns for integer min/max matchers in SDPatternMatch, similar to those in IR PatternMatch.
…imum patterns in addition to the existing ISD nodes." (llvm#112200) Reverts llvm#111774 This appears to be causing some tests to fail.
…ximum patterns in addition to the existing ISD nodes." (llvm#112203) This patch adds icmp+select patterns for integer min/max matchers in SDPatternMatch, similar to those in IR PatternMatch. Reapply llvm#111774. Closes llvm#108218.
Closes #108218.
This patch adds icmp+select patterns for integer min/max matchers in SDPatternMatch, similar to those in IR PatternMatch.