-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DAG] visitORCommutative - fold build_pair(not(x),not(y)) -> not(build_pair(x,y)) style patterns #90050
Conversation
@llvm/pr-subscribers-llvm-selectiondag Author: Simon Pilgrim (RKSimon) Changes(Sorry, not an actual build_pair node just a similar pattern). For cases where we're concatenating 2 integers into a double width integer, see if both integer sources are NOT patterns. We could take this further and handle all logic ops with a constant operands, but I just wanted to handle the case reported on #89533 initially. Fixes #89533 Full diff: https://github.com/llvm/llvm-project/pull/90050.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aa746f1c7b7b3b..e575c2ed3ca5fd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7620,6 +7620,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
SDNode *N) {
EVT VT = N0.getValueType();
+ unsigned BW = VT.getScalarSizeInBits();
SDLoc DL(N);
auto peekThroughResize = [](SDValue V) {
@@ -7689,6 +7690,26 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
return N0;
+ // Attempt to match a legalized build_pair-esque pattern:
+ // or(shl(aext(X),BW/2),zext(Y))
+ SDValue Lo, Hi;
+ if (sd_match(N0,
+ m_OneUse(m_Shl(m_AnyExt(m_Value(Hi)), m_SpecificInt(BW / 2)))) &&
+ sd_match(N1, m_ZExt(m_Value(Lo))) &&
+ Lo.getScalarValueSizeInBits() == (BW / 2) &&
+ Lo.getValueType() == Hi.getValueType()) {
+ // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
+ SDValue NotLo, NotHi;
+ if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
+ sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
+ Lo = DAG.getZExtOrTrunc(NotLo, DL, VT);
+ Hi = DAG.getZExtOrTrunc(NotHi, DL, VT);
+ Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
+ DAG.getShiftAmountConstant(BW / 2, VT, DL));
+ return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll
index e0f438eb7cc8f7..ae66c5420638bc 100644
--- a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll
+++ b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll
@@ -3060,12 +3060,7 @@ define void @vec384_v3i32(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
; SCALAR: # %bb.0:
; SCALAR-NEXT: movl 8(%rdi), %eax
; SCALAR-NEXT: movq (%rdi), %rcx
-; SCALAR-NEXT: movq %rcx, %rdi
-; SCALAR-NEXT: shrq $32, %rdi
-; SCALAR-NEXT: notl %edi
-; SCALAR-NEXT: shlq $32, %rdi
-; SCALAR-NEXT: notl %ecx
-; SCALAR-NEXT: orq %rdi, %rcx
+; SCALAR-NEXT: notq %rcx
; SCALAR-NEXT: notl %eax
; SCALAR-NEXT: movl %eax, 8(%rsi)
; SCALAR-NEXT: movq %rcx, (%rsi)
@@ -3196,12 +3191,7 @@ define void @vec384_v3f32(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
; SCALAR: # %bb.0:
; SCALAR-NEXT: movl 8(%rdi), %eax
; SCALAR-NEXT: movq (%rdi), %rcx
-; SCALAR-NEXT: movq %rcx, %rdi
-; SCALAR-NEXT: shrq $32, %rdi
-; SCALAR-NEXT: notl %edi
-; SCALAR-NEXT: shlq $32, %rdi
-; SCALAR-NEXT: notl %ecx
-; SCALAR-NEXT: orq %rdi, %rcx
+; SCALAR-NEXT: notq %rcx
; SCALAR-NEXT: notl %eax
; SCALAR-NEXT: movl %eax, 8(%rsi)
; SCALAR-NEXT: movq %rcx, (%rsi)
@@ -4216,25 +4206,10 @@ define void @vec384_v6i32(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
; SCALAR: # %bb.0:
; SCALAR-NEXT: movq (%rdi), %rax
; SCALAR-NEXT: movq 8(%rdi), %rcx
-; SCALAR-NEXT: movq %rax, %r8
-; SCALAR-NEXT: shrq $32, %r8
-; SCALAR-NEXT: movq %rcx, %r9
-; SCALAR-NEXT: shrq $32, %r9
; SCALAR-NEXT: movq 16(%rdi), %rdi
-; SCALAR-NEXT: movq %rdi, %r10
-; SCALAR-NEXT: shrq $32, %r10
-; SCALAR-NEXT: notl %r10d
-; SCALAR-NEXT: shlq $32, %r10
-; SCALAR-NEXT: notl %edi
-; SCALAR-NEXT: orq %r10, %rdi
-; SCALAR-NEXT: notl %r9d
-; SCALAR-NEXT: shlq $32, %r9
-; SCALAR-NEXT: notl %ecx
-; SCALAR-NEXT: orq %r9, %rcx
-; SCALAR-NEXT: notl %r8d
-; SCALAR-NEXT: shlq $32, %r8
-; SCALAR-NEXT: notl %eax
-; SCALAR-NEXT: orq %r8, %rax
+; SCALAR-NEXT: notq %rdi
+; SCALAR-NEXT: notq %rcx
+; SCALAR-NEXT: notq %rax
; SCALAR-NEXT: movq %rax, (%rsi)
; SCALAR-NEXT: movq %rcx, 8(%rsi)
; SCALAR-NEXT: movq %rdi, 16(%rsi)
@@ -4303,25 +4278,10 @@ define void @vec384_v6f32(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
; SCALAR: # %bb.0:
; SCALAR-NEXT: movq (%rdi), %rax
; SCALAR-NEXT: movq 8(%rdi), %rcx
-; SCALAR-NEXT: movq %rax, %r8
-; SCALAR-NEXT: shrq $32, %r8
-; SCALAR-NEXT: movq %rcx, %r9
-; SCALAR-NEXT: shrq $32, %r9
; SCALAR-NEXT: movq 16(%rdi), %rdi
-; SCALAR-NEXT: movq %rdi, %r10
-; SCALAR-NEXT: shrq $32, %r10
-; SCALAR-NEXT: notl %r10d
-; SCALAR-NEXT: shlq $32, %r10
-; SCALAR-NEXT: notl %edi
-; SCALAR-NEXT: orq %r10, %rdi
-; SCALAR-NEXT: notl %r9d
-; SCALAR-NEXT: shlq $32, %r9
-; SCALAR-NEXT: notl %ecx
-; SCALAR-NEXT: orq %r9, %rcx
-; SCALAR-NEXT: notl %r8d
-; SCALAR-NEXT: shlq $32, %r8
-; SCALAR-NEXT: notl %eax
-; SCALAR-NEXT: orq %r8, %rax
+; SCALAR-NEXT: notq %rdi
+; SCALAR-NEXT: notq %rcx
+; SCALAR-NEXT: notq %rax
; SCALAR-NEXT: movq %rax, (%rsi)
; SCALAR-NEXT: movq %rcx, 8(%rsi)
; SCALAR-NEXT: movq %rdi, 16(%rsi)
|
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) Changes(Sorry, not an actual build_pair node just a similar pattern). For cases where we're concatenating 2 integers into a double width integer, see if both integer sources are NOT patterns. We could take this further and handle all logic ops with a constant operands, but I just wanted to handle the case reported on #89533 initially. Fixes #89533 Full diff: https://github.com/llvm/llvm-project/pull/90050.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aa746f1c7b7b3b..e575c2ed3ca5fd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7620,6 +7620,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
SDNode *N) {
EVT VT = N0.getValueType();
+ unsigned BW = VT.getScalarSizeInBits();
SDLoc DL(N);
auto peekThroughResize = [](SDValue V) {
@@ -7689,6 +7690,26 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
return N0;
+ // Attempt to match a legalized build_pair-esque pattern:
+ // or(shl(aext(X),BW/2),zext(Y))
+ SDValue Lo, Hi;
+ if (sd_match(N0,
+ m_OneUse(m_Shl(m_AnyExt(m_Value(Hi)), m_SpecificInt(BW / 2)))) &&
+ sd_match(N1, m_ZExt(m_Value(Lo))) &&
+ Lo.getScalarValueSizeInBits() == (BW / 2) &&
+ Lo.getValueType() == Hi.getValueType()) {
+ // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
+ SDValue NotLo, NotHi;
+ if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
+ sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
+ Lo = DAG.getZExtOrTrunc(NotLo, DL, VT);
+ Hi = DAG.getZExtOrTrunc(NotHi, DL, VT);
+ Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
+ DAG.getShiftAmountConstant(BW / 2, VT, DL));
+ return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll
index e0f438eb7cc8f7..ae66c5420638bc 100644
--- a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll
+++ b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll
@@ -3060,12 +3060,7 @@ define void @vec384_v3i32(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
; SCALAR: # %bb.0:
; SCALAR-NEXT: movl 8(%rdi), %eax
; SCALAR-NEXT: movq (%rdi), %rcx
-; SCALAR-NEXT: movq %rcx, %rdi
-; SCALAR-NEXT: shrq $32, %rdi
-; SCALAR-NEXT: notl %edi
-; SCALAR-NEXT: shlq $32, %rdi
-; SCALAR-NEXT: notl %ecx
-; SCALAR-NEXT: orq %rdi, %rcx
+; SCALAR-NEXT: notq %rcx
; SCALAR-NEXT: notl %eax
; SCALAR-NEXT: movl %eax, 8(%rsi)
; SCALAR-NEXT: movq %rcx, (%rsi)
@@ -3196,12 +3191,7 @@ define void @vec384_v3f32(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
; SCALAR: # %bb.0:
; SCALAR-NEXT: movl 8(%rdi), %eax
; SCALAR-NEXT: movq (%rdi), %rcx
-; SCALAR-NEXT: movq %rcx, %rdi
-; SCALAR-NEXT: shrq $32, %rdi
-; SCALAR-NEXT: notl %edi
-; SCALAR-NEXT: shlq $32, %rdi
-; SCALAR-NEXT: notl %ecx
-; SCALAR-NEXT: orq %rdi, %rcx
+; SCALAR-NEXT: notq %rcx
; SCALAR-NEXT: notl %eax
; SCALAR-NEXT: movl %eax, 8(%rsi)
; SCALAR-NEXT: movq %rcx, (%rsi)
@@ -4216,25 +4206,10 @@ define void @vec384_v6i32(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
; SCALAR: # %bb.0:
; SCALAR-NEXT: movq (%rdi), %rax
; SCALAR-NEXT: movq 8(%rdi), %rcx
-; SCALAR-NEXT: movq %rax, %r8
-; SCALAR-NEXT: shrq $32, %r8
-; SCALAR-NEXT: movq %rcx, %r9
-; SCALAR-NEXT: shrq $32, %r9
; SCALAR-NEXT: movq 16(%rdi), %rdi
-; SCALAR-NEXT: movq %rdi, %r10
-; SCALAR-NEXT: shrq $32, %r10
-; SCALAR-NEXT: notl %r10d
-; SCALAR-NEXT: shlq $32, %r10
-; SCALAR-NEXT: notl %edi
-; SCALAR-NEXT: orq %r10, %rdi
-; SCALAR-NEXT: notl %r9d
-; SCALAR-NEXT: shlq $32, %r9
-; SCALAR-NEXT: notl %ecx
-; SCALAR-NEXT: orq %r9, %rcx
-; SCALAR-NEXT: notl %r8d
-; SCALAR-NEXT: shlq $32, %r8
-; SCALAR-NEXT: notl %eax
-; SCALAR-NEXT: orq %r8, %rax
+; SCALAR-NEXT: notq %rdi
+; SCALAR-NEXT: notq %rcx
+; SCALAR-NEXT: notq %rax
; SCALAR-NEXT: movq %rax, (%rsi)
; SCALAR-NEXT: movq %rcx, 8(%rsi)
; SCALAR-NEXT: movq %rdi, 16(%rsi)
@@ -4303,25 +4278,10 @@ define void @vec384_v6f32(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
; SCALAR: # %bb.0:
; SCALAR-NEXT: movq (%rdi), %rax
; SCALAR-NEXT: movq 8(%rdi), %rcx
-; SCALAR-NEXT: movq %rax, %r8
-; SCALAR-NEXT: shrq $32, %r8
-; SCALAR-NEXT: movq %rcx, %r9
-; SCALAR-NEXT: shrq $32, %r9
; SCALAR-NEXT: movq 16(%rdi), %rdi
-; SCALAR-NEXT: movq %rdi, %r10
-; SCALAR-NEXT: shrq $32, %r10
-; SCALAR-NEXT: notl %r10d
-; SCALAR-NEXT: shlq $32, %r10
-; SCALAR-NEXT: notl %edi
-; SCALAR-NEXT: orq %r10, %rdi
-; SCALAR-NEXT: notl %r9d
-; SCALAR-NEXT: shlq $32, %r9
-; SCALAR-NEXT: notl %ecx
-; SCALAR-NEXT: orq %r9, %rcx
-; SCALAR-NEXT: notl %r8d
-; SCALAR-NEXT: shlq $32, %r8
-; SCALAR-NEXT: notl %eax
-; SCALAR-NEXT: orq %r8, %rax
+; SCALAR-NEXT: notq %rdi
+; SCALAR-NEXT: notq %rcx
+; SCALAR-NEXT: notq %rax
; SCALAR-NEXT: movq %rax, (%rsi)
; SCALAR-NEXT: movq %rcx, 8(%rsi)
; SCALAR-NEXT: movq %rdi, 16(%rsi)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
05817ce
to
15d284e
Compare
…d_pair(x,y)) style patterns (Sorry, not an actual build_pair node just a similar pattern). For cases where we're concatenating 2 integers into a double width integer, see if both integer sources are NOT patterns. We could take this further and handle all logic ops with a constant operands, but I just wanted to handle the case reported on llvm#89533 initially. Fixes llvm#89533
(Sorry, not an actual build_pair node just a similar pattern).
For cases where we're concatenating 2 integers into a double width integer, see if both integer sources are NOT patterns.
We could take this further and handle all logic ops with a constant operands, but I just wanted to handle the case reported on #89533 initially.
Fixes #89533