-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Enable store clustering by default #73796
Conversation
e2742f4
to
e5eaf0c
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
Reverse ping! |
After llvm#73789 enabled load clustering, do the same for store clustering.
e5eaf0c
to
2832ed3
Compare
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-llvm-globalisel Author: Alex Bradbury (asb) ChangesBuilds on #73789, enabling store clustering by default using the same heuristic. It's not clear if this is desirable, but posting in case anyone has views. As it's stacked on other in-review PRs, you'll want to just look at the most recent commit to see how it impacts the in-tree tests. Patch is 1.51 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73796.diff 249 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 2dcac1320417c2..0d05bd06eba2e4 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -351,6 +351,8 @@ class RISCVPassConfig : public TargetPassConfig {
DAG = createGenericSchedLive(C);
DAG->addMutation(createLoadClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
+ DAG->addMutation(createStoreClusterDAGMutation(
+ DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
}
return DAG;
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll
index 3df4aca40ec942..6c0e322a252c72 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll
@@ -69,12 +69,12 @@ define i32 @va1(ptr %fmt, ...) {
; RV64-NEXT: sd a2, 32(sp)
; RV64-NEXT: sd a3, 40(sp)
; RV64-NEXT: sd a4, 48(sp)
-; RV64-NEXT: sd a5, 56(sp)
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: addi a1, sp, 24
; RV64-NEXT: sd a1, 8(sp)
; RV64-NEXT: lw a0, 4(a0)
; RV64-NEXT: lwu a1, 8(sp)
+; RV64-NEXT: sd a5, 56(sp)
; RV64-NEXT: sd a6, 64(sp)
; RV64-NEXT: sd a7, 72(sp)
; RV64-NEXT: slli a0, a0, 32
@@ -129,12 +129,12 @@ define i32 @va1(ptr %fmt, ...) {
; RV64-WITHFP-NEXT: sd a2, 16(s0)
; RV64-WITHFP-NEXT: sd a3, 24(s0)
; RV64-WITHFP-NEXT: sd a4, 32(s0)
-; RV64-WITHFP-NEXT: sd a5, 40(s0)
; RV64-WITHFP-NEXT: addi a0, s0, -24
; RV64-WITHFP-NEXT: addi a1, s0, 8
; RV64-WITHFP-NEXT: sd a1, -24(s0)
; RV64-WITHFP-NEXT: lw a0, 4(a0)
; RV64-WITHFP-NEXT: lwu a1, -24(s0)
+; RV64-WITHFP-NEXT: sd a5, 40(s0)
; RV64-WITHFP-NEXT: sd a6, 48(s0)
; RV64-WITHFP-NEXT: sd a7, 56(s0)
; RV64-WITHFP-NEXT: slli a0, a0, 32
@@ -844,11 +844,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; ILP32-LABEL: va3:
; ILP32: # %bb.0:
; ILP32-NEXT: addi sp, sp, -32
-; ILP32-NEXT: sw a3, 12(sp)
-; ILP32-NEXT: sw a4, 16(sp)
; ILP32-NEXT: addi a0, sp, 12
; ILP32-NEXT: sw a0, 4(sp)
; ILP32-NEXT: lw a0, 4(sp)
+; ILP32-NEXT: sw a3, 12(sp)
+; ILP32-NEXT: sw a4, 16(sp)
; ILP32-NEXT: sw a5, 20(sp)
; ILP32-NEXT: sw a6, 24(sp)
; ILP32-NEXT: sw a7, 28(sp)
@@ -868,11 +868,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV32D-ILP32-LABEL: va3:
; RV32D-ILP32: # %bb.0:
; RV32D-ILP32-NEXT: addi sp, sp, -48
-; RV32D-ILP32-NEXT: sw a3, 28(sp)
-; RV32D-ILP32-NEXT: sw a4, 32(sp)
; RV32D-ILP32-NEXT: addi a0, sp, 28
; RV32D-ILP32-NEXT: sw a0, 20(sp)
; RV32D-ILP32-NEXT: lw a0, 20(sp)
+; RV32D-ILP32-NEXT: sw a3, 28(sp)
+; RV32D-ILP32-NEXT: sw a4, 32(sp)
; RV32D-ILP32-NEXT: sw a5, 36(sp)
; RV32D-ILP32-NEXT: sw a6, 40(sp)
; RV32D-ILP32-NEXT: sw a7, 44(sp)
@@ -894,11 +894,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV32D-ILP32F-LABEL: va3:
; RV32D-ILP32F: # %bb.0:
; RV32D-ILP32F-NEXT: addi sp, sp, -48
-; RV32D-ILP32F-NEXT: sw a3, 28(sp)
-; RV32D-ILP32F-NEXT: sw a4, 32(sp)
; RV32D-ILP32F-NEXT: addi a0, sp, 28
; RV32D-ILP32F-NEXT: sw a0, 20(sp)
; RV32D-ILP32F-NEXT: lw a0, 20(sp)
+; RV32D-ILP32F-NEXT: sw a3, 28(sp)
+; RV32D-ILP32F-NEXT: sw a4, 32(sp)
; RV32D-ILP32F-NEXT: sw a5, 36(sp)
; RV32D-ILP32F-NEXT: sw a6, 40(sp)
; RV32D-ILP32F-NEXT: sw a7, 44(sp)
@@ -920,11 +920,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV32D-ILP32D-LABEL: va3:
; RV32D-ILP32D: # %bb.0:
; RV32D-ILP32D-NEXT: addi sp, sp, -48
-; RV32D-ILP32D-NEXT: sw a3, 28(sp)
-; RV32D-ILP32D-NEXT: sw a4, 32(sp)
; RV32D-ILP32D-NEXT: addi a0, sp, 28
; RV32D-ILP32D-NEXT: sw a0, 20(sp)
; RV32D-ILP32D-NEXT: lw a0, 20(sp)
+; RV32D-ILP32D-NEXT: sw a3, 28(sp)
+; RV32D-ILP32D-NEXT: sw a4, 32(sp)
; RV32D-ILP32D-NEXT: sw a5, 36(sp)
; RV32D-ILP32D-NEXT: sw a6, 40(sp)
; RV32D-ILP32D-NEXT: sw a7, 44(sp)
@@ -946,12 +946,12 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV64-LABEL: va3:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -64
-; RV64-NEXT: sd a2, 16(sp)
-; RV64-NEXT: sd a3, 24(sp)
-; RV64-NEXT: sd a4, 32(sp)
; RV64-NEXT: addi a0, sp, 16
; RV64-NEXT: sd a0, 8(sp)
; RV64-NEXT: ld a0, 8(sp)
+; RV64-NEXT: sd a2, 16(sp)
+; RV64-NEXT: sd a3, 24(sp)
+; RV64-NEXT: sd a4, 32(sp)
; RV64-NEXT: sd a5, 40(sp)
; RV64-NEXT: sd a6, 48(sp)
; RV64-NEXT: sd a7, 56(sp)
@@ -970,11 +970,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV32-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill
; RV32-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill
; RV32-WITHFP-NEXT: addi s0, sp, 24
-; RV32-WITHFP-NEXT: sw a3, 4(s0)
-; RV32-WITHFP-NEXT: sw a4, 8(s0)
; RV32-WITHFP-NEXT: addi a0, s0, 4
; RV32-WITHFP-NEXT: sw a0, -12(s0)
; RV32-WITHFP-NEXT: lw a0, -12(s0)
+; RV32-WITHFP-NEXT: sw a3, 4(s0)
+; RV32-WITHFP-NEXT: sw a4, 8(s0)
; RV32-WITHFP-NEXT: sw a5, 12(s0)
; RV32-WITHFP-NEXT: sw a6, 16(s0)
; RV32-WITHFP-NEXT: sw a7, 20(s0)
@@ -999,12 +999,12 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV64-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64-WITHFP-NEXT: addi s0, sp, 32
-; RV64-WITHFP-NEXT: sd a2, 0(s0)
-; RV64-WITHFP-NEXT: sd a3, 8(s0)
-; RV64-WITHFP-NEXT: sd a4, 16(s0)
; RV64-WITHFP-NEXT: mv a0, s0
; RV64-WITHFP-NEXT: sd a0, -24(s0)
; RV64-WITHFP-NEXT: ld a0, -24(s0)
+; RV64-WITHFP-NEXT: sd a2, 0(s0)
+; RV64-WITHFP-NEXT: sd a3, 8(s0)
+; RV64-WITHFP-NEXT: sd a4, 16(s0)
; RV64-WITHFP-NEXT: sd a5, 24(s0)
; RV64-WITHFP-NEXT: sd a6, 32(s0)
; RV64-WITHFP-NEXT: sd a7, 40(s0)
@@ -1622,9 +1622,6 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; RV64-NEXT: lui a0, 24414
; RV64-NEXT: add a0, sp, a0
; RV64-NEXT: sd a4, 304(a0)
-; RV64-NEXT: lui a0, 24414
-; RV64-NEXT: add a0, sp, a0
-; RV64-NEXT: sd a5, 312(a0)
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: lui a1, 24414
; RV64-NEXT: addiw a1, a1, 280
@@ -1634,6 +1631,9 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; RV64-NEXT: lwu a1, 8(sp)
; RV64-NEXT: lui a2, 24414
; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: sd a5, 312(a2)
+; RV64-NEXT: lui a2, 24414
+; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: sd a6, 320(a2)
; RV64-NEXT: lui a2, 24414
; RV64-NEXT: add a2, sp, a2
diff --git a/llvm/test/CodeGen/RISCV/abds-neg.ll b/llvm/test/CodeGen/RISCV/abds-neg.ll
index b6064198935a61..e7fd87bd783876 100644
--- a/llvm/test/CodeGen/RISCV/abds-neg.ll
+++ b/llvm/test/CodeGen/RISCV/abds-neg.ll
@@ -705,8 +705,8 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a4, a4, a3
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: sw a1, 0(a0)
-; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a2, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a5, 12(a0)
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
@@ -824,8 +824,8 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a4, a4, a3
; RV32ZBB-NEXT: neg a1, a1
; RV32ZBB-NEXT: sw a1, 0(a0)
-; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a2, 4(a0)
+; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a5, 12(a0)
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBB-NEXT: addi sp, sp, 16
@@ -952,8 +952,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a4, a4, a3
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: sw a1, 0(a0)
-; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a2, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a5, 12(a0)
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
@@ -1071,8 +1071,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a4, a4, a3
; RV32ZBB-NEXT: neg a1, a1
; RV32ZBB-NEXT: sw a1, 0(a0)
-; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a2, 4(a0)
+; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a5, 12(a0)
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBB-NEXT: addi sp, sp, 16
@@ -1918,9 +1918,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a1, a1, t2
; RV32I-NEXT: sub a2, a2, a3
; RV32I-NEXT: .LBB22_11:
-; RV32I-NEXT: sw a6, 8(a0)
-; RV32I-NEXT: sw a1, 4(a0)
; RV32I-NEXT: sw a2, 0(a0)
+; RV32I-NEXT: sw a1, 4(a0)
+; RV32I-NEXT: sw a6, 8(a0)
; RV32I-NEXT: sw a5, 12(a0)
; RV32I-NEXT: ret
;
@@ -2005,9 +2005,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a1, a1, t2
; RV32ZBB-NEXT: sub a2, a2, a3
; RV32ZBB-NEXT: .LBB22_11:
-; RV32ZBB-NEXT: sw a6, 8(a0)
-; RV32ZBB-NEXT: sw a1, 4(a0)
; RV32ZBB-NEXT: sw a2, 0(a0)
+; RV32ZBB-NEXT: sw a1, 4(a0)
+; RV32ZBB-NEXT: sw a6, 8(a0)
; RV32ZBB-NEXT: sw a5, 12(a0)
; RV32ZBB-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/abds.ll b/llvm/test/CodeGen/RISCV/abds.ll
index 91b044902a5201..e639d4b30d4c94 100644
--- a/llvm/test/CodeGen/RISCV/abds.ll
+++ b/llvm/test/CodeGen/RISCV/abds.ll
@@ -599,9 +599,9 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a1, a4, t5
; RV32I-NEXT: sub a4, a6, t6
; RV32I-NEXT: .LBB11_13:
-; RV32I-NEXT: sw a4, 8(a0)
-; RV32I-NEXT: sw a1, 4(a0)
; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a1, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a2, 12(a0)
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
@@ -695,9 +695,9 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a1, a4, t5
; RV32ZBB-NEXT: sub a4, a6, t6
; RV32ZBB-NEXT: .LBB11_13:
-; RV32ZBB-NEXT: sw a4, 8(a0)
-; RV32ZBB-NEXT: sw a1, 4(a0)
; RV32ZBB-NEXT: sw a3, 0(a0)
+; RV32ZBB-NEXT: sw a1, 4(a0)
+; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a2, 12(a0)
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBB-NEXT: addi sp, sp, 16
@@ -799,9 +799,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a1, a4, t5
; RV32I-NEXT: sub a4, a6, t6
; RV32I-NEXT: .LBB12_13:
-; RV32I-NEXT: sw a4, 8(a0)
-; RV32I-NEXT: sw a1, 4(a0)
; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a1, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a2, 12(a0)
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
@@ -895,9 +895,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a1, a4, t5
; RV32ZBB-NEXT: sub a4, a6, t6
; RV32ZBB-NEXT: .LBB12_13:
-; RV32ZBB-NEXT: sw a4, 8(a0)
-; RV32ZBB-NEXT: sw a1, 4(a0)
; RV32ZBB-NEXT: sw a3, 0(a0)
+; RV32ZBB-NEXT: sw a1, 4(a0)
+; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a2, 12(a0)
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBB-NEXT: addi sp, sp, 16
@@ -1188,9 +1188,9 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a1, a4, t5
; RV32I-NEXT: sub a4, a6, t6
; RV32I-NEXT: .LBB17_13:
-; RV32I-NEXT: sw a4, 8(a0)
-; RV32I-NEXT: sw a1, 4(a0)
; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a1, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a2, 12(a0)
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
@@ -1284,9 +1284,9 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a1, a4, t5
; RV32ZBB-NEXT: sub a4, a6, t6
; RV32ZBB-NEXT: .LBB17_13:
-; RV32ZBB-NEXT: sw a4, 8(a0)
-; RV32ZBB-NEXT: sw a1, 4(a0)
; RV32ZBB-NEXT: sw a3, 0(a0)
+; RV32ZBB-NEXT: sw a1, 4(a0)
+; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a2, 12(a0)
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBB-NEXT: addi sp, sp, 16
@@ -1579,9 +1579,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a1, a4, t5
; RV32I-NEXT: sub a4, a6, t6
; RV32I-NEXT: .LBB22_13:
-; RV32I-NEXT: sw a4, 8(a0)
-; RV32I-NEXT: sw a1, 4(a0)
; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a1, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a2, 12(a0)
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
@@ -1675,9 +1675,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a1, a4, t5
; RV32ZBB-NEXT: sub a4, a6, t6
; RV32ZBB-NEXT: .LBB22_13:
-; RV32ZBB-NEXT: sw a4, 8(a0)
-; RV32ZBB-NEXT: sw a1, 4(a0)
; RV32ZBB-NEXT: sw a3, 0(a0)
+; RV32ZBB-NEXT: sw a1, 4(a0)
+; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a2, 12(a0)
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBB-NEXT: addi sp, sp, 16
@@ -2043,8 +2043,8 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind {
define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_subnsw_i128:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a3, 0(a2)
-; RV32I-NEXT: lw a4, 4(a2)
+; RV32I-NEXT: lw a4, 0(a2)
+; RV32I-NEXT: lw a3, 4(a2)
; RV32I-NEXT: lw a6, 8(a2)
; RV32I-NEXT: lw t0, 12(a2)
; RV32I-NEXT: lw a2, 8(a1)
@@ -2053,24 +2053,24 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: lw a7, 4(a1)
; RV32I-NEXT: sltu a1, a2, a6
; RV32I-NEXT: sub t1, t1, t0
-; RV32I-NEXT: sltu t0, a5, a3
+; RV32I-NEXT: sltu t0, a5, a4
; RV32I-NEXT: sub a1, t1, a1
; RV32I-NEXT: mv t1, t0
-; RV32I-NEXT: beq a7, a4, .LBB31_2
+; RV32I-NEXT: beq a7, a3, .LBB31_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu t1, a7, a4
+; RV32I-NEXT: sltu t1, a7, a3
; RV32I-NEXT: .LBB31_2:
; RV32I-NEXT: sub a2, a2, a6
; RV32I-NEXT: sltu a6, a2, t1
; RV32I-NEXT: sub a1, a1, a6
; RV32I-NEXT: sub a2, a2, t1
-; RV32I-NEXT: sub a4, a7, a4
-; RV32I-NEXT: sub a4, a4, t0
-; RV32I-NEXT: sub a3, a5, a3
+; RV32I-NEXT: sub a3, a7, a3
+; RV32I-NEXT: sub a3, a3, t0
+; RV32I-NEXT: sub a4, a5, a4
; RV32I-NEXT: bgez a1, .LBB31_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: snez a5, a4
-; RV32I-NEXT: snez a6, a3
+; RV32I-NEXT: snez a5, a3
+; RV32I-NEXT: snez a6, a4
; RV32I-NEXT: or a5, a6, a5
; RV32I-NEXT: neg a7, a2
; RV32I-NEXT: sltu t0, a7, a5
@@ -2079,12 +2079,12 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: sub a1, a1, t0
; RV32I-NEXT: sub a2, a7, a5
-; RV32I-NEXT: neg a3, a3
-; RV32I-NEXT: add a4, a4, a6
; RV32I-NEXT: neg a4, a4
+; RV32I-NEXT: add a3, a3, a6
+; RV32I-NEXT: neg a3, a3
; RV32I-NEXT: .LBB31_4:
-; RV32I-NEXT: sw a4, 4(a0)
-; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a4, 0(a0)
+; RV32I-NEXT: sw a3, 4(a0)
; RV32I-NEXT: sw a2, 8(a0)
; RV32I-NEXT: sw a1, 12(a0)
; RV32I-NEXT: ret
@@ -2106,8 +2106,8 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
;
; RV32ZBB-LABEL: abd_subnsw_i128:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a3, 0(a2)
-; RV32ZBB-NEXT: lw a4, 4(a2)
+; RV32ZBB-NEXT: lw a4, 0(a2)
+; RV32ZBB-NEXT: lw a3, 4(a2)
; RV32ZBB-NEXT: lw a6, 8(a2)
; RV32ZBB-NEXT: lw t0, 12(a2)
; RV32ZBB-NEXT: lw a2, 8(a1)
@@ -2116,24 +2116,24 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: lw a7, 4(a1)
; RV32ZBB-NEXT: sltu a1, a2, a6
; RV32ZBB-NEXT: sub t1, t1, t0
-; RV32ZBB-NEXT: sltu t0, a5, a3
+; RV32ZBB-NEXT: sltu t0, a5, a4
; RV32ZBB-NEXT: sub a1, t1, a1
; RV32ZBB-NEXT: mv t1, t0
-; RV32ZBB-NEXT: beq a7, a4, .LBB31_2
+; RV32ZBB-NEXT: beq a7, a3, .LBB31_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu t1, a7, a4
+; RV32ZBB-NEXT: sltu t1, a7, a3
; RV32ZBB-NEXT: .LBB31_2:
; RV32ZBB-NEXT: sub a2, a2, a6
; RV32ZBB-NEXT: sltu a6, a2, t1
; RV32ZBB-NEXT: sub a1, a1, a6
; RV32ZBB-NEXT: sub a2, a2, t1
-; RV32ZBB-NEXT: sub a4, a7, a4
-; RV32ZBB-NEXT: sub a4, a4, t0
-; RV32ZBB-NEXT: sub a3, a5, a3
+; RV32ZBB-NEXT: sub a3, a7, a3
+; RV32ZBB-NEXT: sub a3, a3, t0
+; RV32ZBB-NEXT: sub a4, a5, a4
; RV32ZBB-NEXT: bgez a1, .LBB31_4
; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: snez a5, a4
-; RV32ZBB-NEXT: snez a6, a3
+; RV32ZBB-NEXT: snez a5, a3
+; RV32ZBB-NEXT: snez a6, a4
; RV32ZBB-NEXT: or a5, a6, a5
; RV32ZBB-NEXT: neg a7, a2
; RV32ZBB-NEXT: sltu t0, a7, a5
@@ -2142,12 +2142,12 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: neg a1, a1
; RV32ZBB-NEXT: sub a1, a1, t0
; RV32ZBB-NEXT: sub a2, a7, a5
-; RV32ZBB-NEXT: neg a3, a3
-; RV32ZBB-NEXT: add a4, a4, a6
; RV32ZBB-NEXT: neg a4, a4
+; RV32ZBB-NEXT: add a3, a3, a6
+; RV32ZBB-NEXT: neg a3, a3
; RV32ZBB-NEXT: .LBB31_4:
-; RV32ZBB-NEXT: sw a4, 4(a0)
-; RV32ZBB-NEXT: sw a3, 0(a0)
+; RV32ZBB-NEXT: sw a4, 0(a0)
+; RV32ZBB-NEXT: sw a3, 4(a0)
; RV32ZBB-NEXT: sw a2, 8(a0)
; RV32ZBB-NEXT: sw a1, 12(a0)
; RV32ZBB-NEXT: ret
@@ -2174,8 +2174,8 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_subnsw_i128_undef:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a3, 0(a2)
-; RV32I-NEXT: lw a4, 4(a2)
+; RV32I-NEXT: lw a4, 0(a2)
+; RV32I-NEXT: lw a3, 4(a2)
; RV32I-NEXT: lw a6, 8(a2)
; RV32I-NEXT: lw t0, 12(a2)
; RV32I-NEXT: lw a2, 8(a1)
@@ -2184,24 +2184,24 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: lw a7, 4(a1)
; RV32I-NEXT: sltu a1, a2, a6
; RV32I-NEXT: sub t1, t1, t0
-; RV32I-NEXT: sltu t0, a5, a3
+; RV32I-NEXT: sltu t0, a5, a4
; RV32I-NEXT: sub a1, t1, a1
; RV32I-NEXT: mv t1, t0
-; RV32I-NEXT: beq a7, a4, .LBB32_2
+; RV32I-NEXT: beq a7, a3, .LBB32_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu t1, a7, a4
+; RV32I-NEXT: sltu t1, a7, a3
; RV32I-NEXT: .LBB32_2:
; RV32I-NEXT: sub a2, a2, a6
; RV32I-NEXT: sltu a6, a2, t1
; RV32I-NEXT: sub a1, a1, a6
; RV32I-NEXT: sub a2, a2, t1
-; RV32I-NEXT: sub a4, a7, a4
-; RV32I-NEXT: sub a4, a4, t0
-; RV32I-NEXT: sub a3, a5, a3
+; RV32I-NEXT: sub a3, a7, a3
+; RV32I-NEXT: sub a3, a3, t0
+; RV32I-NEXT: sub a4, a5, a4
; RV32I-NEXT: bgez a1, .LBB32_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: snez a5, a4
-; RV32I-NEXT: snez a6, a3
+; RV32I-NEXT: snez a5, a3
+; RV32I-NEXT: snez a6, a4
; RV32I-NEXT: or a5, a6, a5
; RV32I-NEXT: neg a7, a2
; RV32I-NEXT: sltu t0, a7, a5
@@ -2210,12 +2210,12 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: sub a1, a1, t0
; RV32I-NEXT: sub a2, a7, a5
-; RV32I-NEXT: neg a3, a3
-; RV32I-NEXT: add a4, a4, a6
; RV32I-NEXT: neg a4, a4
+; RV32I-NEXT: add a3, a3, a6
+; RV32I-NEXT: neg a3, a3
; RV32I-NEXT: .LBB32_4:
-; RV32I-NEXT: sw a4, 4(a0)
-; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a4, 0(a0)
+; RV32I-NEXT: sw a3, 4(a0)
; RV32I-NEXT: sw a2, 8(a0)
; RV32I-NEXT: sw a1, 12(a0)
; RV32I-NEXT: ret
@@ -2237,8 +2237,8 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
;
; RV32ZBB-LABEL: abd_subnsw_i128_undef:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a3, 0(a2)
-; RV32ZBB-NEXT: lw a4, 4(a2)
+; RV32ZBB-NEXT: lw a4, 0(a2)
+; RV32ZBB-NEXT: lw a3, 4(a2)
; RV32ZBB-NEXT: lw a6, 8(a2)
; RV32ZBB-NEXT: lw t0, 12(a2)
; RV32ZBB-NEXT: lw a2, 8(a1)
@@ -2247,24 +2247,24 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: lw a7, 4(a1)
; RV32ZBB-NEXT: sltu a1, a2, a6
; RV32ZBB-NEXT: sub t1, t1, t0
-; RV32ZBB-NEXT: sltu t0, a5, a3
+; RV32ZBB-NEXT: sltu t0, a5, a4
; RV32ZBB-NEXT: sub a1, t1, a1
; RV32ZBB-NEXT: mv t1, t0
-; RV32ZBB-NEXT: beq a7, a4, .LBB32_2
+; RV32ZBB-NEXT: beq a7, a3, .LBB32_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu t1, a7, a4
+; RV32ZBB-NEXT: sltu t1, a7, a3
; RV32ZBB-NEXT: .LBB32_2:
; RV32ZBB-NEXT: sub a2, a2, a6
; RV32ZBB-NEXT: sltu a6, a2, t1
; RV32ZBB-NEXT: sub a1, a1, a6
; RV32ZBB-NEXT: sub a2, a2, t1
-; RV32ZBB-NEXT: sub a4, a7, a4
-; RV32ZBB-...
[truncated]
|
Thanks for the ping - I've gone ahead and rebased this, and also gone back and dug out my old notes on this. I think this should be find to go ahead with on the same basis as the previous load clustering patch. And this then matches AArch64. I would like to better understand the comments in #111504 - I'll follow up there. Though if it's "just" a further tweak on top of this, the best path is probably to land this then review the other patch. (EDIT: more closely looking at the other patch, and after the discussion in the sync-up call I think it's definitely not a blocker on this PR.) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
I have skimmed (most of) the tests, and see nothing concerning.
I do see a couple possibilities for store merging, but that's an entirely different topic. For later consideration, the two interesting sub-cases I noticed are: both are storing zero, and we have zbkb.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/6740 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/6756 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/4648 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/11638 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/7023 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/9706 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/10067 Here is the relevant piece of the build log for the reference
|
Builds on llvm#73789, enabling store clustering by default using the same heuristic.
Builds on llvm#73789, enabling store clustering by default using the same heuristic.
Builds on #73789, enabling store clustering by default using the same heuristic.