From 639c1f22d4c64462c6a459d4a569cda13ae0fde7 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Wed, 25 Sep 2024 10:39:00 +0100 Subject: [PATCH] [RISCV] Enable load clustering by default This just flips the default for the option. A later patch may remove it altogether. --- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 2 +- llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll | 44 +- llvm/test/CodeGen/RISCV/abds-neg.ll | 440 +- llvm/test/CodeGen/RISCV/abds.ll | 190 +- llvm/test/CodeGen/RISCV/abdu-neg.ll | 472 +- llvm/test/CodeGen/RISCV/abdu.ll | 884 ++-- llvm/test/CodeGen/RISCV/add-before-shl.ll | 38 +- llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll | 104 +- llvm/test/CodeGen/RISCV/atomic-rmw.ll | 1040 ++-- llvm/test/CodeGen/RISCV/atomic-signext.ll | 208 +- .../CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll | 112 +- .../CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll | 112 +- .../test/CodeGen/RISCV/callee-saved-fpr32s.ll | 256 +- .../test/CodeGen/RISCV/callee-saved-fpr64s.ll | 200 +- llvm/test/CodeGen/RISCV/callee-saved-gprs.ll | 480 +- ...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 92 +- .../test/CodeGen/RISCV/calling-conv-ilp32e.ll | 200 +- .../calling-conv-lp64-lp64f-lp64d-common.ll | 46 +- llvm/test/CodeGen/RISCV/forced-atomics.ll | 144 +- llvm/test/CodeGen/RISCV/fpclamptosat.ll | 304 +- llvm/test/CodeGen/RISCV/legalize-fneg.ll | 10 +- llvm/test/CodeGen/RISCV/llvm.exp10.ll | 34 +- llvm/test/CodeGen/RISCV/llvm.frexp.ll | 112 +- llvm/test/CodeGen/RISCV/memcpy.ll | 62 +- .../CodeGen/RISCV/misched-load-clustering.ll | 10 +- llvm/test/CodeGen/RISCV/mul.ll | 134 +- llvm/test/CodeGen/RISCV/nontemporal.ll | 1200 ++--- .../test/CodeGen/RISCV/overflow-intrinsics.ll | 2 +- llvm/test/CodeGen/RISCV/push-pop-popret.ll | 816 +-- .../test/CodeGen/RISCV/reduction-formation.ll | 72 +- llvm/test/CodeGen/RISCV/rv32zbb.ll | 144 +- llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll | 64 +- .../rvv/fixed-vector-i8-index-cornercase.ll | 12 +- .../CodeGen/RISCV/rvv/fixed-vectors-elen.ll | 50 +- .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 942 ++-- ...fixed-vectors-interleaved-access-zve32x.ll | 36 +- .../CodeGen/RISCV/rvv/fixed-vectors-lrint.ll | 40 +- .../RISCV/rvv/fixed-vectors-masked-gather.ll | 1692 +++---- .../RISCV/rvv/fixed-vectors-masked-scatter.ll | 1598 +++--- .../fixed-vectors-strided-load-store-asm.ll | 16 +- .../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 272 +- llvm/test/CodeGen/RISCV/scmp.ll | 2 +- llvm/test/CodeGen/RISCV/shifts.ll | 110 +- .../CodeGen/RISCV/srem-seteq-illegal-types.ll | 136 +- llvm/test/CodeGen/RISCV/srem-vector-lkk.ll | 638 +-- llvm/test/CodeGen/RISCV/stack-store-check.ll | 32 +- llvm/test/CodeGen/RISCV/ucmp.ll | 2 +- .../RISCV/umulo-128-legalisation-lowering.ll | 96 +- .../CodeGen/RISCV/unaligned-load-store.ll | 72 +- .../CodeGen/RISCV/urem-seteq-illegal-types.ll | 6 +- llvm/test/CodeGen/RISCV/urem-vector-lkk.ll | 442 +- llvm/test/CodeGen/RISCV/vararg.ll | 50 +- ...lar-shift-by-byte-multiple-legalization.ll | 4424 ++++++++--------- .../RISCV/wide-scalar-shift-legalization.ll | 2118 ++++---- llvm/test/CodeGen/RISCV/xtheadmempair.ll | 14 +- 56 files changed, 10419 insertions(+), 10413 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 6a72857b93b6c7..b9d35a924669f1 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -96,7 +96,7 @@ static cl::opt static cl::opt EnableMISchedLoadClustering( "riscv-misched-load-clustering", cl::Hidden, cl::desc("Enable load clustering in the machine scheduler"), - cl::init(false)); + cl::init(true)); static cl::opt EnableVSETVLIAfterRVVRegAlloc( "riscv-vsetvl-after-rvv-regalloc", cl::Hidden, diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll index a49d4de6e9cf0d..01cab0d0e157bd 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll @@ -69,15 +69,15 @@ define i32 @va1(ptr %fmt, ...) { ; RV64-NEXT: sd a2, 32(sp) ; RV64-NEXT: sd a3, 40(sp) ; RV64-NEXT: sd a4, 48(sp) -; RV64-NEXT: sd a5, 56(sp) ; RV64-NEXT: addi a0, sp, 24 ; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lw a0, 12(sp) -; RV64-NEXT: lwu a1, 8(sp) +; RV64-NEXT: lwu a0, 8(sp) +; RV64-NEXT: lw a1, 12(sp) +; RV64-NEXT: sd a5, 56(sp) ; RV64-NEXT: sd a6, 64(sp) ; RV64-NEXT: sd a7, 72(sp) -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: or a0, a1, a0 ; RV64-NEXT: addi a1, a0, 4 ; RV64-NEXT: srli a2, a1, 32 ; RV64-NEXT: sw a1, 8(sp) @@ -128,15 +128,15 @@ define i32 @va1(ptr %fmt, ...) { ; RV64-WITHFP-NEXT: sd a2, 16(s0) ; RV64-WITHFP-NEXT: sd a3, 24(s0) ; RV64-WITHFP-NEXT: sd a4, 32(s0) -; RV64-WITHFP-NEXT: sd a5, 40(s0) ; RV64-WITHFP-NEXT: addi a0, s0, 8 ; RV64-WITHFP-NEXT: sd a0, -24(s0) -; RV64-WITHFP-NEXT: lw a0, -20(s0) -; RV64-WITHFP-NEXT: lwu a1, -24(s0) +; RV64-WITHFP-NEXT: lwu a0, -24(s0) +; RV64-WITHFP-NEXT: lw a1, -20(s0) +; RV64-WITHFP-NEXT: sd a5, 40(s0) ; RV64-WITHFP-NEXT: sd a6, 48(s0) ; RV64-WITHFP-NEXT: sd a7, 56(s0) -; RV64-WITHFP-NEXT: slli a0, a0, 32 -; RV64-WITHFP-NEXT: or a0, a0, a1 +; RV64-WITHFP-NEXT: slli a1, a1, 32 +; RV64-WITHFP-NEXT: or a0, a1, a0 ; RV64-WITHFP-NEXT: addi a1, a0, 4 ; RV64-WITHFP-NEXT: srli a2, a1, 32 ; RV64-WITHFP-NEXT: sw a1, -24(s0) @@ -1609,22 +1609,22 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: sd a4, 304(a0) ; RV64-NEXT: lui a0, 24414 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: sd a5, 312(a0) -; RV64-NEXT: lui a0, 24414 ; RV64-NEXT: addiw a0, a0, 280 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lw a0, 12(sp) -; RV64-NEXT: lwu a1, 8(sp) +; RV64-NEXT: lwu a0, 8(sp) +; RV64-NEXT: lw a1, 12(sp) +; RV64-NEXT: lui a2, 24414 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: sd a5, 312(a2) ; RV64-NEXT: lui a2, 24414 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: sd a6, 320(a2) ; RV64-NEXT: lui a2, 24414 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: sd a7, 328(a2) -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: or a0, a1, a0 ; RV64-NEXT: addi a1, a0, 4 ; RV64-NEXT: srli a2, a1, 32 ; RV64-NEXT: sw a1, 8(sp) @@ -1692,15 +1692,15 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV64-WITHFP-NEXT: sd a2, 16(s0) ; RV64-WITHFP-NEXT: sd a3, 24(s0) ; RV64-WITHFP-NEXT: sd a4, 32(s0) -; RV64-WITHFP-NEXT: sd a5, 40(s0) ; RV64-WITHFP-NEXT: addi a1, s0, 8 ; RV64-WITHFP-NEXT: sd a1, 0(a0) -; RV64-WITHFP-NEXT: lw a1, 4(a0) -; RV64-WITHFP-NEXT: lwu a2, 0(a0) +; RV64-WITHFP-NEXT: lwu a1, 0(a0) +; RV64-WITHFP-NEXT: lw a2, 4(a0) +; RV64-WITHFP-NEXT: sd a5, 40(s0) ; RV64-WITHFP-NEXT: sd a6, 48(s0) ; RV64-WITHFP-NEXT: sd a7, 56(s0) -; RV64-WITHFP-NEXT: slli a1, a1, 32 -; RV64-WITHFP-NEXT: or a1, a1, a2 +; RV64-WITHFP-NEXT: slli a2, a2, 32 +; RV64-WITHFP-NEXT: or a1, a2, a1 ; RV64-WITHFP-NEXT: addi a2, a1, 4 ; RV64-WITHFP-NEXT: srli a3, a2, 32 ; RV64-WITHFP-NEXT: sw a2, 0(a0) diff --git a/llvm/test/CodeGen/RISCV/abds-neg.ll b/llvm/test/CodeGen/RISCV/abds-neg.ll index 168615983d9709..b6064198935a61 100644 --- a/llvm/test/CodeGen/RISCV/abds-neg.ll +++ b/llvm/test/CodeGen/RISCV/abds-neg.ll @@ -622,23 +622,23 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a4, 0(a2) -; RV32I-NEXT: lw a5, 4(a1) +; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) -; RV32I-NEXT: lw a7, 8(a2) ; RV32I-NEXT: lw t1, 12(a1) +; RV32I-NEXT: lw a7, 8(a2) ; RV32I-NEXT: lw t2, 12(a2) -; RV32I-NEXT: lw a1, 4(a2) +; RV32I-NEXT: lw a1, 0(a2) +; RV32I-NEXT: lw a2, 4(a2) ; RV32I-NEXT: sltu t3, a7, a6 ; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: beq t1, t2, .LBB11_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: slt t4, t2, t1 ; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: sltu a2, a4, a3 -; RV32I-NEXT: sltu t6, a1, a5 -; RV32I-NEXT: mv t0, a2 -; RV32I-NEXT: beq a5, a1, .LBB11_4 +; RV32I-NEXT: sltu a5, a1, a3 +; RV32I-NEXT: sltu t6, a2, a4 +; RV32I-NEXT: mv t0, a5 +; RV32I-NEXT: beq a4, a2, .LBB11_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: mv t0, t6 ; RV32I-NEXT: .LBB11_4: @@ -651,16 +651,16 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: mv t0, t4 ; RV32I-NEXT: .LBB11_6: -; RV32I-NEXT: mv t5, a2 -; RV32I-NEXT: beq a1, a5, .LBB11_8 +; RV32I-NEXT: mv t5, a5 +; RV32I-NEXT: beq a2, a4, .LBB11_8 ; RV32I-NEXT: # %bb.7: ; RV32I-NEXT: mv t5, t6 ; RV32I-NEXT: .LBB11_8: -; RV32I-NEXT: sltu t4, a3, a4 +; RV32I-NEXT: sltu t4, a3, a1 ; RV32I-NEXT: mv t6, t4 -; RV32I-NEXT: beq a5, a1, .LBB11_10 +; RV32I-NEXT: beq a4, a2, .LBB11_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu t6, a5, a1 +; RV32I-NEXT: sltu t6, a4, a2 ; RV32I-NEXT: .LBB11_10: ; RV32I-NEXT: bnez t0, .LBB11_12 ; RV32I-NEXT: # %bb.11: @@ -684,29 +684,29 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: add a7, a7, t1 ; RV32I-NEXT: bnez t0, .LBB11_15 ; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: sub a1, a1, a5 -; RV32I-NEXT: sub a1, a1, a2 -; RV32I-NEXT: sub a3, a4, a3 +; RV32I-NEXT: sub a2, a2, a4 +; RV32I-NEXT: sub a2, a2, a5 +; RV32I-NEXT: sub a1, a1, a3 ; RV32I-NEXT: j .LBB11_16 ; RV32I-NEXT: .LBB11_15: -; RV32I-NEXT: sub a5, a5, a1 -; RV32I-NEXT: sub a1, a5, t4 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sub a4, a4, a2 +; RV32I-NEXT: sub a2, a4, t4 +; RV32I-NEXT: sub a1, a3, a1 ; RV32I-NEXT: .LBB11_16: -; RV32I-NEXT: or a2, a3, a1 -; RV32I-NEXT: snez a2, a2 +; RV32I-NEXT: or a3, a1, a2 +; RV32I-NEXT: snez a3, a3 ; RV32I-NEXT: neg a4, a6 -; RV32I-NEXT: sltu a5, a4, a2 +; RV32I-NEXT: sltu a5, a4, a3 ; RV32I-NEXT: neg a6, a7 ; RV32I-NEXT: sub a5, a6, a5 -; RV32I-NEXT: snez a6, a3 -; RV32I-NEXT: add a1, a1, a6 +; RV32I-NEXT: snez a6, a1 +; RV32I-NEXT: add a2, a2, a6 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a4, a4, a3 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a4, a4, a2 -; RV32I-NEXT: neg a2, a3 -; RV32I-NEXT: sw a2, 0(a0) +; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a4, 8(a0) -; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a2, 4(a0) ; RV32I-NEXT: sw a5, 12(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 @@ -741,23 +741,23 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_ext_i128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw a4, 0(a2) -; RV32ZBB-NEXT: lw a5, 4(a1) +; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a6, 8(a1) -; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t1, 12(a1) +; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t2, 12(a2) -; RV32ZBB-NEXT: lw a1, 4(a2) +; RV32ZBB-NEXT: lw a1, 0(a2) +; RV32ZBB-NEXT: lw a2, 4(a2) ; RV32ZBB-NEXT: sltu t3, a7, a6 ; RV32ZBB-NEXT: mv t4, t3 ; RV32ZBB-NEXT: beq t1, t2, .LBB11_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: slt t4, t2, t1 ; RV32ZBB-NEXT: .LBB11_2: -; RV32ZBB-NEXT: sltu a2, a4, a3 -; RV32ZBB-NEXT: sltu t6, a1, a5 -; RV32ZBB-NEXT: mv t0, a2 -; RV32ZBB-NEXT: beq a5, a1, .LBB11_4 +; RV32ZBB-NEXT: sltu a5, a1, a3 +; RV32ZBB-NEXT: sltu t6, a2, a4 +; RV32ZBB-NEXT: mv t0, a5 +; RV32ZBB-NEXT: beq a4, a2, .LBB11_4 ; RV32ZBB-NEXT: # %bb.3: ; RV32ZBB-NEXT: mv t0, t6 ; RV32ZBB-NEXT: .LBB11_4: @@ -770,16 +770,16 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: # %bb.5: ; RV32ZBB-NEXT: mv t0, t4 ; RV32ZBB-NEXT: .LBB11_6: -; RV32ZBB-NEXT: mv t5, a2 -; RV32ZBB-NEXT: beq a1, a5, .LBB11_8 +; RV32ZBB-NEXT: mv t5, a5 +; RV32ZBB-NEXT: beq a2, a4, .LBB11_8 ; RV32ZBB-NEXT: # %bb.7: ; RV32ZBB-NEXT: mv t5, t6 ; RV32ZBB-NEXT: .LBB11_8: -; RV32ZBB-NEXT: sltu t4, a3, a4 +; RV32ZBB-NEXT: sltu t4, a3, a1 ; RV32ZBB-NEXT: mv t6, t4 -; RV32ZBB-NEXT: beq a5, a1, .LBB11_10 +; RV32ZBB-NEXT: beq a4, a2, .LBB11_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sltu t6, a5, a1 +; RV32ZBB-NEXT: sltu t6, a4, a2 ; RV32ZBB-NEXT: .LBB11_10: ; RV32ZBB-NEXT: bnez t0, .LBB11_12 ; RV32ZBB-NEXT: # %bb.11: @@ -803,29 +803,29 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: add a7, a7, t1 ; RV32ZBB-NEXT: bnez t0, .LBB11_15 ; RV32ZBB-NEXT: # %bb.14: -; RV32ZBB-NEXT: sub a1, a1, a5 -; RV32ZBB-NEXT: sub a1, a1, a2 -; RV32ZBB-NEXT: sub a3, a4, a3 +; RV32ZBB-NEXT: sub a2, a2, a4 +; RV32ZBB-NEXT: sub a2, a2, a5 +; RV32ZBB-NEXT: sub a1, a1, a3 ; RV32ZBB-NEXT: j .LBB11_16 ; RV32ZBB-NEXT: .LBB11_15: -; RV32ZBB-NEXT: sub a5, a5, a1 -; RV32ZBB-NEXT: sub a1, a5, t4 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: sub a4, a4, a2 +; RV32ZBB-NEXT: sub a2, a4, t4 +; RV32ZBB-NEXT: sub a1, a3, a1 ; RV32ZBB-NEXT: .LBB11_16: -; RV32ZBB-NEXT: or a2, a3, a1 -; RV32ZBB-NEXT: snez a2, a2 +; RV32ZBB-NEXT: or a3, a1, a2 +; RV32ZBB-NEXT: snez a3, a3 ; RV32ZBB-NEXT: neg a4, a6 -; RV32ZBB-NEXT: sltu a5, a4, a2 +; RV32ZBB-NEXT: sltu a5, a4, a3 ; RV32ZBB-NEXT: neg a6, a7 ; RV32ZBB-NEXT: sub a5, a6, a5 -; RV32ZBB-NEXT: snez a6, a3 -; RV32ZBB-NEXT: add a1, a1, a6 +; RV32ZBB-NEXT: snez a6, a1 +; RV32ZBB-NEXT: add a2, a2, a6 +; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: sub a4, a4, a3 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a4, a4, a2 -; RV32ZBB-NEXT: neg a2, a3 -; RV32ZBB-NEXT: sw a2, 0(a0) +; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a4, 8(a0) -; RV32ZBB-NEXT: sw a1, 4(a0) +; RV32ZBB-NEXT: sw a2, 4(a0) ; RV32ZBB-NEXT: sw a5, 12(a0) ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBB-NEXT: addi sp, sp, 16 @@ -869,23 +869,23 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128_undef: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a4, 0(a2) -; RV32I-NEXT: lw a5, 4(a1) +; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) -; RV32I-NEXT: lw a7, 8(a2) ; RV32I-NEXT: lw t1, 12(a1) +; RV32I-NEXT: lw a7, 8(a2) ; RV32I-NEXT: lw t2, 12(a2) -; RV32I-NEXT: lw a1, 4(a2) +; RV32I-NEXT: lw a1, 0(a2) +; RV32I-NEXT: lw a2, 4(a2) ; RV32I-NEXT: sltu t3, a7, a6 ; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: beq t1, t2, .LBB12_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: slt t4, t2, t1 ; RV32I-NEXT: .LBB12_2: -; RV32I-NEXT: sltu a2, a4, a3 -; RV32I-NEXT: sltu t6, a1, a5 -; RV32I-NEXT: mv t0, a2 -; RV32I-NEXT: beq a5, a1, .LBB12_4 +; RV32I-NEXT: sltu a5, a1, a3 +; RV32I-NEXT: sltu t6, a2, a4 +; RV32I-NEXT: mv t0, a5 +; RV32I-NEXT: beq a4, a2, .LBB12_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: mv t0, t6 ; RV32I-NEXT: .LBB12_4: @@ -898,16 +898,16 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: mv t0, t4 ; RV32I-NEXT: .LBB12_6: -; RV32I-NEXT: mv t5, a2 -; RV32I-NEXT: beq a1, a5, .LBB12_8 +; RV32I-NEXT: mv t5, a5 +; RV32I-NEXT: beq a2, a4, .LBB12_8 ; RV32I-NEXT: # %bb.7: ; RV32I-NEXT: mv t5, t6 ; RV32I-NEXT: .LBB12_8: -; RV32I-NEXT: sltu t4, a3, a4 +; RV32I-NEXT: sltu t4, a3, a1 ; RV32I-NEXT: mv t6, t4 -; RV32I-NEXT: beq a5, a1, .LBB12_10 +; RV32I-NEXT: beq a4, a2, .LBB12_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu t6, a5, a1 +; RV32I-NEXT: sltu t6, a4, a2 ; RV32I-NEXT: .LBB12_10: ; RV32I-NEXT: bnez t0, .LBB12_12 ; RV32I-NEXT: # %bb.11: @@ -931,29 +931,29 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: add a7, a7, t1 ; RV32I-NEXT: bnez t0, .LBB12_15 ; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: sub a1, a1, a5 -; RV32I-NEXT: sub a1, a1, a2 -; RV32I-NEXT: sub a3, a4, a3 +; RV32I-NEXT: sub a2, a2, a4 +; RV32I-NEXT: sub a2, a2, a5 +; RV32I-NEXT: sub a1, a1, a3 ; RV32I-NEXT: j .LBB12_16 ; RV32I-NEXT: .LBB12_15: -; RV32I-NEXT: sub a5, a5, a1 -; RV32I-NEXT: sub a1, a5, t4 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sub a4, a4, a2 +; RV32I-NEXT: sub a2, a4, t4 +; RV32I-NEXT: sub a1, a3, a1 ; RV32I-NEXT: .LBB12_16: -; RV32I-NEXT: or a2, a3, a1 -; RV32I-NEXT: snez a2, a2 +; RV32I-NEXT: or a3, a1, a2 +; RV32I-NEXT: snez a3, a3 ; RV32I-NEXT: neg a4, a6 -; RV32I-NEXT: sltu a5, a4, a2 +; RV32I-NEXT: sltu a5, a4, a3 ; RV32I-NEXT: neg a6, a7 ; RV32I-NEXT: sub a5, a6, a5 -; RV32I-NEXT: snez a6, a3 -; RV32I-NEXT: add a1, a1, a6 +; RV32I-NEXT: snez a6, a1 +; RV32I-NEXT: add a2, a2, a6 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a4, a4, a3 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a4, a4, a2 -; RV32I-NEXT: neg a2, a3 -; RV32I-NEXT: sw a2, 0(a0) +; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a4, 8(a0) -; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a2, 4(a0) ; RV32I-NEXT: sw a5, 12(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 @@ -988,23 +988,23 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_ext_i128_undef: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw a4, 0(a2) -; RV32ZBB-NEXT: lw a5, 4(a1) +; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a6, 8(a1) -; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t1, 12(a1) +; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t2, 12(a2) -; RV32ZBB-NEXT: lw a1, 4(a2) +; RV32ZBB-NEXT: lw a1, 0(a2) +; RV32ZBB-NEXT: lw a2, 4(a2) ; RV32ZBB-NEXT: sltu t3, a7, a6 ; RV32ZBB-NEXT: mv t4, t3 ; RV32ZBB-NEXT: beq t1, t2, .LBB12_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: slt t4, t2, t1 ; RV32ZBB-NEXT: .LBB12_2: -; RV32ZBB-NEXT: sltu a2, a4, a3 -; RV32ZBB-NEXT: sltu t6, a1, a5 -; RV32ZBB-NEXT: mv t0, a2 -; RV32ZBB-NEXT: beq a5, a1, .LBB12_4 +; RV32ZBB-NEXT: sltu a5, a1, a3 +; RV32ZBB-NEXT: sltu t6, a2, a4 +; RV32ZBB-NEXT: mv t0, a5 +; RV32ZBB-NEXT: beq a4, a2, .LBB12_4 ; RV32ZBB-NEXT: # %bb.3: ; RV32ZBB-NEXT: mv t0, t6 ; RV32ZBB-NEXT: .LBB12_4: @@ -1017,16 +1017,16 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: # %bb.5: ; RV32ZBB-NEXT: mv t0, t4 ; RV32ZBB-NEXT: .LBB12_6: -; RV32ZBB-NEXT: mv t5, a2 -; RV32ZBB-NEXT: beq a1, a5, .LBB12_8 +; RV32ZBB-NEXT: mv t5, a5 +; RV32ZBB-NEXT: beq a2, a4, .LBB12_8 ; RV32ZBB-NEXT: # %bb.7: ; RV32ZBB-NEXT: mv t5, t6 ; RV32ZBB-NEXT: .LBB12_8: -; RV32ZBB-NEXT: sltu t4, a3, a4 +; RV32ZBB-NEXT: sltu t4, a3, a1 ; RV32ZBB-NEXT: mv t6, t4 -; RV32ZBB-NEXT: beq a5, a1, .LBB12_10 +; RV32ZBB-NEXT: beq a4, a2, .LBB12_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sltu t6, a5, a1 +; RV32ZBB-NEXT: sltu t6, a4, a2 ; RV32ZBB-NEXT: .LBB12_10: ; RV32ZBB-NEXT: bnez t0, .LBB12_12 ; RV32ZBB-NEXT: # %bb.11: @@ -1050,29 +1050,29 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: add a7, a7, t1 ; RV32ZBB-NEXT: bnez t0, .LBB12_15 ; RV32ZBB-NEXT: # %bb.14: -; RV32ZBB-NEXT: sub a1, a1, a5 -; RV32ZBB-NEXT: sub a1, a1, a2 -; RV32ZBB-NEXT: sub a3, a4, a3 +; RV32ZBB-NEXT: sub a2, a2, a4 +; RV32ZBB-NEXT: sub a2, a2, a5 +; RV32ZBB-NEXT: sub a1, a1, a3 ; RV32ZBB-NEXT: j .LBB12_16 ; RV32ZBB-NEXT: .LBB12_15: -; RV32ZBB-NEXT: sub a5, a5, a1 -; RV32ZBB-NEXT: sub a1, a5, t4 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: sub a4, a4, a2 +; RV32ZBB-NEXT: sub a2, a4, t4 +; RV32ZBB-NEXT: sub a1, a3, a1 ; RV32ZBB-NEXT: .LBB12_16: -; RV32ZBB-NEXT: or a2, a3, a1 -; RV32ZBB-NEXT: snez a2, a2 +; RV32ZBB-NEXT: or a3, a1, a2 +; RV32ZBB-NEXT: snez a3, a3 ; RV32ZBB-NEXT: neg a4, a6 -; RV32ZBB-NEXT: sltu a5, a4, a2 +; RV32ZBB-NEXT: sltu a5, a4, a3 ; RV32ZBB-NEXT: neg a6, a7 ; RV32ZBB-NEXT: sub a5, a6, a5 -; RV32ZBB-NEXT: snez a6, a3 -; RV32ZBB-NEXT: add a1, a1, a6 +; RV32ZBB-NEXT: snez a6, a1 +; RV32ZBB-NEXT: add a2, a2, a6 +; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: sub a4, a4, a3 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a4, a4, a2 -; RV32ZBB-NEXT: neg a2, a3 -; RV32ZBB-NEXT: sw a2, 0(a0) +; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a4, 8(a0) -; RV32ZBB-NEXT: sw a1, 4(a0) +; RV32ZBB-NEXT: sw a2, 4(a0) ; RV32ZBB-NEXT: sw a5, 12(a0) ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBB-NEXT: addi sp, sp, 16 @@ -1383,10 +1383,10 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_minmax_i128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a6, 4(a2) -; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw a7, 8(a2) ; RV32I-NEXT: lw t0, 12(a2) ; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw a4, 8(a1) ; RV32I-NEXT: beq a5, t0, .LBB17_2 ; RV32I-NEXT: # %bb.1: @@ -1510,10 +1510,10 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_minmax_i128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a6, 4(a2) -; RV32ZBB-NEXT: lw a3, 4(a1) ; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t0, 12(a2) ; RV32ZBB-NEXT: lw a5, 12(a1) +; RV32ZBB-NEXT: lw a3, 4(a1) ; RV32ZBB-NEXT: lw a4, 8(a1) ; RV32ZBB-NEXT: beq a5, t0, .LBB17_2 ; RV32ZBB-NEXT: # %bb.1: @@ -1861,67 +1861,67 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_cmp_i128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a4, 0(a1) -; RV32I-NEXT: lw a5, 4(a2) -; RV32I-NEXT: lw a6, 8(a2) -; RV32I-NEXT: lw a7, 8(a1) -; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a4, 4(a2) +; RV32I-NEXT: lw a5, 8(a2) +; RV32I-NEXT: lw a7, 12(a2) +; RV32I-NEXT: lw a6, 8(a1) ; RV32I-NEXT: lw t0, 12(a1) +; RV32I-NEXT: lw a2, 0(a1) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a7, a6 +; RV32I-NEXT: sltu t1, a6, a5 ; RV32I-NEXT: mv t4, t1 -; RV32I-NEXT: beq t0, a2, .LBB22_2 +; RV32I-NEXT: beq t0, a7, .LBB22_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt t4, t0, a2 +; RV32I-NEXT: slt t4, t0, a7 ; RV32I-NEXT: .LBB22_2: -; RV32I-NEXT: sltu t2, a4, a3 +; RV32I-NEXT: sltu t2, a2, a3 ; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: beq a1, a5, .LBB22_4 +; RV32I-NEXT: beq a1, a4, .LBB22_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t3, a1, a5 +; RV32I-NEXT: sltu t3, a1, a4 ; RV32I-NEXT: .LBB22_4: -; RV32I-NEXT: xor t5, t0, a2 -; RV32I-NEXT: xor t6, a7, a6 +; RV32I-NEXT: xor t5, t0, a7 +; RV32I-NEXT: xor t6, a6, a5 ; RV32I-NEXT: or t5, t6, t5 ; RV32I-NEXT: mv t6, t3 ; RV32I-NEXT: beqz t5, .LBB22_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: mv t6, t4 ; RV32I-NEXT: .LBB22_6: -; RV32I-NEXT: sltu t4, a3, a4 +; RV32I-NEXT: sltu t4, a3, a2 ; RV32I-NEXT: mv t5, t4 -; RV32I-NEXT: beq a1, a5, .LBB22_8 +; RV32I-NEXT: beq a1, a4, .LBB22_8 ; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: sltu t5, a5, a1 +; RV32I-NEXT: sltu t5, a4, a1 ; RV32I-NEXT: .LBB22_8: ; RV32I-NEXT: bnez t6, .LBB22_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu t1, a6, a7 -; RV32I-NEXT: sub a2, a2, t0 -; RV32I-NEXT: sub a2, a2, t1 -; RV32I-NEXT: sub a6, a6, a7 -; RV32I-NEXT: sltu a7, a6, t5 -; RV32I-NEXT: sub a2, a2, a7 +; RV32I-NEXT: sltu t1, a5, a6 +; RV32I-NEXT: sub a7, a7, t0 +; RV32I-NEXT: sub a7, a7, t1 +; RV32I-NEXT: sub a6, a5, a6 +; RV32I-NEXT: sltu a5, a6, t5 +; RV32I-NEXT: sub a5, a7, a5 ; RV32I-NEXT: sub a6, a6, t5 -; RV32I-NEXT: sub a5, a5, a1 -; RV32I-NEXT: sub a1, a5, t4 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sub a4, a4, a1 +; RV32I-NEXT: sub a1, a4, t4 +; RV32I-NEXT: sub a2, a3, a2 ; RV32I-NEXT: j .LBB22_11 ; RV32I-NEXT: .LBB22_10: -; RV32I-NEXT: sub a2, t0, a2 -; RV32I-NEXT: sub a6, a7, a6 -; RV32I-NEXT: sub a2, a2, t1 +; RV32I-NEXT: sub a7, t0, a7 +; RV32I-NEXT: sub a6, a6, a5 +; RV32I-NEXT: sub a5, a7, t1 ; RV32I-NEXT: sltu a7, a6, t3 -; RV32I-NEXT: sub a1, a1, a5 -; RV32I-NEXT: sub a2, a2, a7 +; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: sub a5, a5, a7 ; RV32I-NEXT: sub a6, a6, t3 ; RV32I-NEXT: sub a1, a1, t2 -; RV32I-NEXT: sub a3, a4, a3 +; RV32I-NEXT: sub a2, a2, a3 ; RV32I-NEXT: .LBB22_11: ; RV32I-NEXT: sw a6, 8(a0) ; RV32I-NEXT: sw a1, 4(a0) -; RV32I-NEXT: sw a3, 0(a0) -; RV32I-NEXT: sw a2, 12(a0) +; RV32I-NEXT: sw a2, 0(a0) +; RV32I-NEXT: sw a5, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i128: @@ -1948,67 +1948,67 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_cmp_i128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a4, 0(a1) -; RV32ZBB-NEXT: lw a5, 4(a2) -; RV32ZBB-NEXT: lw a6, 8(a2) -; RV32ZBB-NEXT: lw a7, 8(a1) -; RV32ZBB-NEXT: lw a2, 12(a2) +; RV32ZBB-NEXT: lw a4, 4(a2) +; RV32ZBB-NEXT: lw a5, 8(a2) +; RV32ZBB-NEXT: lw a7, 12(a2) +; RV32ZBB-NEXT: lw a6, 8(a1) ; RV32ZBB-NEXT: lw t0, 12(a1) +; RV32ZBB-NEXT: lw a2, 0(a1) ; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a7, a6 +; RV32ZBB-NEXT: sltu t1, a6, a5 ; RV32ZBB-NEXT: mv t4, t1 -; RV32ZBB-NEXT: beq t0, a2, .LBB22_2 +; RV32ZBB-NEXT: beq t0, a7, .LBB22_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt t4, t0, a2 +; RV32ZBB-NEXT: slt t4, t0, a7 ; RV32ZBB-NEXT: .LBB22_2: -; RV32ZBB-NEXT: sltu t2, a4, a3 +; RV32ZBB-NEXT: sltu t2, a2, a3 ; RV32ZBB-NEXT: mv t3, t2 -; RV32ZBB-NEXT: beq a1, a5, .LBB22_4 +; RV32ZBB-NEXT: beq a1, a4, .LBB22_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t3, a1, a5 +; RV32ZBB-NEXT: sltu t3, a1, a4 ; RV32ZBB-NEXT: .LBB22_4: -; RV32ZBB-NEXT: xor t5, t0, a2 -; RV32ZBB-NEXT: xor t6, a7, a6 +; RV32ZBB-NEXT: xor t5, t0, a7 +; RV32ZBB-NEXT: xor t6, a6, a5 ; RV32ZBB-NEXT: or t5, t6, t5 ; RV32ZBB-NEXT: mv t6, t3 ; RV32ZBB-NEXT: beqz t5, .LBB22_6 ; RV32ZBB-NEXT: # %bb.5: ; RV32ZBB-NEXT: mv t6, t4 ; RV32ZBB-NEXT: .LBB22_6: -; RV32ZBB-NEXT: sltu t4, a3, a4 +; RV32ZBB-NEXT: sltu t4, a3, a2 ; RV32ZBB-NEXT: mv t5, t4 -; RV32ZBB-NEXT: beq a1, a5, .LBB22_8 +; RV32ZBB-NEXT: beq a1, a4, .LBB22_8 ; RV32ZBB-NEXT: # %bb.7: -; RV32ZBB-NEXT: sltu t5, a5, a1 +; RV32ZBB-NEXT: sltu t5, a4, a1 ; RV32ZBB-NEXT: .LBB22_8: ; RV32ZBB-NEXT: bnez t6, .LBB22_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sltu t1, a6, a7 -; RV32ZBB-NEXT: sub a2, a2, t0 -; RV32ZBB-NEXT: sub a2, a2, t1 -; RV32ZBB-NEXT: sub a6, a6, a7 -; RV32ZBB-NEXT: sltu a7, a6, t5 -; RV32ZBB-NEXT: sub a2, a2, a7 +; RV32ZBB-NEXT: sltu t1, a5, a6 +; RV32ZBB-NEXT: sub a7, a7, t0 +; RV32ZBB-NEXT: sub a7, a7, t1 +; RV32ZBB-NEXT: sub a6, a5, a6 +; RV32ZBB-NEXT: sltu a5, a6, t5 +; RV32ZBB-NEXT: sub a5, a7, a5 ; RV32ZBB-NEXT: sub a6, a6, t5 -; RV32ZBB-NEXT: sub a5, a5, a1 -; RV32ZBB-NEXT: sub a1, a5, t4 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: sub a4, a4, a1 +; RV32ZBB-NEXT: sub a1, a4, t4 +; RV32ZBB-NEXT: sub a2, a3, a2 ; RV32ZBB-NEXT: j .LBB22_11 ; RV32ZBB-NEXT: .LBB22_10: -; RV32ZBB-NEXT: sub a2, t0, a2 -; RV32ZBB-NEXT: sub a6, a7, a6 -; RV32ZBB-NEXT: sub a2, a2, t1 +; RV32ZBB-NEXT: sub a7, t0, a7 +; RV32ZBB-NEXT: sub a6, a6, a5 +; RV32ZBB-NEXT: sub a5, a7, t1 ; RV32ZBB-NEXT: sltu a7, a6, t3 -; RV32ZBB-NEXT: sub a1, a1, a5 -; RV32ZBB-NEXT: sub a2, a2, a7 +; RV32ZBB-NEXT: sub a1, a1, a4 +; RV32ZBB-NEXT: sub a5, a5, a7 ; RV32ZBB-NEXT: sub a6, a6, t3 ; RV32ZBB-NEXT: sub a1, a1, t2 -; RV32ZBB-NEXT: sub a3, a4, a3 +; RV32ZBB-NEXT: sub a2, a2, a3 ; RV32ZBB-NEXT: .LBB22_11: ; RV32ZBB-NEXT: sw a6, 8(a0) ; RV32ZBB-NEXT: sw a1, 4(a0) -; RV32ZBB-NEXT: sw a3, 0(a0) -; RV32ZBB-NEXT: sw a2, 12(a0) +; RV32ZBB-NEXT: sw a2, 0(a0) +; RV32ZBB-NEXT: sw a5, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i128: @@ -2390,31 +2390,31 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_subnsw_i128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a4, 0(a1) -; RV32I-NEXT: lw a7, 12(a2) +; RV32I-NEXT: lw a4, 4(a2) ; RV32I-NEXT: lw a5, 8(a2) +; RV32I-NEXT: lw a7, 12(a2) ; RV32I-NEXT: lw a6, 8(a1) ; RV32I-NEXT: lw t0, 12(a1) -; RV32I-NEXT: lw a2, 4(a2) +; RV32I-NEXT: lw a2, 0(a1) ; RV32I-NEXT: lw a1, 4(a1) ; RV32I-NEXT: sltu t1, a6, a5 ; RV32I-NEXT: sub t0, t0, a7 -; RV32I-NEXT: sltu a7, a4, a3 +; RV32I-NEXT: sltu a7, a2, a3 ; RV32I-NEXT: sub t1, t0, t1 ; RV32I-NEXT: mv t0, a7 -; RV32I-NEXT: beq a1, a2, .LBB31_2 +; RV32I-NEXT: beq a1, a4, .LBB31_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t0, a1, a2 +; RV32I-NEXT: sltu t0, a1, a4 ; RV32I-NEXT: .LBB31_2: ; RV32I-NEXT: sub a5, a6, a5 ; RV32I-NEXT: sltu a6, a5, t0 ; RV32I-NEXT: sub a6, t1, a6 -; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: sub a1, a1, a4 ; RV32I-NEXT: sub t1, a1, a7 -; RV32I-NEXT: sub a2, a5, t0 -; RV32I-NEXT: sub a3, a4, a3 +; RV32I-NEXT: sub a4, a5, t0 +; RV32I-NEXT: sub a3, a2, a3 ; RV32I-NEXT: srai a1, a6, 31 -; RV32I-NEXT: xor a2, a2, a1 +; RV32I-NEXT: xor a2, a4, a1 ; RV32I-NEXT: sltu a4, a1, a2 ; RV32I-NEXT: xor a5, a6, a1 ; RV32I-NEXT: sub a5, a1, a5 @@ -2458,31 +2458,31 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_subnsw_i128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a4, 0(a1) -; RV32ZBB-NEXT: lw a7, 12(a2) +; RV32ZBB-NEXT: lw a4, 4(a2) ; RV32ZBB-NEXT: lw a5, 8(a2) +; RV32ZBB-NEXT: lw a7, 12(a2) ; RV32ZBB-NEXT: lw a6, 8(a1) ; RV32ZBB-NEXT: lw t0, 12(a1) -; RV32ZBB-NEXT: lw a2, 4(a2) +; RV32ZBB-NEXT: lw a2, 0(a1) ; RV32ZBB-NEXT: lw a1, 4(a1) ; RV32ZBB-NEXT: sltu t1, a6, a5 ; RV32ZBB-NEXT: sub t0, t0, a7 -; RV32ZBB-NEXT: sltu a7, a4, a3 +; RV32ZBB-NEXT: sltu a7, a2, a3 ; RV32ZBB-NEXT: sub t1, t0, t1 ; RV32ZBB-NEXT: mv t0, a7 -; RV32ZBB-NEXT: beq a1, a2, .LBB31_2 +; RV32ZBB-NEXT: beq a1, a4, .LBB31_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t0, a1, a2 +; RV32ZBB-NEXT: sltu t0, a1, a4 ; RV32ZBB-NEXT: .LBB31_2: ; RV32ZBB-NEXT: sub a5, a6, a5 ; RV32ZBB-NEXT: sltu a6, a5, t0 ; RV32ZBB-NEXT: sub a6, t1, a6 -; RV32ZBB-NEXT: sub a1, a1, a2 +; RV32ZBB-NEXT: sub a1, a1, a4 ; RV32ZBB-NEXT: sub t1, a1, a7 -; RV32ZBB-NEXT: sub a2, a5, t0 -; RV32ZBB-NEXT: sub a3, a4, a3 +; RV32ZBB-NEXT: sub a4, a5, t0 +; RV32ZBB-NEXT: sub a3, a2, a3 ; RV32ZBB-NEXT: srai a1, a6, 31 -; RV32ZBB-NEXT: xor a2, a2, a1 +; RV32ZBB-NEXT: xor a2, a4, a1 ; RV32ZBB-NEXT: sltu a4, a1, a2 ; RV32ZBB-NEXT: xor a5, a6, a1 ; RV32ZBB-NEXT: sub a5, a1, a5 @@ -2532,31 +2532,31 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_subnsw_i128_undef: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a4, 0(a1) -; RV32I-NEXT: lw a7, 12(a2) +; RV32I-NEXT: lw a4, 4(a2) ; RV32I-NEXT: lw a5, 8(a2) +; RV32I-NEXT: lw a7, 12(a2) ; RV32I-NEXT: lw a6, 8(a1) ; RV32I-NEXT: lw t0, 12(a1) -; RV32I-NEXT: lw a2, 4(a2) +; RV32I-NEXT: lw a2, 0(a1) ; RV32I-NEXT: lw a1, 4(a1) ; RV32I-NEXT: sltu t1, a6, a5 ; RV32I-NEXT: sub t0, t0, a7 -; RV32I-NEXT: sltu a7, a4, a3 +; RV32I-NEXT: sltu a7, a2, a3 ; RV32I-NEXT: sub t1, t0, t1 ; RV32I-NEXT: mv t0, a7 -; RV32I-NEXT: beq a1, a2, .LBB32_2 +; RV32I-NEXT: beq a1, a4, .LBB32_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t0, a1, a2 +; RV32I-NEXT: sltu t0, a1, a4 ; RV32I-NEXT: .LBB32_2: ; RV32I-NEXT: sub a5, a6, a5 ; RV32I-NEXT: sltu a6, a5, t0 ; RV32I-NEXT: sub a6, t1, a6 -; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: sub a1, a1, a4 ; RV32I-NEXT: sub t1, a1, a7 -; RV32I-NEXT: sub a2, a5, t0 -; RV32I-NEXT: sub a3, a4, a3 +; RV32I-NEXT: sub a4, a5, t0 +; RV32I-NEXT: sub a3, a2, a3 ; RV32I-NEXT: srai a1, a6, 31 -; RV32I-NEXT: xor a2, a2, a1 +; RV32I-NEXT: xor a2, a4, a1 ; RV32I-NEXT: sltu a4, a1, a2 ; RV32I-NEXT: xor a5, a6, a1 ; RV32I-NEXT: sub a5, a1, a5 @@ -2600,31 +2600,31 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_subnsw_i128_undef: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a4, 0(a1) -; RV32ZBB-NEXT: lw a7, 12(a2) +; RV32ZBB-NEXT: lw a4, 4(a2) ; RV32ZBB-NEXT: lw a5, 8(a2) +; RV32ZBB-NEXT: lw a7, 12(a2) ; RV32ZBB-NEXT: lw a6, 8(a1) ; RV32ZBB-NEXT: lw t0, 12(a1) -; RV32ZBB-NEXT: lw a2, 4(a2) +; RV32ZBB-NEXT: lw a2, 0(a1) ; RV32ZBB-NEXT: lw a1, 4(a1) ; RV32ZBB-NEXT: sltu t1, a6, a5 ; RV32ZBB-NEXT: sub t0, t0, a7 -; RV32ZBB-NEXT: sltu a7, a4, a3 +; RV32ZBB-NEXT: sltu a7, a2, a3 ; RV32ZBB-NEXT: sub t1, t0, t1 ; RV32ZBB-NEXT: mv t0, a7 -; RV32ZBB-NEXT: beq a1, a2, .LBB32_2 +; RV32ZBB-NEXT: beq a1, a4, .LBB32_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t0, a1, a2 +; RV32ZBB-NEXT: sltu t0, a1, a4 ; RV32ZBB-NEXT: .LBB32_2: ; RV32ZBB-NEXT: sub a5, a6, a5 ; RV32ZBB-NEXT: sltu a6, a5, t0 ; RV32ZBB-NEXT: sub a6, t1, a6 -; RV32ZBB-NEXT: sub a1, a1, a2 +; RV32ZBB-NEXT: sub a1, a1, a4 ; RV32ZBB-NEXT: sub t1, a1, a7 -; RV32ZBB-NEXT: sub a2, a5, t0 -; RV32ZBB-NEXT: sub a3, a4, a3 +; RV32ZBB-NEXT: sub a4, a5, t0 +; RV32ZBB-NEXT: sub a3, a2, a3 ; RV32ZBB-NEXT: srai a1, a6, 31 -; RV32ZBB-NEXT: xor a2, a2, a1 +; RV32ZBB-NEXT: xor a2, a4, a1 ; RV32ZBB-NEXT: sltu a4, a1, a2 ; RV32ZBB-NEXT: xor a5, a6, a1 ; RV32ZBB-NEXT: sub a5, a1, a5 diff --git a/llvm/test/CodeGen/RISCV/abds.ll b/llvm/test/CodeGen/RISCV/abds.ll index 919214b0e9a8dd..91b044902a5201 100644 --- a/llvm/test/CodeGen/RISCV/abds.ll +++ b/llvm/test/CodeGen/RISCV/abds.ll @@ -535,12 +535,12 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a5, 0(a2) ; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) -; RV32I-NEXT: lw a7, 8(a2) ; RV32I-NEXT: lw t0, 12(a1) +; RV32I-NEXT: lw a7, 8(a2) ; RV32I-NEXT: lw t1, 12(a2) +; RV32I-NEXT: lw a5, 0(a2) ; RV32I-NEXT: lw a1, 4(a2) ; RV32I-NEXT: sltu a2, a7, a6 ; RV32I-NEXT: mv t4, a2 @@ -631,12 +631,12 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_ext_i128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw a5, 0(a2) ; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a6, 8(a1) -; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t0, 12(a1) +; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t1, 12(a2) +; RV32ZBB-NEXT: lw a5, 0(a2) ; RV32ZBB-NEXT: lw a1, 4(a2) ; RV32ZBB-NEXT: sltu a2, a7, a6 ; RV32ZBB-NEXT: mv t4, a2 @@ -735,12 +735,12 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128_undef: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a5, 0(a2) ; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) -; RV32I-NEXT: lw a7, 8(a2) ; RV32I-NEXT: lw t0, 12(a1) +; RV32I-NEXT: lw a7, 8(a2) ; RV32I-NEXT: lw t1, 12(a2) +; RV32I-NEXT: lw a5, 0(a2) ; RV32I-NEXT: lw a1, 4(a2) ; RV32I-NEXT: sltu a2, a7, a6 ; RV32I-NEXT: mv t4, a2 @@ -831,12 +831,12 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_ext_i128_undef: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw a5, 0(a2) ; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a6, 8(a1) -; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t0, 12(a1) +; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t1, 12(a2) +; RV32ZBB-NEXT: lw a5, 0(a2) ; RV32ZBB-NEXT: lw a1, 4(a2) ; RV32ZBB-NEXT: sltu a2, a7, a6 ; RV32ZBB-NEXT: mv t4, a2 @@ -1124,12 +1124,12 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_minmax_i128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a5, 0(a2) ; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) -; RV32I-NEXT: lw a7, 8(a2) ; RV32I-NEXT: lw t0, 12(a1) +; RV32I-NEXT: lw a7, 8(a2) ; RV32I-NEXT: lw t1, 12(a2) +; RV32I-NEXT: lw a5, 0(a2) ; RV32I-NEXT: lw a1, 4(a2) ; RV32I-NEXT: sltu a2, a7, a6 ; RV32I-NEXT: mv t4, a2 @@ -1220,12 +1220,12 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_minmax_i128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw a5, 0(a2) ; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a6, 8(a1) -; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t0, 12(a1) +; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t1, 12(a2) +; RV32ZBB-NEXT: lw a5, 0(a2) ; RV32ZBB-NEXT: lw a1, 4(a2) ; RV32ZBB-NEXT: sltu a2, a7, a6 ; RV32ZBB-NEXT: mv t4, a2 @@ -1515,12 +1515,12 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_cmp_i128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a5, 0(a2) ; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) -; RV32I-NEXT: lw a7, 8(a2) ; RV32I-NEXT: lw t0, 12(a1) +; RV32I-NEXT: lw a7, 8(a2) ; RV32I-NEXT: lw t1, 12(a2) +; RV32I-NEXT: lw a5, 0(a2) ; RV32I-NEXT: lw a1, 4(a2) ; RV32I-NEXT: sltu a2, a7, a6 ; RV32I-NEXT: mv t4, a2 @@ -1611,12 +1611,12 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_cmp_i128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw a5, 0(a2) ; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a6, 8(a1) -; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t0, 12(a1) +; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t1, 12(a2) +; RV32ZBB-NEXT: lw a5, 0(a2) ; RV32ZBB-NEXT: lw a1, 4(a2) ; RV32ZBB-NEXT: sltu a2, a7, a6 ; RV32ZBB-NEXT: mv t4, a2 @@ -2044,28 +2044,28 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_subnsw_i128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a5, 0(a1) -; RV32I-NEXT: lw t1, 12(a2) -; RV32I-NEXT: lw a7, 8(a2) -; RV32I-NEXT: lw t0, 8(a1) -; RV32I-NEXT: lw t2, 12(a1) ; RV32I-NEXT: lw a4, 4(a2) -; RV32I-NEXT: lw a6, 4(a1) -; RV32I-NEXT: sltu a1, t0, a7 -; RV32I-NEXT: sub a2, t2, t1 -; RV32I-NEXT: sltu t1, a5, a3 -; RV32I-NEXT: sub a1, a2, a1 -; RV32I-NEXT: mv a2, t1 -; RV32I-NEXT: beq a6, a4, .LBB31_2 +; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw t0, 12(a2) +; RV32I-NEXT: lw a2, 8(a1) +; RV32I-NEXT: lw t1, 12(a1) +; RV32I-NEXT: lw a5, 0(a1) +; RV32I-NEXT: lw a7, 4(a1) +; RV32I-NEXT: sltu a1, a2, a6 +; RV32I-NEXT: sub t1, t1, t0 +; RV32I-NEXT: sltu t0, a5, a3 +; RV32I-NEXT: sub a1, t1, a1 +; RV32I-NEXT: mv t1, t0 +; RV32I-NEXT: beq a7, a4, .LBB31_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a2, a6, a4 +; RV32I-NEXT: sltu t1, a7, a4 ; RV32I-NEXT: .LBB31_2: -; RV32I-NEXT: sub a7, t0, a7 -; RV32I-NEXT: sltu t0, a7, a2 -; RV32I-NEXT: sub a1, a1, t0 -; RV32I-NEXT: sub a2, a7, a2 -; RV32I-NEXT: sub a4, a6, a4 -; RV32I-NEXT: sub a4, a4, t1 +; RV32I-NEXT: sub a2, a2, a6 +; RV32I-NEXT: sltu a6, a2, t1 +; RV32I-NEXT: sub a1, a1, a6 +; RV32I-NEXT: sub a2, a2, t1 +; RV32I-NEXT: sub a4, a7, a4 +; RV32I-NEXT: sub a4, a4, t0 ; RV32I-NEXT: sub a3, a5, a3 ; RV32I-NEXT: bgez a1, .LBB31_4 ; RV32I-NEXT: # %bb.3: @@ -2107,28 +2107,28 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_subnsw_i128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a5, 0(a1) -; RV32ZBB-NEXT: lw t1, 12(a2) -; RV32ZBB-NEXT: lw a7, 8(a2) -; RV32ZBB-NEXT: lw t0, 8(a1) -; RV32ZBB-NEXT: lw t2, 12(a1) ; RV32ZBB-NEXT: lw a4, 4(a2) -; RV32ZBB-NEXT: lw a6, 4(a1) -; RV32ZBB-NEXT: sltu a1, t0, a7 -; RV32ZBB-NEXT: sub a2, t2, t1 -; RV32ZBB-NEXT: sltu t1, a5, a3 -; RV32ZBB-NEXT: sub a1, a2, a1 -; RV32ZBB-NEXT: mv a2, t1 -; RV32ZBB-NEXT: beq a6, a4, .LBB31_2 +; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw t0, 12(a2) +; RV32ZBB-NEXT: lw a2, 8(a1) +; RV32ZBB-NEXT: lw t1, 12(a1) +; RV32ZBB-NEXT: lw a5, 0(a1) +; RV32ZBB-NEXT: lw a7, 4(a1) +; RV32ZBB-NEXT: sltu a1, a2, a6 +; RV32ZBB-NEXT: sub t1, t1, t0 +; RV32ZBB-NEXT: sltu t0, a5, a3 +; RV32ZBB-NEXT: sub a1, t1, a1 +; RV32ZBB-NEXT: mv t1, t0 +; RV32ZBB-NEXT: beq a7, a4, .LBB31_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu a2, a6, a4 +; RV32ZBB-NEXT: sltu t1, a7, a4 ; RV32ZBB-NEXT: .LBB31_2: -; RV32ZBB-NEXT: sub a7, t0, a7 -; RV32ZBB-NEXT: sltu t0, a7, a2 -; RV32ZBB-NEXT: sub a1, a1, t0 -; RV32ZBB-NEXT: sub a2, a7, a2 -; RV32ZBB-NEXT: sub a4, a6, a4 -; RV32ZBB-NEXT: sub a4, a4, t1 +; RV32ZBB-NEXT: sub a2, a2, a6 +; RV32ZBB-NEXT: sltu a6, a2, t1 +; RV32ZBB-NEXT: sub a1, a1, a6 +; RV32ZBB-NEXT: sub a2, a2, t1 +; RV32ZBB-NEXT: sub a4, a7, a4 +; RV32ZBB-NEXT: sub a4, a4, t0 ; RV32ZBB-NEXT: sub a3, a5, a3 ; RV32ZBB-NEXT: bgez a1, .LBB31_4 ; RV32ZBB-NEXT: # %bb.3: @@ -2175,28 +2175,28 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_subnsw_i128_undef: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a5, 0(a1) -; RV32I-NEXT: lw t1, 12(a2) -; RV32I-NEXT: lw a7, 8(a2) -; RV32I-NEXT: lw t0, 8(a1) -; RV32I-NEXT: lw t2, 12(a1) ; RV32I-NEXT: lw a4, 4(a2) -; RV32I-NEXT: lw a6, 4(a1) -; RV32I-NEXT: sltu a1, t0, a7 -; RV32I-NEXT: sub a2, t2, t1 -; RV32I-NEXT: sltu t1, a5, a3 -; RV32I-NEXT: sub a1, a2, a1 -; RV32I-NEXT: mv a2, t1 -; RV32I-NEXT: beq a6, a4, .LBB32_2 +; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw t0, 12(a2) +; RV32I-NEXT: lw a2, 8(a1) +; RV32I-NEXT: lw t1, 12(a1) +; RV32I-NEXT: lw a5, 0(a1) +; RV32I-NEXT: lw a7, 4(a1) +; RV32I-NEXT: sltu a1, a2, a6 +; RV32I-NEXT: sub t1, t1, t0 +; RV32I-NEXT: sltu t0, a5, a3 +; RV32I-NEXT: sub a1, t1, a1 +; RV32I-NEXT: mv t1, t0 +; RV32I-NEXT: beq a7, a4, .LBB32_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a2, a6, a4 +; RV32I-NEXT: sltu t1, a7, a4 ; RV32I-NEXT: .LBB32_2: -; RV32I-NEXT: sub a7, t0, a7 -; RV32I-NEXT: sltu t0, a7, a2 -; RV32I-NEXT: sub a1, a1, t0 -; RV32I-NEXT: sub a2, a7, a2 -; RV32I-NEXT: sub a4, a6, a4 -; RV32I-NEXT: sub a4, a4, t1 +; RV32I-NEXT: sub a2, a2, a6 +; RV32I-NEXT: sltu a6, a2, t1 +; RV32I-NEXT: sub a1, a1, a6 +; RV32I-NEXT: sub a2, a2, t1 +; RV32I-NEXT: sub a4, a7, a4 +; RV32I-NEXT: sub a4, a4, t0 ; RV32I-NEXT: sub a3, a5, a3 ; RV32I-NEXT: bgez a1, .LBB32_4 ; RV32I-NEXT: # %bb.3: @@ -2238,28 +2238,28 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_subnsw_i128_undef: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a5, 0(a1) -; RV32ZBB-NEXT: lw t1, 12(a2) -; RV32ZBB-NEXT: lw a7, 8(a2) -; RV32ZBB-NEXT: lw t0, 8(a1) -; RV32ZBB-NEXT: lw t2, 12(a1) ; RV32ZBB-NEXT: lw a4, 4(a2) -; RV32ZBB-NEXT: lw a6, 4(a1) -; RV32ZBB-NEXT: sltu a1, t0, a7 -; RV32ZBB-NEXT: sub a2, t2, t1 -; RV32ZBB-NEXT: sltu t1, a5, a3 -; RV32ZBB-NEXT: sub a1, a2, a1 -; RV32ZBB-NEXT: mv a2, t1 -; RV32ZBB-NEXT: beq a6, a4, .LBB32_2 +; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw t0, 12(a2) +; RV32ZBB-NEXT: lw a2, 8(a1) +; RV32ZBB-NEXT: lw t1, 12(a1) +; RV32ZBB-NEXT: lw a5, 0(a1) +; RV32ZBB-NEXT: lw a7, 4(a1) +; RV32ZBB-NEXT: sltu a1, a2, a6 +; RV32ZBB-NEXT: sub t1, t1, t0 +; RV32ZBB-NEXT: sltu t0, a5, a3 +; RV32ZBB-NEXT: sub a1, t1, a1 +; RV32ZBB-NEXT: mv t1, t0 +; RV32ZBB-NEXT: beq a7, a4, .LBB32_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu a2, a6, a4 +; RV32ZBB-NEXT: sltu t1, a7, a4 ; RV32ZBB-NEXT: .LBB32_2: -; RV32ZBB-NEXT: sub a7, t0, a7 -; RV32ZBB-NEXT: sltu t0, a7, a2 -; RV32ZBB-NEXT: sub a1, a1, t0 -; RV32ZBB-NEXT: sub a2, a7, a2 -; RV32ZBB-NEXT: sub a4, a6, a4 -; RV32ZBB-NEXT: sub a4, a4, t1 +; RV32ZBB-NEXT: sub a2, a2, a6 +; RV32ZBB-NEXT: sltu a6, a2, t1 +; RV32ZBB-NEXT: sub a1, a1, a6 +; RV32ZBB-NEXT: sub a2, a2, t1 +; RV32ZBB-NEXT: sub a4, a7, a4 +; RV32ZBB-NEXT: sub a4, a4, t0 ; RV32ZBB-NEXT: sub a3, a5, a3 ; RV32ZBB-NEXT: bgez a1, .LBB32_4 ; RV32ZBB-NEXT: # %bb.3: @@ -2552,10 +2552,10 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_select_i128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a7, 4(a2) -; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw a6, 8(a2) ; RV32I-NEXT: lw t0, 12(a2) ; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw a4, 8(a1) ; RV32I-NEXT: beq a5, t0, .LBB38_2 ; RV32I-NEXT: # %bb.1: @@ -2647,12 +2647,12 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_select_i128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw a5, 0(a2) ; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a6, 8(a1) -; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t0, 12(a1) +; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t1, 12(a2) +; RV32ZBB-NEXT: lw a5, 0(a2) ; RV32ZBB-NEXT: lw a1, 4(a2) ; RV32ZBB-NEXT: sltu a2, a7, a6 ; RV32ZBB-NEXT: mv t4, a2 diff --git a/llvm/test/CodeGen/RISCV/abdu-neg.ll b/llvm/test/CodeGen/RISCV/abdu-neg.ll index 87a06fc4403eb9..54075f41694392 100644 --- a/llvm/test/CodeGen/RISCV/abdu-neg.ll +++ b/llvm/test/CodeGen/RISCV/abdu-neg.ll @@ -624,83 +624,83 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a5, 0(a2) -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw t2, 12(a2) +; RV32I-NEXT: lw a4, 0(a2) +; RV32I-NEXT: lw a6, 4(a2) ; RV32I-NEXT: lw t1, 8(a2) -; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: lw a6, 12(a1) -; RV32I-NEXT: lw a7, 4(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a3, 8(a1) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 0(a1) ; RV32I-NEXT: lw t0, 4(a1) -; RV32I-NEXT: sltu a1, a4, t1 -; RV32I-NEXT: sub a2, a6, t2 -; RV32I-NEXT: sltu t2, a3, a5 +; RV32I-NEXT: sltu a1, a3, t1 +; RV32I-NEXT: sub a2, a5, a2 +; RV32I-NEXT: sltu t2, a7, a4 ; RV32I-NEXT: sub a1, a2, a1 ; RV32I-NEXT: mv a2, t2 -; RV32I-NEXT: beq t0, a7, .LBB11_2 +; RV32I-NEXT: beq t0, a6, .LBB11_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a2, t0, a7 +; RV32I-NEXT: sltu a2, t0, a6 ; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: sub t1, a4, t1 +; RV32I-NEXT: sub t1, a3, t1 ; RV32I-NEXT: sltu t3, t1, a2 ; RV32I-NEXT: sub a1, a1, t3 ; RV32I-NEXT: sub a2, t1, a2 -; RV32I-NEXT: beq a1, a6, .LBB11_4 +; RV32I-NEXT: beq a1, a5, .LBB11_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a6, a1 +; RV32I-NEXT: sltu t1, a5, a1 ; RV32I-NEXT: j .LBB11_5 ; RV32I-NEXT: .LBB11_4: -; RV32I-NEXT: sltu t1, a4, a2 +; RV32I-NEXT: sltu t1, a3, a2 ; RV32I-NEXT: .LBB11_5: -; RV32I-NEXT: sub a7, t0, a7 -; RV32I-NEXT: sub a7, a7, t2 -; RV32I-NEXT: sub a5, a3, a5 -; RV32I-NEXT: beq a7, t0, .LBB11_7 +; RV32I-NEXT: sub a6, t0, a6 +; RV32I-NEXT: sub a6, a6, t2 +; RV32I-NEXT: sub t2, a7, a4 +; RV32I-NEXT: beq a6, t0, .LBB11_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a3, t0, a7 +; RV32I-NEXT: sltu a4, t0, a6 ; RV32I-NEXT: j .LBB11_8 ; RV32I-NEXT: .LBB11_7: -; RV32I-NEXT: sltu a3, a3, a5 +; RV32I-NEXT: sltu a4, a7, t2 ; RV32I-NEXT: .LBB11_8: -; RV32I-NEXT: xor a6, a1, a6 -; RV32I-NEXT: xor a4, a2, a4 -; RV32I-NEXT: or a4, a4, a6 -; RV32I-NEXT: beqz a4, .LBB11_10 +; RV32I-NEXT: xor a5, a1, a5 +; RV32I-NEXT: xor a3, a2, a3 +; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: beqz a3, .LBB11_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a3, t1 +; RV32I-NEXT: mv a4, t1 ; RV32I-NEXT: .LBB11_10: -; RV32I-NEXT: neg t0, a3 -; RV32I-NEXT: xor a5, a5, t0 +; RV32I-NEXT: neg t0, a4 +; RV32I-NEXT: xor a5, t2, t0 ; RV32I-NEXT: sltu t2, a5, t0 -; RV32I-NEXT: xor t3, a7, t0 -; RV32I-NEXT: add a4, t3, a3 -; RV32I-NEXT: sub a4, a4, t2 -; RV32I-NEXT: snez t1, a4 -; RV32I-NEXT: add a5, a5, a3 -; RV32I-NEXT: snez a6, a5 -; RV32I-NEXT: or t1, a6, t1 -; RV32I-NEXT: beqz a7, .LBB11_12 +; RV32I-NEXT: xor t3, a6, t0 +; RV32I-NEXT: add a3, t3, a4 +; RV32I-NEXT: sub a3, a3, t2 +; RV32I-NEXT: snez t1, a3 +; RV32I-NEXT: add a5, a5, a4 +; RV32I-NEXT: snez a7, a5 +; RV32I-NEXT: or t1, a7, t1 +; RV32I-NEXT: beqz a6, .LBB11_12 ; RV32I-NEXT: # %bb.11: ; RV32I-NEXT: sltu t2, t3, t0 ; RV32I-NEXT: .LBB11_12: ; RV32I-NEXT: xor a2, a2, t0 -; RV32I-NEXT: add a7, a2, a3 -; RV32I-NEXT: sub t3, a7, t2 +; RV32I-NEXT: add a6, a2, a4 +; RV32I-NEXT: sub t3, a6, t2 ; RV32I-NEXT: neg t4, t3 ; RV32I-NEXT: sltu t5, t4, t1 ; RV32I-NEXT: sltu a2, a2, t0 ; RV32I-NEXT: xor a1, a1, t0 -; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: add a1, a1, a4 ; RV32I-NEXT: sub a1, a1, a2 -; RV32I-NEXT: sltu a2, a7, t2 +; RV32I-NEXT: sltu a2, a6, t2 ; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: snez a2, t3 ; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: neg a1, a1 ; RV32I-NEXT: sub a1, a1, t5 ; RV32I-NEXT: sub a2, t4, t1 -; RV32I-NEXT: add a4, a4, a6 -; RV32I-NEXT: neg a3, a4 +; RV32I-NEXT: add a3, a3, a7 +; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: neg a4, a5 ; RV32I-NEXT: sw a4, 0(a0) ; RV32I-NEXT: sw a3, 4(a0) @@ -736,83 +736,83 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_ext_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a5, 0(a2) -; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw t2, 12(a2) +; RV32ZBB-NEXT: lw a4, 0(a2) +; RV32ZBB-NEXT: lw a6, 4(a2) ; RV32ZBB-NEXT: lw t1, 8(a2) -; RV32ZBB-NEXT: lw a4, 8(a1) -; RV32ZBB-NEXT: lw a6, 12(a1) -; RV32ZBB-NEXT: lw a7, 4(a2) +; RV32ZBB-NEXT: lw a2, 12(a2) +; RV32ZBB-NEXT: lw a3, 8(a1) +; RV32ZBB-NEXT: lw a5, 12(a1) +; RV32ZBB-NEXT: lw a7, 0(a1) ; RV32ZBB-NEXT: lw t0, 4(a1) -; RV32ZBB-NEXT: sltu a1, a4, t1 -; RV32ZBB-NEXT: sub a2, a6, t2 -; RV32ZBB-NEXT: sltu t2, a3, a5 +; RV32ZBB-NEXT: sltu a1, a3, t1 +; RV32ZBB-NEXT: sub a2, a5, a2 +; RV32ZBB-NEXT: sltu t2, a7, a4 ; RV32ZBB-NEXT: sub a1, a2, a1 ; RV32ZBB-NEXT: mv a2, t2 -; RV32ZBB-NEXT: beq t0, a7, .LBB11_2 +; RV32ZBB-NEXT: beq t0, a6, .LBB11_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu a2, t0, a7 +; RV32ZBB-NEXT: sltu a2, t0, a6 ; RV32ZBB-NEXT: .LBB11_2: -; RV32ZBB-NEXT: sub t1, a4, t1 +; RV32ZBB-NEXT: sub t1, a3, t1 ; RV32ZBB-NEXT: sltu t3, t1, a2 ; RV32ZBB-NEXT: sub a1, a1, t3 ; RV32ZBB-NEXT: sub a2, t1, a2 -; RV32ZBB-NEXT: beq a1, a6, .LBB11_4 +; RV32ZBB-NEXT: beq a1, a5, .LBB11_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a6, a1 +; RV32ZBB-NEXT: sltu t1, a5, a1 ; RV32ZBB-NEXT: j .LBB11_5 ; RV32ZBB-NEXT: .LBB11_4: -; RV32ZBB-NEXT: sltu t1, a4, a2 +; RV32ZBB-NEXT: sltu t1, a3, a2 ; RV32ZBB-NEXT: .LBB11_5: -; RV32ZBB-NEXT: sub a7, t0, a7 -; RV32ZBB-NEXT: sub a7, a7, t2 -; RV32ZBB-NEXT: sub a5, a3, a5 -; RV32ZBB-NEXT: beq a7, t0, .LBB11_7 +; RV32ZBB-NEXT: sub a6, t0, a6 +; RV32ZBB-NEXT: sub a6, a6, t2 +; RV32ZBB-NEXT: sub t2, a7, a4 +; RV32ZBB-NEXT: beq a6, t0, .LBB11_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a3, t0, a7 +; RV32ZBB-NEXT: sltu a4, t0, a6 ; RV32ZBB-NEXT: j .LBB11_8 ; RV32ZBB-NEXT: .LBB11_7: -; RV32ZBB-NEXT: sltu a3, a3, a5 +; RV32ZBB-NEXT: sltu a4, a7, t2 ; RV32ZBB-NEXT: .LBB11_8: -; RV32ZBB-NEXT: xor a6, a1, a6 -; RV32ZBB-NEXT: xor a4, a2, a4 -; RV32ZBB-NEXT: or a4, a4, a6 -; RV32ZBB-NEXT: beqz a4, .LBB11_10 +; RV32ZBB-NEXT: xor a5, a1, a5 +; RV32ZBB-NEXT: xor a3, a2, a3 +; RV32ZBB-NEXT: or a3, a3, a5 +; RV32ZBB-NEXT: beqz a3, .LBB11_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: mv a3, t1 +; RV32ZBB-NEXT: mv a4, t1 ; RV32ZBB-NEXT: .LBB11_10: -; RV32ZBB-NEXT: neg t0, a3 -; RV32ZBB-NEXT: xor a5, a5, t0 +; RV32ZBB-NEXT: neg t0, a4 +; RV32ZBB-NEXT: xor a5, t2, t0 ; RV32ZBB-NEXT: sltu t2, a5, t0 -; RV32ZBB-NEXT: xor t3, a7, t0 -; RV32ZBB-NEXT: add a4, t3, a3 -; RV32ZBB-NEXT: sub a4, a4, t2 -; RV32ZBB-NEXT: snez t1, a4 -; RV32ZBB-NEXT: add a5, a5, a3 -; RV32ZBB-NEXT: snez a6, a5 -; RV32ZBB-NEXT: or t1, a6, t1 -; RV32ZBB-NEXT: beqz a7, .LBB11_12 +; RV32ZBB-NEXT: xor t3, a6, t0 +; RV32ZBB-NEXT: add a3, t3, a4 +; RV32ZBB-NEXT: sub a3, a3, t2 +; RV32ZBB-NEXT: snez t1, a3 +; RV32ZBB-NEXT: add a5, a5, a4 +; RV32ZBB-NEXT: snez a7, a5 +; RV32ZBB-NEXT: or t1, a7, t1 +; RV32ZBB-NEXT: beqz a6, .LBB11_12 ; RV32ZBB-NEXT: # %bb.11: ; RV32ZBB-NEXT: sltu t2, t3, t0 ; RV32ZBB-NEXT: .LBB11_12: ; RV32ZBB-NEXT: xor a2, a2, t0 -; RV32ZBB-NEXT: add a7, a2, a3 -; RV32ZBB-NEXT: sub t3, a7, t2 +; RV32ZBB-NEXT: add a6, a2, a4 +; RV32ZBB-NEXT: sub t3, a6, t2 ; RV32ZBB-NEXT: neg t4, t3 ; RV32ZBB-NEXT: sltu t5, t4, t1 ; RV32ZBB-NEXT: sltu a2, a2, t0 ; RV32ZBB-NEXT: xor a1, a1, t0 -; RV32ZBB-NEXT: add a1, a1, a3 +; RV32ZBB-NEXT: add a1, a1, a4 ; RV32ZBB-NEXT: sub a1, a1, a2 -; RV32ZBB-NEXT: sltu a2, a7, t2 +; RV32ZBB-NEXT: sltu a2, a6, t2 ; RV32ZBB-NEXT: sub a1, a1, a2 ; RV32ZBB-NEXT: snez a2, t3 ; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: neg a1, a1 ; RV32ZBB-NEXT: sub a1, a1, t5 ; RV32ZBB-NEXT: sub a2, t4, t1 -; RV32ZBB-NEXT: add a4, a4, a6 -; RV32ZBB-NEXT: neg a3, a4 +; RV32ZBB-NEXT: add a3, a3, a7 +; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: neg a4, a5 ; RV32ZBB-NEXT: sw a4, 0(a0) ; RV32ZBB-NEXT: sw a3, 4(a0) @@ -857,83 +857,83 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128_undef: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a5, 0(a2) -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw t2, 12(a2) +; RV32I-NEXT: lw a4, 0(a2) +; RV32I-NEXT: lw a6, 4(a2) ; RV32I-NEXT: lw t1, 8(a2) -; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: lw a6, 12(a1) -; RV32I-NEXT: lw a7, 4(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a3, 8(a1) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 0(a1) ; RV32I-NEXT: lw t0, 4(a1) -; RV32I-NEXT: sltu a1, a4, t1 -; RV32I-NEXT: sub a2, a6, t2 -; RV32I-NEXT: sltu t2, a3, a5 +; RV32I-NEXT: sltu a1, a3, t1 +; RV32I-NEXT: sub a2, a5, a2 +; RV32I-NEXT: sltu t2, a7, a4 ; RV32I-NEXT: sub a1, a2, a1 ; RV32I-NEXT: mv a2, t2 -; RV32I-NEXT: beq t0, a7, .LBB12_2 +; RV32I-NEXT: beq t0, a6, .LBB12_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a2, t0, a7 +; RV32I-NEXT: sltu a2, t0, a6 ; RV32I-NEXT: .LBB12_2: -; RV32I-NEXT: sub t1, a4, t1 +; RV32I-NEXT: sub t1, a3, t1 ; RV32I-NEXT: sltu t3, t1, a2 ; RV32I-NEXT: sub a1, a1, t3 ; RV32I-NEXT: sub a2, t1, a2 -; RV32I-NEXT: beq a1, a6, .LBB12_4 +; RV32I-NEXT: beq a1, a5, .LBB12_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a6, a1 +; RV32I-NEXT: sltu t1, a5, a1 ; RV32I-NEXT: j .LBB12_5 ; RV32I-NEXT: .LBB12_4: -; RV32I-NEXT: sltu t1, a4, a2 +; RV32I-NEXT: sltu t1, a3, a2 ; RV32I-NEXT: .LBB12_5: -; RV32I-NEXT: sub a7, t0, a7 -; RV32I-NEXT: sub a7, a7, t2 -; RV32I-NEXT: sub a5, a3, a5 -; RV32I-NEXT: beq a7, t0, .LBB12_7 +; RV32I-NEXT: sub a6, t0, a6 +; RV32I-NEXT: sub a6, a6, t2 +; RV32I-NEXT: sub t2, a7, a4 +; RV32I-NEXT: beq a6, t0, .LBB12_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a3, t0, a7 +; RV32I-NEXT: sltu a4, t0, a6 ; RV32I-NEXT: j .LBB12_8 ; RV32I-NEXT: .LBB12_7: -; RV32I-NEXT: sltu a3, a3, a5 +; RV32I-NEXT: sltu a4, a7, t2 ; RV32I-NEXT: .LBB12_8: -; RV32I-NEXT: xor a6, a1, a6 -; RV32I-NEXT: xor a4, a2, a4 -; RV32I-NEXT: or a4, a4, a6 -; RV32I-NEXT: beqz a4, .LBB12_10 +; RV32I-NEXT: xor a5, a1, a5 +; RV32I-NEXT: xor a3, a2, a3 +; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: beqz a3, .LBB12_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a3, t1 +; RV32I-NEXT: mv a4, t1 ; RV32I-NEXT: .LBB12_10: -; RV32I-NEXT: neg t0, a3 -; RV32I-NEXT: xor a5, a5, t0 +; RV32I-NEXT: neg t0, a4 +; RV32I-NEXT: xor a5, t2, t0 ; RV32I-NEXT: sltu t2, a5, t0 -; RV32I-NEXT: xor t3, a7, t0 -; RV32I-NEXT: add a4, t3, a3 -; RV32I-NEXT: sub a4, a4, t2 -; RV32I-NEXT: snez t1, a4 -; RV32I-NEXT: add a5, a5, a3 -; RV32I-NEXT: snez a6, a5 -; RV32I-NEXT: or t1, a6, t1 -; RV32I-NEXT: beqz a7, .LBB12_12 +; RV32I-NEXT: xor t3, a6, t0 +; RV32I-NEXT: add a3, t3, a4 +; RV32I-NEXT: sub a3, a3, t2 +; RV32I-NEXT: snez t1, a3 +; RV32I-NEXT: add a5, a5, a4 +; RV32I-NEXT: snez a7, a5 +; RV32I-NEXT: or t1, a7, t1 +; RV32I-NEXT: beqz a6, .LBB12_12 ; RV32I-NEXT: # %bb.11: ; RV32I-NEXT: sltu t2, t3, t0 ; RV32I-NEXT: .LBB12_12: ; RV32I-NEXT: xor a2, a2, t0 -; RV32I-NEXT: add a7, a2, a3 -; RV32I-NEXT: sub t3, a7, t2 +; RV32I-NEXT: add a6, a2, a4 +; RV32I-NEXT: sub t3, a6, t2 ; RV32I-NEXT: neg t4, t3 ; RV32I-NEXT: sltu t5, t4, t1 ; RV32I-NEXT: sltu a2, a2, t0 ; RV32I-NEXT: xor a1, a1, t0 -; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: add a1, a1, a4 ; RV32I-NEXT: sub a1, a1, a2 -; RV32I-NEXT: sltu a2, a7, t2 +; RV32I-NEXT: sltu a2, a6, t2 ; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: snez a2, t3 ; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: neg a1, a1 ; RV32I-NEXT: sub a1, a1, t5 ; RV32I-NEXT: sub a2, t4, t1 -; RV32I-NEXT: add a4, a4, a6 -; RV32I-NEXT: neg a3, a4 +; RV32I-NEXT: add a3, a3, a7 +; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: neg a4, a5 ; RV32I-NEXT: sw a4, 0(a0) ; RV32I-NEXT: sw a3, 4(a0) @@ -969,83 +969,83 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_ext_i128_undef: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a5, 0(a2) -; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw t2, 12(a2) +; RV32ZBB-NEXT: lw a4, 0(a2) +; RV32ZBB-NEXT: lw a6, 4(a2) ; RV32ZBB-NEXT: lw t1, 8(a2) -; RV32ZBB-NEXT: lw a4, 8(a1) -; RV32ZBB-NEXT: lw a6, 12(a1) -; RV32ZBB-NEXT: lw a7, 4(a2) +; RV32ZBB-NEXT: lw a2, 12(a2) +; RV32ZBB-NEXT: lw a3, 8(a1) +; RV32ZBB-NEXT: lw a5, 12(a1) +; RV32ZBB-NEXT: lw a7, 0(a1) ; RV32ZBB-NEXT: lw t0, 4(a1) -; RV32ZBB-NEXT: sltu a1, a4, t1 -; RV32ZBB-NEXT: sub a2, a6, t2 -; RV32ZBB-NEXT: sltu t2, a3, a5 +; RV32ZBB-NEXT: sltu a1, a3, t1 +; RV32ZBB-NEXT: sub a2, a5, a2 +; RV32ZBB-NEXT: sltu t2, a7, a4 ; RV32ZBB-NEXT: sub a1, a2, a1 ; RV32ZBB-NEXT: mv a2, t2 -; RV32ZBB-NEXT: beq t0, a7, .LBB12_2 +; RV32ZBB-NEXT: beq t0, a6, .LBB12_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu a2, t0, a7 +; RV32ZBB-NEXT: sltu a2, t0, a6 ; RV32ZBB-NEXT: .LBB12_2: -; RV32ZBB-NEXT: sub t1, a4, t1 +; RV32ZBB-NEXT: sub t1, a3, t1 ; RV32ZBB-NEXT: sltu t3, t1, a2 ; RV32ZBB-NEXT: sub a1, a1, t3 ; RV32ZBB-NEXT: sub a2, t1, a2 -; RV32ZBB-NEXT: beq a1, a6, .LBB12_4 +; RV32ZBB-NEXT: beq a1, a5, .LBB12_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a6, a1 +; RV32ZBB-NEXT: sltu t1, a5, a1 ; RV32ZBB-NEXT: j .LBB12_5 ; RV32ZBB-NEXT: .LBB12_4: -; RV32ZBB-NEXT: sltu t1, a4, a2 +; RV32ZBB-NEXT: sltu t1, a3, a2 ; RV32ZBB-NEXT: .LBB12_5: -; RV32ZBB-NEXT: sub a7, t0, a7 -; RV32ZBB-NEXT: sub a7, a7, t2 -; RV32ZBB-NEXT: sub a5, a3, a5 -; RV32ZBB-NEXT: beq a7, t0, .LBB12_7 +; RV32ZBB-NEXT: sub a6, t0, a6 +; RV32ZBB-NEXT: sub a6, a6, t2 +; RV32ZBB-NEXT: sub t2, a7, a4 +; RV32ZBB-NEXT: beq a6, t0, .LBB12_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a3, t0, a7 +; RV32ZBB-NEXT: sltu a4, t0, a6 ; RV32ZBB-NEXT: j .LBB12_8 ; RV32ZBB-NEXT: .LBB12_7: -; RV32ZBB-NEXT: sltu a3, a3, a5 +; RV32ZBB-NEXT: sltu a4, a7, t2 ; RV32ZBB-NEXT: .LBB12_8: -; RV32ZBB-NEXT: xor a6, a1, a6 -; RV32ZBB-NEXT: xor a4, a2, a4 -; RV32ZBB-NEXT: or a4, a4, a6 -; RV32ZBB-NEXT: beqz a4, .LBB12_10 +; RV32ZBB-NEXT: xor a5, a1, a5 +; RV32ZBB-NEXT: xor a3, a2, a3 +; RV32ZBB-NEXT: or a3, a3, a5 +; RV32ZBB-NEXT: beqz a3, .LBB12_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: mv a3, t1 +; RV32ZBB-NEXT: mv a4, t1 ; RV32ZBB-NEXT: .LBB12_10: -; RV32ZBB-NEXT: neg t0, a3 -; RV32ZBB-NEXT: xor a5, a5, t0 +; RV32ZBB-NEXT: neg t0, a4 +; RV32ZBB-NEXT: xor a5, t2, t0 ; RV32ZBB-NEXT: sltu t2, a5, t0 -; RV32ZBB-NEXT: xor t3, a7, t0 -; RV32ZBB-NEXT: add a4, t3, a3 -; RV32ZBB-NEXT: sub a4, a4, t2 -; RV32ZBB-NEXT: snez t1, a4 -; RV32ZBB-NEXT: add a5, a5, a3 -; RV32ZBB-NEXT: snez a6, a5 -; RV32ZBB-NEXT: or t1, a6, t1 -; RV32ZBB-NEXT: beqz a7, .LBB12_12 +; RV32ZBB-NEXT: xor t3, a6, t0 +; RV32ZBB-NEXT: add a3, t3, a4 +; RV32ZBB-NEXT: sub a3, a3, t2 +; RV32ZBB-NEXT: snez t1, a3 +; RV32ZBB-NEXT: add a5, a5, a4 +; RV32ZBB-NEXT: snez a7, a5 +; RV32ZBB-NEXT: or t1, a7, t1 +; RV32ZBB-NEXT: beqz a6, .LBB12_12 ; RV32ZBB-NEXT: # %bb.11: ; RV32ZBB-NEXT: sltu t2, t3, t0 ; RV32ZBB-NEXT: .LBB12_12: ; RV32ZBB-NEXT: xor a2, a2, t0 -; RV32ZBB-NEXT: add a7, a2, a3 -; RV32ZBB-NEXT: sub t3, a7, t2 +; RV32ZBB-NEXT: add a6, a2, a4 +; RV32ZBB-NEXT: sub t3, a6, t2 ; RV32ZBB-NEXT: neg t4, t3 ; RV32ZBB-NEXT: sltu t5, t4, t1 ; RV32ZBB-NEXT: sltu a2, a2, t0 ; RV32ZBB-NEXT: xor a1, a1, t0 -; RV32ZBB-NEXT: add a1, a1, a3 +; RV32ZBB-NEXT: add a1, a1, a4 ; RV32ZBB-NEXT: sub a1, a1, a2 -; RV32ZBB-NEXT: sltu a2, a7, t2 +; RV32ZBB-NEXT: sltu a2, a6, t2 ; RV32ZBB-NEXT: sub a1, a1, a2 ; RV32ZBB-NEXT: snez a2, t3 ; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: neg a1, a1 ; RV32ZBB-NEXT: sub a1, a1, t5 ; RV32ZBB-NEXT: sub a2, t4, t1 -; RV32ZBB-NEXT: add a4, a4, a6 -; RV32ZBB-NEXT: neg a3, a4 +; RV32ZBB-NEXT: add a3, a3, a7 +; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: neg a4, a5 ; RV32ZBB-NEXT: sw a4, 0(a0) ; RV32ZBB-NEXT: sw a3, 4(a0) @@ -1336,10 +1336,10 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_minmax_i128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a6, 4(a2) -; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw a7, 8(a2) ; RV32I-NEXT: lw t0, 12(a2) ; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw a4, 8(a1) ; RV32I-NEXT: beq a5, t0, .LBB17_2 ; RV32I-NEXT: # %bb.1: @@ -1463,10 +1463,10 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_minmax_i128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a6, 4(a2) -; RV32ZBB-NEXT: lw a3, 4(a1) ; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t0, 12(a2) ; RV32ZBB-NEXT: lw a5, 12(a1) +; RV32ZBB-NEXT: lw a3, 4(a1) ; RV32ZBB-NEXT: lw a4, 8(a1) ; RV32ZBB-NEXT: beq a5, t0, .LBB17_2 ; RV32ZBB-NEXT: # %bb.1: @@ -1798,67 +1798,67 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_cmp_i128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a4, 0(a1) -; RV32I-NEXT: lw a5, 4(a2) -; RV32I-NEXT: lw a6, 8(a2) -; RV32I-NEXT: lw a7, 8(a1) -; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: lw a4, 4(a2) +; RV32I-NEXT: lw a5, 8(a2) +; RV32I-NEXT: lw a7, 12(a2) +; RV32I-NEXT: lw a6, 8(a1) ; RV32I-NEXT: lw t0, 12(a1) +; RV32I-NEXT: lw a2, 0(a1) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a7, a6 +; RV32I-NEXT: sltu t1, a6, a5 ; RV32I-NEXT: mv t4, t1 -; RV32I-NEXT: beq t0, a2, .LBB22_2 +; RV32I-NEXT: beq t0, a7, .LBB22_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t4, t0, a2 +; RV32I-NEXT: sltu t4, t0, a7 ; RV32I-NEXT: .LBB22_2: -; RV32I-NEXT: sltu t2, a4, a3 +; RV32I-NEXT: sltu t2, a2, a3 ; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: beq a1, a5, .LBB22_4 +; RV32I-NEXT: beq a1, a4, .LBB22_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t3, a1, a5 +; RV32I-NEXT: sltu t3, a1, a4 ; RV32I-NEXT: .LBB22_4: -; RV32I-NEXT: xor t5, t0, a2 -; RV32I-NEXT: xor t6, a7, a6 +; RV32I-NEXT: xor t5, t0, a7 +; RV32I-NEXT: xor t6, a6, a5 ; RV32I-NEXT: or t5, t6, t5 ; RV32I-NEXT: mv t6, t3 ; RV32I-NEXT: beqz t5, .LBB22_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: mv t6, t4 ; RV32I-NEXT: .LBB22_6: -; RV32I-NEXT: sltu t4, a3, a4 +; RV32I-NEXT: sltu t4, a3, a2 ; RV32I-NEXT: mv t5, t4 -; RV32I-NEXT: beq a1, a5, .LBB22_8 +; RV32I-NEXT: beq a1, a4, .LBB22_8 ; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: sltu t5, a5, a1 +; RV32I-NEXT: sltu t5, a4, a1 ; RV32I-NEXT: .LBB22_8: ; RV32I-NEXT: bnez t6, .LBB22_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu t1, a6, a7 -; RV32I-NEXT: sub a2, a2, t0 -; RV32I-NEXT: sub a2, a2, t1 -; RV32I-NEXT: sub a6, a6, a7 -; RV32I-NEXT: sltu a7, a6, t5 -; RV32I-NEXT: sub a2, a2, a7 +; RV32I-NEXT: sltu t1, a5, a6 +; RV32I-NEXT: sub a7, a7, t0 +; RV32I-NEXT: sub a7, a7, t1 +; RV32I-NEXT: sub a6, a5, a6 +; RV32I-NEXT: sltu a5, a6, t5 +; RV32I-NEXT: sub a5, a7, a5 ; RV32I-NEXT: sub a6, a6, t5 -; RV32I-NEXT: sub a5, a5, a1 -; RV32I-NEXT: sub a1, a5, t4 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sub a4, a4, a1 +; RV32I-NEXT: sub a1, a4, t4 +; RV32I-NEXT: sub a2, a3, a2 ; RV32I-NEXT: j .LBB22_11 ; RV32I-NEXT: .LBB22_10: -; RV32I-NEXT: sub a2, t0, a2 -; RV32I-NEXT: sub a6, a7, a6 -; RV32I-NEXT: sub a2, a2, t1 +; RV32I-NEXT: sub a7, t0, a7 +; RV32I-NEXT: sub a6, a6, a5 +; RV32I-NEXT: sub a5, a7, t1 ; RV32I-NEXT: sltu a7, a6, t3 -; RV32I-NEXT: sub a1, a1, a5 -; RV32I-NEXT: sub a2, a2, a7 +; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: sub a5, a5, a7 ; RV32I-NEXT: sub a6, a6, t3 ; RV32I-NEXT: sub a1, a1, t2 -; RV32I-NEXT: sub a3, a4, a3 +; RV32I-NEXT: sub a2, a2, a3 ; RV32I-NEXT: .LBB22_11: ; RV32I-NEXT: sw a6, 8(a0) ; RV32I-NEXT: sw a1, 4(a0) -; RV32I-NEXT: sw a3, 0(a0) -; RV32I-NEXT: sw a2, 12(a0) +; RV32I-NEXT: sw a2, 0(a0) +; RV32I-NEXT: sw a5, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i128: @@ -1885,67 +1885,67 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_cmp_i128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a4, 0(a1) -; RV32ZBB-NEXT: lw a5, 4(a2) -; RV32ZBB-NEXT: lw a6, 8(a2) -; RV32ZBB-NEXT: lw a7, 8(a1) -; RV32ZBB-NEXT: lw a2, 12(a2) +; RV32ZBB-NEXT: lw a4, 4(a2) +; RV32ZBB-NEXT: lw a5, 8(a2) +; RV32ZBB-NEXT: lw a7, 12(a2) +; RV32ZBB-NEXT: lw a6, 8(a1) ; RV32ZBB-NEXT: lw t0, 12(a1) +; RV32ZBB-NEXT: lw a2, 0(a1) ; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a7, a6 +; RV32ZBB-NEXT: sltu t1, a6, a5 ; RV32ZBB-NEXT: mv t4, t1 -; RV32ZBB-NEXT: beq t0, a2, .LBB22_2 +; RV32ZBB-NEXT: beq t0, a7, .LBB22_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t4, t0, a2 +; RV32ZBB-NEXT: sltu t4, t0, a7 ; RV32ZBB-NEXT: .LBB22_2: -; RV32ZBB-NEXT: sltu t2, a4, a3 +; RV32ZBB-NEXT: sltu t2, a2, a3 ; RV32ZBB-NEXT: mv t3, t2 -; RV32ZBB-NEXT: beq a1, a5, .LBB22_4 +; RV32ZBB-NEXT: beq a1, a4, .LBB22_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t3, a1, a5 +; RV32ZBB-NEXT: sltu t3, a1, a4 ; RV32ZBB-NEXT: .LBB22_4: -; RV32ZBB-NEXT: xor t5, t0, a2 -; RV32ZBB-NEXT: xor t6, a7, a6 +; RV32ZBB-NEXT: xor t5, t0, a7 +; RV32ZBB-NEXT: xor t6, a6, a5 ; RV32ZBB-NEXT: or t5, t6, t5 ; RV32ZBB-NEXT: mv t6, t3 ; RV32ZBB-NEXT: beqz t5, .LBB22_6 ; RV32ZBB-NEXT: # %bb.5: ; RV32ZBB-NEXT: mv t6, t4 ; RV32ZBB-NEXT: .LBB22_6: -; RV32ZBB-NEXT: sltu t4, a3, a4 +; RV32ZBB-NEXT: sltu t4, a3, a2 ; RV32ZBB-NEXT: mv t5, t4 -; RV32ZBB-NEXT: beq a1, a5, .LBB22_8 +; RV32ZBB-NEXT: beq a1, a4, .LBB22_8 ; RV32ZBB-NEXT: # %bb.7: -; RV32ZBB-NEXT: sltu t5, a5, a1 +; RV32ZBB-NEXT: sltu t5, a4, a1 ; RV32ZBB-NEXT: .LBB22_8: ; RV32ZBB-NEXT: bnez t6, .LBB22_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sltu t1, a6, a7 -; RV32ZBB-NEXT: sub a2, a2, t0 -; RV32ZBB-NEXT: sub a2, a2, t1 -; RV32ZBB-NEXT: sub a6, a6, a7 -; RV32ZBB-NEXT: sltu a7, a6, t5 -; RV32ZBB-NEXT: sub a2, a2, a7 +; RV32ZBB-NEXT: sltu t1, a5, a6 +; RV32ZBB-NEXT: sub a7, a7, t0 +; RV32ZBB-NEXT: sub a7, a7, t1 +; RV32ZBB-NEXT: sub a6, a5, a6 +; RV32ZBB-NEXT: sltu a5, a6, t5 +; RV32ZBB-NEXT: sub a5, a7, a5 ; RV32ZBB-NEXT: sub a6, a6, t5 -; RV32ZBB-NEXT: sub a5, a5, a1 -; RV32ZBB-NEXT: sub a1, a5, t4 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: sub a4, a4, a1 +; RV32ZBB-NEXT: sub a1, a4, t4 +; RV32ZBB-NEXT: sub a2, a3, a2 ; RV32ZBB-NEXT: j .LBB22_11 ; RV32ZBB-NEXT: .LBB22_10: -; RV32ZBB-NEXT: sub a2, t0, a2 -; RV32ZBB-NEXT: sub a6, a7, a6 -; RV32ZBB-NEXT: sub a2, a2, t1 +; RV32ZBB-NEXT: sub a7, t0, a7 +; RV32ZBB-NEXT: sub a6, a6, a5 +; RV32ZBB-NEXT: sub a5, a7, t1 ; RV32ZBB-NEXT: sltu a7, a6, t3 -; RV32ZBB-NEXT: sub a1, a1, a5 -; RV32ZBB-NEXT: sub a2, a2, a7 +; RV32ZBB-NEXT: sub a1, a1, a4 +; RV32ZBB-NEXT: sub a5, a5, a7 ; RV32ZBB-NEXT: sub a6, a6, t3 ; RV32ZBB-NEXT: sub a1, a1, t2 -; RV32ZBB-NEXT: sub a3, a4, a3 +; RV32ZBB-NEXT: sub a2, a2, a3 ; RV32ZBB-NEXT: .LBB22_11: ; RV32ZBB-NEXT: sw a6, 8(a0) ; RV32ZBB-NEXT: sw a1, 4(a0) -; RV32ZBB-NEXT: sw a3, 0(a0) -; RV32ZBB-NEXT: sw a2, 12(a0) +; RV32ZBB-NEXT: sw a2, 0(a0) +; RV32ZBB-NEXT: sw a5, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i128: diff --git a/llvm/test/CodeGen/RISCV/abdu.ll b/llvm/test/CodeGen/RISCV/abdu.ll index a9f933243f679a..a04a800157dbb1 100644 --- a/llvm/test/CodeGen/RISCV/abdu.ll +++ b/llvm/test/CodeGen/RISCV/abdu.ll @@ -540,75 +540,75 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a5, 0(a2) -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw t1, 12(a2) -; RV32I-NEXT: lw a7, 8(a2) -; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: lw a6, 12(a1) -; RV32I-NEXT: lw t0, 4(a2) +; RV32I-NEXT: lw a3, 0(a2) +; RV32I-NEXT: lw a5, 4(a2) +; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a7, 12(a2) +; RV32I-NEXT: lw a2, 8(a1) +; RV32I-NEXT: lw a4, 12(a1) +; RV32I-NEXT: lw t0, 0(a1) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu a2, a4, a7 -; RV32I-NEXT: sub t1, a6, t1 -; RV32I-NEXT: sltu t2, a3, a5 -; RV32I-NEXT: sub a2, t1, a2 +; RV32I-NEXT: sltu t1, a2, a6 +; RV32I-NEXT: sub a7, a4, a7 +; RV32I-NEXT: sltu t2, t0, a3 +; RV32I-NEXT: sub a7, a7, t1 ; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: beq a1, t0, .LBB11_2 +; RV32I-NEXT: beq a1, a5, .LBB11_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, t0 +; RV32I-NEXT: sltu t1, a1, a5 ; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: sub a7, a4, a7 -; RV32I-NEXT: sltu t3, a7, t1 -; RV32I-NEXT: sub a2, a2, t3 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: beq a2, a6, .LBB11_4 +; RV32I-NEXT: sub t3, a2, a6 +; RV32I-NEXT: sltu a6, t3, t1 +; RV32I-NEXT: sub a6, a7, a6 +; RV32I-NEXT: sub a7, t3, t1 +; RV32I-NEXT: beq a6, a4, .LBB11_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a6, a2 +; RV32I-NEXT: sltu t1, a4, a6 ; RV32I-NEXT: j .LBB11_5 ; RV32I-NEXT: .LBB11_4: -; RV32I-NEXT: sltu t1, a4, a7 +; RV32I-NEXT: sltu t1, a2, a7 ; RV32I-NEXT: .LBB11_5: -; RV32I-NEXT: sub t0, a1, t0 -; RV32I-NEXT: sub t0, t0, t2 -; RV32I-NEXT: sub a5, a3, a5 -; RV32I-NEXT: beq t0, a1, .LBB11_7 +; RV32I-NEXT: sub a5, a1, a5 +; RV32I-NEXT: sub a5, a5, t2 +; RV32I-NEXT: sub a3, t0, a3 +; RV32I-NEXT: beq a5, a1, .LBB11_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a1, a1, t0 +; RV32I-NEXT: sltu a1, a1, a5 ; RV32I-NEXT: j .LBB11_8 ; RV32I-NEXT: .LBB11_7: -; RV32I-NEXT: sltu a1, a3, a5 +; RV32I-NEXT: sltu a1, t0, a3 ; RV32I-NEXT: .LBB11_8: -; RV32I-NEXT: xor a3, a2, a6 -; RV32I-NEXT: xor a4, a7, a4 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: beqz a3, .LBB11_10 +; RV32I-NEXT: xor a4, a6, a4 +; RV32I-NEXT: xor a2, a7, a2 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: beqz a2, .LBB11_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv a1, t1 ; RV32I-NEXT: .LBB11_10: -; RV32I-NEXT: neg a6, a1 -; RV32I-NEXT: xor a3, a7, a6 -; RV32I-NEXT: sltu a4, a3, a6 -; RV32I-NEXT: xor a2, a2, a6 -; RV32I-NEXT: add a2, a2, a1 -; RV32I-NEXT: sub a4, a2, a4 -; RV32I-NEXT: xor a2, a5, a6 -; RV32I-NEXT: sltu a5, a2, a6 -; RV32I-NEXT: xor a7, t0, a6 -; RV32I-NEXT: mv t1, a5 -; RV32I-NEXT: beqz t0, .LBB11_12 +; RV32I-NEXT: neg t0, a1 +; RV32I-NEXT: xor a2, a7, t0 +; RV32I-NEXT: sltu a4, a2, t0 +; RV32I-NEXT: xor a6, a6, t0 +; RV32I-NEXT: add a6, a6, a1 +; RV32I-NEXT: sub a4, a6, a4 +; RV32I-NEXT: xor a3, a3, t0 +; RV32I-NEXT: sltu a6, a3, t0 +; RV32I-NEXT: xor a7, a5, t0 +; RV32I-NEXT: mv t1, a6 +; RV32I-NEXT: beqz a5, .LBB11_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sltu t1, a7, a6 +; RV32I-NEXT: sltu t1, a7, t0 ; RV32I-NEXT: .LBB11_12: -; RV32I-NEXT: add a3, a3, a1 -; RV32I-NEXT: sltu a6, a3, t1 -; RV32I-NEXT: sub a4, a4, a6 -; RV32I-NEXT: sub a3, a3, t1 +; RV32I-NEXT: add a2, a2, a1 +; RV32I-NEXT: sltu a5, a2, t1 +; RV32I-NEXT: sub a4, a4, a5 +; RV32I-NEXT: sub a2, a2, t1 ; RV32I-NEXT: add a7, a7, a1 -; RV32I-NEXT: sub a5, a7, a5 -; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: sub a5, a7, a6 +; RV32I-NEXT: add a1, a3, a1 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a2, 8(a0) ; RV32I-NEXT: sw a4, 12(a0) ; RV32I-NEXT: ret ; @@ -636,75 +636,75 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_ext_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a5, 0(a2) -; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw t1, 12(a2) -; RV32ZBB-NEXT: lw a7, 8(a2) -; RV32ZBB-NEXT: lw a4, 8(a1) -; RV32ZBB-NEXT: lw a6, 12(a1) -; RV32ZBB-NEXT: lw t0, 4(a2) +; RV32ZBB-NEXT: lw a3, 0(a2) +; RV32ZBB-NEXT: lw a5, 4(a2) +; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a7, 12(a2) +; RV32ZBB-NEXT: lw a2, 8(a1) +; RV32ZBB-NEXT: lw a4, 12(a1) +; RV32ZBB-NEXT: lw t0, 0(a1) ; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu a2, a4, a7 -; RV32ZBB-NEXT: sub t1, a6, t1 -; RV32ZBB-NEXT: sltu t2, a3, a5 -; RV32ZBB-NEXT: sub a2, t1, a2 +; RV32ZBB-NEXT: sltu t1, a2, a6 +; RV32ZBB-NEXT: sub a7, a4, a7 +; RV32ZBB-NEXT: sltu t2, t0, a3 +; RV32ZBB-NEXT: sub a7, a7, t1 ; RV32ZBB-NEXT: mv t1, t2 -; RV32ZBB-NEXT: beq a1, t0, .LBB11_2 +; RV32ZBB-NEXT: beq a1, a5, .LBB11_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, t0 +; RV32ZBB-NEXT: sltu t1, a1, a5 ; RV32ZBB-NEXT: .LBB11_2: -; RV32ZBB-NEXT: sub a7, a4, a7 -; RV32ZBB-NEXT: sltu t3, a7, t1 -; RV32ZBB-NEXT: sub a2, a2, t3 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: beq a2, a6, .LBB11_4 +; RV32ZBB-NEXT: sub t3, a2, a6 +; RV32ZBB-NEXT: sltu a6, t3, t1 +; RV32ZBB-NEXT: sub a6, a7, a6 +; RV32ZBB-NEXT: sub a7, t3, t1 +; RV32ZBB-NEXT: beq a6, a4, .LBB11_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a6, a2 +; RV32ZBB-NEXT: sltu t1, a4, a6 ; RV32ZBB-NEXT: j .LBB11_5 ; RV32ZBB-NEXT: .LBB11_4: -; RV32ZBB-NEXT: sltu t1, a4, a7 +; RV32ZBB-NEXT: sltu t1, a2, a7 ; RV32ZBB-NEXT: .LBB11_5: -; RV32ZBB-NEXT: sub t0, a1, t0 -; RV32ZBB-NEXT: sub t0, t0, t2 -; RV32ZBB-NEXT: sub a5, a3, a5 -; RV32ZBB-NEXT: beq t0, a1, .LBB11_7 +; RV32ZBB-NEXT: sub a5, a1, a5 +; RV32ZBB-NEXT: sub a5, a5, t2 +; RV32ZBB-NEXT: sub a3, t0, a3 +; RV32ZBB-NEXT: beq a5, a1, .LBB11_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a1, a1, t0 +; RV32ZBB-NEXT: sltu a1, a1, a5 ; RV32ZBB-NEXT: j .LBB11_8 ; RV32ZBB-NEXT: .LBB11_7: -; RV32ZBB-NEXT: sltu a1, a3, a5 +; RV32ZBB-NEXT: sltu a1, t0, a3 ; RV32ZBB-NEXT: .LBB11_8: -; RV32ZBB-NEXT: xor a3, a2, a6 -; RV32ZBB-NEXT: xor a4, a7, a4 -; RV32ZBB-NEXT: or a3, a4, a3 -; RV32ZBB-NEXT: beqz a3, .LBB11_10 +; RV32ZBB-NEXT: xor a4, a6, a4 +; RV32ZBB-NEXT: xor a2, a7, a2 +; RV32ZBB-NEXT: or a2, a2, a4 +; RV32ZBB-NEXT: beqz a2, .LBB11_10 ; RV32ZBB-NEXT: # %bb.9: ; RV32ZBB-NEXT: mv a1, t1 ; RV32ZBB-NEXT: .LBB11_10: -; RV32ZBB-NEXT: neg a6, a1 -; RV32ZBB-NEXT: xor a3, a7, a6 -; RV32ZBB-NEXT: sltu a4, a3, a6 -; RV32ZBB-NEXT: xor a2, a2, a6 -; RV32ZBB-NEXT: add a2, a2, a1 -; RV32ZBB-NEXT: sub a4, a2, a4 -; RV32ZBB-NEXT: xor a2, a5, a6 -; RV32ZBB-NEXT: sltu a5, a2, a6 -; RV32ZBB-NEXT: xor a7, t0, a6 -; RV32ZBB-NEXT: mv t1, a5 -; RV32ZBB-NEXT: beqz t0, .LBB11_12 +; RV32ZBB-NEXT: neg t0, a1 +; RV32ZBB-NEXT: xor a2, a7, t0 +; RV32ZBB-NEXT: sltu a4, a2, t0 +; RV32ZBB-NEXT: xor a6, a6, t0 +; RV32ZBB-NEXT: add a6, a6, a1 +; RV32ZBB-NEXT: sub a4, a6, a4 +; RV32ZBB-NEXT: xor a3, a3, t0 +; RV32ZBB-NEXT: sltu a6, a3, t0 +; RV32ZBB-NEXT: xor a7, a5, t0 +; RV32ZBB-NEXT: mv t1, a6 +; RV32ZBB-NEXT: beqz a5, .LBB11_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu t1, a7, a6 +; RV32ZBB-NEXT: sltu t1, a7, t0 ; RV32ZBB-NEXT: .LBB11_12: -; RV32ZBB-NEXT: add a3, a3, a1 -; RV32ZBB-NEXT: sltu a6, a3, t1 -; RV32ZBB-NEXT: sub a4, a4, a6 -; RV32ZBB-NEXT: sub a3, a3, t1 +; RV32ZBB-NEXT: add a2, a2, a1 +; RV32ZBB-NEXT: sltu a5, a2, t1 +; RV32ZBB-NEXT: sub a4, a4, a5 +; RV32ZBB-NEXT: sub a2, a2, t1 ; RV32ZBB-NEXT: add a7, a7, a1 -; RV32ZBB-NEXT: sub a5, a7, a5 -; RV32ZBB-NEXT: add a1, a2, a1 +; RV32ZBB-NEXT: sub a5, a7, a6 +; RV32ZBB-NEXT: add a1, a3, a1 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a2, 8(a0) ; RV32ZBB-NEXT: sw a4, 12(a0) ; RV32ZBB-NEXT: ret ; @@ -740,75 +740,75 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128_undef: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a5, 0(a2) -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw t1, 12(a2) -; RV32I-NEXT: lw a7, 8(a2) -; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: lw a6, 12(a1) -; RV32I-NEXT: lw t0, 4(a2) +; RV32I-NEXT: lw a3, 0(a2) +; RV32I-NEXT: lw a5, 4(a2) +; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a7, 12(a2) +; RV32I-NEXT: lw a2, 8(a1) +; RV32I-NEXT: lw a4, 12(a1) +; RV32I-NEXT: lw t0, 0(a1) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu a2, a4, a7 -; RV32I-NEXT: sub t1, a6, t1 -; RV32I-NEXT: sltu t2, a3, a5 -; RV32I-NEXT: sub a2, t1, a2 +; RV32I-NEXT: sltu t1, a2, a6 +; RV32I-NEXT: sub a7, a4, a7 +; RV32I-NEXT: sltu t2, t0, a3 +; RV32I-NEXT: sub a7, a7, t1 ; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: beq a1, t0, .LBB12_2 +; RV32I-NEXT: beq a1, a5, .LBB12_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, t0 +; RV32I-NEXT: sltu t1, a1, a5 ; RV32I-NEXT: .LBB12_2: -; RV32I-NEXT: sub a7, a4, a7 -; RV32I-NEXT: sltu t3, a7, t1 -; RV32I-NEXT: sub a2, a2, t3 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: beq a2, a6, .LBB12_4 +; RV32I-NEXT: sub t3, a2, a6 +; RV32I-NEXT: sltu a6, t3, t1 +; RV32I-NEXT: sub a6, a7, a6 +; RV32I-NEXT: sub a7, t3, t1 +; RV32I-NEXT: beq a6, a4, .LBB12_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a6, a2 +; RV32I-NEXT: sltu t1, a4, a6 ; RV32I-NEXT: j .LBB12_5 ; RV32I-NEXT: .LBB12_4: -; RV32I-NEXT: sltu t1, a4, a7 +; RV32I-NEXT: sltu t1, a2, a7 ; RV32I-NEXT: .LBB12_5: -; RV32I-NEXT: sub t0, a1, t0 -; RV32I-NEXT: sub t0, t0, t2 -; RV32I-NEXT: sub a5, a3, a5 -; RV32I-NEXT: beq t0, a1, .LBB12_7 +; RV32I-NEXT: sub a5, a1, a5 +; RV32I-NEXT: sub a5, a5, t2 +; RV32I-NEXT: sub a3, t0, a3 +; RV32I-NEXT: beq a5, a1, .LBB12_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a1, a1, t0 +; RV32I-NEXT: sltu a1, a1, a5 ; RV32I-NEXT: j .LBB12_8 ; RV32I-NEXT: .LBB12_7: -; RV32I-NEXT: sltu a1, a3, a5 +; RV32I-NEXT: sltu a1, t0, a3 ; RV32I-NEXT: .LBB12_8: -; RV32I-NEXT: xor a3, a2, a6 -; RV32I-NEXT: xor a4, a7, a4 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: beqz a3, .LBB12_10 +; RV32I-NEXT: xor a4, a6, a4 +; RV32I-NEXT: xor a2, a7, a2 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: beqz a2, .LBB12_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv a1, t1 ; RV32I-NEXT: .LBB12_10: -; RV32I-NEXT: neg a6, a1 -; RV32I-NEXT: xor a3, a7, a6 -; RV32I-NEXT: sltu a4, a3, a6 -; RV32I-NEXT: xor a2, a2, a6 -; RV32I-NEXT: add a2, a2, a1 -; RV32I-NEXT: sub a4, a2, a4 -; RV32I-NEXT: xor a2, a5, a6 -; RV32I-NEXT: sltu a5, a2, a6 -; RV32I-NEXT: xor a7, t0, a6 -; RV32I-NEXT: mv t1, a5 -; RV32I-NEXT: beqz t0, .LBB12_12 +; RV32I-NEXT: neg t0, a1 +; RV32I-NEXT: xor a2, a7, t0 +; RV32I-NEXT: sltu a4, a2, t0 +; RV32I-NEXT: xor a6, a6, t0 +; RV32I-NEXT: add a6, a6, a1 +; RV32I-NEXT: sub a4, a6, a4 +; RV32I-NEXT: xor a3, a3, t0 +; RV32I-NEXT: sltu a6, a3, t0 +; RV32I-NEXT: xor a7, a5, t0 +; RV32I-NEXT: mv t1, a6 +; RV32I-NEXT: beqz a5, .LBB12_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sltu t1, a7, a6 +; RV32I-NEXT: sltu t1, a7, t0 ; RV32I-NEXT: .LBB12_12: -; RV32I-NEXT: add a3, a3, a1 -; RV32I-NEXT: sltu a6, a3, t1 -; RV32I-NEXT: sub a4, a4, a6 -; RV32I-NEXT: sub a3, a3, t1 +; RV32I-NEXT: add a2, a2, a1 +; RV32I-NEXT: sltu a5, a2, t1 +; RV32I-NEXT: sub a4, a4, a5 +; RV32I-NEXT: sub a2, a2, t1 ; RV32I-NEXT: add a7, a7, a1 -; RV32I-NEXT: sub a5, a7, a5 -; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: sub a5, a7, a6 +; RV32I-NEXT: add a1, a3, a1 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a2, 8(a0) ; RV32I-NEXT: sw a4, 12(a0) ; RV32I-NEXT: ret ; @@ -836,75 +836,75 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_ext_i128_undef: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a5, 0(a2) -; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw t1, 12(a2) -; RV32ZBB-NEXT: lw a7, 8(a2) -; RV32ZBB-NEXT: lw a4, 8(a1) -; RV32ZBB-NEXT: lw a6, 12(a1) -; RV32ZBB-NEXT: lw t0, 4(a2) +; RV32ZBB-NEXT: lw a3, 0(a2) +; RV32ZBB-NEXT: lw a5, 4(a2) +; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a7, 12(a2) +; RV32ZBB-NEXT: lw a2, 8(a1) +; RV32ZBB-NEXT: lw a4, 12(a1) +; RV32ZBB-NEXT: lw t0, 0(a1) ; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu a2, a4, a7 -; RV32ZBB-NEXT: sub t1, a6, t1 -; RV32ZBB-NEXT: sltu t2, a3, a5 -; RV32ZBB-NEXT: sub a2, t1, a2 +; RV32ZBB-NEXT: sltu t1, a2, a6 +; RV32ZBB-NEXT: sub a7, a4, a7 +; RV32ZBB-NEXT: sltu t2, t0, a3 +; RV32ZBB-NEXT: sub a7, a7, t1 ; RV32ZBB-NEXT: mv t1, t2 -; RV32ZBB-NEXT: beq a1, t0, .LBB12_2 +; RV32ZBB-NEXT: beq a1, a5, .LBB12_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, t0 +; RV32ZBB-NEXT: sltu t1, a1, a5 ; RV32ZBB-NEXT: .LBB12_2: -; RV32ZBB-NEXT: sub a7, a4, a7 -; RV32ZBB-NEXT: sltu t3, a7, t1 -; RV32ZBB-NEXT: sub a2, a2, t3 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: beq a2, a6, .LBB12_4 +; RV32ZBB-NEXT: sub t3, a2, a6 +; RV32ZBB-NEXT: sltu a6, t3, t1 +; RV32ZBB-NEXT: sub a6, a7, a6 +; RV32ZBB-NEXT: sub a7, t3, t1 +; RV32ZBB-NEXT: beq a6, a4, .LBB12_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a6, a2 +; RV32ZBB-NEXT: sltu t1, a4, a6 ; RV32ZBB-NEXT: j .LBB12_5 ; RV32ZBB-NEXT: .LBB12_4: -; RV32ZBB-NEXT: sltu t1, a4, a7 +; RV32ZBB-NEXT: sltu t1, a2, a7 ; RV32ZBB-NEXT: .LBB12_5: -; RV32ZBB-NEXT: sub t0, a1, t0 -; RV32ZBB-NEXT: sub t0, t0, t2 -; RV32ZBB-NEXT: sub a5, a3, a5 -; RV32ZBB-NEXT: beq t0, a1, .LBB12_7 +; RV32ZBB-NEXT: sub a5, a1, a5 +; RV32ZBB-NEXT: sub a5, a5, t2 +; RV32ZBB-NEXT: sub a3, t0, a3 +; RV32ZBB-NEXT: beq a5, a1, .LBB12_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a1, a1, t0 +; RV32ZBB-NEXT: sltu a1, a1, a5 ; RV32ZBB-NEXT: j .LBB12_8 ; RV32ZBB-NEXT: .LBB12_7: -; RV32ZBB-NEXT: sltu a1, a3, a5 +; RV32ZBB-NEXT: sltu a1, t0, a3 ; RV32ZBB-NEXT: .LBB12_8: -; RV32ZBB-NEXT: xor a3, a2, a6 -; RV32ZBB-NEXT: xor a4, a7, a4 -; RV32ZBB-NEXT: or a3, a4, a3 -; RV32ZBB-NEXT: beqz a3, .LBB12_10 +; RV32ZBB-NEXT: xor a4, a6, a4 +; RV32ZBB-NEXT: xor a2, a7, a2 +; RV32ZBB-NEXT: or a2, a2, a4 +; RV32ZBB-NEXT: beqz a2, .LBB12_10 ; RV32ZBB-NEXT: # %bb.9: ; RV32ZBB-NEXT: mv a1, t1 ; RV32ZBB-NEXT: .LBB12_10: -; RV32ZBB-NEXT: neg a6, a1 -; RV32ZBB-NEXT: xor a3, a7, a6 -; RV32ZBB-NEXT: sltu a4, a3, a6 -; RV32ZBB-NEXT: xor a2, a2, a6 -; RV32ZBB-NEXT: add a2, a2, a1 -; RV32ZBB-NEXT: sub a4, a2, a4 -; RV32ZBB-NEXT: xor a2, a5, a6 -; RV32ZBB-NEXT: sltu a5, a2, a6 -; RV32ZBB-NEXT: xor a7, t0, a6 -; RV32ZBB-NEXT: mv t1, a5 -; RV32ZBB-NEXT: beqz t0, .LBB12_12 +; RV32ZBB-NEXT: neg t0, a1 +; RV32ZBB-NEXT: xor a2, a7, t0 +; RV32ZBB-NEXT: sltu a4, a2, t0 +; RV32ZBB-NEXT: xor a6, a6, t0 +; RV32ZBB-NEXT: add a6, a6, a1 +; RV32ZBB-NEXT: sub a4, a6, a4 +; RV32ZBB-NEXT: xor a3, a3, t0 +; RV32ZBB-NEXT: sltu a6, a3, t0 +; RV32ZBB-NEXT: xor a7, a5, t0 +; RV32ZBB-NEXT: mv t1, a6 +; RV32ZBB-NEXT: beqz a5, .LBB12_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu t1, a7, a6 +; RV32ZBB-NEXT: sltu t1, a7, t0 ; RV32ZBB-NEXT: .LBB12_12: -; RV32ZBB-NEXT: add a3, a3, a1 -; RV32ZBB-NEXT: sltu a6, a3, t1 -; RV32ZBB-NEXT: sub a4, a4, a6 -; RV32ZBB-NEXT: sub a3, a3, t1 +; RV32ZBB-NEXT: add a2, a2, a1 +; RV32ZBB-NEXT: sltu a5, a2, t1 +; RV32ZBB-NEXT: sub a4, a4, a5 +; RV32ZBB-NEXT: sub a2, a2, t1 ; RV32ZBB-NEXT: add a7, a7, a1 -; RV32ZBB-NEXT: sub a5, a7, a5 -; RV32ZBB-NEXT: add a1, a2, a1 +; RV32ZBB-NEXT: sub a5, a7, a6 +; RV32ZBB-NEXT: add a1, a3, a1 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a2, 8(a0) ; RV32ZBB-NEXT: sw a4, 12(a0) ; RV32ZBB-NEXT: ret ; @@ -1131,75 +1131,75 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_minmax_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a5, 0(a2) -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw t1, 12(a2) -; RV32I-NEXT: lw a7, 8(a2) -; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: lw a6, 12(a1) -; RV32I-NEXT: lw t0, 4(a2) +; RV32I-NEXT: lw a3, 0(a2) +; RV32I-NEXT: lw a5, 4(a2) +; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a7, 12(a2) +; RV32I-NEXT: lw a2, 8(a1) +; RV32I-NEXT: lw a4, 12(a1) +; RV32I-NEXT: lw t0, 0(a1) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu a2, a4, a7 -; RV32I-NEXT: sub t1, a6, t1 -; RV32I-NEXT: sltu t2, a3, a5 -; RV32I-NEXT: sub a2, t1, a2 +; RV32I-NEXT: sltu t1, a2, a6 +; RV32I-NEXT: sub a7, a4, a7 +; RV32I-NEXT: sltu t2, t0, a3 +; RV32I-NEXT: sub a7, a7, t1 ; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: beq a1, t0, .LBB17_2 +; RV32I-NEXT: beq a1, a5, .LBB17_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, t0 +; RV32I-NEXT: sltu t1, a1, a5 ; RV32I-NEXT: .LBB17_2: -; RV32I-NEXT: sub a7, a4, a7 -; RV32I-NEXT: sltu t3, a7, t1 -; RV32I-NEXT: sub a2, a2, t3 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: beq a2, a6, .LBB17_4 +; RV32I-NEXT: sub t3, a2, a6 +; RV32I-NEXT: sltu a6, t3, t1 +; RV32I-NEXT: sub a6, a7, a6 +; RV32I-NEXT: sub a7, t3, t1 +; RV32I-NEXT: beq a6, a4, .LBB17_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a6, a2 +; RV32I-NEXT: sltu t1, a4, a6 ; RV32I-NEXT: j .LBB17_5 ; RV32I-NEXT: .LBB17_4: -; RV32I-NEXT: sltu t1, a4, a7 +; RV32I-NEXT: sltu t1, a2, a7 ; RV32I-NEXT: .LBB17_5: -; RV32I-NEXT: sub t0, a1, t0 -; RV32I-NEXT: sub t0, t0, t2 -; RV32I-NEXT: sub a5, a3, a5 -; RV32I-NEXT: beq t0, a1, .LBB17_7 +; RV32I-NEXT: sub a5, a1, a5 +; RV32I-NEXT: sub a5, a5, t2 +; RV32I-NEXT: sub a3, t0, a3 +; RV32I-NEXT: beq a5, a1, .LBB17_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a1, a1, t0 +; RV32I-NEXT: sltu a1, a1, a5 ; RV32I-NEXT: j .LBB17_8 ; RV32I-NEXT: .LBB17_7: -; RV32I-NEXT: sltu a1, a3, a5 +; RV32I-NEXT: sltu a1, t0, a3 ; RV32I-NEXT: .LBB17_8: -; RV32I-NEXT: xor a3, a2, a6 -; RV32I-NEXT: xor a4, a7, a4 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: beqz a3, .LBB17_10 +; RV32I-NEXT: xor a4, a6, a4 +; RV32I-NEXT: xor a2, a7, a2 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: beqz a2, .LBB17_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv a1, t1 ; RV32I-NEXT: .LBB17_10: -; RV32I-NEXT: neg a6, a1 -; RV32I-NEXT: xor a3, a7, a6 -; RV32I-NEXT: sltu a4, a3, a6 -; RV32I-NEXT: xor a2, a2, a6 -; RV32I-NEXT: add a2, a2, a1 -; RV32I-NEXT: sub a4, a2, a4 -; RV32I-NEXT: xor a2, a5, a6 -; RV32I-NEXT: sltu a5, a2, a6 -; RV32I-NEXT: xor a7, t0, a6 -; RV32I-NEXT: mv t1, a5 -; RV32I-NEXT: beqz t0, .LBB17_12 +; RV32I-NEXT: neg t0, a1 +; RV32I-NEXT: xor a2, a7, t0 +; RV32I-NEXT: sltu a4, a2, t0 +; RV32I-NEXT: xor a6, a6, t0 +; RV32I-NEXT: add a6, a6, a1 +; RV32I-NEXT: sub a4, a6, a4 +; RV32I-NEXT: xor a3, a3, t0 +; RV32I-NEXT: sltu a6, a3, t0 +; RV32I-NEXT: xor a7, a5, t0 +; RV32I-NEXT: mv t1, a6 +; RV32I-NEXT: beqz a5, .LBB17_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sltu t1, a7, a6 +; RV32I-NEXT: sltu t1, a7, t0 ; RV32I-NEXT: .LBB17_12: -; RV32I-NEXT: add a3, a3, a1 -; RV32I-NEXT: sltu a6, a3, t1 -; RV32I-NEXT: sub a4, a4, a6 -; RV32I-NEXT: sub a3, a3, t1 +; RV32I-NEXT: add a2, a2, a1 +; RV32I-NEXT: sltu a5, a2, t1 +; RV32I-NEXT: sub a4, a4, a5 +; RV32I-NEXT: sub a2, a2, t1 ; RV32I-NEXT: add a7, a7, a1 -; RV32I-NEXT: sub a5, a7, a5 -; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: sub a5, a7, a6 +; RV32I-NEXT: add a1, a3, a1 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a2, 8(a0) ; RV32I-NEXT: sw a4, 12(a0) ; RV32I-NEXT: ret ; @@ -1227,75 +1227,75 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_minmax_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a5, 0(a2) -; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw t1, 12(a2) -; RV32ZBB-NEXT: lw a7, 8(a2) -; RV32ZBB-NEXT: lw a4, 8(a1) -; RV32ZBB-NEXT: lw a6, 12(a1) -; RV32ZBB-NEXT: lw t0, 4(a2) +; RV32ZBB-NEXT: lw a3, 0(a2) +; RV32ZBB-NEXT: lw a5, 4(a2) +; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a7, 12(a2) +; RV32ZBB-NEXT: lw a2, 8(a1) +; RV32ZBB-NEXT: lw a4, 12(a1) +; RV32ZBB-NEXT: lw t0, 0(a1) ; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu a2, a4, a7 -; RV32ZBB-NEXT: sub t1, a6, t1 -; RV32ZBB-NEXT: sltu t2, a3, a5 -; RV32ZBB-NEXT: sub a2, t1, a2 +; RV32ZBB-NEXT: sltu t1, a2, a6 +; RV32ZBB-NEXT: sub a7, a4, a7 +; RV32ZBB-NEXT: sltu t2, t0, a3 +; RV32ZBB-NEXT: sub a7, a7, t1 ; RV32ZBB-NEXT: mv t1, t2 -; RV32ZBB-NEXT: beq a1, t0, .LBB17_2 +; RV32ZBB-NEXT: beq a1, a5, .LBB17_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, t0 +; RV32ZBB-NEXT: sltu t1, a1, a5 ; RV32ZBB-NEXT: .LBB17_2: -; RV32ZBB-NEXT: sub a7, a4, a7 -; RV32ZBB-NEXT: sltu t3, a7, t1 -; RV32ZBB-NEXT: sub a2, a2, t3 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: beq a2, a6, .LBB17_4 +; RV32ZBB-NEXT: sub t3, a2, a6 +; RV32ZBB-NEXT: sltu a6, t3, t1 +; RV32ZBB-NEXT: sub a6, a7, a6 +; RV32ZBB-NEXT: sub a7, t3, t1 +; RV32ZBB-NEXT: beq a6, a4, .LBB17_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a6, a2 +; RV32ZBB-NEXT: sltu t1, a4, a6 ; RV32ZBB-NEXT: j .LBB17_5 ; RV32ZBB-NEXT: .LBB17_4: -; RV32ZBB-NEXT: sltu t1, a4, a7 +; RV32ZBB-NEXT: sltu t1, a2, a7 ; RV32ZBB-NEXT: .LBB17_5: -; RV32ZBB-NEXT: sub t0, a1, t0 -; RV32ZBB-NEXT: sub t0, t0, t2 -; RV32ZBB-NEXT: sub a5, a3, a5 -; RV32ZBB-NEXT: beq t0, a1, .LBB17_7 +; RV32ZBB-NEXT: sub a5, a1, a5 +; RV32ZBB-NEXT: sub a5, a5, t2 +; RV32ZBB-NEXT: sub a3, t0, a3 +; RV32ZBB-NEXT: beq a5, a1, .LBB17_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a1, a1, t0 +; RV32ZBB-NEXT: sltu a1, a1, a5 ; RV32ZBB-NEXT: j .LBB17_8 ; RV32ZBB-NEXT: .LBB17_7: -; RV32ZBB-NEXT: sltu a1, a3, a5 +; RV32ZBB-NEXT: sltu a1, t0, a3 ; RV32ZBB-NEXT: .LBB17_8: -; RV32ZBB-NEXT: xor a3, a2, a6 -; RV32ZBB-NEXT: xor a4, a7, a4 -; RV32ZBB-NEXT: or a3, a4, a3 -; RV32ZBB-NEXT: beqz a3, .LBB17_10 +; RV32ZBB-NEXT: xor a4, a6, a4 +; RV32ZBB-NEXT: xor a2, a7, a2 +; RV32ZBB-NEXT: or a2, a2, a4 +; RV32ZBB-NEXT: beqz a2, .LBB17_10 ; RV32ZBB-NEXT: # %bb.9: ; RV32ZBB-NEXT: mv a1, t1 ; RV32ZBB-NEXT: .LBB17_10: -; RV32ZBB-NEXT: neg a6, a1 -; RV32ZBB-NEXT: xor a3, a7, a6 -; RV32ZBB-NEXT: sltu a4, a3, a6 -; RV32ZBB-NEXT: xor a2, a2, a6 -; RV32ZBB-NEXT: add a2, a2, a1 -; RV32ZBB-NEXT: sub a4, a2, a4 -; RV32ZBB-NEXT: xor a2, a5, a6 -; RV32ZBB-NEXT: sltu a5, a2, a6 -; RV32ZBB-NEXT: xor a7, t0, a6 -; RV32ZBB-NEXT: mv t1, a5 -; RV32ZBB-NEXT: beqz t0, .LBB17_12 +; RV32ZBB-NEXT: neg t0, a1 +; RV32ZBB-NEXT: xor a2, a7, t0 +; RV32ZBB-NEXT: sltu a4, a2, t0 +; RV32ZBB-NEXT: xor a6, a6, t0 +; RV32ZBB-NEXT: add a6, a6, a1 +; RV32ZBB-NEXT: sub a4, a6, a4 +; RV32ZBB-NEXT: xor a3, a3, t0 +; RV32ZBB-NEXT: sltu a6, a3, t0 +; RV32ZBB-NEXT: xor a7, a5, t0 +; RV32ZBB-NEXT: mv t1, a6 +; RV32ZBB-NEXT: beqz a5, .LBB17_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu t1, a7, a6 +; RV32ZBB-NEXT: sltu t1, a7, t0 ; RV32ZBB-NEXT: .LBB17_12: -; RV32ZBB-NEXT: add a3, a3, a1 -; RV32ZBB-NEXT: sltu a6, a3, t1 -; RV32ZBB-NEXT: sub a4, a4, a6 -; RV32ZBB-NEXT: sub a3, a3, t1 +; RV32ZBB-NEXT: add a2, a2, a1 +; RV32ZBB-NEXT: sltu a5, a2, t1 +; RV32ZBB-NEXT: sub a4, a4, a5 +; RV32ZBB-NEXT: sub a2, a2, t1 ; RV32ZBB-NEXT: add a7, a7, a1 -; RV32ZBB-NEXT: sub a5, a7, a5 -; RV32ZBB-NEXT: add a1, a2, a1 +; RV32ZBB-NEXT: sub a5, a7, a6 +; RV32ZBB-NEXT: add a1, a3, a1 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a2, 8(a0) ; RV32ZBB-NEXT: sw a4, 12(a0) ; RV32ZBB-NEXT: ret ; @@ -1524,75 +1524,75 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_cmp_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a5, 0(a2) -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw t1, 12(a2) -; RV32I-NEXT: lw a7, 8(a2) -; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: lw a6, 12(a1) -; RV32I-NEXT: lw t0, 4(a2) +; RV32I-NEXT: lw a3, 0(a2) +; RV32I-NEXT: lw a5, 4(a2) +; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a7, 12(a2) +; RV32I-NEXT: lw a2, 8(a1) +; RV32I-NEXT: lw a4, 12(a1) +; RV32I-NEXT: lw t0, 0(a1) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu a2, a4, a7 -; RV32I-NEXT: sub t1, a6, t1 -; RV32I-NEXT: sltu t2, a3, a5 -; RV32I-NEXT: sub a2, t1, a2 +; RV32I-NEXT: sltu t1, a2, a6 +; RV32I-NEXT: sub a7, a4, a7 +; RV32I-NEXT: sltu t2, t0, a3 +; RV32I-NEXT: sub a7, a7, t1 ; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: beq a1, t0, .LBB22_2 +; RV32I-NEXT: beq a1, a5, .LBB22_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, t0 +; RV32I-NEXT: sltu t1, a1, a5 ; RV32I-NEXT: .LBB22_2: -; RV32I-NEXT: sub a7, a4, a7 -; RV32I-NEXT: sltu t3, a7, t1 -; RV32I-NEXT: sub a2, a2, t3 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: beq a2, a6, .LBB22_4 +; RV32I-NEXT: sub t3, a2, a6 +; RV32I-NEXT: sltu a6, t3, t1 +; RV32I-NEXT: sub a6, a7, a6 +; RV32I-NEXT: sub a7, t3, t1 +; RV32I-NEXT: beq a6, a4, .LBB22_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a6, a2 +; RV32I-NEXT: sltu t1, a4, a6 ; RV32I-NEXT: j .LBB22_5 ; RV32I-NEXT: .LBB22_4: -; RV32I-NEXT: sltu t1, a4, a7 +; RV32I-NEXT: sltu t1, a2, a7 ; RV32I-NEXT: .LBB22_5: -; RV32I-NEXT: sub t0, a1, t0 -; RV32I-NEXT: sub t0, t0, t2 -; RV32I-NEXT: sub a5, a3, a5 -; RV32I-NEXT: beq t0, a1, .LBB22_7 +; RV32I-NEXT: sub a5, a1, a5 +; RV32I-NEXT: sub a5, a5, t2 +; RV32I-NEXT: sub a3, t0, a3 +; RV32I-NEXT: beq a5, a1, .LBB22_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a1, a1, t0 +; RV32I-NEXT: sltu a1, a1, a5 ; RV32I-NEXT: j .LBB22_8 ; RV32I-NEXT: .LBB22_7: -; RV32I-NEXT: sltu a1, a3, a5 +; RV32I-NEXT: sltu a1, t0, a3 ; RV32I-NEXT: .LBB22_8: -; RV32I-NEXT: xor a3, a2, a6 -; RV32I-NEXT: xor a4, a7, a4 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: beqz a3, .LBB22_10 +; RV32I-NEXT: xor a4, a6, a4 +; RV32I-NEXT: xor a2, a7, a2 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: beqz a2, .LBB22_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv a1, t1 ; RV32I-NEXT: .LBB22_10: -; RV32I-NEXT: neg a6, a1 -; RV32I-NEXT: xor a3, a7, a6 -; RV32I-NEXT: sltu a4, a3, a6 -; RV32I-NEXT: xor a2, a2, a6 -; RV32I-NEXT: add a2, a2, a1 -; RV32I-NEXT: sub a4, a2, a4 -; RV32I-NEXT: xor a2, a5, a6 -; RV32I-NEXT: sltu a5, a2, a6 -; RV32I-NEXT: xor a7, t0, a6 -; RV32I-NEXT: mv t1, a5 -; RV32I-NEXT: beqz t0, .LBB22_12 +; RV32I-NEXT: neg t0, a1 +; RV32I-NEXT: xor a2, a7, t0 +; RV32I-NEXT: sltu a4, a2, t0 +; RV32I-NEXT: xor a6, a6, t0 +; RV32I-NEXT: add a6, a6, a1 +; RV32I-NEXT: sub a4, a6, a4 +; RV32I-NEXT: xor a3, a3, t0 +; RV32I-NEXT: sltu a6, a3, t0 +; RV32I-NEXT: xor a7, a5, t0 +; RV32I-NEXT: mv t1, a6 +; RV32I-NEXT: beqz a5, .LBB22_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sltu t1, a7, a6 +; RV32I-NEXT: sltu t1, a7, t0 ; RV32I-NEXT: .LBB22_12: -; RV32I-NEXT: add a3, a3, a1 -; RV32I-NEXT: sltu a6, a3, t1 -; RV32I-NEXT: sub a4, a4, a6 -; RV32I-NEXT: sub a3, a3, t1 +; RV32I-NEXT: add a2, a2, a1 +; RV32I-NEXT: sltu a5, a2, t1 +; RV32I-NEXT: sub a4, a4, a5 +; RV32I-NEXT: sub a2, a2, t1 ; RV32I-NEXT: add a7, a7, a1 -; RV32I-NEXT: sub a5, a7, a5 -; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: sub a5, a7, a6 +; RV32I-NEXT: add a1, a3, a1 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a2, 8(a0) ; RV32I-NEXT: sw a4, 12(a0) ; RV32I-NEXT: ret ; @@ -1620,75 +1620,75 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_cmp_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a5, 0(a2) -; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw t1, 12(a2) -; RV32ZBB-NEXT: lw a7, 8(a2) -; RV32ZBB-NEXT: lw a4, 8(a1) -; RV32ZBB-NEXT: lw a6, 12(a1) -; RV32ZBB-NEXT: lw t0, 4(a2) +; RV32ZBB-NEXT: lw a3, 0(a2) +; RV32ZBB-NEXT: lw a5, 4(a2) +; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a7, 12(a2) +; RV32ZBB-NEXT: lw a2, 8(a1) +; RV32ZBB-NEXT: lw a4, 12(a1) +; RV32ZBB-NEXT: lw t0, 0(a1) ; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu a2, a4, a7 -; RV32ZBB-NEXT: sub t1, a6, t1 -; RV32ZBB-NEXT: sltu t2, a3, a5 -; RV32ZBB-NEXT: sub a2, t1, a2 +; RV32ZBB-NEXT: sltu t1, a2, a6 +; RV32ZBB-NEXT: sub a7, a4, a7 +; RV32ZBB-NEXT: sltu t2, t0, a3 +; RV32ZBB-NEXT: sub a7, a7, t1 ; RV32ZBB-NEXT: mv t1, t2 -; RV32ZBB-NEXT: beq a1, t0, .LBB22_2 +; RV32ZBB-NEXT: beq a1, a5, .LBB22_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, t0 +; RV32ZBB-NEXT: sltu t1, a1, a5 ; RV32ZBB-NEXT: .LBB22_2: -; RV32ZBB-NEXT: sub a7, a4, a7 -; RV32ZBB-NEXT: sltu t3, a7, t1 -; RV32ZBB-NEXT: sub a2, a2, t3 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: beq a2, a6, .LBB22_4 +; RV32ZBB-NEXT: sub t3, a2, a6 +; RV32ZBB-NEXT: sltu a6, t3, t1 +; RV32ZBB-NEXT: sub a6, a7, a6 +; RV32ZBB-NEXT: sub a7, t3, t1 +; RV32ZBB-NEXT: beq a6, a4, .LBB22_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a6, a2 +; RV32ZBB-NEXT: sltu t1, a4, a6 ; RV32ZBB-NEXT: j .LBB22_5 ; RV32ZBB-NEXT: .LBB22_4: -; RV32ZBB-NEXT: sltu t1, a4, a7 +; RV32ZBB-NEXT: sltu t1, a2, a7 ; RV32ZBB-NEXT: .LBB22_5: -; RV32ZBB-NEXT: sub t0, a1, t0 -; RV32ZBB-NEXT: sub t0, t0, t2 -; RV32ZBB-NEXT: sub a5, a3, a5 -; RV32ZBB-NEXT: beq t0, a1, .LBB22_7 +; RV32ZBB-NEXT: sub a5, a1, a5 +; RV32ZBB-NEXT: sub a5, a5, t2 +; RV32ZBB-NEXT: sub a3, t0, a3 +; RV32ZBB-NEXT: beq a5, a1, .LBB22_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a1, a1, t0 +; RV32ZBB-NEXT: sltu a1, a1, a5 ; RV32ZBB-NEXT: j .LBB22_8 ; RV32ZBB-NEXT: .LBB22_7: -; RV32ZBB-NEXT: sltu a1, a3, a5 +; RV32ZBB-NEXT: sltu a1, t0, a3 ; RV32ZBB-NEXT: .LBB22_8: -; RV32ZBB-NEXT: xor a3, a2, a6 -; RV32ZBB-NEXT: xor a4, a7, a4 -; RV32ZBB-NEXT: or a3, a4, a3 -; RV32ZBB-NEXT: beqz a3, .LBB22_10 +; RV32ZBB-NEXT: xor a4, a6, a4 +; RV32ZBB-NEXT: xor a2, a7, a2 +; RV32ZBB-NEXT: or a2, a2, a4 +; RV32ZBB-NEXT: beqz a2, .LBB22_10 ; RV32ZBB-NEXT: # %bb.9: ; RV32ZBB-NEXT: mv a1, t1 ; RV32ZBB-NEXT: .LBB22_10: -; RV32ZBB-NEXT: neg a6, a1 -; RV32ZBB-NEXT: xor a3, a7, a6 -; RV32ZBB-NEXT: sltu a4, a3, a6 -; RV32ZBB-NEXT: xor a2, a2, a6 -; RV32ZBB-NEXT: add a2, a2, a1 -; RV32ZBB-NEXT: sub a4, a2, a4 -; RV32ZBB-NEXT: xor a2, a5, a6 -; RV32ZBB-NEXT: sltu a5, a2, a6 -; RV32ZBB-NEXT: xor a7, t0, a6 -; RV32ZBB-NEXT: mv t1, a5 -; RV32ZBB-NEXT: beqz t0, .LBB22_12 +; RV32ZBB-NEXT: neg t0, a1 +; RV32ZBB-NEXT: xor a2, a7, t0 +; RV32ZBB-NEXT: sltu a4, a2, t0 +; RV32ZBB-NEXT: xor a6, a6, t0 +; RV32ZBB-NEXT: add a6, a6, a1 +; RV32ZBB-NEXT: sub a4, a6, a4 +; RV32ZBB-NEXT: xor a3, a3, t0 +; RV32ZBB-NEXT: sltu a6, a3, t0 +; RV32ZBB-NEXT: xor a7, a5, t0 +; RV32ZBB-NEXT: mv t1, a6 +; RV32ZBB-NEXT: beqz a5, .LBB22_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu t1, a7, a6 +; RV32ZBB-NEXT: sltu t1, a7, t0 ; RV32ZBB-NEXT: .LBB22_12: -; RV32ZBB-NEXT: add a3, a3, a1 -; RV32ZBB-NEXT: sltu a6, a3, t1 -; RV32ZBB-NEXT: sub a4, a4, a6 -; RV32ZBB-NEXT: sub a3, a3, t1 +; RV32ZBB-NEXT: add a2, a2, a1 +; RV32ZBB-NEXT: sltu a5, a2, t1 +; RV32ZBB-NEXT: sub a4, a4, a5 +; RV32ZBB-NEXT: sub a2, a2, t1 ; RV32ZBB-NEXT: add a7, a7, a1 -; RV32ZBB-NEXT: sub a5, a7, a5 -; RV32ZBB-NEXT: add a1, a2, a1 +; RV32ZBB-NEXT: sub a5, a7, a6 +; RV32ZBB-NEXT: add a1, a3, a1 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a2, 8(a0) ; RV32ZBB-NEXT: sw a4, 12(a0) ; RV32ZBB-NEXT: ret ; @@ -1918,10 +1918,10 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_select_i128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a7, 4(a2) -; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw a6, 8(a2) ; RV32I-NEXT: lw t0, 12(a2) ; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw a4, 8(a1) ; RV32I-NEXT: beq a5, t0, .LBB27_2 ; RV32I-NEXT: # %bb.1: @@ -2012,75 +2012,75 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_select_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a5, 0(a2) -; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw t1, 12(a2) -; RV32ZBB-NEXT: lw a7, 8(a2) -; RV32ZBB-NEXT: lw a4, 8(a1) -; RV32ZBB-NEXT: lw a6, 12(a1) -; RV32ZBB-NEXT: lw t0, 4(a2) +; RV32ZBB-NEXT: lw a3, 0(a2) +; RV32ZBB-NEXT: lw a5, 4(a2) +; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a7, 12(a2) +; RV32ZBB-NEXT: lw a2, 8(a1) +; RV32ZBB-NEXT: lw a4, 12(a1) +; RV32ZBB-NEXT: lw t0, 0(a1) ; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu a2, a4, a7 -; RV32ZBB-NEXT: sub t1, a6, t1 -; RV32ZBB-NEXT: sltu t2, a3, a5 -; RV32ZBB-NEXT: sub a2, t1, a2 +; RV32ZBB-NEXT: sltu t1, a2, a6 +; RV32ZBB-NEXT: sub a7, a4, a7 +; RV32ZBB-NEXT: sltu t2, t0, a3 +; RV32ZBB-NEXT: sub a7, a7, t1 ; RV32ZBB-NEXT: mv t1, t2 -; RV32ZBB-NEXT: beq a1, t0, .LBB27_2 +; RV32ZBB-NEXT: beq a1, a5, .LBB27_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, t0 +; RV32ZBB-NEXT: sltu t1, a1, a5 ; RV32ZBB-NEXT: .LBB27_2: -; RV32ZBB-NEXT: sub a7, a4, a7 -; RV32ZBB-NEXT: sltu t3, a7, t1 -; RV32ZBB-NEXT: sub a2, a2, t3 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: beq a2, a6, .LBB27_4 +; RV32ZBB-NEXT: sub t3, a2, a6 +; RV32ZBB-NEXT: sltu a6, t3, t1 +; RV32ZBB-NEXT: sub a6, a7, a6 +; RV32ZBB-NEXT: sub a7, t3, t1 +; RV32ZBB-NEXT: beq a6, a4, .LBB27_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a6, a2 +; RV32ZBB-NEXT: sltu t1, a4, a6 ; RV32ZBB-NEXT: j .LBB27_5 ; RV32ZBB-NEXT: .LBB27_4: -; RV32ZBB-NEXT: sltu t1, a4, a7 +; RV32ZBB-NEXT: sltu t1, a2, a7 ; RV32ZBB-NEXT: .LBB27_5: -; RV32ZBB-NEXT: sub t0, a1, t0 -; RV32ZBB-NEXT: sub t0, t0, t2 -; RV32ZBB-NEXT: sub a5, a3, a5 -; RV32ZBB-NEXT: beq t0, a1, .LBB27_7 +; RV32ZBB-NEXT: sub a5, a1, a5 +; RV32ZBB-NEXT: sub a5, a5, t2 +; RV32ZBB-NEXT: sub a3, t0, a3 +; RV32ZBB-NEXT: beq a5, a1, .LBB27_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a1, a1, t0 +; RV32ZBB-NEXT: sltu a1, a1, a5 ; RV32ZBB-NEXT: j .LBB27_8 ; RV32ZBB-NEXT: .LBB27_7: -; RV32ZBB-NEXT: sltu a1, a3, a5 +; RV32ZBB-NEXT: sltu a1, t0, a3 ; RV32ZBB-NEXT: .LBB27_8: -; RV32ZBB-NEXT: xor a3, a2, a6 -; RV32ZBB-NEXT: xor a4, a7, a4 -; RV32ZBB-NEXT: or a3, a4, a3 -; RV32ZBB-NEXT: beqz a3, .LBB27_10 +; RV32ZBB-NEXT: xor a4, a6, a4 +; RV32ZBB-NEXT: xor a2, a7, a2 +; RV32ZBB-NEXT: or a2, a2, a4 +; RV32ZBB-NEXT: beqz a2, .LBB27_10 ; RV32ZBB-NEXT: # %bb.9: ; RV32ZBB-NEXT: mv a1, t1 ; RV32ZBB-NEXT: .LBB27_10: -; RV32ZBB-NEXT: neg a6, a1 -; RV32ZBB-NEXT: xor a3, a7, a6 -; RV32ZBB-NEXT: sltu a4, a3, a6 -; RV32ZBB-NEXT: xor a2, a2, a6 -; RV32ZBB-NEXT: add a2, a2, a1 -; RV32ZBB-NEXT: sub a4, a2, a4 -; RV32ZBB-NEXT: xor a2, a5, a6 -; RV32ZBB-NEXT: sltu a5, a2, a6 -; RV32ZBB-NEXT: xor a7, t0, a6 -; RV32ZBB-NEXT: mv t1, a5 -; RV32ZBB-NEXT: beqz t0, .LBB27_12 +; RV32ZBB-NEXT: neg t0, a1 +; RV32ZBB-NEXT: xor a2, a7, t0 +; RV32ZBB-NEXT: sltu a4, a2, t0 +; RV32ZBB-NEXT: xor a6, a6, t0 +; RV32ZBB-NEXT: add a6, a6, a1 +; RV32ZBB-NEXT: sub a4, a6, a4 +; RV32ZBB-NEXT: xor a3, a3, t0 +; RV32ZBB-NEXT: sltu a6, a3, t0 +; RV32ZBB-NEXT: xor a7, a5, t0 +; RV32ZBB-NEXT: mv t1, a6 +; RV32ZBB-NEXT: beqz a5, .LBB27_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu t1, a7, a6 +; RV32ZBB-NEXT: sltu t1, a7, t0 ; RV32ZBB-NEXT: .LBB27_12: -; RV32ZBB-NEXT: add a3, a3, a1 -; RV32ZBB-NEXT: sltu a6, a3, t1 -; RV32ZBB-NEXT: sub a4, a4, a6 -; RV32ZBB-NEXT: sub a3, a3, t1 +; RV32ZBB-NEXT: add a2, a2, a1 +; RV32ZBB-NEXT: sltu a5, a2, t1 +; RV32ZBB-NEXT: sub a4, a4, a5 +; RV32ZBB-NEXT: sub a2, a2, t1 ; RV32ZBB-NEXT: add a7, a7, a1 -; RV32ZBB-NEXT: sub a5, a7, a5 -; RV32ZBB-NEXT: add a1, a2, a1 +; RV32ZBB-NEXT: sub a5, a7, a6 +; RV32ZBB-NEXT: add a1, a3, a1 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a2, 8(a0) ; RV32ZBB-NEXT: sw a4, 12(a0) ; RV32ZBB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll index 274f1cef49aa95..823918f1c42e7a 100644 --- a/llvm/test/CodeGen/RISCV/add-before-shl.ll +++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll @@ -167,17 +167,17 @@ define i128 @add_wide_operand(i128 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lw a2, 0(a1) ; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: lw a1, 8(a1) +; RV32I-NEXT: lw a4, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) ; RV32I-NEXT: srli a5, a2, 29 ; RV32I-NEXT: slli a6, a3, 3 ; RV32I-NEXT: or a5, a6, a5 ; RV32I-NEXT: srli a3, a3, 29 -; RV32I-NEXT: slli a6, a1, 3 +; RV32I-NEXT: slli a6, a4, 3 ; RV32I-NEXT: or a3, a6, a3 -; RV32I-NEXT: srli a1, a1, 29 -; RV32I-NEXT: slli a4, a4, 3 -; RV32I-NEXT: or a1, a4, a1 +; RV32I-NEXT: srli a4, a4, 29 +; RV32I-NEXT: slli a1, a1, 3 +; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: slli a2, a2, 3 ; RV32I-NEXT: lui a4, 128 ; RV32I-NEXT: add a1, a1, a4 @@ -200,26 +200,26 @@ define i128 @add_wide_operand(i128 %a) nounwind { ; ; RV32C-LABEL: add_wide_operand: ; RV32C: # %bb.0: -; RV32C-NEXT: lw a6, 4(a1) -; RV32C-NEXT: c.lw a3, 12(a1) -; RV32C-NEXT: c.lw a4, 0(a1) +; RV32C-NEXT: c.lw a2, 12(a1) +; RV32C-NEXT: lw a6, 0(a1) +; RV32C-NEXT: c.lw a3, 4(a1) ; RV32C-NEXT: c.lw a1, 8(a1) ; RV32C-NEXT: c.lui a5, 16 -; RV32C-NEXT: c.add a3, a5 -; RV32C-NEXT: c.slli a3, 3 +; RV32C-NEXT: c.add a2, a5 +; RV32C-NEXT: c.slli a2, 3 ; RV32C-NEXT: srli a5, a1, 29 -; RV32C-NEXT: c.or a3, a5 -; RV32C-NEXT: srli a5, a4, 29 -; RV32C-NEXT: slli a2, a6, 3 ; RV32C-NEXT: c.or a2, a5 ; RV32C-NEXT: srli a5, a6, 29 +; RV32C-NEXT: slli a4, a3, 3 +; RV32C-NEXT: c.or a4, a5 +; RV32C-NEXT: c.srli a3, 29 ; RV32C-NEXT: c.slli a1, 3 -; RV32C-NEXT: c.or a1, a5 -; RV32C-NEXT: c.slli a4, 3 -; RV32C-NEXT: c.sw a4, 0(a0) +; RV32C-NEXT: c.or a1, a3 +; RV32C-NEXT: c.slli a6, 3 +; RV32C-NEXT: sw a6, 0(a0) ; RV32C-NEXT: c.sw a1, 8(a0) -; RV32C-NEXT: c.sw a2, 4(a0) -; RV32C-NEXT: c.sw a3, 12(a0) +; RV32C-NEXT: c.sw a4, 4(a0) +; RV32C-NEXT: c.sw a2, 12(a0) ; RV32C-NEXT: c.jr ra ; ; RV64C-LABEL: add_wide_operand: diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll index 8d3fc96109262e..35a1227b86b3a6 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll @@ -192,37 +192,37 @@ define void @amomax_d_discard(ptr %a, i64 %b) nounwind { ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lw a4, 4(a0) -; RV32-NEXT: lw a5, 0(a0) +; RV32-NEXT: lw a4, 0(a0) +; RV32-NEXT: lw a5, 4(a0) ; RV32-NEXT: mv s1, a2 ; RV32-NEXT: mv s2, a1 ; RV32-NEXT: j .LBB11_2 ; RV32-NEXT: .LBB11_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1 -; RV32-NEXT: sw a5, 8(sp) -; RV32-NEXT: sw a4, 12(sp) +; RV32-NEXT: sw a4, 8(sp) +; RV32-NEXT: sw a5, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a4, 12(sp) -; RV32-NEXT: lw a5, 8(sp) +; RV32-NEXT: lw a4, 8(sp) +; RV32-NEXT: lw a5, 12(sp) ; RV32-NEXT: bnez a0, .LBB11_6 ; RV32-NEXT: .LBB11_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: beq a4, s1, .LBB11_4 +; RV32-NEXT: beq a5, s1, .LBB11_4 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1 -; RV32-NEXT: slt a0, s1, a4 -; RV32-NEXT: mv a2, a5 -; RV32-NEXT: mv a3, a4 +; RV32-NEXT: slt a0, s1, a5 +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: mv a3, a5 ; RV32-NEXT: bnez a0, .LBB11_1 ; RV32-NEXT: j .LBB11_5 ; RV32-NEXT: .LBB11_4: # in Loop: Header=BB11_2 Depth=1 -; RV32-NEXT: sltu a0, s2, a5 -; RV32-NEXT: mv a2, a5 -; RV32-NEXT: mv a3, a4 +; RV32-NEXT: sltu a0, s2, a4 +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: mv a3, a5 ; RV32-NEXT: bnez a0, .LBB11_1 ; RV32-NEXT: .LBB11_5: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1 @@ -268,37 +268,37 @@ define void @amomaxu_d_discard(ptr %a, i64 %b) nounwind { ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lw a4, 4(a0) -; RV32-NEXT: lw a5, 0(a0) +; RV32-NEXT: lw a4, 0(a0) +; RV32-NEXT: lw a5, 4(a0) ; RV32-NEXT: mv s1, a2 ; RV32-NEXT: mv s2, a1 ; RV32-NEXT: j .LBB13_2 ; RV32-NEXT: .LBB13_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1 -; RV32-NEXT: sw a5, 8(sp) -; RV32-NEXT: sw a4, 12(sp) +; RV32-NEXT: sw a4, 8(sp) +; RV32-NEXT: sw a5, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a4, 12(sp) -; RV32-NEXT: lw a5, 8(sp) +; RV32-NEXT: lw a4, 8(sp) +; RV32-NEXT: lw a5, 12(sp) ; RV32-NEXT: bnez a0, .LBB13_6 ; RV32-NEXT: .LBB13_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: beq a4, s1, .LBB13_4 +; RV32-NEXT: beq a5, s1, .LBB13_4 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1 -; RV32-NEXT: sltu a0, s1, a4 -; RV32-NEXT: mv a2, a5 -; RV32-NEXT: mv a3, a4 +; RV32-NEXT: sltu a0, s1, a5 +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: mv a3, a5 ; RV32-NEXT: bnez a0, .LBB13_1 ; RV32-NEXT: j .LBB13_5 ; RV32-NEXT: .LBB13_4: # in Loop: Header=BB13_2 Depth=1 -; RV32-NEXT: sltu a0, s2, a5 -; RV32-NEXT: mv a2, a5 -; RV32-NEXT: mv a3, a4 +; RV32-NEXT: sltu a0, s2, a4 +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: mv a3, a5 ; RV32-NEXT: bnez a0, .LBB13_1 ; RV32-NEXT: .LBB13_5: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1 @@ -344,37 +344,37 @@ define void @amomin_d_discard(ptr %a, i64 %b) nounwind { ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lw a4, 4(a0) -; RV32-NEXT: lw a5, 0(a0) +; RV32-NEXT: lw a4, 0(a0) +; RV32-NEXT: lw a5, 4(a0) ; RV32-NEXT: mv s1, a2 ; RV32-NEXT: mv s2, a1 ; RV32-NEXT: j .LBB15_2 ; RV32-NEXT: .LBB15_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1 -; RV32-NEXT: sw a5, 8(sp) -; RV32-NEXT: sw a4, 12(sp) +; RV32-NEXT: sw a4, 8(sp) +; RV32-NEXT: sw a5, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a4, 12(sp) -; RV32-NEXT: lw a5, 8(sp) +; RV32-NEXT: lw a4, 8(sp) +; RV32-NEXT: lw a5, 12(sp) ; RV32-NEXT: bnez a0, .LBB15_6 ; RV32-NEXT: .LBB15_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: beq a4, s1, .LBB15_4 +; RV32-NEXT: beq a5, s1, .LBB15_4 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1 -; RV32-NEXT: slt a0, s1, a4 -; RV32-NEXT: mv a2, a5 -; RV32-NEXT: mv a3, a4 +; RV32-NEXT: slt a0, s1, a5 +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: mv a3, a5 ; RV32-NEXT: beqz a0, .LBB15_1 ; RV32-NEXT: j .LBB15_5 ; RV32-NEXT: .LBB15_4: # in Loop: Header=BB15_2 Depth=1 -; RV32-NEXT: sltu a0, s2, a5 -; RV32-NEXT: mv a2, a5 -; RV32-NEXT: mv a3, a4 +; RV32-NEXT: sltu a0, s2, a4 +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: mv a3, a5 ; RV32-NEXT: beqz a0, .LBB15_1 ; RV32-NEXT: .LBB15_5: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1 @@ -420,37 +420,37 @@ define void @amominu_d_discard(ptr %a, i64 %b) nounwind { ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lw a4, 4(a0) -; RV32-NEXT: lw a5, 0(a0) +; RV32-NEXT: lw a4, 0(a0) +; RV32-NEXT: lw a5, 4(a0) ; RV32-NEXT: mv s1, a2 ; RV32-NEXT: mv s2, a1 ; RV32-NEXT: j .LBB17_2 ; RV32-NEXT: .LBB17_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1 -; RV32-NEXT: sw a5, 8(sp) -; RV32-NEXT: sw a4, 12(sp) +; RV32-NEXT: sw a4, 8(sp) +; RV32-NEXT: sw a5, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a4, 12(sp) -; RV32-NEXT: lw a5, 8(sp) +; RV32-NEXT: lw a4, 8(sp) +; RV32-NEXT: lw a5, 12(sp) ; RV32-NEXT: bnez a0, .LBB17_6 ; RV32-NEXT: .LBB17_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: beq a4, s1, .LBB17_4 +; RV32-NEXT: beq a5, s1, .LBB17_4 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1 -; RV32-NEXT: sltu a0, s1, a4 -; RV32-NEXT: mv a2, a5 -; RV32-NEXT: mv a3, a4 +; RV32-NEXT: sltu a0, s1, a5 +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: mv a3, a5 ; RV32-NEXT: beqz a0, .LBB17_1 ; RV32-NEXT: j .LBB17_5 ; RV32-NEXT: .LBB17_4: # in Loop: Header=BB17_2 Depth=1 -; RV32-NEXT: sltu a0, s2, a5 -; RV32-NEXT: mv a2, a5 -; RV32-NEXT: mv a3, a4 +; RV32-NEXT: sltu a0, s2, a4 +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: mv a3, a5 ; RV32-NEXT: beqz a0, .LBB17_1 ; RV32-NEXT: .LBB17_5: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1 diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll index f50744fc3c1f32..469edacb391df6 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -26073,36 +26073,36 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB220_2 ; RV32I-NEXT: .LBB220_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB220_7 ; RV32I-NEXT: .LBB220_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB220_4 +; RV32I-NEXT: beq a4, s1, .LBB220_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32I-NEXT: slt a0, s1, a5 +; RV32I-NEXT: slt a0, s1, a4 ; RV32I-NEXT: j .LBB220_5 ; RV32I-NEXT: .LBB220_4: # in Loop: Header=BB220_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB220_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: bnez a0, .LBB220_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1 @@ -26110,8 +26110,8 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB220_1 ; RV32I-NEXT: .LBB220_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26127,36 +26127,36 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB220_2 ; RV32IA-NEXT: .LBB220_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB220_7 ; RV32IA-NEXT: .LBB220_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB220_4 +; RV32IA-NEXT: beq a4, s1, .LBB220_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32IA-NEXT: slt a0, s1, a5 +; RV32IA-NEXT: slt a0, s1, a4 ; RV32IA-NEXT: j .LBB220_5 ; RV32IA-NEXT: .LBB220_4: # in Loop: Header=BB220_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB220_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: bnez a0, .LBB220_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1 @@ -26164,8 +26164,8 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB220_1 ; RV32IA-NEXT: .LBB220_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26226,36 +26226,36 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB221_2 ; RV32I-NEXT: .LBB221_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: li a5, 2 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB221_7 ; RV32I-NEXT: .LBB221_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB221_4 +; RV32I-NEXT: beq a4, s1, .LBB221_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32I-NEXT: slt a0, s1, a5 +; RV32I-NEXT: slt a0, s1, a4 ; RV32I-NEXT: j .LBB221_5 ; RV32I-NEXT: .LBB221_4: # in Loop: Header=BB221_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB221_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: bnez a0, .LBB221_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1 @@ -26263,8 +26263,8 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB221_1 ; RV32I-NEXT: .LBB221_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26280,36 +26280,36 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB221_2 ; RV32IA-NEXT: .LBB221_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 2 ; RV32IA-NEXT: li a5, 2 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB221_7 ; RV32IA-NEXT: .LBB221_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB221_4 +; RV32IA-NEXT: beq a4, s1, .LBB221_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32IA-NEXT: slt a0, s1, a5 +; RV32IA-NEXT: slt a0, s1, a4 ; RV32IA-NEXT: j .LBB221_5 ; RV32IA-NEXT: .LBB221_4: # in Loop: Header=BB221_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB221_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: bnez a0, .LBB221_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1 @@ -26317,8 +26317,8 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB221_1 ; RV32IA-NEXT: .LBB221_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26384,36 +26384,36 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB222_2 ; RV32I-NEXT: .LBB222_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 3 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB222_7 ; RV32I-NEXT: .LBB222_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB222_4 +; RV32I-NEXT: beq a4, s1, .LBB222_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32I-NEXT: slt a0, s1, a5 +; RV32I-NEXT: slt a0, s1, a4 ; RV32I-NEXT: j .LBB222_5 ; RV32I-NEXT: .LBB222_4: # in Loop: Header=BB222_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB222_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: bnez a0, .LBB222_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1 @@ -26421,8 +26421,8 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB222_1 ; RV32I-NEXT: .LBB222_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26438,36 +26438,36 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB222_2 ; RV32IA-NEXT: .LBB222_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 3 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB222_7 ; RV32IA-NEXT: .LBB222_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB222_4 +; RV32IA-NEXT: beq a4, s1, .LBB222_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32IA-NEXT: slt a0, s1, a5 +; RV32IA-NEXT: slt a0, s1, a4 ; RV32IA-NEXT: j .LBB222_5 ; RV32IA-NEXT: .LBB222_4: # in Loop: Header=BB222_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB222_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: bnez a0, .LBB222_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1 @@ -26475,8 +26475,8 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB222_1 ; RV32IA-NEXT: .LBB222_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26542,36 +26542,36 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB223_2 ; RV32I-NEXT: .LBB223_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 4 ; RV32I-NEXT: li a5, 2 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB223_7 ; RV32I-NEXT: .LBB223_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB223_4 +; RV32I-NEXT: beq a4, s1, .LBB223_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32I-NEXT: slt a0, s1, a5 +; RV32I-NEXT: slt a0, s1, a4 ; RV32I-NEXT: j .LBB223_5 ; RV32I-NEXT: .LBB223_4: # in Loop: Header=BB223_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB223_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: bnez a0, .LBB223_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1 @@ -26579,8 +26579,8 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB223_1 ; RV32I-NEXT: .LBB223_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26596,36 +26596,36 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB223_2 ; RV32IA-NEXT: .LBB223_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 4 ; RV32IA-NEXT: li a5, 2 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB223_7 ; RV32IA-NEXT: .LBB223_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB223_4 +; RV32IA-NEXT: beq a4, s1, .LBB223_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32IA-NEXT: slt a0, s1, a5 +; RV32IA-NEXT: slt a0, s1, a4 ; RV32IA-NEXT: j .LBB223_5 ; RV32IA-NEXT: .LBB223_4: # in Loop: Header=BB223_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB223_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: bnez a0, .LBB223_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1 @@ -26633,8 +26633,8 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB223_1 ; RV32IA-NEXT: .LBB223_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26700,36 +26700,36 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB224_2 ; RV32I-NEXT: .LBB224_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB224_7 ; RV32I-NEXT: .LBB224_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB224_4 +; RV32I-NEXT: beq a4, s1, .LBB224_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32I-NEXT: slt a0, s1, a5 +; RV32I-NEXT: slt a0, s1, a4 ; RV32I-NEXT: j .LBB224_5 ; RV32I-NEXT: .LBB224_4: # in Loop: Header=BB224_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB224_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: bnez a0, .LBB224_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1 @@ -26737,8 +26737,8 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB224_1 ; RV32I-NEXT: .LBB224_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26754,36 +26754,36 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB224_2 ; RV32IA-NEXT: .LBB224_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB224_7 ; RV32IA-NEXT: .LBB224_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB224_4 +; RV32IA-NEXT: beq a4, s1, .LBB224_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32IA-NEXT: slt a0, s1, a5 +; RV32IA-NEXT: slt a0, s1, a4 ; RV32IA-NEXT: j .LBB224_5 ; RV32IA-NEXT: .LBB224_4: # in Loop: Header=BB224_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB224_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: bnez a0, .LBB224_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1 @@ -26791,8 +26791,8 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB224_1 ; RV32IA-NEXT: .LBB224_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26858,36 +26858,36 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB225_2 ; RV32I-NEXT: .LBB225_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB225_7 ; RV32I-NEXT: .LBB225_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB225_4 +; RV32I-NEXT: beq a4, s1, .LBB225_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32I-NEXT: slt a0, s1, a5 +; RV32I-NEXT: slt a0, s1, a4 ; RV32I-NEXT: j .LBB225_5 ; RV32I-NEXT: .LBB225_4: # in Loop: Header=BB225_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB225_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: beqz a0, .LBB225_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1 @@ -26895,8 +26895,8 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB225_1 ; RV32I-NEXT: .LBB225_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26912,36 +26912,36 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB225_2 ; RV32IA-NEXT: .LBB225_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB225_7 ; RV32IA-NEXT: .LBB225_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB225_4 +; RV32IA-NEXT: beq a4, s1, .LBB225_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32IA-NEXT: slt a0, s1, a5 +; RV32IA-NEXT: slt a0, s1, a4 ; RV32IA-NEXT: j .LBB225_5 ; RV32IA-NEXT: .LBB225_4: # in Loop: Header=BB225_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB225_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: beqz a0, .LBB225_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1 @@ -26949,8 +26949,8 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB225_1 ; RV32IA-NEXT: .LBB225_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27011,36 +27011,36 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB226_2 ; RV32I-NEXT: .LBB226_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: li a5, 2 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB226_7 ; RV32I-NEXT: .LBB226_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB226_4 +; RV32I-NEXT: beq a4, s1, .LBB226_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32I-NEXT: slt a0, s1, a5 +; RV32I-NEXT: slt a0, s1, a4 ; RV32I-NEXT: j .LBB226_5 ; RV32I-NEXT: .LBB226_4: # in Loop: Header=BB226_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB226_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: beqz a0, .LBB226_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1 @@ -27048,8 +27048,8 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB226_1 ; RV32I-NEXT: .LBB226_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27065,36 +27065,36 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB226_2 ; RV32IA-NEXT: .LBB226_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 2 ; RV32IA-NEXT: li a5, 2 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB226_7 ; RV32IA-NEXT: .LBB226_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB226_4 +; RV32IA-NEXT: beq a4, s1, .LBB226_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32IA-NEXT: slt a0, s1, a5 +; RV32IA-NEXT: slt a0, s1, a4 ; RV32IA-NEXT: j .LBB226_5 ; RV32IA-NEXT: .LBB226_4: # in Loop: Header=BB226_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB226_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: beqz a0, .LBB226_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1 @@ -27102,8 +27102,8 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB226_1 ; RV32IA-NEXT: .LBB226_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27169,36 +27169,36 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB227_2 ; RV32I-NEXT: .LBB227_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 3 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB227_7 ; RV32I-NEXT: .LBB227_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB227_4 +; RV32I-NEXT: beq a4, s1, .LBB227_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32I-NEXT: slt a0, s1, a5 +; RV32I-NEXT: slt a0, s1, a4 ; RV32I-NEXT: j .LBB227_5 ; RV32I-NEXT: .LBB227_4: # in Loop: Header=BB227_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB227_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: beqz a0, .LBB227_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1 @@ -27206,8 +27206,8 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB227_1 ; RV32I-NEXT: .LBB227_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27223,36 +27223,36 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB227_2 ; RV32IA-NEXT: .LBB227_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 3 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB227_7 ; RV32IA-NEXT: .LBB227_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB227_4 +; RV32IA-NEXT: beq a4, s1, .LBB227_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32IA-NEXT: slt a0, s1, a5 +; RV32IA-NEXT: slt a0, s1, a4 ; RV32IA-NEXT: j .LBB227_5 ; RV32IA-NEXT: .LBB227_4: # in Loop: Header=BB227_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB227_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: beqz a0, .LBB227_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1 @@ -27260,8 +27260,8 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB227_1 ; RV32IA-NEXT: .LBB227_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27327,36 +27327,36 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB228_2 ; RV32I-NEXT: .LBB228_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 4 ; RV32I-NEXT: li a5, 2 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB228_7 ; RV32I-NEXT: .LBB228_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB228_4 +; RV32I-NEXT: beq a4, s1, .LBB228_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32I-NEXT: slt a0, s1, a5 +; RV32I-NEXT: slt a0, s1, a4 ; RV32I-NEXT: j .LBB228_5 ; RV32I-NEXT: .LBB228_4: # in Loop: Header=BB228_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB228_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: beqz a0, .LBB228_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1 @@ -27364,8 +27364,8 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB228_1 ; RV32I-NEXT: .LBB228_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27381,36 +27381,36 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB228_2 ; RV32IA-NEXT: .LBB228_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 4 ; RV32IA-NEXT: li a5, 2 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB228_7 ; RV32IA-NEXT: .LBB228_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB228_4 +; RV32IA-NEXT: beq a4, s1, .LBB228_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32IA-NEXT: slt a0, s1, a5 +; RV32IA-NEXT: slt a0, s1, a4 ; RV32IA-NEXT: j .LBB228_5 ; RV32IA-NEXT: .LBB228_4: # in Loop: Header=BB228_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB228_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: beqz a0, .LBB228_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1 @@ -27418,8 +27418,8 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB228_1 ; RV32IA-NEXT: .LBB228_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27485,36 +27485,36 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB229_2 ; RV32I-NEXT: .LBB229_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB229_7 ; RV32I-NEXT: .LBB229_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB229_4 +; RV32I-NEXT: beq a4, s1, .LBB229_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32I-NEXT: slt a0, s1, a5 +; RV32I-NEXT: slt a0, s1, a4 ; RV32I-NEXT: j .LBB229_5 ; RV32I-NEXT: .LBB229_4: # in Loop: Header=BB229_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB229_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: beqz a0, .LBB229_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1 @@ -27522,8 +27522,8 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB229_1 ; RV32I-NEXT: .LBB229_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27539,36 +27539,36 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB229_2 ; RV32IA-NEXT: .LBB229_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB229_7 ; RV32IA-NEXT: .LBB229_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB229_4 +; RV32IA-NEXT: beq a4, s1, .LBB229_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32IA-NEXT: slt a0, s1, a5 +; RV32IA-NEXT: slt a0, s1, a4 ; RV32IA-NEXT: j .LBB229_5 ; RV32IA-NEXT: .LBB229_4: # in Loop: Header=BB229_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB229_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: beqz a0, .LBB229_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1 @@ -27576,8 +27576,8 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB229_1 ; RV32IA-NEXT: .LBB229_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27643,36 +27643,36 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB230_2 ; RV32I-NEXT: .LBB230_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB230_7 ; RV32I-NEXT: .LBB230_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB230_4 +; RV32I-NEXT: beq a4, s1, .LBB230_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32I-NEXT: sltu a0, s1, a5 +; RV32I-NEXT: sltu a0, s1, a4 ; RV32I-NEXT: j .LBB230_5 ; RV32I-NEXT: .LBB230_4: # in Loop: Header=BB230_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB230_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: bnez a0, .LBB230_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1 @@ -27680,8 +27680,8 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB230_1 ; RV32I-NEXT: .LBB230_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27697,36 +27697,36 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB230_2 ; RV32IA-NEXT: .LBB230_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB230_7 ; RV32IA-NEXT: .LBB230_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB230_4 +; RV32IA-NEXT: beq a4, s1, .LBB230_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a5 +; RV32IA-NEXT: sltu a0, s1, a4 ; RV32IA-NEXT: j .LBB230_5 ; RV32IA-NEXT: .LBB230_4: # in Loop: Header=BB230_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB230_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: bnez a0, .LBB230_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1 @@ -27734,8 +27734,8 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB230_1 ; RV32IA-NEXT: .LBB230_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27796,36 +27796,36 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB231_2 ; RV32I-NEXT: .LBB231_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: li a5, 2 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB231_7 ; RV32I-NEXT: .LBB231_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB231_4 +; RV32I-NEXT: beq a4, s1, .LBB231_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32I-NEXT: sltu a0, s1, a5 +; RV32I-NEXT: sltu a0, s1, a4 ; RV32I-NEXT: j .LBB231_5 ; RV32I-NEXT: .LBB231_4: # in Loop: Header=BB231_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB231_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: bnez a0, .LBB231_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1 @@ -27833,8 +27833,8 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB231_1 ; RV32I-NEXT: .LBB231_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27850,36 +27850,36 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB231_2 ; RV32IA-NEXT: .LBB231_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 2 ; RV32IA-NEXT: li a5, 2 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB231_7 ; RV32IA-NEXT: .LBB231_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB231_4 +; RV32IA-NEXT: beq a4, s1, .LBB231_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a5 +; RV32IA-NEXT: sltu a0, s1, a4 ; RV32IA-NEXT: j .LBB231_5 ; RV32IA-NEXT: .LBB231_4: # in Loop: Header=BB231_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB231_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: bnez a0, .LBB231_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1 @@ -27887,8 +27887,8 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB231_1 ; RV32IA-NEXT: .LBB231_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27954,36 +27954,36 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB232_2 ; RV32I-NEXT: .LBB232_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 3 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB232_7 ; RV32I-NEXT: .LBB232_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB232_4 +; RV32I-NEXT: beq a4, s1, .LBB232_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32I-NEXT: sltu a0, s1, a5 +; RV32I-NEXT: sltu a0, s1, a4 ; RV32I-NEXT: j .LBB232_5 ; RV32I-NEXT: .LBB232_4: # in Loop: Header=BB232_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB232_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: bnez a0, .LBB232_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1 @@ -27991,8 +27991,8 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB232_1 ; RV32I-NEXT: .LBB232_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28008,36 +28008,36 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB232_2 ; RV32IA-NEXT: .LBB232_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 3 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB232_7 ; RV32IA-NEXT: .LBB232_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB232_4 +; RV32IA-NEXT: beq a4, s1, .LBB232_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a5 +; RV32IA-NEXT: sltu a0, s1, a4 ; RV32IA-NEXT: j .LBB232_5 ; RV32IA-NEXT: .LBB232_4: # in Loop: Header=BB232_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB232_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: bnez a0, .LBB232_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1 @@ -28045,8 +28045,8 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB232_1 ; RV32IA-NEXT: .LBB232_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28112,36 +28112,36 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB233_2 ; RV32I-NEXT: .LBB233_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 4 ; RV32I-NEXT: li a5, 2 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB233_7 ; RV32I-NEXT: .LBB233_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB233_4 +; RV32I-NEXT: beq a4, s1, .LBB233_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32I-NEXT: sltu a0, s1, a5 +; RV32I-NEXT: sltu a0, s1, a4 ; RV32I-NEXT: j .LBB233_5 ; RV32I-NEXT: .LBB233_4: # in Loop: Header=BB233_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB233_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: bnez a0, .LBB233_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1 @@ -28149,8 +28149,8 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB233_1 ; RV32I-NEXT: .LBB233_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28166,36 +28166,36 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB233_2 ; RV32IA-NEXT: .LBB233_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 4 ; RV32IA-NEXT: li a5, 2 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB233_7 ; RV32IA-NEXT: .LBB233_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB233_4 +; RV32IA-NEXT: beq a4, s1, .LBB233_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a5 +; RV32IA-NEXT: sltu a0, s1, a4 ; RV32IA-NEXT: j .LBB233_5 ; RV32IA-NEXT: .LBB233_4: # in Loop: Header=BB233_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB233_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: bnez a0, .LBB233_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1 @@ -28203,8 +28203,8 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB233_1 ; RV32IA-NEXT: .LBB233_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28270,36 +28270,36 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB234_2 ; RV32I-NEXT: .LBB234_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB234_7 ; RV32I-NEXT: .LBB234_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB234_4 +; RV32I-NEXT: beq a4, s1, .LBB234_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32I-NEXT: sltu a0, s1, a5 +; RV32I-NEXT: sltu a0, s1, a4 ; RV32I-NEXT: j .LBB234_5 ; RV32I-NEXT: .LBB234_4: # in Loop: Header=BB234_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB234_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: bnez a0, .LBB234_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1 @@ -28307,8 +28307,8 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB234_1 ; RV32I-NEXT: .LBB234_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28324,36 +28324,36 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB234_2 ; RV32IA-NEXT: .LBB234_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB234_7 ; RV32IA-NEXT: .LBB234_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB234_4 +; RV32IA-NEXT: beq a4, s1, .LBB234_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a5 +; RV32IA-NEXT: sltu a0, s1, a4 ; RV32IA-NEXT: j .LBB234_5 ; RV32IA-NEXT: .LBB234_4: # in Loop: Header=BB234_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB234_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: bnez a0, .LBB234_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1 @@ -28361,8 +28361,8 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB234_1 ; RV32IA-NEXT: .LBB234_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28428,36 +28428,36 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB235_2 ; RV32I-NEXT: .LBB235_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB235_7 ; RV32I-NEXT: .LBB235_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB235_4 +; RV32I-NEXT: beq a4, s1, .LBB235_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32I-NEXT: sltu a0, s1, a5 +; RV32I-NEXT: sltu a0, s1, a4 ; RV32I-NEXT: j .LBB235_5 ; RV32I-NEXT: .LBB235_4: # in Loop: Header=BB235_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB235_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: beqz a0, .LBB235_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1 @@ -28465,8 +28465,8 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB235_1 ; RV32I-NEXT: .LBB235_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28482,36 +28482,36 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB235_2 ; RV32IA-NEXT: .LBB235_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB235_7 ; RV32IA-NEXT: .LBB235_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB235_4 +; RV32IA-NEXT: beq a4, s1, .LBB235_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a5 +; RV32IA-NEXT: sltu a0, s1, a4 ; RV32IA-NEXT: j .LBB235_5 ; RV32IA-NEXT: .LBB235_4: # in Loop: Header=BB235_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB235_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: beqz a0, .LBB235_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1 @@ -28519,8 +28519,8 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB235_1 ; RV32IA-NEXT: .LBB235_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28581,36 +28581,36 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB236_2 ; RV32I-NEXT: .LBB236_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: li a5, 2 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB236_7 ; RV32I-NEXT: .LBB236_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB236_4 +; RV32I-NEXT: beq a4, s1, .LBB236_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32I-NEXT: sltu a0, s1, a5 +; RV32I-NEXT: sltu a0, s1, a4 ; RV32I-NEXT: j .LBB236_5 ; RV32I-NEXT: .LBB236_4: # in Loop: Header=BB236_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB236_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: beqz a0, .LBB236_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1 @@ -28618,8 +28618,8 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB236_1 ; RV32I-NEXT: .LBB236_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28635,36 +28635,36 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB236_2 ; RV32IA-NEXT: .LBB236_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 2 ; RV32IA-NEXT: li a5, 2 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB236_7 ; RV32IA-NEXT: .LBB236_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB236_4 +; RV32IA-NEXT: beq a4, s1, .LBB236_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a5 +; RV32IA-NEXT: sltu a0, s1, a4 ; RV32IA-NEXT: j .LBB236_5 ; RV32IA-NEXT: .LBB236_4: # in Loop: Header=BB236_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB236_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: beqz a0, .LBB236_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1 @@ -28672,8 +28672,8 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB236_1 ; RV32IA-NEXT: .LBB236_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28739,36 +28739,36 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB237_2 ; RV32I-NEXT: .LBB237_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 3 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB237_7 ; RV32I-NEXT: .LBB237_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB237_4 +; RV32I-NEXT: beq a4, s1, .LBB237_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32I-NEXT: sltu a0, s1, a5 +; RV32I-NEXT: sltu a0, s1, a4 ; RV32I-NEXT: j .LBB237_5 ; RV32I-NEXT: .LBB237_4: # in Loop: Header=BB237_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB237_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: beqz a0, .LBB237_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1 @@ -28776,8 +28776,8 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB237_1 ; RV32I-NEXT: .LBB237_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28793,36 +28793,36 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB237_2 ; RV32IA-NEXT: .LBB237_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 3 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB237_7 ; RV32IA-NEXT: .LBB237_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB237_4 +; RV32IA-NEXT: beq a4, s1, .LBB237_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a5 +; RV32IA-NEXT: sltu a0, s1, a4 ; RV32IA-NEXT: j .LBB237_5 ; RV32IA-NEXT: .LBB237_4: # in Loop: Header=BB237_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB237_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: beqz a0, .LBB237_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1 @@ -28830,8 +28830,8 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB237_1 ; RV32IA-NEXT: .LBB237_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28897,36 +28897,36 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB238_2 ; RV32I-NEXT: .LBB238_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 4 ; RV32I-NEXT: li a5, 2 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB238_7 ; RV32I-NEXT: .LBB238_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB238_4 +; RV32I-NEXT: beq a4, s1, .LBB238_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32I-NEXT: sltu a0, s1, a5 +; RV32I-NEXT: sltu a0, s1, a4 ; RV32I-NEXT: j .LBB238_5 ; RV32I-NEXT: .LBB238_4: # in Loop: Header=BB238_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB238_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: beqz a0, .LBB238_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1 @@ -28934,8 +28934,8 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB238_1 ; RV32I-NEXT: .LBB238_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28951,36 +28951,36 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB238_2 ; RV32IA-NEXT: .LBB238_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 4 ; RV32IA-NEXT: li a5, 2 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB238_7 ; RV32IA-NEXT: .LBB238_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB238_4 +; RV32IA-NEXT: beq a4, s1, .LBB238_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a5 +; RV32IA-NEXT: sltu a0, s1, a4 ; RV32IA-NEXT: j .LBB238_5 ; RV32IA-NEXT: .LBB238_4: # in Loop: Header=BB238_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB238_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: beqz a0, .LBB238_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1 @@ -28988,8 +28988,8 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB238_1 ; RV32IA-NEXT: .LBB238_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -29055,36 +29055,36 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB239_2 ; RV32I-NEXT: .LBB239_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB239_7 ; RV32I-NEXT: .LBB239_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB239_4 +; RV32I-NEXT: beq a4, s1, .LBB239_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32I-NEXT: sltu a0, s1, a5 +; RV32I-NEXT: sltu a0, s1, a4 ; RV32I-NEXT: j .LBB239_5 ; RV32I-NEXT: .LBB239_4: # in Loop: Header=BB239_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB239_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: beqz a0, .LBB239_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1 @@ -29092,8 +29092,8 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB239_1 ; RV32I-NEXT: .LBB239_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -29109,36 +29109,36 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB239_2 ; RV32IA-NEXT: .LBB239_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB239_7 ; RV32IA-NEXT: .LBB239_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB239_4 +; RV32IA-NEXT: beq a4, s1, .LBB239_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a5 +; RV32IA-NEXT: sltu a0, s1, a4 ; RV32IA-NEXT: j .LBB239_5 ; RV32IA-NEXT: .LBB239_4: # in Loop: Header=BB239_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB239_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: beqz a0, .LBB239_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1 @@ -29146,8 +29146,8 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB239_1 ; RV32IA-NEXT: .LBB239_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll index ed0a160d3f58ad..06594e35be8703 100644 --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -3183,36 +3183,36 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB43_2 ; RV32I-NEXT: .LBB43_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB43_7 ; RV32I-NEXT: .LBB43_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB43_4 +; RV32I-NEXT: beq a4, s1, .LBB43_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32I-NEXT: slt a0, s1, a5 +; RV32I-NEXT: slt a0, s1, a4 ; RV32I-NEXT: j .LBB43_5 ; RV32I-NEXT: .LBB43_4: # in Loop: Header=BB43_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB43_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: bnez a0, .LBB43_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 @@ -3220,8 +3220,8 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB43_1 ; RV32I-NEXT: .LBB43_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3237,36 +3237,36 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB43_2 ; RV32IA-NEXT: .LBB43_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB43_7 ; RV32IA-NEXT: .LBB43_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB43_4 +; RV32IA-NEXT: beq a4, s1, .LBB43_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32IA-NEXT: slt a0, s1, a5 +; RV32IA-NEXT: slt a0, s1, a4 ; RV32IA-NEXT: j .LBB43_5 ; RV32IA-NEXT: .LBB43_4: # in Loop: Header=BB43_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB43_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: bnez a0, .LBB43_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1 @@ -3274,8 +3274,8 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB43_1 ; RV32IA-NEXT: .LBB43_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3336,36 +3336,36 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB44_2 ; RV32I-NEXT: .LBB44_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB44_7 ; RV32I-NEXT: .LBB44_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB44_4 +; RV32I-NEXT: beq a4, s1, .LBB44_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32I-NEXT: slt a0, s1, a5 +; RV32I-NEXT: slt a0, s1, a4 ; RV32I-NEXT: j .LBB44_5 ; RV32I-NEXT: .LBB44_4: # in Loop: Header=BB44_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB44_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: beqz a0, .LBB44_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 @@ -3373,8 +3373,8 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB44_1 ; RV32I-NEXT: .LBB44_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3390,36 +3390,36 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB44_2 ; RV32IA-NEXT: .LBB44_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB44_7 ; RV32IA-NEXT: .LBB44_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB44_4 +; RV32IA-NEXT: beq a4, s1, .LBB44_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32IA-NEXT: slt a0, s1, a5 +; RV32IA-NEXT: slt a0, s1, a4 ; RV32IA-NEXT: j .LBB44_5 ; RV32IA-NEXT: .LBB44_4: # in Loop: Header=BB44_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB44_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: beqz a0, .LBB44_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1 @@ -3427,8 +3427,8 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB44_1 ; RV32IA-NEXT: .LBB44_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3489,36 +3489,36 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB45_2 ; RV32I-NEXT: .LBB45_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB45_7 ; RV32I-NEXT: .LBB45_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB45_4 +; RV32I-NEXT: beq a4, s1, .LBB45_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32I-NEXT: sltu a0, s1, a5 +; RV32I-NEXT: sltu a0, s1, a4 ; RV32I-NEXT: j .LBB45_5 ; RV32I-NEXT: .LBB45_4: # in Loop: Header=BB45_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB45_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: bnez a0, .LBB45_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 @@ -3526,8 +3526,8 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB45_1 ; RV32I-NEXT: .LBB45_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3543,36 +3543,36 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB45_2 ; RV32IA-NEXT: .LBB45_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB45_7 ; RV32IA-NEXT: .LBB45_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB45_4 +; RV32IA-NEXT: beq a4, s1, .LBB45_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a5 +; RV32IA-NEXT: sltu a0, s1, a4 ; RV32IA-NEXT: j .LBB45_5 ; RV32IA-NEXT: .LBB45_4: # in Loop: Header=BB45_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB45_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: bnez a0, .LBB45_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1 @@ -3580,8 +3580,8 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB45_1 ; RV32IA-NEXT: .LBB45_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3642,36 +3642,36 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB46_2 ; RV32I-NEXT: .LBB46_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB46_7 ; RV32I-NEXT: .LBB46_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB46_4 +; RV32I-NEXT: beq a4, s1, .LBB46_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32I-NEXT: sltu a0, s1, a5 +; RV32I-NEXT: sltu a0, s1, a4 ; RV32I-NEXT: j .LBB46_5 ; RV32I-NEXT: .LBB46_4: # in Loop: Header=BB46_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB46_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: beqz a0, .LBB46_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 @@ -3679,8 +3679,8 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: j .LBB46_1 ; RV32I-NEXT: .LBB46_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3696,36 +3696,36 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB46_2 ; RV32IA-NEXT: .LBB46_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB46_7 ; RV32IA-NEXT: .LBB46_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB46_4 +; RV32IA-NEXT: beq a4, s1, .LBB46_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a5 +; RV32IA-NEXT: sltu a0, s1, a4 ; RV32IA-NEXT: j .LBB46_5 ; RV32IA-NEXT: .LBB46_4: # in Loop: Header=BB46_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB46_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: mv a3, a4 ; RV32IA-NEXT: beqz a0, .LBB46_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1 @@ -3733,8 +3733,8 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: mv a3, s1 ; RV32IA-NEXT: j .LBB46_1 ; RV32IA-NEXT: .LBB46_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll index b5e892c0ff6aca..0d6ae3a51e2469 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll @@ -488,43 +488,43 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) { ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: .cfi_offset s2, -16 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB3_3 ; RV32I-NEXT: .LBB3_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32I-NEXT: sltu a0, a5, s1 +; RV32I-NEXT: sltu a0, a4, s1 ; RV32I-NEXT: .LBB3_2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: and a1, a0, s2 -; RV32I-NEXT: sltu a2, a4, a1 +; RV32I-NEXT: sltu a2, a5, a1 ; RV32I-NEXT: and a0, a0, s1 -; RV32I-NEXT: sub a3, a5, a0 +; RV32I-NEXT: sub a3, a4, a0 ; RV32I-NEXT: sub a3, a3, a2 -; RV32I-NEXT: sub a2, a4, a1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sub a2, a5, a1 +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB3_5 ; RV32I-NEXT: .LBB3_3: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: bne a5, s1, .LBB3_1 +; RV32I-NEXT: bne a4, s1, .LBB3_1 ; RV32I-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 -; RV32I-NEXT: sltu a0, a4, s2 +; RV32I-NEXT: sltu a0, a5, s2 ; RV32I-NEXT: j .LBB3_2 ; RV32I-NEXT: .LBB3_5: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -545,43 +545,43 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) { ; RV32IA-NEXT: .cfi_offset s1, -12 ; RV32IA-NEXT: .cfi_offset s2, -16 ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB3_3 ; RV32IA-NEXT: .LBB3_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32IA-NEXT: sltu a0, a5, s1 +; RV32IA-NEXT: sltu a0, a4, s1 ; RV32IA-NEXT: .LBB3_2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 ; RV32IA-NEXT: neg a0, a0 ; RV32IA-NEXT: and a1, a0, s2 -; RV32IA-NEXT: sltu a2, a4, a1 +; RV32IA-NEXT: sltu a2, a5, a1 ; RV32IA-NEXT: and a0, a0, s1 -; RV32IA-NEXT: sub a3, a5, a0 +; RV32IA-NEXT: sub a3, a4, a0 ; RV32IA-NEXT: sub a3, a3, a2 -; RV32IA-NEXT: sub a2, a4, a1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sub a2, a5, a1 +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB3_5 ; RV32IA-NEXT: .LBB3_3: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: bne a5, s1, .LBB3_1 +; RV32IA-NEXT: bne a4, s1, .LBB3_1 ; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 -; RV32IA-NEXT: sltu a0, a4, s2 +; RV32IA-NEXT: sltu a0, a5, s2 ; RV32IA-NEXT: j .LBB3_2 ; RV32IA-NEXT: .LBB3_5: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1102,42 +1102,42 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) { ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: .cfi_offset s2, -16 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB7_3 ; RV32I-NEXT: .LBB7_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_3 Depth=1 -; RV32I-NEXT: sltu a2, a5, a0 +; RV32I-NEXT: sltu a2, a4, a0 ; RV32I-NEXT: .LBB7_2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_3 Depth=1 ; RV32I-NEXT: addi a3, a2, -1 ; RV32I-NEXT: and a2, a3, a1 ; RV32I-NEXT: and a3, a3, a0 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB7_5 ; RV32I-NEXT: .LBB7_3: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sltu a0, a4, s2 -; RV32I-NEXT: sub a1, a5, s1 +; RV32I-NEXT: sltu a0, a5, s2 +; RV32I-NEXT: sub a1, a4, s1 ; RV32I-NEXT: sub a0, a1, a0 -; RV32I-NEXT: sub a1, a4, s2 -; RV32I-NEXT: bne a0, a5, .LBB7_1 +; RV32I-NEXT: sub a1, a5, s2 +; RV32I-NEXT: bne a0, a4, .LBB7_1 ; RV32I-NEXT: # %bb.4: # in Loop: Header=BB7_3 Depth=1 -; RV32I-NEXT: sltu a2, a4, a1 +; RV32I-NEXT: sltu a2, a5, a1 ; RV32I-NEXT: j .LBB7_2 ; RV32I-NEXT: .LBB7_5: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1158,42 +1158,42 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) { ; RV32IA-NEXT: .cfi_offset s1, -12 ; RV32IA-NEXT: .cfi_offset s2, -16 ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB7_3 ; RV32IA-NEXT: .LBB7_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_3 Depth=1 -; RV32IA-NEXT: sltu a2, a5, a0 +; RV32IA-NEXT: sltu a2, a4, a0 ; RV32IA-NEXT: .LBB7_2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_3 Depth=1 ; RV32IA-NEXT: addi a3, a2, -1 ; RV32IA-NEXT: and a2, a3, a1 ; RV32IA-NEXT: and a3, a3, a0 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB7_5 ; RV32IA-NEXT: .LBB7_3: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: sltu a0, a4, s2 -; RV32IA-NEXT: sub a1, a5, s1 +; RV32IA-NEXT: sltu a0, a5, s2 +; RV32IA-NEXT: sub a1, a4, s1 ; RV32IA-NEXT: sub a0, a1, a0 -; RV32IA-NEXT: sub a1, a4, s2 -; RV32IA-NEXT: bne a0, a5, .LBB7_1 +; RV32IA-NEXT: sub a1, a5, s2 +; RV32IA-NEXT: bne a0, a4, .LBB7_1 ; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB7_3 Depth=1 -; RV32IA-NEXT: sltu a2, a4, a1 +; RV32IA-NEXT: sltu a2, a5, a1 ; RV32IA-NEXT: j .LBB7_2 ; RV32IA-NEXT: .LBB7_5: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll index 634ed45044ee21..927e778c9dd9c2 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll @@ -468,41 +468,41 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: .cfi_offset s2, -16 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB3_3 ; RV32I-NEXT: .LBB3_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32I-NEXT: sltu a0, a5, s1 +; RV32I-NEXT: sltu a0, a4, s1 ; RV32I-NEXT: .LBB3_2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32I-NEXT: addi a1, a4, 1 +; RV32I-NEXT: addi a1, a5, 1 ; RV32I-NEXT: seqz a2, a1 -; RV32I-NEXT: add a3, a5, a2 +; RV32I-NEXT: add a3, a4, a2 ; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: and a2, a0, a1 ; RV32I-NEXT: and a3, a0, a3 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB3_5 ; RV32I-NEXT: .LBB3_3: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: bne a5, s1, .LBB3_1 +; RV32I-NEXT: bne a4, s1, .LBB3_1 ; RV32I-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 -; RV32I-NEXT: sltu a0, a4, s2 +; RV32I-NEXT: sltu a0, a5, s2 ; RV32I-NEXT: j .LBB3_2 ; RV32I-NEXT: .LBB3_5: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -523,41 +523,41 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; RV32IA-NEXT: .cfi_offset s1, -12 ; RV32IA-NEXT: .cfi_offset s2, -16 ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB3_3 ; RV32IA-NEXT: .LBB3_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32IA-NEXT: sltu a0, a5, s1 +; RV32IA-NEXT: sltu a0, a4, s1 ; RV32IA-NEXT: .LBB3_2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32IA-NEXT: addi a1, a4, 1 +; RV32IA-NEXT: addi a1, a5, 1 ; RV32IA-NEXT: seqz a2, a1 -; RV32IA-NEXT: add a3, a5, a2 +; RV32IA-NEXT: add a3, a4, a2 ; RV32IA-NEXT: neg a0, a0 ; RV32IA-NEXT: and a2, a0, a1 ; RV32IA-NEXT: and a3, a0, a3 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB3_5 ; RV32IA-NEXT: .LBB3_3: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: bne a5, s1, .LBB3_1 +; RV32IA-NEXT: bne a4, s1, .LBB3_1 ; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 -; RV32IA-NEXT: sltu a0, a4, s2 +; RV32IA-NEXT: sltu a0, a5, s2 ; RV32IA-NEXT: j .LBB3_2 ; RV32IA-NEXT: .LBB3_5: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1211,35 +1211,35 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: .cfi_offset s2, -16 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a5, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: j .LBB7_2 ; RV32I-NEXT: .LBB7_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a5, 8(sp) +; RV32I-NEXT: lw a4, 12(sp) ; RV32I-NEXT: bnez a0, .LBB7_7 ; RV32I-NEXT: .LBB7_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB7_4 +; RV32I-NEXT: beq a4, s1, .LBB7_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: sltu a0, s1, a5 +; RV32I-NEXT: sltu a0, s1, a4 ; RV32I-NEXT: j .LBB7_5 ; RV32I-NEXT: .LBB7_4: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s2, a5 ; RV32I-NEXT: .LBB7_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: or a1, a4, a5 +; RV32I-NEXT: or a1, a5, a4 ; RV32I-NEXT: seqz a1, a1 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: mv a2, s2 @@ -1247,13 +1247,13 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; RV32I-NEXT: bnez a0, .LBB7_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: seqz a0, a4 -; RV32I-NEXT: sub a3, a5, a0 -; RV32I-NEXT: addi a2, a4, -1 +; RV32I-NEXT: seqz a0, a5 +; RV32I-NEXT: sub a3, a4, a0 +; RV32I-NEXT: addi a2, a5, -1 ; RV32I-NEXT: j .LBB7_1 ; RV32I-NEXT: .LBB7_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1274,35 +1274,35 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; RV32IA-NEXT: .cfi_offset s1, -12 ; RV32IA-NEXT: .cfi_offset s2, -16 ; RV32IA-NEXT: mv s0, a0 -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a5, 0(a0) +; RV32IA-NEXT: lw a4, 4(a0) ; RV32IA-NEXT: mv s1, a2 ; RV32IA-NEXT: mv s2, a1 ; RV32IA-NEXT: j .LBB7_2 ; RV32IA-NEXT: .LBB7_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a5, 8(sp) +; RV32IA-NEXT: sw a4, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 ; RV32IA-NEXT: mv a0, s0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a5, 8(sp) +; RV32IA-NEXT: lw a4, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB7_7 ; RV32IA-NEXT: .LBB7_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB7_4 +; RV32IA-NEXT: beq a4, s1, .LBB7_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a5 +; RV32IA-NEXT: sltu a0, s1, a4 ; RV32IA-NEXT: j .LBB7_5 ; RV32IA-NEXT: .LBB7_4: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s2, a5 ; RV32IA-NEXT: .LBB7_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: or a1, a4, a5 +; RV32IA-NEXT: or a1, a5, a4 ; RV32IA-NEXT: seqz a1, a1 ; RV32IA-NEXT: or a0, a1, a0 ; RV32IA-NEXT: mv a2, s2 @@ -1310,13 +1310,13 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; RV32IA-NEXT: bnez a0, .LBB7_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: seqz a0, a4 -; RV32IA-NEXT: sub a3, a5, a0 -; RV32IA-NEXT: addi a2, a4, -1 +; RV32IA-NEXT: seqz a0, a5 +; RV32IA-NEXT: sub a3, a4, a0 +; RV32IA-NEXT: addi a2, a5, -1 ; RV32IA-NEXT: j .LBB7_1 ; RV32IA-NEXT: .LBB7_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: mv a0, a5 +; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll index 2122b3fd91788b..337e9bc5845f94 100644 --- a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll @@ -53,22 +53,22 @@ define void @callee() nounwind { ; ILP32-NEXT: flw fs1, 84(a1) ; ILP32-NEXT: flw fs2, 88(a1) ; ILP32-NEXT: flw fs3, 92(a1) -; ILP32-NEXT: flw fs4, 96(a1) -; ILP32-NEXT: flw fs5, 100(a1) -; ILP32-NEXT: flw fs6, 104(a1) -; ILP32-NEXT: flw fs7, 108(a1) -; ILP32-NEXT: flw fs8, 124(a1) -; ILP32-NEXT: flw fs9, 120(a1) -; ILP32-NEXT: flw fs10, 116(a1) -; ILP32-NEXT: flw fs11, 112(a1) -; ILP32-NEXT: fsw fs8, 124(a1) -; ILP32-NEXT: fsw fs9, 120(a1) -; ILP32-NEXT: fsw fs10, 116(a1) -; ILP32-NEXT: fsw fs11, 112(a1) -; ILP32-NEXT: fsw fs7, 108(a1) -; ILP32-NEXT: fsw fs6, 104(a1) -; ILP32-NEXT: fsw fs5, 100(a1) -; ILP32-NEXT: fsw fs4, 96(a1) +; ILP32-NEXT: flw fs4, 112(a1) +; ILP32-NEXT: flw fs5, 116(a1) +; ILP32-NEXT: flw fs6, 120(a1) +; ILP32-NEXT: flw fs7, 124(a1) +; ILP32-NEXT: flw fs8, 96(a1) +; ILP32-NEXT: flw fs9, 100(a1) +; ILP32-NEXT: flw fs10, 104(a1) +; ILP32-NEXT: flw fs11, 108(a1) +; ILP32-NEXT: fsw fs7, 124(a1) +; ILP32-NEXT: fsw fs6, 120(a1) +; ILP32-NEXT: fsw fs5, 116(a1) +; ILP32-NEXT: fsw fs4, 112(a1) +; ILP32-NEXT: fsw fs11, 108(a1) +; ILP32-NEXT: fsw fs10, 104(a1) +; ILP32-NEXT: fsw fs9, 100(a1) +; ILP32-NEXT: fsw fs8, 96(a1) ; ILP32-NEXT: fsw fs3, 92(a1) ; ILP32-NEXT: fsw fs2, 88(a1) ; ILP32-NEXT: fsw fs1, 84(a1) @@ -123,22 +123,22 @@ define void @callee() nounwind { ; ILP32E-NEXT: flw fs1, 84(a1) ; ILP32E-NEXT: flw fs2, 88(a1) ; ILP32E-NEXT: flw fs3, 92(a1) -; ILP32E-NEXT: flw fs4, 96(a1) -; ILP32E-NEXT: flw fs5, 100(a1) -; ILP32E-NEXT: flw fs6, 104(a1) -; ILP32E-NEXT: flw fs7, 108(a1) -; ILP32E-NEXT: flw fs8, 124(a1) -; ILP32E-NEXT: flw fs9, 120(a1) -; ILP32E-NEXT: flw fs10, 116(a1) -; ILP32E-NEXT: flw fs11, 112(a1) -; ILP32E-NEXT: fsw fs8, 124(a1) -; ILP32E-NEXT: fsw fs9, 120(a1) -; ILP32E-NEXT: fsw fs10, 116(a1) -; ILP32E-NEXT: fsw fs11, 112(a1) -; ILP32E-NEXT: fsw fs7, 108(a1) -; ILP32E-NEXT: fsw fs6, 104(a1) -; ILP32E-NEXT: fsw fs5, 100(a1) -; ILP32E-NEXT: fsw fs4, 96(a1) +; ILP32E-NEXT: flw fs4, 112(a1) +; ILP32E-NEXT: flw fs5, 116(a1) +; ILP32E-NEXT: flw fs6, 120(a1) +; ILP32E-NEXT: flw fs7, 124(a1) +; ILP32E-NEXT: flw fs8, 96(a1) +; ILP32E-NEXT: flw fs9, 100(a1) +; ILP32E-NEXT: flw fs10, 104(a1) +; ILP32E-NEXT: flw fs11, 108(a1) +; ILP32E-NEXT: fsw fs7, 124(a1) +; ILP32E-NEXT: fsw fs6, 120(a1) +; ILP32E-NEXT: fsw fs5, 116(a1) +; ILP32E-NEXT: fsw fs4, 112(a1) +; ILP32E-NEXT: fsw fs11, 108(a1) +; ILP32E-NEXT: fsw fs10, 104(a1) +; ILP32E-NEXT: fsw fs9, 100(a1) +; ILP32E-NEXT: fsw fs8, 96(a1) ; ILP32E-NEXT: fsw fs3, 92(a1) ; ILP32E-NEXT: fsw fs2, 88(a1) ; ILP32E-NEXT: fsw fs1, 84(a1) @@ -193,22 +193,22 @@ define void @callee() nounwind { ; LP64-NEXT: flw fs1, 84(a1) ; LP64-NEXT: flw fs2, 88(a1) ; LP64-NEXT: flw fs3, 92(a1) -; LP64-NEXT: flw fs4, 96(a1) -; LP64-NEXT: flw fs5, 100(a1) -; LP64-NEXT: flw fs6, 104(a1) -; LP64-NEXT: flw fs7, 108(a1) -; LP64-NEXT: flw fs8, 124(a1) -; LP64-NEXT: flw fs9, 120(a1) -; LP64-NEXT: flw fs10, 116(a1) -; LP64-NEXT: flw fs11, 112(a1) -; LP64-NEXT: fsw fs8, 124(a1) -; LP64-NEXT: fsw fs9, 120(a1) -; LP64-NEXT: fsw fs10, 116(a1) -; LP64-NEXT: fsw fs11, 112(a1) -; LP64-NEXT: fsw fs7, 108(a1) -; LP64-NEXT: fsw fs6, 104(a1) -; LP64-NEXT: fsw fs5, 100(a1) -; LP64-NEXT: fsw fs4, 96(a1) +; LP64-NEXT: flw fs4, 112(a1) +; LP64-NEXT: flw fs5, 116(a1) +; LP64-NEXT: flw fs6, 120(a1) +; LP64-NEXT: flw fs7, 124(a1) +; LP64-NEXT: flw fs8, 96(a1) +; LP64-NEXT: flw fs9, 100(a1) +; LP64-NEXT: flw fs10, 104(a1) +; LP64-NEXT: flw fs11, 108(a1) +; LP64-NEXT: fsw fs7, 124(a1) +; LP64-NEXT: fsw fs6, 120(a1) +; LP64-NEXT: fsw fs5, 116(a1) +; LP64-NEXT: fsw fs4, 112(a1) +; LP64-NEXT: fsw fs11, 108(a1) +; LP64-NEXT: fsw fs10, 104(a1) +; LP64-NEXT: fsw fs9, 100(a1) +; LP64-NEXT: fsw fs8, 96(a1) ; LP64-NEXT: fsw fs3, 92(a1) ; LP64-NEXT: fsw fs2, 88(a1) ; LP64-NEXT: fsw fs1, 84(a1) @@ -263,22 +263,22 @@ define void @callee() nounwind { ; LP64E-NEXT: flw fs1, 84(a1) ; LP64E-NEXT: flw fs2, 88(a1) ; LP64E-NEXT: flw fs3, 92(a1) -; LP64E-NEXT: flw fs4, 96(a1) -; LP64E-NEXT: flw fs5, 100(a1) -; LP64E-NEXT: flw fs6, 104(a1) -; LP64E-NEXT: flw fs7, 108(a1) -; LP64E-NEXT: flw fs8, 124(a1) -; LP64E-NEXT: flw fs9, 120(a1) -; LP64E-NEXT: flw fs10, 116(a1) -; LP64E-NEXT: flw fs11, 112(a1) -; LP64E-NEXT: fsw fs8, 124(a1) -; LP64E-NEXT: fsw fs9, 120(a1) -; LP64E-NEXT: fsw fs10, 116(a1) -; LP64E-NEXT: fsw fs11, 112(a1) -; LP64E-NEXT: fsw fs7, 108(a1) -; LP64E-NEXT: fsw fs6, 104(a1) -; LP64E-NEXT: fsw fs5, 100(a1) -; LP64E-NEXT: fsw fs4, 96(a1) +; LP64E-NEXT: flw fs4, 112(a1) +; LP64E-NEXT: flw fs5, 116(a1) +; LP64E-NEXT: flw fs6, 120(a1) +; LP64E-NEXT: flw fs7, 124(a1) +; LP64E-NEXT: flw fs8, 96(a1) +; LP64E-NEXT: flw fs9, 100(a1) +; LP64E-NEXT: flw fs10, 104(a1) +; LP64E-NEXT: flw fs11, 108(a1) +; LP64E-NEXT: fsw fs7, 124(a1) +; LP64E-NEXT: fsw fs6, 120(a1) +; LP64E-NEXT: fsw fs5, 116(a1) +; LP64E-NEXT: fsw fs4, 112(a1) +; LP64E-NEXT: fsw fs11, 108(a1) +; LP64E-NEXT: fsw fs10, 104(a1) +; LP64E-NEXT: fsw fs9, 100(a1) +; LP64E-NEXT: fsw fs8, 96(a1) ; LP64E-NEXT: fsw fs3, 92(a1) ; LP64E-NEXT: fsw fs2, 88(a1) ; LP64E-NEXT: fsw fs1, 84(a1) @@ -346,22 +346,22 @@ define void @callee() nounwind { ; ILP32F-NEXT: flw fs1, 84(a1) ; ILP32F-NEXT: flw fs2, 88(a1) ; ILP32F-NEXT: flw fs3, 92(a1) -; ILP32F-NEXT: flw fs4, 96(a1) -; ILP32F-NEXT: flw fs5, 100(a1) -; ILP32F-NEXT: flw fs6, 104(a1) -; ILP32F-NEXT: flw fs7, 108(a1) -; ILP32F-NEXT: flw fs8, 124(a1) -; ILP32F-NEXT: flw fs9, 120(a1) -; ILP32F-NEXT: flw fs10, 116(a1) -; ILP32F-NEXT: flw fs11, 112(a1) -; ILP32F-NEXT: fsw fs8, 124(a1) -; ILP32F-NEXT: fsw fs9, 120(a1) -; ILP32F-NEXT: fsw fs10, 116(a1) -; ILP32F-NEXT: fsw fs11, 112(a1) -; ILP32F-NEXT: fsw fs7, 108(a1) -; ILP32F-NEXT: fsw fs6, 104(a1) -; ILP32F-NEXT: fsw fs5, 100(a1) -; ILP32F-NEXT: fsw fs4, 96(a1) +; ILP32F-NEXT: flw fs4, 112(a1) +; ILP32F-NEXT: flw fs5, 116(a1) +; ILP32F-NEXT: flw fs6, 120(a1) +; ILP32F-NEXT: flw fs7, 124(a1) +; ILP32F-NEXT: flw fs8, 96(a1) +; ILP32F-NEXT: flw fs9, 100(a1) +; ILP32F-NEXT: flw fs10, 104(a1) +; ILP32F-NEXT: flw fs11, 108(a1) +; ILP32F-NEXT: fsw fs7, 124(a1) +; ILP32F-NEXT: fsw fs6, 120(a1) +; ILP32F-NEXT: fsw fs5, 116(a1) +; ILP32F-NEXT: fsw fs4, 112(a1) +; ILP32F-NEXT: fsw fs11, 108(a1) +; ILP32F-NEXT: fsw fs10, 104(a1) +; ILP32F-NEXT: fsw fs9, 100(a1) +; ILP32F-NEXT: fsw fs8, 96(a1) ; ILP32F-NEXT: fsw fs3, 92(a1) ; ILP32F-NEXT: fsw fs2, 88(a1) ; ILP32F-NEXT: fsw fs1, 84(a1) @@ -442,22 +442,22 @@ define void @callee() nounwind { ; LP64F-NEXT: flw fs1, 84(a1) ; LP64F-NEXT: flw fs2, 88(a1) ; LP64F-NEXT: flw fs3, 92(a1) -; LP64F-NEXT: flw fs4, 96(a1) -; LP64F-NEXT: flw fs5, 100(a1) -; LP64F-NEXT: flw fs6, 104(a1) -; LP64F-NEXT: flw fs7, 108(a1) -; LP64F-NEXT: flw fs8, 124(a1) -; LP64F-NEXT: flw fs9, 120(a1) -; LP64F-NEXT: flw fs10, 116(a1) -; LP64F-NEXT: flw fs11, 112(a1) -; LP64F-NEXT: fsw fs8, 124(a1) -; LP64F-NEXT: fsw fs9, 120(a1) -; LP64F-NEXT: fsw fs10, 116(a1) -; LP64F-NEXT: fsw fs11, 112(a1) -; LP64F-NEXT: fsw fs7, 108(a1) -; LP64F-NEXT: fsw fs6, 104(a1) -; LP64F-NEXT: fsw fs5, 100(a1) -; LP64F-NEXT: fsw fs4, 96(a1) +; LP64F-NEXT: flw fs4, 112(a1) +; LP64F-NEXT: flw fs5, 116(a1) +; LP64F-NEXT: flw fs6, 120(a1) +; LP64F-NEXT: flw fs7, 124(a1) +; LP64F-NEXT: flw fs8, 96(a1) +; LP64F-NEXT: flw fs9, 100(a1) +; LP64F-NEXT: flw fs10, 104(a1) +; LP64F-NEXT: flw fs11, 108(a1) +; LP64F-NEXT: fsw fs7, 124(a1) +; LP64F-NEXT: fsw fs6, 120(a1) +; LP64F-NEXT: fsw fs5, 116(a1) +; LP64F-NEXT: fsw fs4, 112(a1) +; LP64F-NEXT: fsw fs11, 108(a1) +; LP64F-NEXT: fsw fs10, 104(a1) +; LP64F-NEXT: fsw fs9, 100(a1) +; LP64F-NEXT: fsw fs8, 96(a1) ; LP64F-NEXT: fsw fs3, 92(a1) ; LP64F-NEXT: fsw fs2, 88(a1) ; LP64F-NEXT: fsw fs1, 84(a1) @@ -538,22 +538,22 @@ define void @callee() nounwind { ; ILP32D-NEXT: flw fs1, 84(a1) ; ILP32D-NEXT: flw fs2, 88(a1) ; ILP32D-NEXT: flw fs3, 92(a1) -; ILP32D-NEXT: flw fs4, 96(a1) -; ILP32D-NEXT: flw fs5, 100(a1) -; ILP32D-NEXT: flw fs6, 104(a1) -; ILP32D-NEXT: flw fs7, 108(a1) -; ILP32D-NEXT: flw fs8, 124(a1) -; ILP32D-NEXT: flw fs9, 120(a1) -; ILP32D-NEXT: flw fs10, 116(a1) -; ILP32D-NEXT: flw fs11, 112(a1) -; ILP32D-NEXT: fsw fs8, 124(a1) -; ILP32D-NEXT: fsw fs9, 120(a1) -; ILP32D-NEXT: fsw fs10, 116(a1) -; ILP32D-NEXT: fsw fs11, 112(a1) -; ILP32D-NEXT: fsw fs7, 108(a1) -; ILP32D-NEXT: fsw fs6, 104(a1) -; ILP32D-NEXT: fsw fs5, 100(a1) -; ILP32D-NEXT: fsw fs4, 96(a1) +; ILP32D-NEXT: flw fs4, 112(a1) +; ILP32D-NEXT: flw fs5, 116(a1) +; ILP32D-NEXT: flw fs6, 120(a1) +; ILP32D-NEXT: flw fs7, 124(a1) +; ILP32D-NEXT: flw fs8, 96(a1) +; ILP32D-NEXT: flw fs9, 100(a1) +; ILP32D-NEXT: flw fs10, 104(a1) +; ILP32D-NEXT: flw fs11, 108(a1) +; ILP32D-NEXT: fsw fs7, 124(a1) +; ILP32D-NEXT: fsw fs6, 120(a1) +; ILP32D-NEXT: fsw fs5, 116(a1) +; ILP32D-NEXT: fsw fs4, 112(a1) +; ILP32D-NEXT: fsw fs11, 108(a1) +; ILP32D-NEXT: fsw fs10, 104(a1) +; ILP32D-NEXT: fsw fs9, 100(a1) +; ILP32D-NEXT: fsw fs8, 96(a1) ; ILP32D-NEXT: fsw fs3, 92(a1) ; ILP32D-NEXT: fsw fs2, 88(a1) ; ILP32D-NEXT: fsw fs1, 84(a1) @@ -634,22 +634,22 @@ define void @callee() nounwind { ; LP64D-NEXT: flw fs1, 84(a1) ; LP64D-NEXT: flw fs2, 88(a1) ; LP64D-NEXT: flw fs3, 92(a1) -; LP64D-NEXT: flw fs4, 96(a1) -; LP64D-NEXT: flw fs5, 100(a1) -; LP64D-NEXT: flw fs6, 104(a1) -; LP64D-NEXT: flw fs7, 108(a1) -; LP64D-NEXT: flw fs8, 124(a1) -; LP64D-NEXT: flw fs9, 120(a1) -; LP64D-NEXT: flw fs10, 116(a1) -; LP64D-NEXT: flw fs11, 112(a1) -; LP64D-NEXT: fsw fs8, 124(a1) -; LP64D-NEXT: fsw fs9, 120(a1) -; LP64D-NEXT: fsw fs10, 116(a1) -; LP64D-NEXT: fsw fs11, 112(a1) -; LP64D-NEXT: fsw fs7, 108(a1) -; LP64D-NEXT: fsw fs6, 104(a1) -; LP64D-NEXT: fsw fs5, 100(a1) -; LP64D-NEXT: fsw fs4, 96(a1) +; LP64D-NEXT: flw fs4, 112(a1) +; LP64D-NEXT: flw fs5, 116(a1) +; LP64D-NEXT: flw fs6, 120(a1) +; LP64D-NEXT: flw fs7, 124(a1) +; LP64D-NEXT: flw fs8, 96(a1) +; LP64D-NEXT: flw fs9, 100(a1) +; LP64D-NEXT: flw fs10, 104(a1) +; LP64D-NEXT: flw fs11, 108(a1) +; LP64D-NEXT: fsw fs7, 124(a1) +; LP64D-NEXT: fsw fs6, 120(a1) +; LP64D-NEXT: fsw fs5, 116(a1) +; LP64D-NEXT: fsw fs4, 112(a1) +; LP64D-NEXT: fsw fs11, 108(a1) +; LP64D-NEXT: fsw fs10, 104(a1) +; LP64D-NEXT: fsw fs9, 100(a1) +; LP64D-NEXT: fsw fs8, 96(a1) ; LP64D-NEXT: fsw fs3, 92(a1) ; LP64D-NEXT: fsw fs2, 88(a1) ; LP64D-NEXT: fsw fs1, 84(a1) diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll index 38e3c2d9256cdf..0501c700f57dfb 100644 --- a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll @@ -45,26 +45,26 @@ define void @callee() nounwind { ; ILP32-NEXT: fld ft11, 152(a1) ; ILP32-NEXT: fld fs0, 160(a1) ; ILP32-NEXT: fld fs1, 168(a1) -; ILP32-NEXT: fld fs2, 176(a1) -; ILP32-NEXT: fld fs3, 184(a1) -; ILP32-NEXT: fld fs4, 192(a1) -; ILP32-NEXT: fld fs5, 200(a1) -; ILP32-NEXT: fld fs6, 208(a1) -; ILP32-NEXT: fld fs7, 216(a1) -; ILP32-NEXT: fld fs8, 248(a1) -; ILP32-NEXT: fld fs9, 240(a1) -; ILP32-NEXT: fld fs10, 232(a1) -; ILP32-NEXT: fld fs11, 224(a1) -; ILP32-NEXT: fsd fs8, 248(a1) -; ILP32-NEXT: fsd fs9, 240(a1) -; ILP32-NEXT: fsd fs10, 232(a1) -; ILP32-NEXT: fsd fs11, 224(a1) -; ILP32-NEXT: fsd fs7, 216(a1) -; ILP32-NEXT: fsd fs6, 208(a1) -; ILP32-NEXT: fsd fs5, 200(a1) -; ILP32-NEXT: fsd fs4, 192(a1) -; ILP32-NEXT: fsd fs3, 184(a1) -; ILP32-NEXT: fsd fs2, 176(a1) +; ILP32-NEXT: fld fs2, 208(a1) +; ILP32-NEXT: fld fs3, 216(a1) +; ILP32-NEXT: fld fs4, 224(a1) +; ILP32-NEXT: fld fs5, 232(a1) +; ILP32-NEXT: fld fs6, 240(a1) +; ILP32-NEXT: fld fs7, 248(a1) +; ILP32-NEXT: fld fs8, 176(a1) +; ILP32-NEXT: fld fs9, 184(a1) +; ILP32-NEXT: fld fs10, 192(a1) +; ILP32-NEXT: fld fs11, 200(a1) +; ILP32-NEXT: fsd fs7, 248(a1) +; ILP32-NEXT: fsd fs6, 240(a1) +; ILP32-NEXT: fsd fs5, 232(a1) +; ILP32-NEXT: fsd fs4, 224(a1) +; ILP32-NEXT: fsd fs3, 216(a1) +; ILP32-NEXT: fsd fs2, 208(a1) +; ILP32-NEXT: fsd fs11, 200(a1) +; ILP32-NEXT: fsd fs10, 192(a1) +; ILP32-NEXT: fsd fs9, 184(a1) +; ILP32-NEXT: fsd fs8, 176(a1) ; ILP32-NEXT: fsd fs1, 168(a1) ; ILP32-NEXT: fsd fs0, 160(a1) ; ILP32-NEXT: fsd ft11, 152(a1) @@ -115,26 +115,26 @@ define void @callee() nounwind { ; LP64-NEXT: fld ft11, 152(a1) ; LP64-NEXT: fld fs0, 160(a1) ; LP64-NEXT: fld fs1, 168(a1) -; LP64-NEXT: fld fs2, 176(a1) -; LP64-NEXT: fld fs3, 184(a1) -; LP64-NEXT: fld fs4, 192(a1) -; LP64-NEXT: fld fs5, 200(a1) -; LP64-NEXT: fld fs6, 208(a1) -; LP64-NEXT: fld fs7, 216(a1) -; LP64-NEXT: fld fs8, 248(a1) -; LP64-NEXT: fld fs9, 240(a1) -; LP64-NEXT: fld fs10, 232(a1) -; LP64-NEXT: fld fs11, 224(a1) -; LP64-NEXT: fsd fs8, 248(a1) -; LP64-NEXT: fsd fs9, 240(a1) -; LP64-NEXT: fsd fs10, 232(a1) -; LP64-NEXT: fsd fs11, 224(a1) -; LP64-NEXT: fsd fs7, 216(a1) -; LP64-NEXT: fsd fs6, 208(a1) -; LP64-NEXT: fsd fs5, 200(a1) -; LP64-NEXT: fsd fs4, 192(a1) -; LP64-NEXT: fsd fs3, 184(a1) -; LP64-NEXT: fsd fs2, 176(a1) +; LP64-NEXT: fld fs2, 208(a1) +; LP64-NEXT: fld fs3, 216(a1) +; LP64-NEXT: fld fs4, 224(a1) +; LP64-NEXT: fld fs5, 232(a1) +; LP64-NEXT: fld fs6, 240(a1) +; LP64-NEXT: fld fs7, 248(a1) +; LP64-NEXT: fld fs8, 176(a1) +; LP64-NEXT: fld fs9, 184(a1) +; LP64-NEXT: fld fs10, 192(a1) +; LP64-NEXT: fld fs11, 200(a1) +; LP64-NEXT: fsd fs7, 248(a1) +; LP64-NEXT: fsd fs6, 240(a1) +; LP64-NEXT: fsd fs5, 232(a1) +; LP64-NEXT: fsd fs4, 224(a1) +; LP64-NEXT: fsd fs3, 216(a1) +; LP64-NEXT: fsd fs2, 208(a1) +; LP64-NEXT: fsd fs11, 200(a1) +; LP64-NEXT: fsd fs10, 192(a1) +; LP64-NEXT: fsd fs9, 184(a1) +; LP64-NEXT: fsd fs8, 176(a1) ; LP64-NEXT: fsd fs1, 168(a1) ; LP64-NEXT: fsd fs0, 160(a1) ; LP64-NEXT: fsd ft11, 152(a1) @@ -185,26 +185,26 @@ define void @callee() nounwind { ; LP64E-NEXT: fld ft11, 152(a1) ; LP64E-NEXT: fld fs0, 160(a1) ; LP64E-NEXT: fld fs1, 168(a1) -; LP64E-NEXT: fld fs2, 176(a1) -; LP64E-NEXT: fld fs3, 184(a1) -; LP64E-NEXT: fld fs4, 192(a1) -; LP64E-NEXT: fld fs5, 200(a1) -; LP64E-NEXT: fld fs6, 208(a1) -; LP64E-NEXT: fld fs7, 216(a1) -; LP64E-NEXT: fld fs8, 248(a1) -; LP64E-NEXT: fld fs9, 240(a1) -; LP64E-NEXT: fld fs10, 232(a1) -; LP64E-NEXT: fld fs11, 224(a1) -; LP64E-NEXT: fsd fs8, 248(a1) -; LP64E-NEXT: fsd fs9, 240(a1) -; LP64E-NEXT: fsd fs10, 232(a1) -; LP64E-NEXT: fsd fs11, 224(a1) -; LP64E-NEXT: fsd fs7, 216(a1) -; LP64E-NEXT: fsd fs6, 208(a1) -; LP64E-NEXT: fsd fs5, 200(a1) -; LP64E-NEXT: fsd fs4, 192(a1) -; LP64E-NEXT: fsd fs3, 184(a1) -; LP64E-NEXT: fsd fs2, 176(a1) +; LP64E-NEXT: fld fs2, 208(a1) +; LP64E-NEXT: fld fs3, 216(a1) +; LP64E-NEXT: fld fs4, 224(a1) +; LP64E-NEXT: fld fs5, 232(a1) +; LP64E-NEXT: fld fs6, 240(a1) +; LP64E-NEXT: fld fs7, 248(a1) +; LP64E-NEXT: fld fs8, 176(a1) +; LP64E-NEXT: fld fs9, 184(a1) +; LP64E-NEXT: fld fs10, 192(a1) +; LP64E-NEXT: fld fs11, 200(a1) +; LP64E-NEXT: fsd fs7, 248(a1) +; LP64E-NEXT: fsd fs6, 240(a1) +; LP64E-NEXT: fsd fs5, 232(a1) +; LP64E-NEXT: fsd fs4, 224(a1) +; LP64E-NEXT: fsd fs3, 216(a1) +; LP64E-NEXT: fsd fs2, 208(a1) +; LP64E-NEXT: fsd fs11, 200(a1) +; LP64E-NEXT: fsd fs10, 192(a1) +; LP64E-NEXT: fsd fs9, 184(a1) +; LP64E-NEXT: fsd fs8, 176(a1) ; LP64E-NEXT: fsd fs1, 168(a1) ; LP64E-NEXT: fsd fs0, 160(a1) ; LP64E-NEXT: fsd ft11, 152(a1) @@ -268,26 +268,26 @@ define void @callee() nounwind { ; ILP32D-NEXT: fld ft11, 152(a1) ; ILP32D-NEXT: fld fs0, 160(a1) ; ILP32D-NEXT: fld fs1, 168(a1) -; ILP32D-NEXT: fld fs2, 176(a1) -; ILP32D-NEXT: fld fs3, 184(a1) -; ILP32D-NEXT: fld fs4, 192(a1) -; ILP32D-NEXT: fld fs5, 200(a1) -; ILP32D-NEXT: fld fs6, 208(a1) -; ILP32D-NEXT: fld fs7, 216(a1) -; ILP32D-NEXT: fld fs8, 248(a1) -; ILP32D-NEXT: fld fs9, 240(a1) -; ILP32D-NEXT: fld fs10, 232(a1) -; ILP32D-NEXT: fld fs11, 224(a1) -; ILP32D-NEXT: fsd fs8, 248(a1) -; ILP32D-NEXT: fsd fs9, 240(a1) -; ILP32D-NEXT: fsd fs10, 232(a1) -; ILP32D-NEXT: fsd fs11, 224(a1) -; ILP32D-NEXT: fsd fs7, 216(a1) -; ILP32D-NEXT: fsd fs6, 208(a1) -; ILP32D-NEXT: fsd fs5, 200(a1) -; ILP32D-NEXT: fsd fs4, 192(a1) -; ILP32D-NEXT: fsd fs3, 184(a1) -; ILP32D-NEXT: fsd fs2, 176(a1) +; ILP32D-NEXT: fld fs2, 208(a1) +; ILP32D-NEXT: fld fs3, 216(a1) +; ILP32D-NEXT: fld fs4, 224(a1) +; ILP32D-NEXT: fld fs5, 232(a1) +; ILP32D-NEXT: fld fs6, 240(a1) +; ILP32D-NEXT: fld fs7, 248(a1) +; ILP32D-NEXT: fld fs8, 176(a1) +; ILP32D-NEXT: fld fs9, 184(a1) +; ILP32D-NEXT: fld fs10, 192(a1) +; ILP32D-NEXT: fld fs11, 200(a1) +; ILP32D-NEXT: fsd fs7, 248(a1) +; ILP32D-NEXT: fsd fs6, 240(a1) +; ILP32D-NEXT: fsd fs5, 232(a1) +; ILP32D-NEXT: fsd fs4, 224(a1) +; ILP32D-NEXT: fsd fs3, 216(a1) +; ILP32D-NEXT: fsd fs2, 208(a1) +; ILP32D-NEXT: fsd fs11, 200(a1) +; ILP32D-NEXT: fsd fs10, 192(a1) +; ILP32D-NEXT: fsd fs9, 184(a1) +; ILP32D-NEXT: fsd fs8, 176(a1) ; ILP32D-NEXT: fsd fs1, 168(a1) ; ILP32D-NEXT: fsd fs0, 160(a1) ; ILP32D-NEXT: fsd ft11, 152(a1) @@ -364,26 +364,26 @@ define void @callee() nounwind { ; LP64D-NEXT: fld ft11, 152(a1) ; LP64D-NEXT: fld fs0, 160(a1) ; LP64D-NEXT: fld fs1, 168(a1) -; LP64D-NEXT: fld fs2, 176(a1) -; LP64D-NEXT: fld fs3, 184(a1) -; LP64D-NEXT: fld fs4, 192(a1) -; LP64D-NEXT: fld fs5, 200(a1) -; LP64D-NEXT: fld fs6, 208(a1) -; LP64D-NEXT: fld fs7, 216(a1) -; LP64D-NEXT: fld fs8, 248(a1) -; LP64D-NEXT: fld fs9, 240(a1) -; LP64D-NEXT: fld fs10, 232(a1) -; LP64D-NEXT: fld fs11, 224(a1) -; LP64D-NEXT: fsd fs8, 248(a1) -; LP64D-NEXT: fsd fs9, 240(a1) -; LP64D-NEXT: fsd fs10, 232(a1) -; LP64D-NEXT: fsd fs11, 224(a1) -; LP64D-NEXT: fsd fs7, 216(a1) -; LP64D-NEXT: fsd fs6, 208(a1) -; LP64D-NEXT: fsd fs5, 200(a1) -; LP64D-NEXT: fsd fs4, 192(a1) -; LP64D-NEXT: fsd fs3, 184(a1) -; LP64D-NEXT: fsd fs2, 176(a1) +; LP64D-NEXT: fld fs2, 208(a1) +; LP64D-NEXT: fld fs3, 216(a1) +; LP64D-NEXT: fld fs4, 224(a1) +; LP64D-NEXT: fld fs5, 232(a1) +; LP64D-NEXT: fld fs6, 240(a1) +; LP64D-NEXT: fld fs7, 248(a1) +; LP64D-NEXT: fld fs8, 176(a1) +; LP64D-NEXT: fld fs9, 184(a1) +; LP64D-NEXT: fld fs10, 192(a1) +; LP64D-NEXT: fld fs11, 200(a1) +; LP64D-NEXT: fsd fs7, 248(a1) +; LP64D-NEXT: fsd fs6, 240(a1) +; LP64D-NEXT: fsd fs5, 232(a1) +; LP64D-NEXT: fsd fs4, 224(a1) +; LP64D-NEXT: fsd fs3, 216(a1) +; LP64D-NEXT: fsd fs2, 208(a1) +; LP64D-NEXT: fsd fs11, 200(a1) +; LP64D-NEXT: fsd fs10, 192(a1) +; LP64D-NEXT: fsd fs9, 184(a1) +; LP64D-NEXT: fsd fs8, 176(a1) ; LP64D-NEXT: fsd fs1, 168(a1) ; LP64D-NEXT: fsd fs0, 160(a1) ; LP64D-NEXT: fsd ft11, 152(a1) diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll index 5e8ed4509b5357..6d2263f74062df 100644 --- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll @@ -54,16 +54,16 @@ define void @callee() nounwind { ; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a6, %hi(var) -; RV32I-NEXT: lw a0, %lo(var)(a6) +; RV32I-NEXT: lui a7, %hi(var) +; RV32I-NEXT: lw a0, %lo(var)(a7) ; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var+4)(a6) +; RV32I-NEXT: lw a0, %lo(var+4)(a7) ; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var+8)(a6) +; RV32I-NEXT: lw a0, %lo(var+8)(a7) ; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var+12)(a6) +; RV32I-NEXT: lw a0, %lo(var+12)(a7) ; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: addi a5, a6, %lo(var) +; RV32I-NEXT: addi a5, a7, %lo(var) ; RV32I-NEXT: lw a0, 16(a5) ; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a0, 20(a5) @@ -86,22 +86,22 @@ define void @callee() nounwind { ; RV32I-NEXT: lw s8, 84(a5) ; RV32I-NEXT: lw s9, 88(a5) ; RV32I-NEXT: lw s10, 92(a5) -; RV32I-NEXT: lw s11, 96(a5) -; RV32I-NEXT: lw ra, 100(a5) -; RV32I-NEXT: lw a7, 104(a5) -; RV32I-NEXT: lw a4, 108(a5) +; RV32I-NEXT: lw s11, 112(a5) +; RV32I-NEXT: lw ra, 116(a5) +; RV32I-NEXT: lw a3, 120(a5) ; RV32I-NEXT: lw a0, 124(a5) -; RV32I-NEXT: lw a1, 120(a5) -; RV32I-NEXT: lw a2, 116(a5) -; RV32I-NEXT: lw a3, 112(a5) +; RV32I-NEXT: lw a6, 96(a5) +; RV32I-NEXT: lw a4, 100(a5) +; RV32I-NEXT: lw a2, 104(a5) +; RV32I-NEXT: lw a1, 108(a5) ; RV32I-NEXT: sw a0, 124(a5) -; RV32I-NEXT: sw a1, 120(a5) -; RV32I-NEXT: sw a2, 116(a5) -; RV32I-NEXT: sw a3, 112(a5) -; RV32I-NEXT: sw a4, 108(a5) -; RV32I-NEXT: sw a7, 104(a5) -; RV32I-NEXT: sw ra, 100(a5) -; RV32I-NEXT: sw s11, 96(a5) +; RV32I-NEXT: sw a3, 120(a5) +; RV32I-NEXT: sw ra, 116(a5) +; RV32I-NEXT: sw s11, 112(a5) +; RV32I-NEXT: sw a1, 108(a5) +; RV32I-NEXT: sw a2, 104(a5) +; RV32I-NEXT: sw a4, 100(a5) +; RV32I-NEXT: sw a6, 96(a5) ; RV32I-NEXT: sw s10, 92(a5) ; RV32I-NEXT: sw s9, 88(a5) ; RV32I-NEXT: sw s8, 84(a5) @@ -125,13 +125,13 @@ define void @callee() nounwind { ; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: sw a0, 16(a5) ; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var+12)(a6) +; RV32I-NEXT: sw a0, %lo(var+12)(a7) ; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var+8)(a6) +; RV32I-NEXT: sw a0, %lo(var+8)(a7) ; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var+4)(a6) +; RV32I-NEXT: sw a0, %lo(var+4)(a7) ; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var)(a6) +; RV32I-NEXT: sw a0, %lo(var)(a7) ; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload @@ -154,16 +154,16 @@ define void @callee() nounwind { ; RV32I-ILP32E-NEXT: sw ra, 32(sp) # 4-byte Folded Spill ; RV32I-ILP32E-NEXT: sw s0, 28(sp) # 4-byte Folded Spill ; RV32I-ILP32E-NEXT: sw s1, 24(sp) # 4-byte Folded Spill -; RV32I-ILP32E-NEXT: lui a6, %hi(var) -; RV32I-ILP32E-NEXT: lw a0, %lo(var)(a6) +; RV32I-ILP32E-NEXT: lui a7, %hi(var) +; RV32I-ILP32E-NEXT: lw a0, %lo(var)(a7) ; RV32I-ILP32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32I-ILP32E-NEXT: lw a0, %lo(var+4)(a6) +; RV32I-ILP32E-NEXT: lw a0, %lo(var+4)(a7) ; RV32I-ILP32E-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32I-ILP32E-NEXT: lw a0, %lo(var+8)(a6) +; RV32I-ILP32E-NEXT: lw a0, %lo(var+8)(a7) ; RV32I-ILP32E-NEXT: sw a0, 12(sp) # 4-byte Folded Spill -; RV32I-ILP32E-NEXT: lw a0, %lo(var+12)(a6) +; RV32I-ILP32E-NEXT: lw a0, %lo(var+12)(a7) ; RV32I-ILP32E-NEXT: sw a0, 8(sp) # 4-byte Folded Spill -; RV32I-ILP32E-NEXT: addi a5, a6, %lo(var) +; RV32I-ILP32E-NEXT: addi a5, a7, %lo(var) ; RV32I-ILP32E-NEXT: lw a0, 16(a5) ; RV32I-ILP32E-NEXT: sw a0, 4(sp) # 4-byte Folded Spill ; RV32I-ILP32E-NEXT: lw a0, 20(a5) @@ -186,22 +186,22 @@ define void @callee() nounwind { ; RV32I-ILP32E-NEXT: lw s10, 84(a5) ; RV32I-ILP32E-NEXT: lw s11, 88(a5) ; RV32I-ILP32E-NEXT: lw s0, 92(a5) -; RV32I-ILP32E-NEXT: lw s1, 96(a5) -; RV32I-ILP32E-NEXT: lw ra, 100(a5) -; RV32I-ILP32E-NEXT: lw a7, 104(a5) -; RV32I-ILP32E-NEXT: lw a4, 108(a5) +; RV32I-ILP32E-NEXT: lw s1, 112(a5) +; RV32I-ILP32E-NEXT: lw ra, 116(a5) +; RV32I-ILP32E-NEXT: lw a3, 120(a5) ; RV32I-ILP32E-NEXT: lw a0, 124(a5) -; RV32I-ILP32E-NEXT: lw a1, 120(a5) -; RV32I-ILP32E-NEXT: lw a2, 116(a5) -; RV32I-ILP32E-NEXT: lw a3, 112(a5) +; RV32I-ILP32E-NEXT: lw a6, 96(a5) +; RV32I-ILP32E-NEXT: lw a4, 100(a5) +; RV32I-ILP32E-NEXT: lw a2, 104(a5) +; RV32I-ILP32E-NEXT: lw a1, 108(a5) ; RV32I-ILP32E-NEXT: sw a0, 124(a5) -; RV32I-ILP32E-NEXT: sw a1, 120(a5) -; RV32I-ILP32E-NEXT: sw a2, 116(a5) -; RV32I-ILP32E-NEXT: sw a3, 112(a5) -; RV32I-ILP32E-NEXT: sw a4, 108(a5) -; RV32I-ILP32E-NEXT: sw a7, 104(a5) -; RV32I-ILP32E-NEXT: sw ra, 100(a5) -; RV32I-ILP32E-NEXT: sw s1, 96(a5) +; RV32I-ILP32E-NEXT: sw a3, 120(a5) +; RV32I-ILP32E-NEXT: sw ra, 116(a5) +; RV32I-ILP32E-NEXT: sw s1, 112(a5) +; RV32I-ILP32E-NEXT: sw a1, 108(a5) +; RV32I-ILP32E-NEXT: sw a2, 104(a5) +; RV32I-ILP32E-NEXT: sw a4, 100(a5) +; RV32I-ILP32E-NEXT: sw a6, 96(a5) ; RV32I-ILP32E-NEXT: sw s0, 92(a5) ; RV32I-ILP32E-NEXT: sw s11, 88(a5) ; RV32I-ILP32E-NEXT: sw s10, 84(a5) @@ -225,13 +225,13 @@ define void @callee() nounwind { ; RV32I-ILP32E-NEXT: lw a0, 4(sp) # 4-byte Folded Reload ; RV32I-ILP32E-NEXT: sw a0, 16(a5) ; RV32I-ILP32E-NEXT: lw a0, 8(sp) # 4-byte Folded Reload -; RV32I-ILP32E-NEXT: sw a0, %lo(var+12)(a6) +; RV32I-ILP32E-NEXT: sw a0, %lo(var+12)(a7) ; RV32I-ILP32E-NEXT: lw a0, 12(sp) # 4-byte Folded Reload -; RV32I-ILP32E-NEXT: sw a0, %lo(var+8)(a6) +; RV32I-ILP32E-NEXT: sw a0, %lo(var+8)(a7) ; RV32I-ILP32E-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32I-ILP32E-NEXT: sw a0, %lo(var+4)(a6) +; RV32I-ILP32E-NEXT: sw a0, %lo(var+4)(a7) ; RV32I-ILP32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32I-ILP32E-NEXT: sw a0, %lo(var)(a6) +; RV32I-ILP32E-NEXT: sw a0, %lo(var)(a7) ; RV32I-ILP32E-NEXT: lw ra, 32(sp) # 4-byte Folded Reload ; RV32I-ILP32E-NEXT: lw s0, 28(sp) # 4-byte Folded Reload ; RV32I-ILP32E-NEXT: lw s1, 24(sp) # 4-byte Folded Reload @@ -255,16 +255,16 @@ define void @callee() nounwind { ; RV32I-WITH-FP-NEXT: sw s10, 32(sp) # 4-byte Folded Spill ; RV32I-WITH-FP-NEXT: sw s11, 28(sp) # 4-byte Folded Spill ; RV32I-WITH-FP-NEXT: addi s0, sp, 80 -; RV32I-WITH-FP-NEXT: lui a6, %hi(var) -; RV32I-WITH-FP-NEXT: lw a0, %lo(var)(a6) +; RV32I-WITH-FP-NEXT: lui t0, %hi(var) +; RV32I-WITH-FP-NEXT: lw a0, %lo(var)(t0) ; RV32I-WITH-FP-NEXT: sw a0, -56(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, %lo(var+4)(a6) +; RV32I-WITH-FP-NEXT: lw a0, %lo(var+4)(t0) ; RV32I-WITH-FP-NEXT: sw a0, -60(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, %lo(var+8)(a6) +; RV32I-WITH-FP-NEXT: lw a0, %lo(var+8)(t0) ; RV32I-WITH-FP-NEXT: sw a0, -64(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, %lo(var+12)(a6) +; RV32I-WITH-FP-NEXT: lw a0, %lo(var+12)(t0) ; RV32I-WITH-FP-NEXT: sw a0, -68(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: addi a5, a6, %lo(var) +; RV32I-WITH-FP-NEXT: addi a5, t0, %lo(var) ; RV32I-WITH-FP-NEXT: lw a0, 16(a5) ; RV32I-WITH-FP-NEXT: sw a0, -72(s0) # 4-byte Folded Spill ; RV32I-WITH-FP-NEXT: lw a0, 20(a5) @@ -288,22 +288,22 @@ define void @callee() nounwind { ; RV32I-WITH-FP-NEXT: lw s9, 84(a5) ; RV32I-WITH-FP-NEXT: lw s10, 88(a5) ; RV32I-WITH-FP-NEXT: lw s11, 92(a5) -; RV32I-WITH-FP-NEXT: lw ra, 96(a5) -; RV32I-WITH-FP-NEXT: lw t0, 100(a5) -; RV32I-WITH-FP-NEXT: lw a7, 104(a5) -; RV32I-WITH-FP-NEXT: lw a4, 108(a5) +; RV32I-WITH-FP-NEXT: lw ra, 112(a5) +; RV32I-WITH-FP-NEXT: lw a4, 116(a5) +; RV32I-WITH-FP-NEXT: lw a3, 120(a5) ; RV32I-WITH-FP-NEXT: lw a0, 124(a5) -; RV32I-WITH-FP-NEXT: lw a1, 120(a5) -; RV32I-WITH-FP-NEXT: lw a2, 116(a5) -; RV32I-WITH-FP-NEXT: lw a3, 112(a5) +; RV32I-WITH-FP-NEXT: lw a7, 96(a5) +; RV32I-WITH-FP-NEXT: lw a6, 100(a5) +; RV32I-WITH-FP-NEXT: lw a2, 104(a5) +; RV32I-WITH-FP-NEXT: lw a1, 108(a5) ; RV32I-WITH-FP-NEXT: sw a0, 124(a5) -; RV32I-WITH-FP-NEXT: sw a1, 120(a5) -; RV32I-WITH-FP-NEXT: sw a2, 116(a5) -; RV32I-WITH-FP-NEXT: sw a3, 112(a5) -; RV32I-WITH-FP-NEXT: sw a4, 108(a5) -; RV32I-WITH-FP-NEXT: sw a7, 104(a5) -; RV32I-WITH-FP-NEXT: sw t0, 100(a5) -; RV32I-WITH-FP-NEXT: sw ra, 96(a5) +; RV32I-WITH-FP-NEXT: sw a3, 120(a5) +; RV32I-WITH-FP-NEXT: sw a4, 116(a5) +; RV32I-WITH-FP-NEXT: sw ra, 112(a5) +; RV32I-WITH-FP-NEXT: sw a1, 108(a5) +; RV32I-WITH-FP-NEXT: sw a2, 104(a5) +; RV32I-WITH-FP-NEXT: sw a6, 100(a5) +; RV32I-WITH-FP-NEXT: sw a7, 96(a5) ; RV32I-WITH-FP-NEXT: sw s11, 92(a5) ; RV32I-WITH-FP-NEXT: sw s10, 88(a5) ; RV32I-WITH-FP-NEXT: sw s9, 84(a5) @@ -328,13 +328,13 @@ define void @callee() nounwind { ; RV32I-WITH-FP-NEXT: lw a0, -72(s0) # 4-byte Folded Reload ; RV32I-WITH-FP-NEXT: sw a0, 16(a5) ; RV32I-WITH-FP-NEXT: lw a0, -68(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, %lo(var+12)(a6) +; RV32I-WITH-FP-NEXT: sw a0, %lo(var+12)(t0) ; RV32I-WITH-FP-NEXT: lw a0, -64(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, %lo(var+8)(a6) +; RV32I-WITH-FP-NEXT: sw a0, %lo(var+8)(t0) ; RV32I-WITH-FP-NEXT: lw a0, -60(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, %lo(var+4)(a6) +; RV32I-WITH-FP-NEXT: sw a0, %lo(var+4)(t0) ; RV32I-WITH-FP-NEXT: lw a0, -56(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, %lo(var)(a6) +; RV32I-WITH-FP-NEXT: sw a0, %lo(var)(t0) ; RV32I-WITH-FP-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32I-WITH-FP-NEXT: lw s0, 72(sp) # 4-byte Folded Reload ; RV32I-WITH-FP-NEXT: lw s1, 68(sp) # 4-byte Folded Reload @@ -354,16 +354,16 @@ define void @callee() nounwind { ; RV32IZCMP-LABEL: callee: ; RV32IZCMP: # %bb.0: ; RV32IZCMP-NEXT: cm.push {ra, s0-s11}, -96 -; RV32IZCMP-NEXT: lui a6, %hi(var) -; RV32IZCMP-NEXT: lw a0, %lo(var)(a6) +; RV32IZCMP-NEXT: lui t0, %hi(var) +; RV32IZCMP-NEXT: lw a0, %lo(var)(t0) ; RV32IZCMP-NEXT: sw a0, 28(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var+4)(a6) +; RV32IZCMP-NEXT: lw a0, %lo(var+4)(t0) ; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var+8)(a6) +; RV32IZCMP-NEXT: lw a0, %lo(var+8)(t0) ; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var+12)(a6) +; RV32IZCMP-NEXT: lw a0, %lo(var+12)(t0) ; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: addi a5, a6, %lo(var) +; RV32IZCMP-NEXT: addi a5, t0, %lo(var) ; RV32IZCMP-NEXT: lw a0, 16(a5) ; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill ; RV32IZCMP-NEXT: lw a0, 20(a5) @@ -386,22 +386,22 @@ define void @callee() nounwind { ; RV32IZCMP-NEXT: lw t3, 84(a5) ; RV32IZCMP-NEXT: lw t2, 88(a5) ; RV32IZCMP-NEXT: lw t1, 92(a5) -; RV32IZCMP-NEXT: lw t0, 96(a5) -; RV32IZCMP-NEXT: lw s0, 100(a5) -; RV32IZCMP-NEXT: lw a7, 104(a5) -; RV32IZCMP-NEXT: lw a4, 108(a5) +; RV32IZCMP-NEXT: lw a7, 112(a5) +; RV32IZCMP-NEXT: lw s0, 116(a5) +; RV32IZCMP-NEXT: lw a3, 120(a5) ; RV32IZCMP-NEXT: lw a0, 124(a5) -; RV32IZCMP-NEXT: lw a1, 120(a5) -; RV32IZCMP-NEXT: lw a2, 116(a5) -; RV32IZCMP-NEXT: lw a3, 112(a5) +; RV32IZCMP-NEXT: lw a6, 96(a5) +; RV32IZCMP-NEXT: lw a4, 100(a5) +; RV32IZCMP-NEXT: lw a2, 104(a5) +; RV32IZCMP-NEXT: lw a1, 108(a5) ; RV32IZCMP-NEXT: sw a0, 124(a5) -; RV32IZCMP-NEXT: sw a1, 120(a5) -; RV32IZCMP-NEXT: sw a2, 116(a5) -; RV32IZCMP-NEXT: sw a3, 112(a5) -; RV32IZCMP-NEXT: sw a4, 108(a5) -; RV32IZCMP-NEXT: sw a7, 104(a5) -; RV32IZCMP-NEXT: sw s0, 100(a5) -; RV32IZCMP-NEXT: sw t0, 96(a5) +; RV32IZCMP-NEXT: sw a3, 120(a5) +; RV32IZCMP-NEXT: sw s0, 116(a5) +; RV32IZCMP-NEXT: sw a7, 112(a5) +; RV32IZCMP-NEXT: sw a1, 108(a5) +; RV32IZCMP-NEXT: sw a2, 104(a5) +; RV32IZCMP-NEXT: sw a4, 100(a5) +; RV32IZCMP-NEXT: sw a6, 96(a5) ; RV32IZCMP-NEXT: sw t1, 92(a5) ; RV32IZCMP-NEXT: sw t2, 88(a5) ; RV32IZCMP-NEXT: sw t3, 84(a5) @@ -425,13 +425,13 @@ define void @callee() nounwind { ; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload ; RV32IZCMP-NEXT: sw a0, 16(a5) ; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var+12)(a6) +; RV32IZCMP-NEXT: sw a0, %lo(var+12)(t0) ; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var+8)(a6) +; RV32IZCMP-NEXT: sw a0, %lo(var+8)(t0) ; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var+4)(a6) +; RV32IZCMP-NEXT: sw a0, %lo(var+4)(t0) ; RV32IZCMP-NEXT: lw a0, 28(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var)(a6) +; RV32IZCMP-NEXT: sw a0, %lo(var)(t0) ; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 96 ; ; RV32IZCMP-WITH-FP-LABEL: callee: @@ -451,16 +451,16 @@ define void @callee() nounwind { ; RV32IZCMP-WITH-FP-NEXT: sw s10, 32(sp) # 4-byte Folded Spill ; RV32IZCMP-WITH-FP-NEXT: sw s11, 28(sp) # 4-byte Folded Spill ; RV32IZCMP-WITH-FP-NEXT: addi s0, sp, 80 -; RV32IZCMP-WITH-FP-NEXT: lui a6, %hi(var) -; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(a6) +; RV32IZCMP-WITH-FP-NEXT: lui t1, %hi(var) +; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(t1) ; RV32IZCMP-WITH-FP-NEXT: sw a0, -56(s0) # 4-byte Folded Spill -; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(a6) +; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(t1) ; RV32IZCMP-WITH-FP-NEXT: sw a0, -60(s0) # 4-byte Folded Spill -; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(a6) +; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(t1) ; RV32IZCMP-WITH-FP-NEXT: sw a0, -64(s0) # 4-byte Folded Spill -; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(a6) +; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(t1) ; RV32IZCMP-WITH-FP-NEXT: sw a0, -68(s0) # 4-byte Folded Spill -; RV32IZCMP-WITH-FP-NEXT: addi a5, a6, %lo(var) +; RV32IZCMP-WITH-FP-NEXT: addi a5, t1, %lo(var) ; RV32IZCMP-WITH-FP-NEXT: lw a0, 16(a5) ; RV32IZCMP-WITH-FP-NEXT: sw a0, -72(s0) # 4-byte Folded Spill ; RV32IZCMP-WITH-FP-NEXT: lw a0, 20(a5) @@ -484,22 +484,22 @@ define void @callee() nounwind { ; RV32IZCMP-WITH-FP-NEXT: lw t3, 84(a5) ; RV32IZCMP-WITH-FP-NEXT: lw t2, 88(a5) ; RV32IZCMP-WITH-FP-NEXT: lw s1, 92(a5) -; RV32IZCMP-WITH-FP-NEXT: lw t1, 96(a5) -; RV32IZCMP-WITH-FP-NEXT: lw t0, 100(a5) -; RV32IZCMP-WITH-FP-NEXT: lw a7, 104(a5) -; RV32IZCMP-WITH-FP-NEXT: lw a4, 108(a5) +; RV32IZCMP-WITH-FP-NEXT: lw t0, 112(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a4, 116(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a3, 120(a5) ; RV32IZCMP-WITH-FP-NEXT: lw a0, 124(a5) -; RV32IZCMP-WITH-FP-NEXT: lw a1, 120(a5) -; RV32IZCMP-WITH-FP-NEXT: lw a2, 116(a5) -; RV32IZCMP-WITH-FP-NEXT: lw a3, 112(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a7, 96(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a6, 100(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a2, 104(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a1, 108(a5) ; RV32IZCMP-WITH-FP-NEXT: sw a0, 124(a5) -; RV32IZCMP-WITH-FP-NEXT: sw a1, 120(a5) -; RV32IZCMP-WITH-FP-NEXT: sw a2, 116(a5) -; RV32IZCMP-WITH-FP-NEXT: sw a3, 112(a5) -; RV32IZCMP-WITH-FP-NEXT: sw a4, 108(a5) -; RV32IZCMP-WITH-FP-NEXT: sw a7, 104(a5) -; RV32IZCMP-WITH-FP-NEXT: sw t0, 100(a5) -; RV32IZCMP-WITH-FP-NEXT: sw t1, 96(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a3, 120(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a4, 116(a5) +; RV32IZCMP-WITH-FP-NEXT: sw t0, 112(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a1, 108(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a2, 104(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a6, 100(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a7, 96(a5) ; RV32IZCMP-WITH-FP-NEXT: sw s1, 92(a5) ; RV32IZCMP-WITH-FP-NEXT: sw t2, 88(a5) ; RV32IZCMP-WITH-FP-NEXT: sw t3, 84(a5) @@ -524,13 +524,13 @@ define void @callee() nounwind { ; RV32IZCMP-WITH-FP-NEXT: lw a0, -72(s0) # 4-byte Folded Reload ; RV32IZCMP-WITH-FP-NEXT: sw a0, 16(a5) ; RV32IZCMP-WITH-FP-NEXT: lw a0, -68(s0) # 4-byte Folded Reload -; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(a6) +; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(t1) ; RV32IZCMP-WITH-FP-NEXT: lw a0, -64(s0) # 4-byte Folded Reload -; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(a6) +; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(t1) ; RV32IZCMP-WITH-FP-NEXT: lw a0, -60(s0) # 4-byte Folded Reload -; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(a6) +; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(t1) ; RV32IZCMP-WITH-FP-NEXT: lw a0, -56(s0) # 4-byte Folded Reload -; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(a6) +; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(t1) ; RV32IZCMP-WITH-FP-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32IZCMP-WITH-FP-NEXT: lw s0, 72(sp) # 4-byte Folded Reload ; RV32IZCMP-WITH-FP-NEXT: lw s1, 68(sp) # 4-byte Folded Reload @@ -563,16 +563,16 @@ define void @callee() nounwind { ; RV64I-NEXT: sd s9, 72(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s10, 64(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s11, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a6, %hi(var) -; RV64I-NEXT: lw a0, %lo(var)(a6) +; RV64I-NEXT: lui a7, %hi(var) +; RV64I-NEXT: lw a0, %lo(var)(a7) ; RV64I-NEXT: sd a0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var+4)(a6) +; RV64I-NEXT: lw a0, %lo(var+4)(a7) ; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var+8)(a6) +; RV64I-NEXT: lw a0, %lo(var+8)(a7) ; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var+12)(a6) +; RV64I-NEXT: lw a0, %lo(var+12)(a7) ; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: addi a5, a6, %lo(var) +; RV64I-NEXT: addi a5, a7, %lo(var) ; RV64I-NEXT: lw a0, 16(a5) ; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: lw a0, 20(a5) @@ -595,22 +595,22 @@ define void @callee() nounwind { ; RV64I-NEXT: lw s8, 84(a5) ; RV64I-NEXT: lw s9, 88(a5) ; RV64I-NEXT: lw s10, 92(a5) -; RV64I-NEXT: lw s11, 96(a5) -; RV64I-NEXT: lw ra, 100(a5) -; RV64I-NEXT: lw a7, 104(a5) -; RV64I-NEXT: lw a4, 108(a5) +; RV64I-NEXT: lw s11, 112(a5) +; RV64I-NEXT: lw ra, 116(a5) +; RV64I-NEXT: lw a3, 120(a5) ; RV64I-NEXT: lw a0, 124(a5) -; RV64I-NEXT: lw a1, 120(a5) -; RV64I-NEXT: lw a2, 116(a5) -; RV64I-NEXT: lw a3, 112(a5) +; RV64I-NEXT: lw a6, 96(a5) +; RV64I-NEXT: lw a4, 100(a5) +; RV64I-NEXT: lw a2, 104(a5) +; RV64I-NEXT: lw a1, 108(a5) ; RV64I-NEXT: sw a0, 124(a5) -; RV64I-NEXT: sw a1, 120(a5) -; RV64I-NEXT: sw a2, 116(a5) -; RV64I-NEXT: sw a3, 112(a5) -; RV64I-NEXT: sw a4, 108(a5) -; RV64I-NEXT: sw a7, 104(a5) -; RV64I-NEXT: sw ra, 100(a5) -; RV64I-NEXT: sw s11, 96(a5) +; RV64I-NEXT: sw a3, 120(a5) +; RV64I-NEXT: sw ra, 116(a5) +; RV64I-NEXT: sw s11, 112(a5) +; RV64I-NEXT: sw a1, 108(a5) +; RV64I-NEXT: sw a2, 104(a5) +; RV64I-NEXT: sw a4, 100(a5) +; RV64I-NEXT: sw a6, 96(a5) ; RV64I-NEXT: sw s10, 92(a5) ; RV64I-NEXT: sw s9, 88(a5) ; RV64I-NEXT: sw s8, 84(a5) @@ -634,13 +634,13 @@ define void @callee() nounwind { ; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: sw a0, 16(a5) ; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var+12)(a6) +; RV64I-NEXT: sw a0, %lo(var+12)(a7) ; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var+8)(a6) +; RV64I-NEXT: sw a0, %lo(var+8)(a7) ; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var+4)(a6) +; RV64I-NEXT: sw a0, %lo(var+4)(a7) ; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var)(a6) +; RV64I-NEXT: sw a0, %lo(var)(a7) ; RV64I-NEXT: ld ra, 152(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 144(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 136(sp) # 8-byte Folded Reload @@ -663,16 +663,16 @@ define void @callee() nounwind { ; RV64I-LP64E-NEXT: sd ra, 64(sp) # 8-byte Folded Spill ; RV64I-LP64E-NEXT: sd s0, 56(sp) # 8-byte Folded Spill ; RV64I-LP64E-NEXT: sd s1, 48(sp) # 8-byte Folded Spill -; RV64I-LP64E-NEXT: lui a6, %hi(var) -; RV64I-LP64E-NEXT: lw a0, %lo(var)(a6) +; RV64I-LP64E-NEXT: lui a7, %hi(var) +; RV64I-LP64E-NEXT: lw a0, %lo(var)(a7) ; RV64I-LP64E-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64I-LP64E-NEXT: lw a0, %lo(var+4)(a6) +; RV64I-LP64E-NEXT: lw a0, %lo(var+4)(a7) ; RV64I-LP64E-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64I-LP64E-NEXT: lw a0, %lo(var+8)(a6) +; RV64I-LP64E-NEXT: lw a0, %lo(var+8)(a7) ; RV64I-LP64E-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64I-LP64E-NEXT: lw a0, %lo(var+12)(a6) +; RV64I-LP64E-NEXT: lw a0, %lo(var+12)(a7) ; RV64I-LP64E-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64I-LP64E-NEXT: addi a5, a6, %lo(var) +; RV64I-LP64E-NEXT: addi a5, a7, %lo(var) ; RV64I-LP64E-NEXT: lw a0, 16(a5) ; RV64I-LP64E-NEXT: sd a0, 8(sp) # 8-byte Folded Spill ; RV64I-LP64E-NEXT: lw a0, 20(a5) @@ -695,22 +695,22 @@ define void @callee() nounwind { ; RV64I-LP64E-NEXT: lw s10, 84(a5) ; RV64I-LP64E-NEXT: lw s11, 88(a5) ; RV64I-LP64E-NEXT: lw s0, 92(a5) -; RV64I-LP64E-NEXT: lw s1, 96(a5) -; RV64I-LP64E-NEXT: lw ra, 100(a5) -; RV64I-LP64E-NEXT: lw a7, 104(a5) -; RV64I-LP64E-NEXT: lw a4, 108(a5) +; RV64I-LP64E-NEXT: lw s1, 112(a5) +; RV64I-LP64E-NEXT: lw ra, 116(a5) +; RV64I-LP64E-NEXT: lw a3, 120(a5) ; RV64I-LP64E-NEXT: lw a0, 124(a5) -; RV64I-LP64E-NEXT: lw a1, 120(a5) -; RV64I-LP64E-NEXT: lw a2, 116(a5) -; RV64I-LP64E-NEXT: lw a3, 112(a5) +; RV64I-LP64E-NEXT: lw a6, 96(a5) +; RV64I-LP64E-NEXT: lw a4, 100(a5) +; RV64I-LP64E-NEXT: lw a2, 104(a5) +; RV64I-LP64E-NEXT: lw a1, 108(a5) ; RV64I-LP64E-NEXT: sw a0, 124(a5) -; RV64I-LP64E-NEXT: sw a1, 120(a5) -; RV64I-LP64E-NEXT: sw a2, 116(a5) -; RV64I-LP64E-NEXT: sw a3, 112(a5) -; RV64I-LP64E-NEXT: sw a4, 108(a5) -; RV64I-LP64E-NEXT: sw a7, 104(a5) -; RV64I-LP64E-NEXT: sw ra, 100(a5) -; RV64I-LP64E-NEXT: sw s1, 96(a5) +; RV64I-LP64E-NEXT: sw a3, 120(a5) +; RV64I-LP64E-NEXT: sw ra, 116(a5) +; RV64I-LP64E-NEXT: sw s1, 112(a5) +; RV64I-LP64E-NEXT: sw a1, 108(a5) +; RV64I-LP64E-NEXT: sw a2, 104(a5) +; RV64I-LP64E-NEXT: sw a4, 100(a5) +; RV64I-LP64E-NEXT: sw a6, 96(a5) ; RV64I-LP64E-NEXT: sw s0, 92(a5) ; RV64I-LP64E-NEXT: sw s11, 88(a5) ; RV64I-LP64E-NEXT: sw s10, 84(a5) @@ -734,13 +734,13 @@ define void @callee() nounwind { ; RV64I-LP64E-NEXT: ld a0, 8(sp) # 8-byte Folded Reload ; RV64I-LP64E-NEXT: sw a0, 16(a5) ; RV64I-LP64E-NEXT: ld a0, 16(sp) # 8-byte Folded Reload -; RV64I-LP64E-NEXT: sw a0, %lo(var+12)(a6) +; RV64I-LP64E-NEXT: sw a0, %lo(var+12)(a7) ; RV64I-LP64E-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64I-LP64E-NEXT: sw a0, %lo(var+8)(a6) +; RV64I-LP64E-NEXT: sw a0, %lo(var+8)(a7) ; RV64I-LP64E-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64I-LP64E-NEXT: sw a0, %lo(var+4)(a6) +; RV64I-LP64E-NEXT: sw a0, %lo(var+4)(a7) ; RV64I-LP64E-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64I-LP64E-NEXT: sw a0, %lo(var)(a6) +; RV64I-LP64E-NEXT: sw a0, %lo(var)(a7) ; RV64I-LP64E-NEXT: ld ra, 64(sp) # 8-byte Folded Reload ; RV64I-LP64E-NEXT: ld s0, 56(sp) # 8-byte Folded Reload ; RV64I-LP64E-NEXT: ld s1, 48(sp) # 8-byte Folded Reload @@ -764,16 +764,16 @@ define void @callee() nounwind { ; RV64I-WITH-FP-NEXT: sd s10, 64(sp) # 8-byte Folded Spill ; RV64I-WITH-FP-NEXT: sd s11, 56(sp) # 8-byte Folded Spill ; RV64I-WITH-FP-NEXT: addi s0, sp, 160 -; RV64I-WITH-FP-NEXT: lui a6, %hi(var) -; RV64I-WITH-FP-NEXT: lw a0, %lo(var)(a6) +; RV64I-WITH-FP-NEXT: lui t0, %hi(var) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var)(t0) ; RV64I-WITH-FP-NEXT: sd a0, -112(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, %lo(var+4)(a6) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+4)(t0) ; RV64I-WITH-FP-NEXT: sd a0, -120(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, %lo(var+8)(a6) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+8)(t0) ; RV64I-WITH-FP-NEXT: sd a0, -128(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, %lo(var+12)(a6) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+12)(t0) ; RV64I-WITH-FP-NEXT: sd a0, -136(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: addi a5, a6, %lo(var) +; RV64I-WITH-FP-NEXT: addi a5, t0, %lo(var) ; RV64I-WITH-FP-NEXT: lw a0, 16(a5) ; RV64I-WITH-FP-NEXT: sd a0, -144(s0) # 8-byte Folded Spill ; RV64I-WITH-FP-NEXT: lw a0, 20(a5) @@ -797,22 +797,22 @@ define void @callee() nounwind { ; RV64I-WITH-FP-NEXT: lw s9, 84(a5) ; RV64I-WITH-FP-NEXT: lw s10, 88(a5) ; RV64I-WITH-FP-NEXT: lw s11, 92(a5) -; RV64I-WITH-FP-NEXT: lw ra, 96(a5) -; RV64I-WITH-FP-NEXT: lw t0, 100(a5) -; RV64I-WITH-FP-NEXT: lw a7, 104(a5) -; RV64I-WITH-FP-NEXT: lw a4, 108(a5) +; RV64I-WITH-FP-NEXT: lw ra, 112(a5) +; RV64I-WITH-FP-NEXT: lw a4, 116(a5) +; RV64I-WITH-FP-NEXT: lw a3, 120(a5) ; RV64I-WITH-FP-NEXT: lw a0, 124(a5) -; RV64I-WITH-FP-NEXT: lw a1, 120(a5) -; RV64I-WITH-FP-NEXT: lw a2, 116(a5) -; RV64I-WITH-FP-NEXT: lw a3, 112(a5) +; RV64I-WITH-FP-NEXT: lw a7, 96(a5) +; RV64I-WITH-FP-NEXT: lw a6, 100(a5) +; RV64I-WITH-FP-NEXT: lw a2, 104(a5) +; RV64I-WITH-FP-NEXT: lw a1, 108(a5) ; RV64I-WITH-FP-NEXT: sw a0, 124(a5) -; RV64I-WITH-FP-NEXT: sw a1, 120(a5) -; RV64I-WITH-FP-NEXT: sw a2, 116(a5) -; RV64I-WITH-FP-NEXT: sw a3, 112(a5) -; RV64I-WITH-FP-NEXT: sw a4, 108(a5) -; RV64I-WITH-FP-NEXT: sw a7, 104(a5) -; RV64I-WITH-FP-NEXT: sw t0, 100(a5) -; RV64I-WITH-FP-NEXT: sw ra, 96(a5) +; RV64I-WITH-FP-NEXT: sw a3, 120(a5) +; RV64I-WITH-FP-NEXT: sw a4, 116(a5) +; RV64I-WITH-FP-NEXT: sw ra, 112(a5) +; RV64I-WITH-FP-NEXT: sw a1, 108(a5) +; RV64I-WITH-FP-NEXT: sw a2, 104(a5) +; RV64I-WITH-FP-NEXT: sw a6, 100(a5) +; RV64I-WITH-FP-NEXT: sw a7, 96(a5) ; RV64I-WITH-FP-NEXT: sw s11, 92(a5) ; RV64I-WITH-FP-NEXT: sw s10, 88(a5) ; RV64I-WITH-FP-NEXT: sw s9, 84(a5) @@ -837,13 +837,13 @@ define void @callee() nounwind { ; RV64I-WITH-FP-NEXT: ld a0, -144(s0) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: sw a0, 16(a5) ; RV64I-WITH-FP-NEXT: ld a0, -136(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, %lo(var+12)(a6) +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+12)(t0) ; RV64I-WITH-FP-NEXT: ld a0, -128(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, %lo(var+8)(a6) +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+8)(t0) ; RV64I-WITH-FP-NEXT: ld a0, -120(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, %lo(var+4)(a6) +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+4)(t0) ; RV64I-WITH-FP-NEXT: ld a0, -112(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, %lo(var)(a6) +; RV64I-WITH-FP-NEXT: sw a0, %lo(var)(t0) ; RV64I-WITH-FP-NEXT: ld ra, 152(sp) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: ld s0, 144(sp) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: ld s1, 136(sp) # 8-byte Folded Reload @@ -863,16 +863,16 @@ define void @callee() nounwind { ; RV64IZCMP-LABEL: callee: ; RV64IZCMP: # %bb.0: ; RV64IZCMP-NEXT: cm.push {ra, s0-s11}, -160 -; RV64IZCMP-NEXT: lui a6, %hi(var) -; RV64IZCMP-NEXT: lw a0, %lo(var)(a6) +; RV64IZCMP-NEXT: lui t0, %hi(var) +; RV64IZCMP-NEXT: lw a0, %lo(var)(t0) ; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var+4)(a6) +; RV64IZCMP-NEXT: lw a0, %lo(var+4)(t0) ; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var+8)(a6) +; RV64IZCMP-NEXT: lw a0, %lo(var+8)(t0) ; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var+12)(a6) +; RV64IZCMP-NEXT: lw a0, %lo(var+12)(t0) ; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: addi a5, a6, %lo(var) +; RV64IZCMP-NEXT: addi a5, t0, %lo(var) ; RV64IZCMP-NEXT: lw a0, 16(a5) ; RV64IZCMP-NEXT: sd a0, 8(sp) # 8-byte Folded Spill ; RV64IZCMP-NEXT: lw a0, 20(a5) @@ -895,22 +895,22 @@ define void @callee() nounwind { ; RV64IZCMP-NEXT: lw t3, 84(a5) ; RV64IZCMP-NEXT: lw t2, 88(a5) ; RV64IZCMP-NEXT: lw t1, 92(a5) -; RV64IZCMP-NEXT: lw t0, 96(a5) -; RV64IZCMP-NEXT: lw s0, 100(a5) -; RV64IZCMP-NEXT: lw a7, 104(a5) -; RV64IZCMP-NEXT: lw a4, 108(a5) +; RV64IZCMP-NEXT: lw a7, 112(a5) +; RV64IZCMP-NEXT: lw s0, 116(a5) +; RV64IZCMP-NEXT: lw a3, 120(a5) ; RV64IZCMP-NEXT: lw a0, 124(a5) -; RV64IZCMP-NEXT: lw a1, 120(a5) -; RV64IZCMP-NEXT: lw a2, 116(a5) -; RV64IZCMP-NEXT: lw a3, 112(a5) +; RV64IZCMP-NEXT: lw a6, 96(a5) +; RV64IZCMP-NEXT: lw a4, 100(a5) +; RV64IZCMP-NEXT: lw a2, 104(a5) +; RV64IZCMP-NEXT: lw a1, 108(a5) ; RV64IZCMP-NEXT: sw a0, 124(a5) -; RV64IZCMP-NEXT: sw a1, 120(a5) -; RV64IZCMP-NEXT: sw a2, 116(a5) -; RV64IZCMP-NEXT: sw a3, 112(a5) -; RV64IZCMP-NEXT: sw a4, 108(a5) -; RV64IZCMP-NEXT: sw a7, 104(a5) -; RV64IZCMP-NEXT: sw s0, 100(a5) -; RV64IZCMP-NEXT: sw t0, 96(a5) +; RV64IZCMP-NEXT: sw a3, 120(a5) +; RV64IZCMP-NEXT: sw s0, 116(a5) +; RV64IZCMP-NEXT: sw a7, 112(a5) +; RV64IZCMP-NEXT: sw a1, 108(a5) +; RV64IZCMP-NEXT: sw a2, 104(a5) +; RV64IZCMP-NEXT: sw a4, 100(a5) +; RV64IZCMP-NEXT: sw a6, 96(a5) ; RV64IZCMP-NEXT: sw t1, 92(a5) ; RV64IZCMP-NEXT: sw t2, 88(a5) ; RV64IZCMP-NEXT: sw t3, 84(a5) @@ -934,13 +934,13 @@ define void @callee() nounwind { ; RV64IZCMP-NEXT: ld a0, 8(sp) # 8-byte Folded Reload ; RV64IZCMP-NEXT: sw a0, 16(a5) ; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var+12)(a6) +; RV64IZCMP-NEXT: sw a0, %lo(var+12)(t0) ; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var+8)(a6) +; RV64IZCMP-NEXT: sw a0, %lo(var+8)(t0) ; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var+4)(a6) +; RV64IZCMP-NEXT: sw a0, %lo(var+4)(t0) ; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var)(a6) +; RV64IZCMP-NEXT: sw a0, %lo(var)(t0) ; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160 ; ; RV64IZCMP-WITH-FP-LABEL: callee: @@ -960,16 +960,16 @@ define void @callee() nounwind { ; RV64IZCMP-WITH-FP-NEXT: sd s10, 64(sp) # 8-byte Folded Spill ; RV64IZCMP-WITH-FP-NEXT: sd s11, 56(sp) # 8-byte Folded Spill ; RV64IZCMP-WITH-FP-NEXT: addi s0, sp, 160 -; RV64IZCMP-WITH-FP-NEXT: lui a6, %hi(var) -; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(a6) +; RV64IZCMP-WITH-FP-NEXT: lui t1, %hi(var) +; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(t1) ; RV64IZCMP-WITH-FP-NEXT: sd a0, -112(s0) # 8-byte Folded Spill -; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(a6) +; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(t1) ; RV64IZCMP-WITH-FP-NEXT: sd a0, -120(s0) # 8-byte Folded Spill -; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(a6) +; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(t1) ; RV64IZCMP-WITH-FP-NEXT: sd a0, -128(s0) # 8-byte Folded Spill -; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(a6) +; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(t1) ; RV64IZCMP-WITH-FP-NEXT: sd a0, -136(s0) # 8-byte Folded Spill -; RV64IZCMP-WITH-FP-NEXT: addi a5, a6, %lo(var) +; RV64IZCMP-WITH-FP-NEXT: addi a5, t1, %lo(var) ; RV64IZCMP-WITH-FP-NEXT: lw a0, 16(a5) ; RV64IZCMP-WITH-FP-NEXT: sd a0, -144(s0) # 8-byte Folded Spill ; RV64IZCMP-WITH-FP-NEXT: lw a0, 20(a5) @@ -993,22 +993,22 @@ define void @callee() nounwind { ; RV64IZCMP-WITH-FP-NEXT: lw t3, 84(a5) ; RV64IZCMP-WITH-FP-NEXT: lw t2, 88(a5) ; RV64IZCMP-WITH-FP-NEXT: lw s1, 92(a5) -; RV64IZCMP-WITH-FP-NEXT: lw t1, 96(a5) -; RV64IZCMP-WITH-FP-NEXT: lw t0, 100(a5) -; RV64IZCMP-WITH-FP-NEXT: lw a7, 104(a5) -; RV64IZCMP-WITH-FP-NEXT: lw a4, 108(a5) +; RV64IZCMP-WITH-FP-NEXT: lw t0, 112(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a4, 116(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a3, 120(a5) ; RV64IZCMP-WITH-FP-NEXT: lw a0, 124(a5) -; RV64IZCMP-WITH-FP-NEXT: lw a1, 120(a5) -; RV64IZCMP-WITH-FP-NEXT: lw a2, 116(a5) -; RV64IZCMP-WITH-FP-NEXT: lw a3, 112(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a7, 96(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a6, 100(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a2, 104(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a1, 108(a5) ; RV64IZCMP-WITH-FP-NEXT: sw a0, 124(a5) -; RV64IZCMP-WITH-FP-NEXT: sw a1, 120(a5) -; RV64IZCMP-WITH-FP-NEXT: sw a2, 116(a5) -; RV64IZCMP-WITH-FP-NEXT: sw a3, 112(a5) -; RV64IZCMP-WITH-FP-NEXT: sw a4, 108(a5) -; RV64IZCMP-WITH-FP-NEXT: sw a7, 104(a5) -; RV64IZCMP-WITH-FP-NEXT: sw t0, 100(a5) -; RV64IZCMP-WITH-FP-NEXT: sw t1, 96(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a3, 120(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a4, 116(a5) +; RV64IZCMP-WITH-FP-NEXT: sw t0, 112(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a1, 108(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a2, 104(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a6, 100(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a7, 96(a5) ; RV64IZCMP-WITH-FP-NEXT: sw s1, 92(a5) ; RV64IZCMP-WITH-FP-NEXT: sw t2, 88(a5) ; RV64IZCMP-WITH-FP-NEXT: sw t3, 84(a5) @@ -1033,13 +1033,13 @@ define void @callee() nounwind { ; RV64IZCMP-WITH-FP-NEXT: ld a0, -144(s0) # 8-byte Folded Reload ; RV64IZCMP-WITH-FP-NEXT: sw a0, 16(a5) ; RV64IZCMP-WITH-FP-NEXT: ld a0, -136(s0) # 8-byte Folded Reload -; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(a6) +; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(t1) ; RV64IZCMP-WITH-FP-NEXT: ld a0, -128(s0) # 8-byte Folded Reload -; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(a6) +; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(t1) ; RV64IZCMP-WITH-FP-NEXT: ld a0, -120(s0) # 8-byte Folded Reload -; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(a6) +; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(t1) ; RV64IZCMP-WITH-FP-NEXT: ld a0, -112(s0) # 8-byte Folded Reload -; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(a6) +; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(t1) ; RV64IZCMP-WITH-FP-NEXT: ld ra, 152(sp) # 8-byte Folded Reload ; RV64IZCMP-WITH-FP-NEXT: ld s0, 144(sp) # 8-byte Folded Reload ; RV64IZCMP-WITH-FP-NEXT: ld s1, 136(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll index 231ed159ab2061..bb082b0314d599 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll @@ -190,21 +190,21 @@ define i32 @caller_many_scalars() nounwind { define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind { ; RV32I-FPELIM-LABEL: callee_large_scalars: ; RV32I-FPELIM: # %bb.0: -; RV32I-FPELIM-NEXT: lw a2, 0(a1) -; RV32I-FPELIM-NEXT: lw a3, 0(a0) -; RV32I-FPELIM-NEXT: lw a4, 4(a1) -; RV32I-FPELIM-NEXT: lw a5, 12(a1) -; RV32I-FPELIM-NEXT: lw a6, 12(a0) -; RV32I-FPELIM-NEXT: lw a7, 4(a0) +; RV32I-FPELIM-NEXT: lw a2, 0(a0) +; RV32I-FPELIM-NEXT: lw a3, 4(a0) +; RV32I-FPELIM-NEXT: lw a4, 12(a1) +; RV32I-FPELIM-NEXT: lw a5, 12(a0) +; RV32I-FPELIM-NEXT: lw a6, 0(a1) +; RV32I-FPELIM-NEXT: lw a7, 4(a1) ; RV32I-FPELIM-NEXT: lw a1, 8(a1) ; RV32I-FPELIM-NEXT: lw a0, 8(a0) -; RV32I-FPELIM-NEXT: xor a5, a6, a5 -; RV32I-FPELIM-NEXT: xor a4, a7, a4 -; RV32I-FPELIM-NEXT: or a4, a4, a5 +; RV32I-FPELIM-NEXT: xor a4, a5, a4 +; RV32I-FPELIM-NEXT: xor a3, a3, a7 +; RV32I-FPELIM-NEXT: or a3, a3, a4 ; RV32I-FPELIM-NEXT: xor a0, a0, a1 -; RV32I-FPELIM-NEXT: xor a2, a3, a2 -; RV32I-FPELIM-NEXT: or a0, a2, a0 -; RV32I-FPELIM-NEXT: or a0, a0, a4 +; RV32I-FPELIM-NEXT: xor a1, a2, a6 +; RV32I-FPELIM-NEXT: or a0, a1, a0 +; RV32I-FPELIM-NEXT: or a0, a0, a3 ; RV32I-FPELIM-NEXT: seqz a0, a0 ; RV32I-FPELIM-NEXT: ret ; @@ -214,21 +214,21 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind { ; RV32I-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 16 -; RV32I-WITHFP-NEXT: lw a2, 0(a1) -; RV32I-WITHFP-NEXT: lw a3, 0(a0) -; RV32I-WITHFP-NEXT: lw a4, 4(a1) -; RV32I-WITHFP-NEXT: lw a5, 12(a1) -; RV32I-WITHFP-NEXT: lw a6, 12(a0) -; RV32I-WITHFP-NEXT: lw a7, 4(a0) +; RV32I-WITHFP-NEXT: lw a2, 0(a0) +; RV32I-WITHFP-NEXT: lw a3, 4(a0) +; RV32I-WITHFP-NEXT: lw a4, 12(a1) +; RV32I-WITHFP-NEXT: lw a5, 12(a0) +; RV32I-WITHFP-NEXT: lw a6, 0(a1) +; RV32I-WITHFP-NEXT: lw a7, 4(a1) ; RV32I-WITHFP-NEXT: lw a1, 8(a1) ; RV32I-WITHFP-NEXT: lw a0, 8(a0) -; RV32I-WITHFP-NEXT: xor a5, a6, a5 -; RV32I-WITHFP-NEXT: xor a4, a7, a4 -; RV32I-WITHFP-NEXT: or a4, a4, a5 +; RV32I-WITHFP-NEXT: xor a4, a5, a4 +; RV32I-WITHFP-NEXT: xor a3, a3, a7 +; RV32I-WITHFP-NEXT: or a3, a3, a4 ; RV32I-WITHFP-NEXT: xor a0, a0, a1 -; RV32I-WITHFP-NEXT: xor a2, a3, a2 -; RV32I-WITHFP-NEXT: or a0, a2, a0 -; RV32I-WITHFP-NEXT: or a0, a0, a4 +; RV32I-WITHFP-NEXT: xor a1, a2, a6 +; RV32I-WITHFP-NEXT: or a0, a1, a0 +; RV32I-WITHFP-NEXT: or a0, a0, a3 ; RV32I-WITHFP-NEXT: seqz a0, a0 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -297,21 +297,21 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; RV32I-FPELIM-LABEL: callee_large_scalars_exhausted_regs: ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: lw a0, 4(sp) -; RV32I-FPELIM-NEXT: lw a1, 0(a0) -; RV32I-FPELIM-NEXT: lw a2, 0(a7) -; RV32I-FPELIM-NEXT: lw a3, 4(a0) -; RV32I-FPELIM-NEXT: lw a4, 12(a0) -; RV32I-FPELIM-NEXT: lw a5, 12(a7) -; RV32I-FPELIM-NEXT: lw a6, 4(a7) +; RV32I-FPELIM-NEXT: lw a1, 0(a7) +; RV32I-FPELIM-NEXT: lw a2, 4(a7) +; RV32I-FPELIM-NEXT: lw a3, 12(a0) +; RV32I-FPELIM-NEXT: lw a4, 12(a7) +; RV32I-FPELIM-NEXT: lw a5, 0(a0) +; RV32I-FPELIM-NEXT: lw a6, 4(a0) ; RV32I-FPELIM-NEXT: lw a0, 8(a0) ; RV32I-FPELIM-NEXT: lw a7, 8(a7) -; RV32I-FPELIM-NEXT: xor a4, a5, a4 -; RV32I-FPELIM-NEXT: xor a3, a6, a3 -; RV32I-FPELIM-NEXT: or a3, a3, a4 +; RV32I-FPELIM-NEXT: xor a3, a4, a3 +; RV32I-FPELIM-NEXT: xor a2, a2, a6 +; RV32I-FPELIM-NEXT: or a2, a2, a3 ; RV32I-FPELIM-NEXT: xor a0, a7, a0 -; RV32I-FPELIM-NEXT: xor a1, a2, a1 +; RV32I-FPELIM-NEXT: xor a1, a1, a5 ; RV32I-FPELIM-NEXT: or a0, a1, a0 -; RV32I-FPELIM-NEXT: or a0, a0, a3 +; RV32I-FPELIM-NEXT: or a0, a0, a2 ; RV32I-FPELIM-NEXT: seqz a0, a0 ; RV32I-FPELIM-NEXT: ret ; @@ -322,21 +322,21 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 16 ; RV32I-WITHFP-NEXT: lw a0, 4(s0) -; RV32I-WITHFP-NEXT: lw a1, 0(a0) -; RV32I-WITHFP-NEXT: lw a2, 0(a7) -; RV32I-WITHFP-NEXT: lw a3, 4(a0) -; RV32I-WITHFP-NEXT: lw a4, 12(a0) -; RV32I-WITHFP-NEXT: lw a5, 12(a7) -; RV32I-WITHFP-NEXT: lw a6, 4(a7) +; RV32I-WITHFP-NEXT: lw a1, 0(a7) +; RV32I-WITHFP-NEXT: lw a2, 4(a7) +; RV32I-WITHFP-NEXT: lw a3, 12(a0) +; RV32I-WITHFP-NEXT: lw a4, 12(a7) +; RV32I-WITHFP-NEXT: lw a5, 0(a0) +; RV32I-WITHFP-NEXT: lw a6, 4(a0) ; RV32I-WITHFP-NEXT: lw a0, 8(a0) ; RV32I-WITHFP-NEXT: lw a7, 8(a7) -; RV32I-WITHFP-NEXT: xor a4, a5, a4 -; RV32I-WITHFP-NEXT: xor a3, a6, a3 -; RV32I-WITHFP-NEXT: or a3, a3, a4 +; RV32I-WITHFP-NEXT: xor a3, a4, a3 +; RV32I-WITHFP-NEXT: xor a2, a2, a6 +; RV32I-WITHFP-NEXT: or a2, a2, a3 ; RV32I-WITHFP-NEXT: xor a0, a7, a0 -; RV32I-WITHFP-NEXT: xor a1, a2, a1 +; RV32I-WITHFP-NEXT: xor a1, a1, a5 ; RV32I-WITHFP-NEXT: or a0, a1, a0 -; RV32I-WITHFP-NEXT: or a0, a0, a3 +; RV32I-WITHFP-NEXT: or a0, a0, a2 ; RV32I-WITHFP-NEXT: seqz a0, a0 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll index d08cf577b1bdd3..708cb00d1c45c6 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll @@ -1267,21 +1267,21 @@ define i32 @caller_many_scalars() { define i32 @callee_large_scalars(i128 %a, fp128 %b) { ; ILP32E-FPELIM-LABEL: callee_large_scalars: ; ILP32E-FPELIM: # %bb.0: -; ILP32E-FPELIM-NEXT: lw a2, 0(a1) -; ILP32E-FPELIM-NEXT: lw a3, 0(a0) -; ILP32E-FPELIM-NEXT: lw a4, 4(a1) -; ILP32E-FPELIM-NEXT: lw a5, 12(a1) -; ILP32E-FPELIM-NEXT: lw a6, 12(a0) -; ILP32E-FPELIM-NEXT: lw a7, 4(a0) +; ILP32E-FPELIM-NEXT: lw a2, 0(a0) +; ILP32E-FPELIM-NEXT: lw a3, 4(a0) +; ILP32E-FPELIM-NEXT: lw a4, 12(a1) +; ILP32E-FPELIM-NEXT: lw a5, 12(a0) +; ILP32E-FPELIM-NEXT: lw a6, 0(a1) +; ILP32E-FPELIM-NEXT: lw a7, 4(a1) ; ILP32E-FPELIM-NEXT: lw a1, 8(a1) ; ILP32E-FPELIM-NEXT: lw a0, 8(a0) -; ILP32E-FPELIM-NEXT: xor a5, a6, a5 -; ILP32E-FPELIM-NEXT: xor a4, a7, a4 -; ILP32E-FPELIM-NEXT: or a4, a4, a5 +; ILP32E-FPELIM-NEXT: xor a4, a5, a4 +; ILP32E-FPELIM-NEXT: xor a3, a3, a7 +; ILP32E-FPELIM-NEXT: or a3, a3, a4 ; ILP32E-FPELIM-NEXT: xor a0, a0, a1 -; ILP32E-FPELIM-NEXT: xor a2, a3, a2 -; ILP32E-FPELIM-NEXT: or a0, a2, a0 -; ILP32E-FPELIM-NEXT: or a0, a0, a4 +; ILP32E-FPELIM-NEXT: xor a1, a2, a6 +; ILP32E-FPELIM-NEXT: or a0, a1, a0 +; ILP32E-FPELIM-NEXT: or a0, a0, a3 ; ILP32E-FPELIM-NEXT: seqz a0, a0 ; ILP32E-FPELIM-NEXT: ret ; @@ -1295,21 +1295,21 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) { ; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 ; ILP32E-WITHFP-NEXT: addi s0, sp, 8 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 -; ILP32E-WITHFP-NEXT: lw a2, 0(a1) -; ILP32E-WITHFP-NEXT: lw a3, 0(a0) -; ILP32E-WITHFP-NEXT: lw a4, 4(a1) -; ILP32E-WITHFP-NEXT: lw a5, 12(a1) -; ILP32E-WITHFP-NEXT: lw a6, 12(a0) -; ILP32E-WITHFP-NEXT: lw a7, 4(a0) +; ILP32E-WITHFP-NEXT: lw a2, 0(a0) +; ILP32E-WITHFP-NEXT: lw a3, 4(a0) +; ILP32E-WITHFP-NEXT: lw a4, 12(a1) +; ILP32E-WITHFP-NEXT: lw a5, 12(a0) +; ILP32E-WITHFP-NEXT: lw a6, 0(a1) +; ILP32E-WITHFP-NEXT: lw a7, 4(a1) ; ILP32E-WITHFP-NEXT: lw a1, 8(a1) ; ILP32E-WITHFP-NEXT: lw a0, 8(a0) -; ILP32E-WITHFP-NEXT: xor a5, a6, a5 -; ILP32E-WITHFP-NEXT: xor a4, a7, a4 -; ILP32E-WITHFP-NEXT: or a4, a4, a5 +; ILP32E-WITHFP-NEXT: xor a4, a5, a4 +; ILP32E-WITHFP-NEXT: xor a3, a3, a7 +; ILP32E-WITHFP-NEXT: or a3, a3, a4 ; ILP32E-WITHFP-NEXT: xor a0, a0, a1 -; ILP32E-WITHFP-NEXT: xor a2, a3, a2 -; ILP32E-WITHFP-NEXT: or a0, a2, a0 -; ILP32E-WITHFP-NEXT: or a0, a0, a4 +; ILP32E-WITHFP-NEXT: xor a1, a2, a6 +; ILP32E-WITHFP-NEXT: or a0, a1, a0 +; ILP32E-WITHFP-NEXT: or a0, a0, a3 ; ILP32E-WITHFP-NEXT: seqz a0, a0 ; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload ; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload @@ -1318,21 +1318,21 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) { ; ; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_scalars: ; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 0(a1) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 0(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 4(a1) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 12(a1) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 12(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 4(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 0(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 4(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 12(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 12(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 0(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 4(a1) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 8(a1) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 8(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a5, a6, a5 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a7, a4 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a4, a4, a5 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a5, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a3, a3, a7 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a3, a3, a4 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a0, a1 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a3, a2 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a2, a0 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a2, a6 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a1, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a3 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a0, a0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret ; @@ -1344,21 +1344,21 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 0(a1) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 0(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 4(a1) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 12(a1) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 12(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 4(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 0(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 4(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 12(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 12(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 0(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 4(a1) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 8(a1) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 8(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a5, a6, a5 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a7, a4 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a4, a4, a5 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a5, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a3, a3, a7 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a3, a3, a4 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a0, a1 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a3, a2 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a2, a0 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a2, a6 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a3 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a0, a0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 %b_bitcast = bitcast fp128 %b to i128 @@ -1492,23 +1492,23 @@ define i32 @caller_large_scalars() { define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i128 %h, i32 %i, fp128 %j) { ; ILP32E-FPELIM-LABEL: callee_large_scalars_exhausted_regs: ; ILP32E-FPELIM: # %bb.0: -; ILP32E-FPELIM-NEXT: lw a0, 12(sp) -; ILP32E-FPELIM-NEXT: lw a1, 4(sp) +; ILP32E-FPELIM-NEXT: lw a0, 4(sp) +; ILP32E-FPELIM-NEXT: lw a1, 12(sp) ; ILP32E-FPELIM-NEXT: lw a2, 0(a0) -; ILP32E-FPELIM-NEXT: lw a3, 0(a1) -; ILP32E-FPELIM-NEXT: lw a4, 4(a0) +; ILP32E-FPELIM-NEXT: lw a3, 4(a0) +; ILP32E-FPELIM-NEXT: lw a4, 12(a1) ; ILP32E-FPELIM-NEXT: lw a5, 12(a0) -; ILP32E-FPELIM-NEXT: lw a6, 12(a1) +; ILP32E-FPELIM-NEXT: lw a6, 0(a1) ; ILP32E-FPELIM-NEXT: lw a7, 4(a1) -; ILP32E-FPELIM-NEXT: lw a0, 8(a0) ; ILP32E-FPELIM-NEXT: lw a1, 8(a1) -; ILP32E-FPELIM-NEXT: xor a5, a6, a5 -; ILP32E-FPELIM-NEXT: xor a4, a7, a4 -; ILP32E-FPELIM-NEXT: or a4, a4, a5 -; ILP32E-FPELIM-NEXT: xor a0, a1, a0 -; ILP32E-FPELIM-NEXT: xor a2, a3, a2 -; ILP32E-FPELIM-NEXT: or a0, a2, a0 -; ILP32E-FPELIM-NEXT: or a0, a0, a4 +; ILP32E-FPELIM-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-NEXT: xor a4, a5, a4 +; ILP32E-FPELIM-NEXT: xor a3, a3, a7 +; ILP32E-FPELIM-NEXT: or a3, a3, a4 +; ILP32E-FPELIM-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-NEXT: xor a1, a2, a6 +; ILP32E-FPELIM-NEXT: or a0, a1, a0 +; ILP32E-FPELIM-NEXT: or a0, a0, a3 ; ILP32E-FPELIM-NEXT: seqz a0, a0 ; ILP32E-FPELIM-NEXT: ret ; @@ -1522,23 +1522,23 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 ; ILP32E-WITHFP-NEXT: addi s0, sp, 8 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 -; ILP32E-WITHFP-NEXT: lw a0, 12(s0) -; ILP32E-WITHFP-NEXT: lw a1, 4(s0) +; ILP32E-WITHFP-NEXT: lw a0, 4(s0) +; ILP32E-WITHFP-NEXT: lw a1, 12(s0) ; ILP32E-WITHFP-NEXT: lw a2, 0(a0) -; ILP32E-WITHFP-NEXT: lw a3, 0(a1) -; ILP32E-WITHFP-NEXT: lw a4, 4(a0) +; ILP32E-WITHFP-NEXT: lw a3, 4(a0) +; ILP32E-WITHFP-NEXT: lw a4, 12(a1) ; ILP32E-WITHFP-NEXT: lw a5, 12(a0) -; ILP32E-WITHFP-NEXT: lw a6, 12(a1) +; ILP32E-WITHFP-NEXT: lw a6, 0(a1) ; ILP32E-WITHFP-NEXT: lw a7, 4(a1) -; ILP32E-WITHFP-NEXT: lw a0, 8(a0) ; ILP32E-WITHFP-NEXT: lw a1, 8(a1) -; ILP32E-WITHFP-NEXT: xor a5, a6, a5 -; ILP32E-WITHFP-NEXT: xor a4, a7, a4 -; ILP32E-WITHFP-NEXT: or a4, a4, a5 -; ILP32E-WITHFP-NEXT: xor a0, a1, a0 -; ILP32E-WITHFP-NEXT: xor a2, a3, a2 -; ILP32E-WITHFP-NEXT: or a0, a2, a0 -; ILP32E-WITHFP-NEXT: or a0, a0, a4 +; ILP32E-WITHFP-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-NEXT: xor a4, a5, a4 +; ILP32E-WITHFP-NEXT: xor a3, a3, a7 +; ILP32E-WITHFP-NEXT: or a3, a3, a4 +; ILP32E-WITHFP-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-NEXT: xor a1, a2, a6 +; ILP32E-WITHFP-NEXT: or a0, a1, a0 +; ILP32E-WITHFP-NEXT: or a0, a0, a3 ; ILP32E-WITHFP-NEXT: seqz a0, a0 ; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload ; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload @@ -1547,23 +1547,23 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; ; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_scalars_exhausted_regs: ; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 12(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 12(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 0(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 0(a1) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 4(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 4(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 12(a1) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 12(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 12(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 0(a1) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 4(a1) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 8(a0) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 8(a1) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a5, a6, a5 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a7, a4 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a4, a4, a5 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a1, a0 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a3, a2 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a2, a0 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a5, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a3, a3, a7 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a3, a3, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a2, a6 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a1, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a3 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a0, a0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret ; @@ -1575,23 +1575,23 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 12(s0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 4(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 4(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 12(s0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 0(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 0(a1) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 4(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 4(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 12(a1) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 12(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 12(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 0(a1) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 4(a1) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 8(a0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 8(a1) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a5, a6, a5 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a7, a4 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a4, a4, a5 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a1, a0 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a3, a2 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a2, a0 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a5, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a3, a3, a7 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a3, a3, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a2, a6 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a3 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a0, a0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 %j_bitcast = bitcast fp128 %j to i128 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll index 67123466354c41..a0e1b002b7260d 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll @@ -106,21 +106,21 @@ define i32 @caller_many_scalars() nounwind { define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind { ; RV64I-LABEL: callee_large_scalars: ; RV64I: # %bb.0: -; RV64I-NEXT: ld a2, 0(a1) -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: ld a4, 8(a1) -; RV64I-NEXT: ld a5, 24(a1) -; RV64I-NEXT: ld a6, 24(a0) -; RV64I-NEXT: ld a7, 8(a0) +; RV64I-NEXT: ld a2, 0(a0) +; RV64I-NEXT: ld a3, 8(a0) +; RV64I-NEXT: ld a4, 24(a1) +; RV64I-NEXT: ld a5, 24(a0) +; RV64I-NEXT: ld a6, 0(a1) +; RV64I-NEXT: ld a7, 8(a1) ; RV64I-NEXT: ld a1, 16(a1) ; RV64I-NEXT: ld a0, 16(a0) -; RV64I-NEXT: xor a5, a6, a5 -; RV64I-NEXT: xor a4, a7, a4 -; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: xor a4, a5, a4 +; RV64I-NEXT: xor a3, a3, a7 +; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: xor a2, a3, a2 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: xor a1, a2, a6 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret %1 = icmp eq i256 %a, %b @@ -161,21 +161,21 @@ define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d, ; RV64I-LABEL: callee_large_scalars_exhausted_regs: ; RV64I: # %bb.0: ; RV64I-NEXT: ld a0, 8(sp) -; RV64I-NEXT: ld a1, 0(a0) -; RV64I-NEXT: ld a2, 0(a7) -; RV64I-NEXT: ld a3, 8(a0) -; RV64I-NEXT: ld a4, 24(a0) -; RV64I-NEXT: ld a5, 24(a7) -; RV64I-NEXT: ld a6, 8(a7) +; RV64I-NEXT: ld a1, 0(a7) +; RV64I-NEXT: ld a2, 8(a7) +; RV64I-NEXT: ld a3, 24(a0) +; RV64I-NEXT: ld a4, 24(a7) +; RV64I-NEXT: ld a5, 0(a0) +; RV64I-NEXT: ld a6, 8(a0) ; RV64I-NEXT: ld a0, 16(a0) ; RV64I-NEXT: ld a7, 16(a7) -; RV64I-NEXT: xor a4, a5, a4 -; RV64I-NEXT: xor a3, a6, a3 -; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: xor a3, a4, a3 +; RV64I-NEXT: xor a2, a2, a6 +; RV64I-NEXT: or a2, a2, a3 ; RV64I-NEXT: xor a0, a7, a0 -; RV64I-NEXT: xor a1, a2, a1 +; RV64I-NEXT: xor a1, a1, a5 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret %1 = icmp eq i256 %h, %j diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll index 35900f8a0717aa..603491bf3d3003 100644 --- a/llvm/test/CodeGen/RISCV/forced-atomics.ll +++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll @@ -3348,8 +3348,8 @@ define i64 @rmw64_max_seq_cst(ptr %p) nounwind { ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lw a1, 4(a0) ; RV32-NEXT: lw a4, 0(a0) +; RV32-NEXT: lw a1, 4(a0) ; RV32-NEXT: j .LBB49_2 ; RV32-NEXT: .LBB49_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1 @@ -3362,8 +3362,8 @@ define i64 @rmw64_max_seq_cst(ptr %p) nounwind { ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a1, 4(sp) ; RV32-NEXT: lw a4, 0(sp) +; RV32-NEXT: lw a1, 4(sp) ; RV32-NEXT: bnez a0, .LBB49_6 ; RV32-NEXT: .LBB49_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3453,8 +3453,8 @@ define i64 @rmw64_min_seq_cst(ptr %p) nounwind { ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lw a1, 4(a0) ; RV32-NEXT: lw a4, 0(a0) +; RV32-NEXT: lw a1, 4(a0) ; RV32-NEXT: j .LBB50_2 ; RV32-NEXT: .LBB50_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1 @@ -3467,8 +3467,8 @@ define i64 @rmw64_min_seq_cst(ptr %p) nounwind { ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a1, 4(sp) ; RV32-NEXT: lw a4, 0(sp) +; RV32-NEXT: lw a1, 4(sp) ; RV32-NEXT: bnez a0, .LBB50_6 ; RV32-NEXT: .LBB50_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3560,8 +3560,8 @@ define i64 @rmw64_umax_seq_cst(ptr %p) nounwind { ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lw a1, 4(a0) ; RV32-NEXT: lw a4, 0(a0) +; RV32-NEXT: lw a1, 4(a0) ; RV32-NEXT: j .LBB51_2 ; RV32-NEXT: .LBB51_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1 @@ -3574,8 +3574,8 @@ define i64 @rmw64_umax_seq_cst(ptr %p) nounwind { ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a1, 4(sp) ; RV32-NEXT: lw a4, 0(sp) +; RV32-NEXT: lw a1, 4(sp) ; RV32-NEXT: bnez a0, .LBB51_4 ; RV32-NEXT: .LBB51_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3652,8 +3652,8 @@ define i64 @rmw64_umin_seq_cst(ptr %p) nounwind { ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lw a1, 4(a0) ; RV32-NEXT: lw a4, 0(a0) +; RV32-NEXT: lw a1, 4(a0) ; RV32-NEXT: j .LBB52_2 ; RV32-NEXT: .LBB52_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1 @@ -3666,8 +3666,8 @@ define i64 @rmw64_umin_seq_cst(ptr %p) nounwind { ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a1, 4(sp) ; RV32-NEXT: lw a4, 0(sp) +; RV32-NEXT: lw a1, 4(sp) ; RV32-NEXT: bnez a0, .LBB52_4 ; RV32-NEXT: .LBB52_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3802,30 +3802,30 @@ define double @rmw64_fadd_seq_cst(ptr %p) nounwind { ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lw s1, 4(a0) -; RV32-NEXT: lw s2, 0(a0) +; RV32-NEXT: lw s1, 0(a0) +; RV32-NEXT: lw s2, 4(a0) ; RV32-NEXT: .LBB54_1: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-NEXT: lui a3, 261888 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: mv a1, s2 ; RV32-NEXT: li a2, 0 ; RV32-NEXT: call __adddf3 ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: sw s2, 8(sp) -; RV32-NEXT: sw s1, 12(sp) +; RV32-NEXT: sw s1, 8(sp) +; RV32-NEXT: sw s2, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw s1, 12(sp) -; RV32-NEXT: lw s2, 8(sp) +; RV32-NEXT: lw s1, 8(sp) +; RV32-NEXT: lw s2, 12(sp) ; RV32-NEXT: beqz a0, .LBB54_1 ; RV32-NEXT: # %bb.2: # %atomicrmw.end -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: mv a1, s2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3937,30 +3937,30 @@ define double @rmw64_fsub_seq_cst(ptr %p) nounwind { ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lw s1, 4(a0) -; RV32-NEXT: lw s2, 0(a0) +; RV32-NEXT: lw s1, 0(a0) +; RV32-NEXT: lw s2, 4(a0) ; RV32-NEXT: .LBB55_1: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-NEXT: lui a3, 786176 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: mv a1, s2 ; RV32-NEXT: li a2, 0 ; RV32-NEXT: call __adddf3 ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: sw s2, 8(sp) -; RV32-NEXT: sw s1, 12(sp) +; RV32-NEXT: sw s1, 8(sp) +; RV32-NEXT: sw s2, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw s1, 12(sp) -; RV32-NEXT: lw s2, 8(sp) +; RV32-NEXT: lw s1, 8(sp) +; RV32-NEXT: lw s2, 12(sp) ; RV32-NEXT: beqz a0, .LBB55_1 ; RV32-NEXT: # %bb.2: # %atomicrmw.end -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: mv a1, s2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -4072,30 +4072,30 @@ define double @rmw64_fmin_seq_cst(ptr %p) nounwind { ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lw s1, 4(a0) -; RV32-NEXT: lw s2, 0(a0) +; RV32-NEXT: lw s1, 0(a0) +; RV32-NEXT: lw s2, 4(a0) ; RV32-NEXT: .LBB56_1: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-NEXT: lui a3, 261888 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: mv a1, s2 ; RV32-NEXT: li a2, 0 ; RV32-NEXT: call fmin ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: sw s2, 8(sp) -; RV32-NEXT: sw s1, 12(sp) +; RV32-NEXT: sw s1, 8(sp) +; RV32-NEXT: sw s2, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw s1, 12(sp) -; RV32-NEXT: lw s2, 8(sp) +; RV32-NEXT: lw s1, 8(sp) +; RV32-NEXT: lw s2, 12(sp) ; RV32-NEXT: beqz a0, .LBB56_1 ; RV32-NEXT: # %bb.2: # %atomicrmw.end -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: mv a1, s2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -4207,30 +4207,30 @@ define double @rmw64_fmax_seq_cst(ptr %p) nounwind { ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lw s1, 4(a0) -; RV32-NEXT: lw s2, 0(a0) +; RV32-NEXT: lw s1, 0(a0) +; RV32-NEXT: lw s2, 4(a0) ; RV32-NEXT: .LBB57_1: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-NEXT: lui a3, 261888 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: mv a1, s2 ; RV32-NEXT: li a2, 0 ; RV32-NEXT: call fmax ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: sw s2, 8(sp) -; RV32-NEXT: sw s1, 12(sp) +; RV32-NEXT: sw s1, 8(sp) +; RV32-NEXT: sw s2, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw s1, 12(sp) -; RV32-NEXT: lw s2, 8(sp) +; RV32-NEXT: lw s1, 8(sp) +; RV32-NEXT: lw s2, 12(sp) ; RV32-NEXT: beqz a0, .LBB57_1 ; RV32-NEXT: # %bb.2: # %atomicrmw.end -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: mv a1, s2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -4346,8 +4346,8 @@ define i64 @cmpxchg64_monotonic(ptr %p) nounwind { ; RV32-NEXT: li a4, 0 ; RV32-NEXT: li a5, 0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a1, 4(sp) ; RV32-NEXT: lw a0, 0(sp) +; RV32-NEXT: lw a1, 4(sp) ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -4406,8 +4406,8 @@ define i64 @cmpxchg64_seq_cst(ptr %p) nounwind { ; RV32-NEXT: li a5, 5 ; RV32-NEXT: li a3, 0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a1, 4(sp) ; RV32-NEXT: lw a0, 0(sp) +; RV32-NEXT: lw a1, 4(sp) ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -4531,25 +4531,25 @@ define i128 @rmw128(ptr %p) nounwind { ; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a1 -; RV32-NEXT: lw a1, 12(a1) -; RV32-NEXT: lw a2, 8(s0) -; RV32-NEXT: lw a3, 4(s0) -; RV32-NEXT: lw a4, 0(s0) +; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: lw a2, 4(s0) +; RV32-NEXT: lw a3, 8(s0) +; RV32-NEXT: lw a4, 12(s0) ; RV32-NEXT: mv s1, a0 ; RV32-NEXT: .LBB62_1: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: addi a0, a4, 1 +; RV32-NEXT: addi a0, a1, 1 ; RV32-NEXT: seqz a5, a0 -; RV32-NEXT: add a5, a3, a5 +; RV32-NEXT: add a5, a2, a5 ; RV32-NEXT: or a6, a0, a5 ; RV32-NEXT: seqz a6, a6 -; RV32-NEXT: add a6, a2, a6 -; RV32-NEXT: sltu a7, a6, a2 -; RV32-NEXT: add a7, a1, a7 -; RV32-NEXT: sw a4, 16(sp) -; RV32-NEXT: sw a3, 20(sp) -; RV32-NEXT: sw a2, 24(sp) -; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: add a6, a3, a6 +; RV32-NEXT: sltu a7, a6, a3 +; RV32-NEXT: add a7, a4, a7 +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: sw a3, 24(sp) +; RV32-NEXT: sw a4, 28(sp) ; RV32-NEXT: sw a5, 4(sp) ; RV32-NEXT: sw a0, 0(sp) ; RV32-NEXT: sw a6, 8(sp) @@ -4561,16 +4561,16 @@ define i128 @rmw128(ptr %p) nounwind { ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a1, s0 ; RV32-NEXT: call __atomic_compare_exchange -; RV32-NEXT: lw a1, 28(sp) -; RV32-NEXT: lw a2, 24(sp) -; RV32-NEXT: lw a3, 20(sp) -; RV32-NEXT: lw a4, 16(sp) +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: lw a2, 20(sp) +; RV32-NEXT: lw a3, 24(sp) +; RV32-NEXT: lw a4, 28(sp) ; RV32-NEXT: beqz a0, .LBB62_1 ; RV32-NEXT: # %bb.2: # %atomicrmw.end -; RV32-NEXT: sw a4, 0(s1) -; RV32-NEXT: sw a3, 4(s1) -; RV32-NEXT: sw a2, 8(s1) -; RV32-NEXT: sw a1, 12(s1) +; RV32-NEXT: sw a1, 0(s1) +; RV32-NEXT: sw a2, 4(s1) +; RV32-NEXT: sw a3, 8(s1) +; RV32-NEXT: sw a4, 12(s1) ; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -4639,8 +4639,8 @@ define i128 @cmpxchg128(ptr %p) nounwind { ; RV64-NEXT: li a5, 5 ; RV64-NEXT: li a3, 0 ; RV64-NEXT: call __atomic_compare_exchange_16 -; RV64-NEXT: ld a1, 8(sp) ; RV64-NEXT: ld a0, 0(sp) +; RV64-NEXT: ld a1, 8(sp) ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 32 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index deb5a6d4013d49..c44f4942e9e699 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -1043,24 +1043,24 @@ define i64 @stest_f64i64(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti -; RV32IF-NEXT: lw a0, 16(sp) -; RV32IF-NEXT: lw a2, 20(sp) +; RV32IF-NEXT: lw a3, 8(sp) ; RV32IF-NEXT: lw a1, 12(sp) -; RV32IF-NEXT: lw a4, 8(sp) -; RV32IF-NEXT: lui a3, 524288 -; RV32IF-NEXT: addi a5, a3, -1 +; RV32IF-NEXT: lw a2, 16(sp) +; RV32IF-NEXT: lw a4, 20(sp) +; RV32IF-NEXT: lui a0, 524288 +; RV32IF-NEXT: addi a5, a0, -1 ; RV32IF-NEXT: beq a1, a5, .LBB18_2 ; RV32IF-NEXT: # %bb.1: # %entry ; RV32IF-NEXT: sltu a6, a1, a5 -; RV32IF-NEXT: or a7, a0, a2 +; RV32IF-NEXT: or a7, a2, a4 ; RV32IF-NEXT: bnez a7, .LBB18_3 ; RV32IF-NEXT: j .LBB18_4 ; RV32IF-NEXT: .LBB18_2: -; RV32IF-NEXT: sltiu a6, a4, -1 -; RV32IF-NEXT: or a7, a0, a2 +; RV32IF-NEXT: sltiu a6, a3, -1 +; RV32IF-NEXT: or a7, a2, a4 ; RV32IF-NEXT: beqz a7, .LBB18_4 ; RV32IF-NEXT: .LBB18_3: # %entry -; RV32IF-NEXT: slti a6, a2, 0 +; RV32IF-NEXT: slti a6, a4, 0 ; RV32IF-NEXT: .LBB18_4: # %entry ; RV32IF-NEXT: addi a7, a6, -1 ; RV32IF-NEXT: neg t0, a6 @@ -1068,21 +1068,21 @@ define i64 @stest_f64i64(double %x) { ; RV32IF-NEXT: # %bb.5: # %entry ; RV32IF-NEXT: mv a1, a5 ; RV32IF-NEXT: .LBB18_6: # %entry -; RV32IF-NEXT: or a4, a7, a4 +; RV32IF-NEXT: or a3, a7, a3 +; RV32IF-NEXT: and a4, t0, a4 ; RV32IF-NEXT: and a2, t0, a2 -; RV32IF-NEXT: and a5, t0, a0 -; RV32IF-NEXT: beq a1, a3, .LBB18_8 +; RV32IF-NEXT: beq a1, a0, .LBB18_8 ; RV32IF-NEXT: # %bb.7: # %entry -; RV32IF-NEXT: sltu a0, a3, a1 +; RV32IF-NEXT: sltu a0, a0, a1 ; RV32IF-NEXT: j .LBB18_9 ; RV32IF-NEXT: .LBB18_8: -; RV32IF-NEXT: snez a0, a4 +; RV32IF-NEXT: snez a0, a3 ; RV32IF-NEXT: .LBB18_9: # %entry -; RV32IF-NEXT: and a5, a5, a2 -; RV32IF-NEXT: li a3, -1 -; RV32IF-NEXT: beq a5, a3, .LBB18_11 +; RV32IF-NEXT: and a2, a2, a4 +; RV32IF-NEXT: li a5, -1 +; RV32IF-NEXT: beq a2, a5, .LBB18_11 ; RV32IF-NEXT: # %bb.10: # %entry -; RV32IF-NEXT: slti a0, a2, 0 +; RV32IF-NEXT: slti a0, a4, 0 ; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: .LBB18_11: # %entry ; RV32IF-NEXT: bnez a0, .LBB18_13 @@ -1090,7 +1090,7 @@ define i64 @stest_f64i64(double %x) { ; RV32IF-NEXT: lui a1, 524288 ; RV32IF-NEXT: .LBB18_13: # %entry ; RV32IF-NEXT: neg a0, a0 -; RV32IF-NEXT: and a0, a0, a4 +; RV32IF-NEXT: and a0, a0, a3 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret @@ -1142,24 +1142,24 @@ define i64 @stest_f64i64(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti -; RV32IFD-NEXT: lw a0, 16(sp) -; RV32IFD-NEXT: lw a2, 20(sp) +; RV32IFD-NEXT: lw a3, 8(sp) ; RV32IFD-NEXT: lw a1, 12(sp) -; RV32IFD-NEXT: lw a4, 8(sp) -; RV32IFD-NEXT: lui a3, 524288 -; RV32IFD-NEXT: addi a5, a3, -1 +; RV32IFD-NEXT: lw a2, 16(sp) +; RV32IFD-NEXT: lw a4, 20(sp) +; RV32IFD-NEXT: lui a0, 524288 +; RV32IFD-NEXT: addi a5, a0, -1 ; RV32IFD-NEXT: beq a1, a5, .LBB18_2 ; RV32IFD-NEXT: # %bb.1: # %entry ; RV32IFD-NEXT: sltu a6, a1, a5 -; RV32IFD-NEXT: or a7, a0, a2 +; RV32IFD-NEXT: or a7, a2, a4 ; RV32IFD-NEXT: bnez a7, .LBB18_3 ; RV32IFD-NEXT: j .LBB18_4 ; RV32IFD-NEXT: .LBB18_2: -; RV32IFD-NEXT: sltiu a6, a4, -1 -; RV32IFD-NEXT: or a7, a0, a2 +; RV32IFD-NEXT: sltiu a6, a3, -1 +; RV32IFD-NEXT: or a7, a2, a4 ; RV32IFD-NEXT: beqz a7, .LBB18_4 ; RV32IFD-NEXT: .LBB18_3: # %entry -; RV32IFD-NEXT: slti a6, a2, 0 +; RV32IFD-NEXT: slti a6, a4, 0 ; RV32IFD-NEXT: .LBB18_4: # %entry ; RV32IFD-NEXT: addi a7, a6, -1 ; RV32IFD-NEXT: neg t0, a6 @@ -1167,21 +1167,21 @@ define i64 @stest_f64i64(double %x) { ; RV32IFD-NEXT: # %bb.5: # %entry ; RV32IFD-NEXT: mv a1, a5 ; RV32IFD-NEXT: .LBB18_6: # %entry -; RV32IFD-NEXT: or a4, a7, a4 +; RV32IFD-NEXT: or a3, a7, a3 +; RV32IFD-NEXT: and a4, t0, a4 ; RV32IFD-NEXT: and a2, t0, a2 -; RV32IFD-NEXT: and a5, t0, a0 -; RV32IFD-NEXT: beq a1, a3, .LBB18_8 +; RV32IFD-NEXT: beq a1, a0, .LBB18_8 ; RV32IFD-NEXT: # %bb.7: # %entry -; RV32IFD-NEXT: sltu a0, a3, a1 +; RV32IFD-NEXT: sltu a0, a0, a1 ; RV32IFD-NEXT: j .LBB18_9 ; RV32IFD-NEXT: .LBB18_8: -; RV32IFD-NEXT: snez a0, a4 +; RV32IFD-NEXT: snez a0, a3 ; RV32IFD-NEXT: .LBB18_9: # %entry -; RV32IFD-NEXT: and a5, a5, a2 -; RV32IFD-NEXT: li a3, -1 -; RV32IFD-NEXT: beq a5, a3, .LBB18_11 +; RV32IFD-NEXT: and a2, a2, a4 +; RV32IFD-NEXT: li a5, -1 +; RV32IFD-NEXT: beq a2, a5, .LBB18_11 ; RV32IFD-NEXT: # %bb.10: # %entry -; RV32IFD-NEXT: slti a0, a2, 0 +; RV32IFD-NEXT: slti a0, a4, 0 ; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: .LBB18_11: # %entry ; RV32IFD-NEXT: bnez a0, .LBB18_13 @@ -1189,7 +1189,7 @@ define i64 @stest_f64i64(double %x) { ; RV32IFD-NEXT: lui a1, 524288 ; RV32IFD-NEXT: .LBB18_13: # %entry ; RV32IFD-NEXT: neg a0, a0 -; RV32IFD-NEXT: and a0, a0, a4 +; RV32IFD-NEXT: and a0, a0, a3 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret @@ -1440,24 +1440,24 @@ define i64 @stest_f32i64(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: lw a2, 20(sp) +; RV32-NEXT: lw a3, 8(sp) ; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a4, 8(sp) -; RV32-NEXT: lui a3, 524288 -; RV32-NEXT: addi a5, a3, -1 +; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a4, 20(sp) +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: addi a5, a0, -1 ; RV32-NEXT: beq a1, a5, .LBB21_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a0, a2 +; RV32-NEXT: or a7, a2, a4 ; RV32-NEXT: bnez a7, .LBB21_3 ; RV32-NEXT: j .LBB21_4 ; RV32-NEXT: .LBB21_2: -; RV32-NEXT: sltiu a6, a4, -1 -; RV32-NEXT: or a7, a0, a2 +; RV32-NEXT: sltiu a6, a3, -1 +; RV32-NEXT: or a7, a2, a4 ; RV32-NEXT: beqz a7, .LBB21_4 ; RV32-NEXT: .LBB21_3: # %entry -; RV32-NEXT: slti a6, a2, 0 +; RV32-NEXT: slti a6, a4, 0 ; RV32-NEXT: .LBB21_4: # %entry ; RV32-NEXT: addi a7, a6, -1 ; RV32-NEXT: neg t0, a6 @@ -1465,21 +1465,21 @@ define i64 @stest_f32i64(float %x) { ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB21_6: # %entry -; RV32-NEXT: or a4, a7, a4 +; RV32-NEXT: or a3, a7, a3 +; RV32-NEXT: and a4, t0, a4 ; RV32-NEXT: and a2, t0, a2 -; RV32-NEXT: and a5, t0, a0 -; RV32-NEXT: beq a1, a3, .LBB21_8 +; RV32-NEXT: beq a1, a0, .LBB21_8 ; RV32-NEXT: # %bb.7: # %entry -; RV32-NEXT: sltu a0, a3, a1 +; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: j .LBB21_9 ; RV32-NEXT: .LBB21_8: -; RV32-NEXT: snez a0, a4 +; RV32-NEXT: snez a0, a3 ; RV32-NEXT: .LBB21_9: # %entry -; RV32-NEXT: and a5, a5, a2 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beq a5, a3, .LBB21_11 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: li a5, -1 +; RV32-NEXT: beq a2, a5, .LBB21_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a0, a2, 0 +; RV32-NEXT: slti a0, a4, 0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB21_11: # %entry ; RV32-NEXT: bnez a0, .LBB21_13 @@ -1487,7 +1487,7 @@ define i64 @stest_f32i64(float %x) { ; RV32-NEXT: lui a1, 524288 ; RV32-NEXT: .LBB21_13: # %entry ; RV32-NEXT: neg a0, a0 -; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -1657,24 +1657,24 @@ define i64 @stest_f16i64(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: lw a2, 20(sp) +; RV32-NEXT: lw a3, 8(sp) ; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a4, 8(sp) -; RV32-NEXT: lui a3, 524288 -; RV32-NEXT: addi a5, a3, -1 +; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a4, 20(sp) +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: addi a5, a0, -1 ; RV32-NEXT: beq a1, a5, .LBB24_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a0, a2 +; RV32-NEXT: or a7, a2, a4 ; RV32-NEXT: bnez a7, .LBB24_3 ; RV32-NEXT: j .LBB24_4 ; RV32-NEXT: .LBB24_2: -; RV32-NEXT: sltiu a6, a4, -1 -; RV32-NEXT: or a7, a0, a2 +; RV32-NEXT: sltiu a6, a3, -1 +; RV32-NEXT: or a7, a2, a4 ; RV32-NEXT: beqz a7, .LBB24_4 ; RV32-NEXT: .LBB24_3: # %entry -; RV32-NEXT: slti a6, a2, 0 +; RV32-NEXT: slti a6, a4, 0 ; RV32-NEXT: .LBB24_4: # %entry ; RV32-NEXT: addi a7, a6, -1 ; RV32-NEXT: neg t0, a6 @@ -1682,21 +1682,21 @@ define i64 @stest_f16i64(half %x) { ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB24_6: # %entry -; RV32-NEXT: or a4, a7, a4 +; RV32-NEXT: or a3, a7, a3 +; RV32-NEXT: and a4, t0, a4 ; RV32-NEXT: and a2, t0, a2 -; RV32-NEXT: and a5, t0, a0 -; RV32-NEXT: beq a1, a3, .LBB24_8 +; RV32-NEXT: beq a1, a0, .LBB24_8 ; RV32-NEXT: # %bb.7: # %entry -; RV32-NEXT: sltu a0, a3, a1 +; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: j .LBB24_9 ; RV32-NEXT: .LBB24_8: -; RV32-NEXT: snez a0, a4 +; RV32-NEXT: snez a0, a3 ; RV32-NEXT: .LBB24_9: # %entry -; RV32-NEXT: and a5, a5, a2 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beq a5, a3, .LBB24_11 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: li a5, -1 +; RV32-NEXT: beq a2, a5, .LBB24_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a0, a2, 0 +; RV32-NEXT: slti a0, a4, 0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB24_11: # %entry ; RV32-NEXT: bnez a0, .LBB24_13 @@ -1704,7 +1704,7 @@ define i64 @stest_f16i64(half %x) { ; RV32-NEXT: lui a1, 524288 ; RV32-NEXT: .LBB24_13: # %entry ; RV32-NEXT: neg a0, a0 -; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -2891,24 +2891,24 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti -; RV32IF-NEXT: lw a0, 16(sp) -; RV32IF-NEXT: lw a2, 20(sp) +; RV32IF-NEXT: lw a3, 8(sp) ; RV32IF-NEXT: lw a1, 12(sp) -; RV32IF-NEXT: lw a4, 8(sp) -; RV32IF-NEXT: lui a3, 524288 -; RV32IF-NEXT: addi a5, a3, -1 +; RV32IF-NEXT: lw a2, 16(sp) +; RV32IF-NEXT: lw a4, 20(sp) +; RV32IF-NEXT: lui a0, 524288 +; RV32IF-NEXT: addi a5, a0, -1 ; RV32IF-NEXT: beq a1, a5, .LBB45_2 ; RV32IF-NEXT: # %bb.1: # %entry ; RV32IF-NEXT: sltu a6, a1, a5 -; RV32IF-NEXT: or a7, a0, a2 +; RV32IF-NEXT: or a7, a2, a4 ; RV32IF-NEXT: bnez a7, .LBB45_3 ; RV32IF-NEXT: j .LBB45_4 ; RV32IF-NEXT: .LBB45_2: -; RV32IF-NEXT: sltiu a6, a4, -1 -; RV32IF-NEXT: or a7, a0, a2 +; RV32IF-NEXT: sltiu a6, a3, -1 +; RV32IF-NEXT: or a7, a2, a4 ; RV32IF-NEXT: beqz a7, .LBB45_4 ; RV32IF-NEXT: .LBB45_3: # %entry -; RV32IF-NEXT: slti a6, a2, 0 +; RV32IF-NEXT: slti a6, a4, 0 ; RV32IF-NEXT: .LBB45_4: # %entry ; RV32IF-NEXT: addi a7, a6, -1 ; RV32IF-NEXT: neg t0, a6 @@ -2916,21 +2916,21 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: # %bb.5: # %entry ; RV32IF-NEXT: mv a1, a5 ; RV32IF-NEXT: .LBB45_6: # %entry -; RV32IF-NEXT: or a4, a7, a4 +; RV32IF-NEXT: or a3, a7, a3 +; RV32IF-NEXT: and a4, t0, a4 ; RV32IF-NEXT: and a2, t0, a2 -; RV32IF-NEXT: and a5, t0, a0 -; RV32IF-NEXT: beq a1, a3, .LBB45_8 +; RV32IF-NEXT: beq a1, a0, .LBB45_8 ; RV32IF-NEXT: # %bb.7: # %entry -; RV32IF-NEXT: sltu a0, a3, a1 +; RV32IF-NEXT: sltu a0, a0, a1 ; RV32IF-NEXT: j .LBB45_9 ; RV32IF-NEXT: .LBB45_8: -; RV32IF-NEXT: snez a0, a4 +; RV32IF-NEXT: snez a0, a3 ; RV32IF-NEXT: .LBB45_9: # %entry -; RV32IF-NEXT: and a5, a5, a2 -; RV32IF-NEXT: li a3, -1 -; RV32IF-NEXT: beq a5, a3, .LBB45_11 +; RV32IF-NEXT: and a2, a2, a4 +; RV32IF-NEXT: li a5, -1 +; RV32IF-NEXT: beq a2, a5, .LBB45_11 ; RV32IF-NEXT: # %bb.10: # %entry -; RV32IF-NEXT: slti a0, a2, 0 +; RV32IF-NEXT: slti a0, a4, 0 ; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: .LBB45_11: # %entry ; RV32IF-NEXT: bnez a0, .LBB45_13 @@ -2938,7 +2938,7 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: lui a1, 524288 ; RV32IF-NEXT: .LBB45_13: # %entry ; RV32IF-NEXT: neg a0, a0 -; RV32IF-NEXT: and a0, a0, a4 +; RV32IF-NEXT: and a0, a0, a3 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret @@ -2990,24 +2990,24 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti -; RV32IFD-NEXT: lw a0, 16(sp) -; RV32IFD-NEXT: lw a2, 20(sp) +; RV32IFD-NEXT: lw a3, 8(sp) ; RV32IFD-NEXT: lw a1, 12(sp) -; RV32IFD-NEXT: lw a4, 8(sp) -; RV32IFD-NEXT: lui a3, 524288 -; RV32IFD-NEXT: addi a5, a3, -1 +; RV32IFD-NEXT: lw a2, 16(sp) +; RV32IFD-NEXT: lw a4, 20(sp) +; RV32IFD-NEXT: lui a0, 524288 +; RV32IFD-NEXT: addi a5, a0, -1 ; RV32IFD-NEXT: beq a1, a5, .LBB45_2 ; RV32IFD-NEXT: # %bb.1: # %entry ; RV32IFD-NEXT: sltu a6, a1, a5 -; RV32IFD-NEXT: or a7, a0, a2 +; RV32IFD-NEXT: or a7, a2, a4 ; RV32IFD-NEXT: bnez a7, .LBB45_3 ; RV32IFD-NEXT: j .LBB45_4 ; RV32IFD-NEXT: .LBB45_2: -; RV32IFD-NEXT: sltiu a6, a4, -1 -; RV32IFD-NEXT: or a7, a0, a2 +; RV32IFD-NEXT: sltiu a6, a3, -1 +; RV32IFD-NEXT: or a7, a2, a4 ; RV32IFD-NEXT: beqz a7, .LBB45_4 ; RV32IFD-NEXT: .LBB45_3: # %entry -; RV32IFD-NEXT: slti a6, a2, 0 +; RV32IFD-NEXT: slti a6, a4, 0 ; RV32IFD-NEXT: .LBB45_4: # %entry ; RV32IFD-NEXT: addi a7, a6, -1 ; RV32IFD-NEXT: neg t0, a6 @@ -3015,21 +3015,21 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: # %bb.5: # %entry ; RV32IFD-NEXT: mv a1, a5 ; RV32IFD-NEXT: .LBB45_6: # %entry -; RV32IFD-NEXT: or a4, a7, a4 +; RV32IFD-NEXT: or a3, a7, a3 +; RV32IFD-NEXT: and a4, t0, a4 ; RV32IFD-NEXT: and a2, t0, a2 -; RV32IFD-NEXT: and a5, t0, a0 -; RV32IFD-NEXT: beq a1, a3, .LBB45_8 +; RV32IFD-NEXT: beq a1, a0, .LBB45_8 ; RV32IFD-NEXT: # %bb.7: # %entry -; RV32IFD-NEXT: sltu a0, a3, a1 +; RV32IFD-NEXT: sltu a0, a0, a1 ; RV32IFD-NEXT: j .LBB45_9 ; RV32IFD-NEXT: .LBB45_8: -; RV32IFD-NEXT: snez a0, a4 +; RV32IFD-NEXT: snez a0, a3 ; RV32IFD-NEXT: .LBB45_9: # %entry -; RV32IFD-NEXT: and a5, a5, a2 -; RV32IFD-NEXT: li a3, -1 -; RV32IFD-NEXT: beq a5, a3, .LBB45_11 +; RV32IFD-NEXT: and a2, a2, a4 +; RV32IFD-NEXT: li a5, -1 +; RV32IFD-NEXT: beq a2, a5, .LBB45_11 ; RV32IFD-NEXT: # %bb.10: # %entry -; RV32IFD-NEXT: slti a0, a2, 0 +; RV32IFD-NEXT: slti a0, a4, 0 ; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: .LBB45_11: # %entry ; RV32IFD-NEXT: bnez a0, .LBB45_13 @@ -3037,7 +3037,7 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: lui a1, 524288 ; RV32IFD-NEXT: .LBB45_13: # %entry ; RV32IFD-NEXT: neg a0, a0 -; RV32IFD-NEXT: and a0, a0, a4 +; RV32IFD-NEXT: and a0, a0, a3 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret @@ -3246,24 +3246,24 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: lw a2, 20(sp) +; RV32-NEXT: lw a3, 8(sp) ; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a4, 8(sp) -; RV32-NEXT: lui a3, 524288 -; RV32-NEXT: addi a5, a3, -1 +; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a4, 20(sp) +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: addi a5, a0, -1 ; RV32-NEXT: beq a1, a5, .LBB48_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a0, a2 +; RV32-NEXT: or a7, a2, a4 ; RV32-NEXT: bnez a7, .LBB48_3 ; RV32-NEXT: j .LBB48_4 ; RV32-NEXT: .LBB48_2: -; RV32-NEXT: sltiu a6, a4, -1 -; RV32-NEXT: or a7, a0, a2 +; RV32-NEXT: sltiu a6, a3, -1 +; RV32-NEXT: or a7, a2, a4 ; RV32-NEXT: beqz a7, .LBB48_4 ; RV32-NEXT: .LBB48_3: # %entry -; RV32-NEXT: slti a6, a2, 0 +; RV32-NEXT: slti a6, a4, 0 ; RV32-NEXT: .LBB48_4: # %entry ; RV32-NEXT: addi a7, a6, -1 ; RV32-NEXT: neg t0, a6 @@ -3271,21 +3271,21 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB48_6: # %entry -; RV32-NEXT: or a4, a7, a4 +; RV32-NEXT: or a3, a7, a3 +; RV32-NEXT: and a4, t0, a4 ; RV32-NEXT: and a2, t0, a2 -; RV32-NEXT: and a5, t0, a0 -; RV32-NEXT: beq a1, a3, .LBB48_8 +; RV32-NEXT: beq a1, a0, .LBB48_8 ; RV32-NEXT: # %bb.7: # %entry -; RV32-NEXT: sltu a0, a3, a1 +; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: j .LBB48_9 ; RV32-NEXT: .LBB48_8: -; RV32-NEXT: snez a0, a4 +; RV32-NEXT: snez a0, a3 ; RV32-NEXT: .LBB48_9: # %entry -; RV32-NEXT: and a5, a5, a2 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beq a5, a3, .LBB48_11 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: li a5, -1 +; RV32-NEXT: beq a2, a5, .LBB48_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a0, a2, 0 +; RV32-NEXT: slti a0, a4, 0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB48_11: # %entry ; RV32-NEXT: bnez a0, .LBB48_13 @@ -3293,7 +3293,7 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: lui a1, 524288 ; RV32-NEXT: .LBB48_13: # %entry ; RV32-NEXT: neg a0, a0 -; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -3437,24 +3437,24 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: lw a2, 20(sp) +; RV32-NEXT: lw a3, 8(sp) ; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a4, 8(sp) -; RV32-NEXT: lui a3, 524288 -; RV32-NEXT: addi a5, a3, -1 +; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a4, 20(sp) +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: addi a5, a0, -1 ; RV32-NEXT: beq a1, a5, .LBB51_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a0, a2 +; RV32-NEXT: or a7, a2, a4 ; RV32-NEXT: bnez a7, .LBB51_3 ; RV32-NEXT: j .LBB51_4 ; RV32-NEXT: .LBB51_2: -; RV32-NEXT: sltiu a6, a4, -1 -; RV32-NEXT: or a7, a0, a2 +; RV32-NEXT: sltiu a6, a3, -1 +; RV32-NEXT: or a7, a2, a4 ; RV32-NEXT: beqz a7, .LBB51_4 ; RV32-NEXT: .LBB51_3: # %entry -; RV32-NEXT: slti a6, a2, 0 +; RV32-NEXT: slti a6, a4, 0 ; RV32-NEXT: .LBB51_4: # %entry ; RV32-NEXT: addi a7, a6, -1 ; RV32-NEXT: neg t0, a6 @@ -3462,21 +3462,21 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB51_6: # %entry -; RV32-NEXT: or a4, a7, a4 +; RV32-NEXT: or a3, a7, a3 +; RV32-NEXT: and a4, t0, a4 ; RV32-NEXT: and a2, t0, a2 -; RV32-NEXT: and a5, t0, a0 -; RV32-NEXT: beq a1, a3, .LBB51_8 +; RV32-NEXT: beq a1, a0, .LBB51_8 ; RV32-NEXT: # %bb.7: # %entry -; RV32-NEXT: sltu a0, a3, a1 +; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: j .LBB51_9 ; RV32-NEXT: .LBB51_8: -; RV32-NEXT: snez a0, a4 +; RV32-NEXT: snez a0, a3 ; RV32-NEXT: .LBB51_9: # %entry -; RV32-NEXT: and a5, a5, a2 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beq a5, a3, .LBB51_11 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: li a5, -1 +; RV32-NEXT: beq a2, a5, .LBB51_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a0, a2, 0 +; RV32-NEXT: slti a0, a4, 0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB51_11: # %entry ; RV32-NEXT: bnez a0, .LBB51_13 @@ -3484,7 +3484,7 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: lui a1, 524288 ; RV32-NEXT: .LBB51_13: # %entry ; RV32-NEXT: neg a0, a0 -; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/legalize-fneg.ll b/llvm/test/CodeGen/RISCV/legalize-fneg.ll index 13d03c5217fb1b..dfd62e8d5f9f56 100644 --- a/llvm/test/CodeGen/RISCV/legalize-fneg.ll +++ b/llvm/test/CodeGen/RISCV/legalize-fneg.ll @@ -56,16 +56,16 @@ entry: define void @test3(ptr %a, ptr %b) nounwind { ; RV32-LABEL: test3: ; RV32: # %bb.0: # %entry -; RV32-NEXT: lw a2, 4(a1) -; RV32-NEXT: lw a3, 12(a1) +; RV32-NEXT: lw a2, 12(a1) +; RV32-NEXT: lw a3, 4(a1) ; RV32-NEXT: lw a4, 8(a1) ; RV32-NEXT: lw a1, 0(a1) ; RV32-NEXT: lui a5, 524288 -; RV32-NEXT: xor a3, a3, a5 +; RV32-NEXT: xor a2, a2, a5 ; RV32-NEXT: sw a4, 8(a0) ; RV32-NEXT: sw a1, 0(a0) -; RV32-NEXT: sw a2, 4(a0) -; RV32-NEXT: sw a3, 12(a0) +; RV32-NEXT: sw a3, 4(a0) +; RV32-NEXT: sw a2, 12(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: test3: diff --git a/llvm/test/CodeGen/RISCV/llvm.exp10.ll b/llvm/test/CodeGen/RISCV/llvm.exp10.ll index 6fde86733b07f7..0941f6a73da280 100644 --- a/llvm/test/CodeGen/RISCV/llvm.exp10.ll +++ b/llvm/test/CodeGen/RISCV/llvm.exp10.ll @@ -222,32 +222,32 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) { ; RV64IFD-NEXT: .cfi_offset s1, -24 ; RV64IFD-NEXT: .cfi_offset s2, -32 ; RV64IFD-NEXT: .cfi_offset fs0, -40 -; RV64IFD-NEXT: lhu s1, 16(a1) -; RV64IFD-NEXT: lhu s2, 0(a1) -; RV64IFD-NEXT: lhu a1, 8(a1) +; RV64IFD-NEXT: lhu s1, 0(a1) +; RV64IFD-NEXT: lhu a2, 8(a1) +; RV64IFD-NEXT: lhu s2, 16(a1) ; RV64IFD-NEXT: mv s0, a0 -; RV64IFD-NEXT: fmv.w.x fa0, a1 +; RV64IFD-NEXT: fmv.w.x fa0, a2 ; RV64IFD-NEXT: call __extendhfsf2 ; RV64IFD-NEXT: call exp10f ; RV64IFD-NEXT: call __truncsfhf2 ; RV64IFD-NEXT: fmv.s fs0, fa0 -; RV64IFD-NEXT: fmv.w.x fa0, s2 +; RV64IFD-NEXT: fmv.w.x fa0, s1 ; RV64IFD-NEXT: call __extendhfsf2 ; RV64IFD-NEXT: call exp10f ; RV64IFD-NEXT: fmv.x.w a0, fs0 -; RV64IFD-NEXT: slli s2, a0, 16 +; RV64IFD-NEXT: slli s1, a0, 16 ; RV64IFD-NEXT: call __truncsfhf2 ; RV64IFD-NEXT: fmv.x.w a0, fa0 ; RV64IFD-NEXT: slli a0, a0, 48 ; RV64IFD-NEXT: srli a0, a0, 48 -; RV64IFD-NEXT: or s2, a0, s2 -; RV64IFD-NEXT: fmv.w.x fa0, s1 +; RV64IFD-NEXT: or s1, a0, s1 +; RV64IFD-NEXT: fmv.w.x fa0, s2 ; RV64IFD-NEXT: call __extendhfsf2 ; RV64IFD-NEXT: call exp10f ; RV64IFD-NEXT: call __truncsfhf2 ; RV64IFD-NEXT: fmv.x.w a0, fa0 ; RV64IFD-NEXT: sh a0, 4(s0) -; RV64IFD-NEXT: sw s2, 0(s0) +; RV64IFD-NEXT: sw s1, 0(s0) ; RV64IFD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -349,27 +349,27 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) { ; RV64IFD-NEXT: .cfi_offset fs0, -48 ; RV64IFD-NEXT: .cfi_offset fs1, -56 ; RV64IFD-NEXT: .cfi_offset fs2, -64 -; RV64IFD-NEXT: lhu s1, 24(a1) -; RV64IFD-NEXT: lhu s2, 0(a1) -; RV64IFD-NEXT: lhu s3, 8(a1) -; RV64IFD-NEXT: lhu a1, 16(a1) +; RV64IFD-NEXT: lhu s1, 0(a1) +; RV64IFD-NEXT: lhu s2, 8(a1) +; RV64IFD-NEXT: lhu a2, 16(a1) +; RV64IFD-NEXT: lhu s3, 24(a1) ; RV64IFD-NEXT: mv s0, a0 -; RV64IFD-NEXT: fmv.w.x fa0, a1 +; RV64IFD-NEXT: fmv.w.x fa0, a2 ; RV64IFD-NEXT: call __extendhfsf2 ; RV64IFD-NEXT: call exp10f ; RV64IFD-NEXT: call __truncsfhf2 ; RV64IFD-NEXT: fmv.s fs0, fa0 -; RV64IFD-NEXT: fmv.w.x fa0, s3 +; RV64IFD-NEXT: fmv.w.x fa0, s2 ; RV64IFD-NEXT: call __extendhfsf2 ; RV64IFD-NEXT: call exp10f ; RV64IFD-NEXT: call __truncsfhf2 ; RV64IFD-NEXT: fmv.s fs1, fa0 -; RV64IFD-NEXT: fmv.w.x fa0, s2 +; RV64IFD-NEXT: fmv.w.x fa0, s1 ; RV64IFD-NEXT: call __extendhfsf2 ; RV64IFD-NEXT: call exp10f ; RV64IFD-NEXT: call __truncsfhf2 ; RV64IFD-NEXT: fmv.s fs2, fa0 -; RV64IFD-NEXT: fmv.w.x fa0, s1 +; RV64IFD-NEXT: fmv.w.x fa0, s3 ; RV64IFD-NEXT: call __extendhfsf2 ; RV64IFD-NEXT: call exp10f ; RV64IFD-NEXT: fmv.x.w s1, fs2 diff --git a/llvm/test/CodeGen/RISCV/llvm.frexp.ll b/llvm/test/CodeGen/RISCV/llvm.frexp.ll index 30f9dd1e516585..442b0cf5b4a856 100644 --- a/llvm/test/CodeGen/RISCV/llvm.frexp.ll +++ b/llvm/test/CodeGen/RISCV/llvm.frexp.ll @@ -738,25 +738,25 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw s0, 12(a1) -; RV32I-NEXT: lw s1, 8(a1) -; RV32I-NEXT: lw s2, 4(a1) ; RV32I-NEXT: lw a2, 0(a1) +; RV32I-NEXT: lw s0, 4(a1) +; RV32I-NEXT: lw s1, 8(a1) +; RV32I-NEXT: lw s2, 12(a1) ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: addi a1, sp, 12 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call frexpf -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: addi a1, sp, 16 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: addi a1, sp, 20 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: lw a1, 8(sp) ; RV32I-NEXT: lw a2, 12(sp) @@ -764,7 +764,7 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; RV32I-NEXT: lw a4, 20(sp) ; RV32I-NEXT: sw a0, 12(s3) ; RV32I-NEXT: sw s1, 8(s3) -; RV32I-NEXT: sw s2, 4(s3) +; RV32I-NEXT: sw s0, 4(s3) ; RV32I-NEXT: sw s4, 0(s3) ; RV32I-NEXT: sw a4, 28(s3) ; RV32I-NEXT: sw a3, 24(s3) @@ -788,25 +788,25 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw s0, 24(a1) -; RV64I-NEXT: lw s1, 16(a1) -; RV64I-NEXT: lw s2, 8(a1) ; RV64I-NEXT: lw a2, 0(a1) +; RV64I-NEXT: lw s0, 8(a1) +; RV64I-NEXT: lw s1, 16(a1) +; RV64I-NEXT: lw s2, 24(a1) ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: addi a1, sp, 4 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call frexpf -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: addi a1, sp, 8 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: addi a1, sp, 12 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: lw a1, 0(sp) ; RV64I-NEXT: lw a2, 4(sp) @@ -814,7 +814,7 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; RV64I-NEXT: lw a4, 12(sp) ; RV64I-NEXT: sw a0, 12(s3) ; RV64I-NEXT: sw s1, 8(s3) -; RV64I-NEXT: sw s2, 4(s3) +; RV64I-NEXT: sw s0, 4(s3) ; RV64I-NEXT: sw s4, 0(s3) ; RV64I-NEXT: sw a4, 28(s3) ; RV64I-NEXT: sw a3, 24(s3) @@ -1006,29 +1006,29 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi ; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw s0, 12(a1) -; RV32I-NEXT: lw s1, 8(a1) -; RV32I-NEXT: lw s2, 4(a1) ; RV32I-NEXT: lw a2, 0(a1) +; RV32I-NEXT: lw s0, 4(a1) +; RV32I-NEXT: lw s1, 8(a1) +; RV32I-NEXT: lw s2, 12(a1) ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: addi a1, sp, 12 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call frexpf -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: addi a1, sp, 16 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: addi a1, sp, 20 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: sw a0, 12(s3) ; RV32I-NEXT: sw s1, 8(s3) -; RV32I-NEXT: sw s2, 4(s3) +; RV32I-NEXT: sw s0, 4(s3) ; RV32I-NEXT: sw s4, 0(s3) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload @@ -1048,29 +1048,29 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi ; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw s0, 24(a1) -; RV64I-NEXT: lw s1, 16(a1) -; RV64I-NEXT: lw s2, 8(a1) ; RV64I-NEXT: lw a2, 0(a1) +; RV64I-NEXT: lw s0, 8(a1) +; RV64I-NEXT: lw s1, 16(a1) +; RV64I-NEXT: lw s2, 24(a1) ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: addi a1, sp, 4 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call frexpf -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: addi a1, sp, 8 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: addi a1, sp, 12 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: sw a0, 12(s3) ; RV64I-NEXT: sw s1, 8(s3) -; RV64I-NEXT: sw s2, 4(s3) +; RV64I-NEXT: sw s0, 4(s3) ; RV64I-NEXT: sw s4, 0(s3) ; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -1254,22 +1254,22 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind { ; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw s0, 12(a1) -; RV32I-NEXT: lw s1, 8(a1) -; RV32I-NEXT: lw s2, 4(a1) ; RV32I-NEXT: lw a2, 0(a1) +; RV32I-NEXT: lw s0, 4(a1) +; RV32I-NEXT: lw s1, 8(a1) +; RV32I-NEXT: lw s2, 12(a1) ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: addi a1, sp, 12 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: addi a1, sp, 16 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: addi a1, sp, 20 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: addi a1, sp, 24 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: lw a0, 24(sp) ; RV32I-NEXT: lw a1, 20(sp) @@ -1295,22 +1295,22 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind { ; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw s0, 24(a1) -; RV64I-NEXT: lw s1, 16(a1) -; RV64I-NEXT: lw s2, 8(a1) ; RV64I-NEXT: lw a2, 0(a1) +; RV64I-NEXT: lw s0, 8(a1) +; RV64I-NEXT: lw s1, 16(a1) +; RV64I-NEXT: lw s2, 24(a1) ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: addi a1, sp, 8 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: addi a1, sp, 12 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: addi a1, sp, 16 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: addi a1, sp, 20 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: lw a0, 20(sp) ; RV64I-NEXT: lw a1, 16(sp) @@ -1584,16 +1584,16 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind { ; RV32IFD-NEXT: addi a2, sp, 36 ; RV32IFD-NEXT: sw a3, 0(sp) ; RV32IFD-NEXT: call frexpl -; RV32IFD-NEXT: lw a0, 36(sp) +; RV32IFD-NEXT: lw a0, 24(sp) ; RV32IFD-NEXT: lw a1, 28(sp) -; RV32IFD-NEXT: lw a2, 24(sp) +; RV32IFD-NEXT: lw a2, 16(sp) ; RV32IFD-NEXT: lw a3, 20(sp) -; RV32IFD-NEXT: lw a4, 16(sp) +; RV32IFD-NEXT: lw a4, 36(sp) ; RV32IFD-NEXT: sw a1, 12(s0) -; RV32IFD-NEXT: sw a2, 8(s0) +; RV32IFD-NEXT: sw a0, 8(s0) ; RV32IFD-NEXT: sw a3, 4(s0) -; RV32IFD-NEXT: sw a4, 0(s0) -; RV32IFD-NEXT: sw a0, 16(s0) +; RV32IFD-NEXT: sw a2, 0(s0) +; RV32IFD-NEXT: sw a4, 16(s0) ; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 48 @@ -1637,16 +1637,16 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind { ; RV32IZFINXZDINX-NEXT: addi a2, sp, 36 ; RV32IZFINXZDINX-NEXT: sw a3, 0(sp) ; RV32IZFINXZDINX-NEXT: call frexpl -; RV32IZFINXZDINX-NEXT: lw a0, 36(sp) +; RV32IZFINXZDINX-NEXT: lw a0, 24(sp) ; RV32IZFINXZDINX-NEXT: lw a1, 28(sp) -; RV32IZFINXZDINX-NEXT: lw a2, 24(sp) +; RV32IZFINXZDINX-NEXT: lw a2, 16(sp) ; RV32IZFINXZDINX-NEXT: lw a3, 20(sp) -; RV32IZFINXZDINX-NEXT: lw a4, 16(sp) +; RV32IZFINXZDINX-NEXT: lw a4, 36(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(s0) -; RV32IZFINXZDINX-NEXT: sw a2, 8(s0) +; RV32IZFINXZDINX-NEXT: sw a0, 8(s0) ; RV32IZFINXZDINX-NEXT: sw a3, 4(s0) -; RV32IZFINXZDINX-NEXT: sw a4, 0(s0) -; RV32IZFINXZDINX-NEXT: sw a0, 16(s0) +; RV32IZFINXZDINX-NEXT: sw a2, 0(s0) +; RV32IZFINXZDINX-NEXT: sw a4, 16(s0) ; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 48 @@ -1690,16 +1690,16 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind { ; RV32I-NEXT: addi a2, sp, 36 ; RV32I-NEXT: sw a3, 0(sp) ; RV32I-NEXT: call frexpl -; RV32I-NEXT: lw a0, 36(sp) +; RV32I-NEXT: lw a0, 24(sp) ; RV32I-NEXT: lw a1, 28(sp) -; RV32I-NEXT: lw a2, 24(sp) +; RV32I-NEXT: lw a2, 16(sp) ; RV32I-NEXT: lw a3, 20(sp) -; RV32I-NEXT: lw a4, 16(sp) +; RV32I-NEXT: lw a4, 36(sp) ; RV32I-NEXT: sw a1, 12(s0) -; RV32I-NEXT: sw a2, 8(s0) +; RV32I-NEXT: sw a0, 8(s0) ; RV32I-NEXT: sw a3, 4(s0) -; RV32I-NEXT: sw a4, 0(s0) -; RV32I-NEXT: sw a0, 16(s0) +; RV32I-NEXT: sw a2, 0(s0) +; RV32I-NEXT: sw a4, 16(s0) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 48 diff --git a/llvm/test/CodeGen/RISCV/memcpy.ll b/llvm/test/CodeGen/RISCV/memcpy.ll index 02f582339d0b78..41c27d83defe61 100644 --- a/llvm/test/CodeGen/RISCV/memcpy.ll +++ b/llvm/test/CodeGen/RISCV/memcpy.ll @@ -25,16 +25,16 @@ define i32 @t0() { ; RV32: # %bb.0: # %entry ; RV32-NEXT: lui a0, %hi(src) ; RV32-NEXT: lw a1, %lo(src)(a0) -; RV32-NEXT: lui a2, %hi(dst) -; RV32-NEXT: sw a1, %lo(dst)(a2) ; RV32-NEXT: addi a0, a0, %lo(src) -; RV32-NEXT: lbu a1, 10(a0) +; RV32-NEXT: lw a2, 4(a0) ; RV32-NEXT: lh a3, 8(a0) -; RV32-NEXT: lw a0, 4(a0) -; RV32-NEXT: addi a2, a2, %lo(dst) -; RV32-NEXT: sb a1, 10(a2) -; RV32-NEXT: sh a3, 8(a2) -; RV32-NEXT: sw a0, 4(a2) +; RV32-NEXT: lbu a0, 10(a0) +; RV32-NEXT: lui a4, %hi(dst) +; RV32-NEXT: sw a1, %lo(dst)(a4) +; RV32-NEXT: addi a1, a4, %lo(dst) +; RV32-NEXT: sb a0, 10(a1) +; RV32-NEXT: sh a3, 8(a1) +; RV32-NEXT: sw a2, 4(a1) ; RV32-NEXT: li a0, 0 ; RV32-NEXT: ret ; @@ -42,14 +42,14 @@ define i32 @t0() { ; RV64: # %bb.0: # %entry ; RV64-NEXT: lui a0, %hi(src) ; RV64-NEXT: ld a1, %lo(src)(a0) -; RV64-NEXT: lui a2, %hi(dst) ; RV64-NEXT: addi a0, a0, %lo(src) -; RV64-NEXT: lbu a3, 10(a0) -; RV64-NEXT: lh a0, 8(a0) -; RV64-NEXT: sd a1, %lo(dst)(a2) -; RV64-NEXT: addi a1, a2, %lo(dst) -; RV64-NEXT: sb a3, 10(a1) -; RV64-NEXT: sh a0, 8(a1) +; RV64-NEXT: lh a2, 8(a0) +; RV64-NEXT: lbu a0, 10(a0) +; RV64-NEXT: lui a3, %hi(dst) +; RV64-NEXT: sd a1, %lo(dst)(a3) +; RV64-NEXT: addi a1, a3, %lo(dst) +; RV64-NEXT: sb a0, 10(a1) +; RV64-NEXT: sh a2, 8(a1) ; RV64-NEXT: li a0, 0 ; RV64-NEXT: ret ; @@ -57,14 +57,14 @@ define i32 @t0() { ; RV32-FAST: # %bb.0: # %entry ; RV32-FAST-NEXT: lui a0, %hi(src) ; RV32-FAST-NEXT: lw a1, %lo(src)(a0) -; RV32-FAST-NEXT: lui a2, %hi(dst) ; RV32-FAST-NEXT: addi a0, a0, %lo(src) -; RV32-FAST-NEXT: lw a3, 7(a0) -; RV32-FAST-NEXT: lw a0, 4(a0) -; RV32-FAST-NEXT: sw a1, %lo(dst)(a2) -; RV32-FAST-NEXT: addi a1, a2, %lo(dst) -; RV32-FAST-NEXT: sw a3, 7(a1) -; RV32-FAST-NEXT: sw a0, 4(a1) +; RV32-FAST-NEXT: lw a2, 4(a0) +; RV32-FAST-NEXT: lw a0, 7(a0) +; RV32-FAST-NEXT: lui a3, %hi(dst) +; RV32-FAST-NEXT: sw a1, %lo(dst)(a3) +; RV32-FAST-NEXT: addi a1, a3, %lo(dst) +; RV32-FAST-NEXT: sw a0, 7(a1) +; RV32-FAST-NEXT: sw a2, 4(a1) ; RV32-FAST-NEXT: li a0, 0 ; RV32-FAST-NEXT: ret ; @@ -166,16 +166,16 @@ define void @t2(ptr nocapture %C) nounwind { ; RV64-FAST-NEXT: lui a1, %hi(.L.str2) ; RV64-FAST-NEXT: ld a2, %lo(.L.str2)(a1) ; RV64-FAST-NEXT: sd a2, 0(a0) -; RV64-FAST-NEXT: lui a2, 1156 -; RV64-FAST-NEXT: addi a2, a2, 332 ; RV64-FAST-NEXT: addi a1, a1, %lo(.L.str2) -; RV64-FAST-NEXT: ld a3, 24(a1) -; RV64-FAST-NEXT: ld a4, 16(a1) -; RV64-FAST-NEXT: ld a1, 8(a1) -; RV64-FAST-NEXT: sw a2, 32(a0) -; RV64-FAST-NEXT: sd a3, 24(a0) -; RV64-FAST-NEXT: sd a4, 16(a0) -; RV64-FAST-NEXT: sd a1, 8(a0) +; RV64-FAST-NEXT: ld a2, 8(a1) +; RV64-FAST-NEXT: ld a3, 16(a1) +; RV64-FAST-NEXT: ld a1, 24(a1) +; RV64-FAST-NEXT: lui a4, 1156 +; RV64-FAST-NEXT: addi a4, a4, 332 +; RV64-FAST-NEXT: sw a4, 32(a0) +; RV64-FAST-NEXT: sd a1, 24(a0) +; RV64-FAST-NEXT: sd a3, 16(a0) +; RV64-FAST-NEXT: sd a2, 8(a0) ; RV64-FAST-NEXT: ret entry: tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str2, i64 36, i1 false) diff --git a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll index db41b262718141..cf290a0b8682da 100644 --- a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll +++ b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll @@ -1,12 +1,14 @@ ; REQUIRES: asserts -; RUN: llc -mtriple=riscv32 -verify-misched -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: llc -mtriple=riscv32 -verify-misched -riscv-misched-load-clustering=false \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=NOCLUSTER %s -; RUN: llc -mtriple=riscv64 -verify-misched -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: llc -mtriple=riscv64 -verify-misched -riscv-misched-load-clustering=false \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=NOCLUSTER %s -; RUN: llc -mtriple=riscv32 -riscv-misched-load-clustering -verify-misched \ +; RUN: llc -mtriple=riscv32 -verify-misched \ ; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=LDCLUSTER %s -; RUN: llc -mtriple=riscv64 -riscv-misched-load-clustering -verify-misched \ +; RUN: llc -mtriple=riscv64 -verify-misched \ ; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=LDCLUSTER %s diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll index 14f2777fdd06d2..e9b84b3cd97ed2 100644 --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -1351,48 +1351,48 @@ define i128 @muli128_m3840(i128 %a) nounwind { ; RV32IM-NEXT: addi sp, sp, -16 ; RV32IM-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a2, 12(a1) -; RV32IM-NEXT: lw a3, 8(a1) -; RV32IM-NEXT: lw a4, 0(a1) -; RV32IM-NEXT: lw a1, 4(a1) +; RV32IM-NEXT: lw a2, 0(a1) +; RV32IM-NEXT: lw a3, 4(a1) +; RV32IM-NEXT: lw a4, 8(a1) +; RV32IM-NEXT: lw a1, 12(a1) ; RV32IM-NEXT: li a5, -15 ; RV32IM-NEXT: slli a5, a5, 8 -; RV32IM-NEXT: mulhu a6, a4, a5 -; RV32IM-NEXT: mul a7, a1, a5 +; RV32IM-NEXT: mulhu a6, a2, a5 +; RV32IM-NEXT: mul a7, a3, a5 ; RV32IM-NEXT: add a6, a7, a6 ; RV32IM-NEXT: sltu a7, a6, a7 -; RV32IM-NEXT: mulhu t0, a1, a5 +; RV32IM-NEXT: mulhu t0, a3, a5 ; RV32IM-NEXT: add a7, t0, a7 -; RV32IM-NEXT: sub a6, a6, a4 -; RV32IM-NEXT: neg t0, a4 +; RV32IM-NEXT: sub a6, a6, a2 +; RV32IM-NEXT: neg t0, a2 ; RV32IM-NEXT: sltu t1, a6, t0 ; RV32IM-NEXT: li t2, -1 -; RV32IM-NEXT: mulhu t3, a4, t2 +; RV32IM-NEXT: mulhu t3, a2, t2 ; RV32IM-NEXT: add t1, t3, t1 ; RV32IM-NEXT: add t1, a7, t1 -; RV32IM-NEXT: sub t4, t1, a1 -; RV32IM-NEXT: mul t5, a3, a5 -; RV32IM-NEXT: sub t5, t5, a4 +; RV32IM-NEXT: sub t4, t1, a3 +; RV32IM-NEXT: mul t5, a4, a5 +; RV32IM-NEXT: sub t5, t5, a2 ; RV32IM-NEXT: add t6, t4, t5 ; RV32IM-NEXT: sltu s0, t6, t4 -; RV32IM-NEXT: neg s1, a1 +; RV32IM-NEXT: neg s1, a3 ; RV32IM-NEXT: sltu t4, t4, s1 ; RV32IM-NEXT: sltu a7, t1, a7 -; RV32IM-NEXT: mulhu t1, a1, t2 +; RV32IM-NEXT: mulhu t1, a3, t2 ; RV32IM-NEXT: add a7, t1, a7 ; RV32IM-NEXT: add a7, a7, t4 ; RV32IM-NEXT: sltu t0, t5, t0 -; RV32IM-NEXT: mul a2, a2, a5 -; RV32IM-NEXT: mulhu t1, a3, a5 -; RV32IM-NEXT: sub a3, t1, a3 -; RV32IM-NEXT: add a2, a3, a2 +; RV32IM-NEXT: mul a1, a1, a5 +; RV32IM-NEXT: mulhu t1, a4, a5 +; RV32IM-NEXT: sub a4, t1, a4 ; RV32IM-NEXT: add a1, a4, a1 -; RV32IM-NEXT: sub a1, t3, a1 -; RV32IM-NEXT: add a1, a1, a2 +; RV32IM-NEXT: add a3, a2, a3 +; RV32IM-NEXT: sub a3, t3, a3 +; RV32IM-NEXT: add a1, a3, a1 ; RV32IM-NEXT: add a1, a1, t0 ; RV32IM-NEXT: add a1, a7, a1 ; RV32IM-NEXT: add a1, a1, s0 -; RV32IM-NEXT: mul a2, a4, a5 +; RV32IM-NEXT: mul a2, a2, a5 ; RV32IM-NEXT: sw a2, 0(a0) ; RV32IM-NEXT: sw a6, 4(a0) ; RV32IM-NEXT: sw t6, 8(a0) @@ -1436,39 +1436,39 @@ define i128 @muli128_m63(i128 %a) nounwind { ; RV32I-LABEL: muli128_m63: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw a4, 12(a1) +; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) -; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: slli a3, a2, 6 -; RV32I-NEXT: sltu a5, a2, a3 +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: slli a1, a2, 6 +; RV32I-NEXT: sltu a4, a2, a1 ; RV32I-NEXT: srli a7, a2, 26 -; RV32I-NEXT: slli t0, a1, 6 +; RV32I-NEXT: slli t0, a3, 6 ; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: mv t0, a5 -; RV32I-NEXT: beq a1, a7, .LBB37_2 +; RV32I-NEXT: mv t0, a4 +; RV32I-NEXT: beq a3, a7, .LBB37_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t0, a1, a7 +; RV32I-NEXT: sltu t0, a3, a7 ; RV32I-NEXT: .LBB37_2: -; RV32I-NEXT: srli t1, a1, 26 +; RV32I-NEXT: srli t1, a3, 26 ; RV32I-NEXT: slli t2, a6, 6 ; RV32I-NEXT: or t1, t2, t1 ; RV32I-NEXT: sub t2, a6, t1 ; RV32I-NEXT: sltu t3, t2, t0 ; RV32I-NEXT: sltu t1, a6, t1 ; RV32I-NEXT: srli a6, a6, 26 -; RV32I-NEXT: slli t4, a4, 6 +; RV32I-NEXT: slli t4, a5, 6 ; RV32I-NEXT: or a6, t4, a6 -; RV32I-NEXT: sub a4, a4, a6 -; RV32I-NEXT: sub a4, a4, t1 -; RV32I-NEXT: sub a4, a4, t3 +; RV32I-NEXT: sub a5, a5, a6 +; RV32I-NEXT: sub a5, a5, t1 +; RV32I-NEXT: sub a5, a5, t3 ; RV32I-NEXT: sub a6, t2, t0 -; RV32I-NEXT: sub a1, a1, a7 -; RV32I-NEXT: sub a1, a1, a5 -; RV32I-NEXT: sub a2, a2, a3 +; RV32I-NEXT: sub a3, a3, a7 +; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sub a2, a2, a1 ; RV32I-NEXT: sw a2, 0(a0) -; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a3, 4(a0) ; RV32I-NEXT: sw a6, 8(a0) -; RV32I-NEXT: sw a4, 12(a0) +; RV32I-NEXT: sw a5, 12(a0) ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli128_m63: @@ -1476,52 +1476,52 @@ define i128 @muli128_m63(i128 %a) nounwind { ; RV32IM-NEXT: addi sp, sp, -16 ; RV32IM-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a2, 12(a1) -; RV32IM-NEXT: lw a3, 0(a1) -; RV32IM-NEXT: lw a4, 4(a1) -; RV32IM-NEXT: lw a1, 8(a1) +; RV32IM-NEXT: lw a2, 0(a1) +; RV32IM-NEXT: lw a3, 4(a1) +; RV32IM-NEXT: lw a4, 8(a1) +; RV32IM-NEXT: lw a1, 12(a1) ; RV32IM-NEXT: li a5, -63 -; RV32IM-NEXT: mulhu a6, a3, a5 -; RV32IM-NEXT: slli a7, a4, 6 -; RV32IM-NEXT: sub a7, a4, a7 +; RV32IM-NEXT: mulhu a6, a2, a5 +; RV32IM-NEXT: slli a7, a3, 6 +; RV32IM-NEXT: sub a7, a3, a7 ; RV32IM-NEXT: add a6, a7, a6 ; RV32IM-NEXT: sltu a7, a6, a7 -; RV32IM-NEXT: mulhu t0, a4, a5 +; RV32IM-NEXT: mulhu t0, a3, a5 ; RV32IM-NEXT: add a7, t0, a7 -; RV32IM-NEXT: sub a6, a6, a3 -; RV32IM-NEXT: neg t0, a3 +; RV32IM-NEXT: sub a6, a6, a2 +; RV32IM-NEXT: neg t0, a2 ; RV32IM-NEXT: sltu t1, a6, t0 ; RV32IM-NEXT: li t2, -1 -; RV32IM-NEXT: mulhu t3, a3, t2 +; RV32IM-NEXT: mulhu t3, a2, t2 ; RV32IM-NEXT: add t1, t3, t1 ; RV32IM-NEXT: add t1, a7, t1 -; RV32IM-NEXT: sub t4, t1, a4 -; RV32IM-NEXT: slli t5, a1, 6 -; RV32IM-NEXT: sub t6, a1, a3 +; RV32IM-NEXT: sub t4, t1, a3 +; RV32IM-NEXT: slli t5, a4, 6 +; RV32IM-NEXT: sub t6, a4, a2 ; RV32IM-NEXT: sub t5, t6, t5 ; RV32IM-NEXT: add t6, t4, t5 ; RV32IM-NEXT: sltu s0, t6, t4 -; RV32IM-NEXT: neg s1, a4 +; RV32IM-NEXT: neg s1, a3 ; RV32IM-NEXT: sltu t4, t4, s1 ; RV32IM-NEXT: sltu a7, t1, a7 -; RV32IM-NEXT: mulhu t1, a4, t2 +; RV32IM-NEXT: mulhu t1, a3, t2 ; RV32IM-NEXT: add a7, t1, a7 ; RV32IM-NEXT: add a7, a7, t4 ; RV32IM-NEXT: sltu t0, t5, t0 -; RV32IM-NEXT: slli t1, a2, 6 -; RV32IM-NEXT: sub a2, a2, t1 -; RV32IM-NEXT: mulhu a5, a1, a5 -; RV32IM-NEXT: sub a5, a5, a1 -; RV32IM-NEXT: add a2, a5, a2 -; RV32IM-NEXT: add a4, a3, a4 -; RV32IM-NEXT: sub a1, t3, a4 -; RV32IM-NEXT: add a1, a1, a2 +; RV32IM-NEXT: slli t1, a1, 6 +; RV32IM-NEXT: sub a1, a1, t1 +; RV32IM-NEXT: mulhu a5, a4, a5 +; RV32IM-NEXT: sub a5, a5, a4 +; RV32IM-NEXT: add a1, a5, a1 +; RV32IM-NEXT: add a3, a2, a3 +; RV32IM-NEXT: sub a3, t3, a3 +; RV32IM-NEXT: add a1, a3, a1 ; RV32IM-NEXT: add a1, a1, t0 ; RV32IM-NEXT: add a1, a7, a1 ; RV32IM-NEXT: add a1, a1, s0 -; RV32IM-NEXT: slli a2, a3, 6 -; RV32IM-NEXT: sub a3, a3, a2 -; RV32IM-NEXT: sw a3, 0(a0) +; RV32IM-NEXT: slli a3, a2, 6 +; RV32IM-NEXT: sub a2, a2, a3 +; RV32IM-NEXT: sw a2, 0(a0) ; RV32IM-NEXT: sw a6, 4(a0) ; RV32IM-NEXT: sw t6, 8(a0) ; RV32IM-NEXT: sw a1, 12(a0) diff --git a/llvm/test/CodeGen/RISCV/nontemporal.ll b/llvm/test/CodeGen/RISCV/nontemporal.ll index 4c5c36fc72d14d..55bd32e4857345 100644 --- a/llvm/test/CodeGen/RISCV/nontemporal.ll +++ b/llvm/test/CodeGen/RISCV/nontemporal.ll @@ -915,30 +915,30 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64-NEXT: lbu a7, 40(a1) ; CHECK-RV64-NEXT: lbu t0, 48(a1) ; CHECK-RV64-NEXT: lbu t1, 56(a1) -; CHECK-RV64-NEXT: lbu t2, 64(a1) -; CHECK-RV64-NEXT: lbu t3, 72(a1) -; CHECK-RV64-NEXT: lbu t4, 80(a1) -; CHECK-RV64-NEXT: lbu t5, 88(a1) -; CHECK-RV64-NEXT: lbu t6, 120(a1) -; CHECK-RV64-NEXT: lbu s0, 112(a1) -; CHECK-RV64-NEXT: lbu s1, 104(a1) -; CHECK-RV64-NEXT: lbu a1, 96(a1) +; CHECK-RV64-NEXT: lbu t2, 96(a1) +; CHECK-RV64-NEXT: lbu t3, 104(a1) +; CHECK-RV64-NEXT: lbu t4, 112(a1) +; CHECK-RV64-NEXT: lbu t5, 120(a1) +; CHECK-RV64-NEXT: lbu t6, 64(a1) +; CHECK-RV64-NEXT: lbu s0, 72(a1) +; CHECK-RV64-NEXT: lbu s1, 80(a1) +; CHECK-RV64-NEXT: lbu a1, 88(a1) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t6, 15(a0) +; CHECK-RV64-NEXT: sb t5, 15(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb s0, 14(a0) +; CHECK-RV64-NEXT: sb t4, 14(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb s1, 13(a0) +; CHECK-RV64-NEXT: sb t3, 13(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb a1, 12(a0) +; CHECK-RV64-NEXT: sb t2, 12(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t5, 11(a0) +; CHECK-RV64-NEXT: sb a1, 11(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t4, 10(a0) +; CHECK-RV64-NEXT: sb s1, 10(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t3, 9(a0) +; CHECK-RV64-NEXT: sb s0, 9(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t2, 8(a0) +; CHECK-RV64-NEXT: sb t6, 8(a0) ; CHECK-RV64-NEXT: ntl.all ; CHECK-RV64-NEXT: sb t1, 7(a0) ; CHECK-RV64-NEXT: ntl.all @@ -976,30 +976,30 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32-NEXT: lbu a7, 20(a1) ; CHECK-RV32-NEXT: lbu t0, 24(a1) ; CHECK-RV32-NEXT: lbu t1, 28(a1) -; CHECK-RV32-NEXT: lbu t2, 32(a1) -; CHECK-RV32-NEXT: lbu t3, 36(a1) -; CHECK-RV32-NEXT: lbu t4, 40(a1) -; CHECK-RV32-NEXT: lbu t5, 44(a1) -; CHECK-RV32-NEXT: lbu t6, 60(a1) -; CHECK-RV32-NEXT: lbu s0, 56(a1) -; CHECK-RV32-NEXT: lbu s1, 52(a1) -; CHECK-RV32-NEXT: lbu a1, 48(a1) +; CHECK-RV32-NEXT: lbu t2, 48(a1) +; CHECK-RV32-NEXT: lbu t3, 52(a1) +; CHECK-RV32-NEXT: lbu t4, 56(a1) +; CHECK-RV32-NEXT: lbu t5, 60(a1) +; CHECK-RV32-NEXT: lbu t6, 32(a1) +; CHECK-RV32-NEXT: lbu s0, 36(a1) +; CHECK-RV32-NEXT: lbu s1, 40(a1) +; CHECK-RV32-NEXT: lbu a1, 44(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t6, 15(a0) +; CHECK-RV32-NEXT: sb t5, 15(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb s0, 14(a0) +; CHECK-RV32-NEXT: sb t4, 14(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb s1, 13(a0) +; CHECK-RV32-NEXT: sb t3, 13(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb a1, 12(a0) +; CHECK-RV32-NEXT: sb t2, 12(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t5, 11(a0) +; CHECK-RV32-NEXT: sb a1, 11(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t4, 10(a0) +; CHECK-RV32-NEXT: sb s1, 10(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t3, 9(a0) +; CHECK-RV32-NEXT: sb s0, 9(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t2, 8(a0) +; CHECK-RV32-NEXT: sb t6, 8(a0) ; CHECK-RV32-NEXT: ntl.all ; CHECK-RV32-NEXT: sb t1, 7(a0) ; CHECK-RV32-NEXT: ntl.all @@ -1037,28 +1037,28 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64C-NEXT: lbu t3, 40(a1) ; CHECK-RV64C-NEXT: lbu t4, 48(a1) ; CHECK-RV64C-NEXT: lbu t5, 56(a1) +; CHECK-RV64C-NEXT: lbu a2, 96(a1) +; CHECK-RV64C-NEXT: lbu a3, 104(a1) +; CHECK-RV64C-NEXT: lbu a4, 112(a1) +; CHECK-RV64C-NEXT: lbu a5, 120(a1) ; CHECK-RV64C-NEXT: lbu t6, 64(a1) -; CHECK-RV64C-NEXT: lbu a3, 72(a1) -; CHECK-RV64C-NEXT: lbu a4, 80(a1) -; CHECK-RV64C-NEXT: lbu a5, 88(a1) -; CHECK-RV64C-NEXT: lbu a2, 120(a1) -; CHECK-RV64C-NEXT: lbu s0, 112(a1) -; CHECK-RV64C-NEXT: lbu s1, 104(a1) -; CHECK-RV64C-NEXT: lbu a1, 96(a1) +; CHECK-RV64C-NEXT: lbu s0, 72(a1) +; CHECK-RV64C-NEXT: lbu s1, 80(a1) +; CHECK-RV64C-NEXT: lbu a1, 88(a1) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a2, 15(a0) +; CHECK-RV64C-NEXT: sb a5, 15(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb s0, 14(a0) +; CHECK-RV64C-NEXT: sb a4, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb s1, 13(a0) +; CHECK-RV64C-NEXT: sb a3, 13(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a1, 12(a0) +; CHECK-RV64C-NEXT: sb a2, 12(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a5, 11(a0) +; CHECK-RV64C-NEXT: sb a1, 11(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a4, 10(a0) +; CHECK-RV64C-NEXT: sb s1, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a3, 9(a0) +; CHECK-RV64C-NEXT: sb s0, 9(a0) ; CHECK-RV64C-NEXT: c.ntl.all ; CHECK-RV64C-NEXT: sb t6, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.all @@ -1098,28 +1098,28 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32C-NEXT: lbu t3, 20(a1) ; CHECK-RV32C-NEXT: lbu t4, 24(a1) ; CHECK-RV32C-NEXT: lbu t5, 28(a1) +; CHECK-RV32C-NEXT: lbu a2, 48(a1) +; CHECK-RV32C-NEXT: lbu a3, 52(a1) +; CHECK-RV32C-NEXT: lbu a4, 56(a1) +; CHECK-RV32C-NEXT: lbu a5, 60(a1) ; CHECK-RV32C-NEXT: lbu t6, 32(a1) -; CHECK-RV32C-NEXT: lbu a3, 36(a1) -; CHECK-RV32C-NEXT: lbu a4, 40(a1) -; CHECK-RV32C-NEXT: lbu a5, 44(a1) -; CHECK-RV32C-NEXT: lbu a2, 60(a1) -; CHECK-RV32C-NEXT: lbu s0, 56(a1) -; CHECK-RV32C-NEXT: lbu s1, 52(a1) -; CHECK-RV32C-NEXT: lbu a1, 48(a1) +; CHECK-RV32C-NEXT: lbu s0, 36(a1) +; CHECK-RV32C-NEXT: lbu s1, 40(a1) +; CHECK-RV32C-NEXT: lbu a1, 44(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a2, 15(a0) +; CHECK-RV32C-NEXT: sb a5, 15(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb s0, 14(a0) +; CHECK-RV32C-NEXT: sb a4, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb s1, 13(a0) +; CHECK-RV32C-NEXT: sb a3, 13(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a1, 12(a0) +; CHECK-RV32C-NEXT: sb a2, 12(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a5, 11(a0) +; CHECK-RV32C-NEXT: sb a1, 11(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a4, 10(a0) +; CHECK-RV32C-NEXT: sb s1, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a3, 9(a0) +; CHECK-RV32C-NEXT: sb s0, 9(a0) ; CHECK-RV32C-NEXT: c.ntl.all ; CHECK-RV32C-NEXT: sb t6, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.all @@ -1163,112 +1163,112 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) { define void @test_nontemporal_store_v8i16(ptr %p, <8 x i16> %v) { ; CHECK-RV64-LABEL: test_nontemporal_store_v8i16: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: lh a2, 0(a1) -; CHECK-RV64-NEXT: lh a3, 8(a1) -; CHECK-RV64-NEXT: lh a4, 16(a1) -; CHECK-RV64-NEXT: lh a5, 24(a1) -; CHECK-RV64-NEXT: lh a6, 56(a1) -; CHECK-RV64-NEXT: lh a7, 48(a1) -; CHECK-RV64-NEXT: lh t0, 40(a1) -; CHECK-RV64-NEXT: lh a1, 32(a1) +; CHECK-RV64-NEXT: lh a2, 32(a1) +; CHECK-RV64-NEXT: lh a3, 40(a1) +; CHECK-RV64-NEXT: lh a4, 48(a1) +; CHECK-RV64-NEXT: lh a5, 56(a1) +; CHECK-RV64-NEXT: lh a6, 0(a1) +; CHECK-RV64-NEXT: lh a7, 8(a1) +; CHECK-RV64-NEXT: lh t0, 16(a1) +; CHECK-RV64-NEXT: lh a1, 24(a1) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a6, 14(a0) +; CHECK-RV64-NEXT: sh a5, 14(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a7, 12(a0) +; CHECK-RV64-NEXT: sh a4, 12(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh t0, 10(a0) +; CHECK-RV64-NEXT: sh a3, 10(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a1, 8(a0) +; CHECK-RV64-NEXT: sh a2, 8(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a5, 6(a0) +; CHECK-RV64-NEXT: sh a1, 6(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a4, 4(a0) +; CHECK-RV64-NEXT: sh t0, 4(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a3, 2(a0) +; CHECK-RV64-NEXT: sh a7, 2(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a2, 0(a0) +; CHECK-RV64-NEXT: sh a6, 0(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_store_v8i16: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lh a2, 0(a1) -; CHECK-RV32-NEXT: lh a3, 4(a1) -; CHECK-RV32-NEXT: lh a4, 8(a1) -; CHECK-RV32-NEXT: lh a5, 12(a1) -; CHECK-RV32-NEXT: lh a6, 28(a1) -; CHECK-RV32-NEXT: lh a7, 24(a1) -; CHECK-RV32-NEXT: lh t0, 20(a1) -; CHECK-RV32-NEXT: lh a1, 16(a1) +; CHECK-RV32-NEXT: lh a2, 16(a1) +; CHECK-RV32-NEXT: lh a3, 20(a1) +; CHECK-RV32-NEXT: lh a4, 24(a1) +; CHECK-RV32-NEXT: lh a5, 28(a1) +; CHECK-RV32-NEXT: lh a6, 0(a1) +; CHECK-RV32-NEXT: lh a7, 4(a1) +; CHECK-RV32-NEXT: lh t0, 8(a1) +; CHECK-RV32-NEXT: lh a1, 12(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a6, 14(a0) +; CHECK-RV32-NEXT: sh a5, 14(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a7, 12(a0) +; CHECK-RV32-NEXT: sh a4, 12(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh t0, 10(a0) +; CHECK-RV32-NEXT: sh a3, 10(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a1, 8(a0) +; CHECK-RV32-NEXT: sh a2, 8(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a5, 6(a0) +; CHECK-RV32-NEXT: sh a1, 6(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a4, 4(a0) +; CHECK-RV32-NEXT: sh t0, 4(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a3, 2(a0) +; CHECK-RV32-NEXT: sh a7, 2(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a2, 0(a0) +; CHECK-RV32-NEXT: sh a6, 0(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_store_v8i16: ; CHECK-RV64C: # %bb.0: +; CHECK-RV64C-NEXT: lh a7, 32(a1) +; CHECK-RV64C-NEXT: lh a3, 40(a1) +; CHECK-RV64C-NEXT: lh a4, 48(a1) +; CHECK-RV64C-NEXT: lh a5, 56(a1) ; CHECK-RV64C-NEXT: lh a6, 0(a1) -; CHECK-RV64C-NEXT: lh a7, 8(a1) -; CHECK-RV64C-NEXT: lh t0, 16(a1) -; CHECK-RV64C-NEXT: lh a5, 24(a1) -; CHECK-RV64C-NEXT: lh a2, 56(a1) -; CHECK-RV64C-NEXT: lh a3, 48(a1) -; CHECK-RV64C-NEXT: lh a4, 40(a1) -; CHECK-RV64C-NEXT: lh a1, 32(a1) +; CHECK-RV64C-NEXT: lh t0, 8(a1) +; CHECK-RV64C-NEXT: lh a2, 16(a1) +; CHECK-RV64C-NEXT: lh a1, 24(a1) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a2, 14(a0) +; CHECK-RV64C-NEXT: sh a5, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a3, 12(a0) +; CHECK-RV64C-NEXT: sh a4, 12(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a4, 10(a0) +; CHECK-RV64C-NEXT: sh a3, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a1, 8(a0) +; CHECK-RV64C-NEXT: sh a7, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a5, 6(a0) +; CHECK-RV64C-NEXT: sh a1, 6(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh t0, 4(a0) +; CHECK-RV64C-NEXT: sh a2, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a7, 2(a0) +; CHECK-RV64C-NEXT: sh t0, 2(a0) ; CHECK-RV64C-NEXT: c.ntl.all ; CHECK-RV64C-NEXT: sh a6, 0(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_store_v8i16: ; CHECK-RV32C: # %bb.0: +; CHECK-RV32C-NEXT: lh a7, 16(a1) +; CHECK-RV32C-NEXT: lh a3, 20(a1) +; CHECK-RV32C-NEXT: lh a4, 24(a1) +; CHECK-RV32C-NEXT: lh a5, 28(a1) ; CHECK-RV32C-NEXT: lh a6, 0(a1) -; CHECK-RV32C-NEXT: lh a7, 4(a1) -; CHECK-RV32C-NEXT: lh t0, 8(a1) -; CHECK-RV32C-NEXT: lh a5, 12(a1) -; CHECK-RV32C-NEXT: lh a2, 28(a1) -; CHECK-RV32C-NEXT: lh a3, 24(a1) -; CHECK-RV32C-NEXT: lh a4, 20(a1) -; CHECK-RV32C-NEXT: lh a1, 16(a1) +; CHECK-RV32C-NEXT: lh t0, 4(a1) +; CHECK-RV32C-NEXT: lh a2, 8(a1) +; CHECK-RV32C-NEXT: lh a1, 12(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a2, 14(a0) +; CHECK-RV32C-NEXT: sh a5, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a3, 12(a0) +; CHECK-RV32C-NEXT: sh a4, 12(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a4, 10(a0) +; CHECK-RV32C-NEXT: sh a3, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a1, 8(a0) +; CHECK-RV32C-NEXT: sh a7, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a5, 6(a0) +; CHECK-RV32C-NEXT: sh a1, 6(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh t0, 4(a0) +; CHECK-RV32C-NEXT: sh a2, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a7, 2(a0) +; CHECK-RV32C-NEXT: sh t0, 2(a0) ; CHECK-RV32C-NEXT: c.ntl.all ; CHECK-RV32C-NEXT: sh a6, 0(a0) ; CHECK-RV32C-NEXT: ret @@ -2329,30 +2329,30 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64-NEXT: lbu a7, 40(a1) ; CHECK-RV64-NEXT: lbu t0, 48(a1) ; CHECK-RV64-NEXT: lbu t1, 56(a1) -; CHECK-RV64-NEXT: lbu t2, 64(a1) -; CHECK-RV64-NEXT: lbu t3, 72(a1) -; CHECK-RV64-NEXT: lbu t4, 80(a1) -; CHECK-RV64-NEXT: lbu t5, 88(a1) -; CHECK-RV64-NEXT: lbu t6, 120(a1) -; CHECK-RV64-NEXT: lbu s0, 112(a1) -; CHECK-RV64-NEXT: lbu s1, 104(a1) -; CHECK-RV64-NEXT: lbu a1, 96(a1) +; CHECK-RV64-NEXT: lbu t2, 96(a1) +; CHECK-RV64-NEXT: lbu t3, 104(a1) +; CHECK-RV64-NEXT: lbu t4, 112(a1) +; CHECK-RV64-NEXT: lbu t5, 120(a1) +; CHECK-RV64-NEXT: lbu t6, 64(a1) +; CHECK-RV64-NEXT: lbu s0, 72(a1) +; CHECK-RV64-NEXT: lbu s1, 80(a1) +; CHECK-RV64-NEXT: lbu a1, 88(a1) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb t6, 15(a0) +; CHECK-RV64-NEXT: sb t5, 15(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb s0, 14(a0) +; CHECK-RV64-NEXT: sb t4, 14(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb s1, 13(a0) +; CHECK-RV64-NEXT: sb t3, 13(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb a1, 12(a0) +; CHECK-RV64-NEXT: sb t2, 12(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb t5, 11(a0) +; CHECK-RV64-NEXT: sb a1, 11(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb t4, 10(a0) +; CHECK-RV64-NEXT: sb s1, 10(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb t3, 9(a0) +; CHECK-RV64-NEXT: sb s0, 9(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb t2, 8(a0) +; CHECK-RV64-NEXT: sb t6, 8(a0) ; CHECK-RV64-NEXT: ntl.p1 ; CHECK-RV64-NEXT: sb t1, 7(a0) ; CHECK-RV64-NEXT: ntl.p1 @@ -2390,30 +2390,30 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32-NEXT: lbu a7, 20(a1) ; CHECK-RV32-NEXT: lbu t0, 24(a1) ; CHECK-RV32-NEXT: lbu t1, 28(a1) -; CHECK-RV32-NEXT: lbu t2, 32(a1) -; CHECK-RV32-NEXT: lbu t3, 36(a1) -; CHECK-RV32-NEXT: lbu t4, 40(a1) -; CHECK-RV32-NEXT: lbu t5, 44(a1) -; CHECK-RV32-NEXT: lbu t6, 60(a1) -; CHECK-RV32-NEXT: lbu s0, 56(a1) -; CHECK-RV32-NEXT: lbu s1, 52(a1) -; CHECK-RV32-NEXT: lbu a1, 48(a1) +; CHECK-RV32-NEXT: lbu t2, 48(a1) +; CHECK-RV32-NEXT: lbu t3, 52(a1) +; CHECK-RV32-NEXT: lbu t4, 56(a1) +; CHECK-RV32-NEXT: lbu t5, 60(a1) +; CHECK-RV32-NEXT: lbu t6, 32(a1) +; CHECK-RV32-NEXT: lbu s0, 36(a1) +; CHECK-RV32-NEXT: lbu s1, 40(a1) +; CHECK-RV32-NEXT: lbu a1, 44(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb t6, 15(a0) +; CHECK-RV32-NEXT: sb t5, 15(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb s0, 14(a0) +; CHECK-RV32-NEXT: sb t4, 14(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb s1, 13(a0) +; CHECK-RV32-NEXT: sb t3, 13(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb a1, 12(a0) +; CHECK-RV32-NEXT: sb t2, 12(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb t5, 11(a0) +; CHECK-RV32-NEXT: sb a1, 11(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb t4, 10(a0) +; CHECK-RV32-NEXT: sb s1, 10(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb t3, 9(a0) +; CHECK-RV32-NEXT: sb s0, 9(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb t2, 8(a0) +; CHECK-RV32-NEXT: sb t6, 8(a0) ; CHECK-RV32-NEXT: ntl.p1 ; CHECK-RV32-NEXT: sb t1, 7(a0) ; CHECK-RV32-NEXT: ntl.p1 @@ -2451,28 +2451,28 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64C-NEXT: lbu t3, 40(a1) ; CHECK-RV64C-NEXT: lbu t4, 48(a1) ; CHECK-RV64C-NEXT: lbu t5, 56(a1) +; CHECK-RV64C-NEXT: lbu a2, 96(a1) +; CHECK-RV64C-NEXT: lbu a3, 104(a1) +; CHECK-RV64C-NEXT: lbu a4, 112(a1) +; CHECK-RV64C-NEXT: lbu a5, 120(a1) ; CHECK-RV64C-NEXT: lbu t6, 64(a1) -; CHECK-RV64C-NEXT: lbu a3, 72(a1) -; CHECK-RV64C-NEXT: lbu a4, 80(a1) -; CHECK-RV64C-NEXT: lbu a5, 88(a1) -; CHECK-RV64C-NEXT: lbu a2, 120(a1) -; CHECK-RV64C-NEXT: lbu s0, 112(a1) -; CHECK-RV64C-NEXT: lbu s1, 104(a1) -; CHECK-RV64C-NEXT: lbu a1, 96(a1) +; CHECK-RV64C-NEXT: lbu s0, 72(a1) +; CHECK-RV64C-NEXT: lbu s1, 80(a1) +; CHECK-RV64C-NEXT: lbu a1, 88(a1) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb a2, 15(a0) +; CHECK-RV64C-NEXT: sb a5, 15(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb s0, 14(a0) +; CHECK-RV64C-NEXT: sb a4, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb s1, 13(a0) +; CHECK-RV64C-NEXT: sb a3, 13(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb a1, 12(a0) +; CHECK-RV64C-NEXT: sb a2, 12(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb a5, 11(a0) +; CHECK-RV64C-NEXT: sb a1, 11(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb a4, 10(a0) +; CHECK-RV64C-NEXT: sb s1, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb a3, 9(a0) +; CHECK-RV64C-NEXT: sb s0, 9(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 ; CHECK-RV64C-NEXT: sb t6, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 @@ -2512,28 +2512,28 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32C-NEXT: lbu t3, 20(a1) ; CHECK-RV32C-NEXT: lbu t4, 24(a1) ; CHECK-RV32C-NEXT: lbu t5, 28(a1) +; CHECK-RV32C-NEXT: lbu a2, 48(a1) +; CHECK-RV32C-NEXT: lbu a3, 52(a1) +; CHECK-RV32C-NEXT: lbu a4, 56(a1) +; CHECK-RV32C-NEXT: lbu a5, 60(a1) ; CHECK-RV32C-NEXT: lbu t6, 32(a1) -; CHECK-RV32C-NEXT: lbu a3, 36(a1) -; CHECK-RV32C-NEXT: lbu a4, 40(a1) -; CHECK-RV32C-NEXT: lbu a5, 44(a1) -; CHECK-RV32C-NEXT: lbu a2, 60(a1) -; CHECK-RV32C-NEXT: lbu s0, 56(a1) -; CHECK-RV32C-NEXT: lbu s1, 52(a1) -; CHECK-RV32C-NEXT: lbu a1, 48(a1) +; CHECK-RV32C-NEXT: lbu s0, 36(a1) +; CHECK-RV32C-NEXT: lbu s1, 40(a1) +; CHECK-RV32C-NEXT: lbu a1, 44(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb a2, 15(a0) +; CHECK-RV32C-NEXT: sb a5, 15(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb s0, 14(a0) +; CHECK-RV32C-NEXT: sb a4, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb s1, 13(a0) +; CHECK-RV32C-NEXT: sb a3, 13(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb a1, 12(a0) +; CHECK-RV32C-NEXT: sb a2, 12(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb a5, 11(a0) +; CHECK-RV32C-NEXT: sb a1, 11(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb a4, 10(a0) +; CHECK-RV32C-NEXT: sb s1, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb a3, 9(a0) +; CHECK-RV32C-NEXT: sb s0, 9(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 ; CHECK-RV32C-NEXT: sb t6, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 @@ -2577,112 +2577,112 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) { define void @test_nontemporal_P1_store_v8i16(ptr %p, <8 x i16> %v) { ; CHECK-RV64-LABEL: test_nontemporal_P1_store_v8i16: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: lh a2, 0(a1) -; CHECK-RV64-NEXT: lh a3, 8(a1) -; CHECK-RV64-NEXT: lh a4, 16(a1) -; CHECK-RV64-NEXT: lh a5, 24(a1) -; CHECK-RV64-NEXT: lh a6, 56(a1) -; CHECK-RV64-NEXT: lh a7, 48(a1) -; CHECK-RV64-NEXT: lh t0, 40(a1) -; CHECK-RV64-NEXT: lh a1, 32(a1) +; CHECK-RV64-NEXT: lh a2, 32(a1) +; CHECK-RV64-NEXT: lh a3, 40(a1) +; CHECK-RV64-NEXT: lh a4, 48(a1) +; CHECK-RV64-NEXT: lh a5, 56(a1) +; CHECK-RV64-NEXT: lh a6, 0(a1) +; CHECK-RV64-NEXT: lh a7, 8(a1) +; CHECK-RV64-NEXT: lh t0, 16(a1) +; CHECK-RV64-NEXT: lh a1, 24(a1) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sh a6, 14(a0) +; CHECK-RV64-NEXT: sh a5, 14(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sh a7, 12(a0) +; CHECK-RV64-NEXT: sh a4, 12(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sh t0, 10(a0) +; CHECK-RV64-NEXT: sh a3, 10(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sh a1, 8(a0) +; CHECK-RV64-NEXT: sh a2, 8(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sh a5, 6(a0) +; CHECK-RV64-NEXT: sh a1, 6(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sh a4, 4(a0) +; CHECK-RV64-NEXT: sh t0, 4(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sh a3, 2(a0) +; CHECK-RV64-NEXT: sh a7, 2(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sh a2, 0(a0) +; CHECK-RV64-NEXT: sh a6, 0(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_P1_store_v8i16: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lh a2, 0(a1) -; CHECK-RV32-NEXT: lh a3, 4(a1) -; CHECK-RV32-NEXT: lh a4, 8(a1) -; CHECK-RV32-NEXT: lh a5, 12(a1) -; CHECK-RV32-NEXT: lh a6, 28(a1) -; CHECK-RV32-NEXT: lh a7, 24(a1) -; CHECK-RV32-NEXT: lh t0, 20(a1) -; CHECK-RV32-NEXT: lh a1, 16(a1) +; CHECK-RV32-NEXT: lh a2, 16(a1) +; CHECK-RV32-NEXT: lh a3, 20(a1) +; CHECK-RV32-NEXT: lh a4, 24(a1) +; CHECK-RV32-NEXT: lh a5, 28(a1) +; CHECK-RV32-NEXT: lh a6, 0(a1) +; CHECK-RV32-NEXT: lh a7, 4(a1) +; CHECK-RV32-NEXT: lh t0, 8(a1) +; CHECK-RV32-NEXT: lh a1, 12(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sh a6, 14(a0) +; CHECK-RV32-NEXT: sh a5, 14(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sh a7, 12(a0) +; CHECK-RV32-NEXT: sh a4, 12(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sh t0, 10(a0) +; CHECK-RV32-NEXT: sh a3, 10(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sh a1, 8(a0) +; CHECK-RV32-NEXT: sh a2, 8(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sh a5, 6(a0) +; CHECK-RV32-NEXT: sh a1, 6(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sh a4, 4(a0) +; CHECK-RV32-NEXT: sh t0, 4(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sh a3, 2(a0) +; CHECK-RV32-NEXT: sh a7, 2(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sh a2, 0(a0) +; CHECK-RV32-NEXT: sh a6, 0(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_P1_store_v8i16: ; CHECK-RV64C: # %bb.0: +; CHECK-RV64C-NEXT: lh a7, 32(a1) +; CHECK-RV64C-NEXT: lh a3, 40(a1) +; CHECK-RV64C-NEXT: lh a4, 48(a1) +; CHECK-RV64C-NEXT: lh a5, 56(a1) ; CHECK-RV64C-NEXT: lh a6, 0(a1) -; CHECK-RV64C-NEXT: lh a7, 8(a1) -; CHECK-RV64C-NEXT: lh t0, 16(a1) -; CHECK-RV64C-NEXT: lh a5, 24(a1) -; CHECK-RV64C-NEXT: lh a2, 56(a1) -; CHECK-RV64C-NEXT: lh a3, 48(a1) -; CHECK-RV64C-NEXT: lh a4, 40(a1) -; CHECK-RV64C-NEXT: lh a1, 32(a1) +; CHECK-RV64C-NEXT: lh t0, 8(a1) +; CHECK-RV64C-NEXT: lh a2, 16(a1) +; CHECK-RV64C-NEXT: lh a1, 24(a1) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sh a2, 14(a0) +; CHECK-RV64C-NEXT: sh a5, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sh a3, 12(a0) +; CHECK-RV64C-NEXT: sh a4, 12(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sh a4, 10(a0) +; CHECK-RV64C-NEXT: sh a3, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sh a1, 8(a0) +; CHECK-RV64C-NEXT: sh a7, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sh a5, 6(a0) +; CHECK-RV64C-NEXT: sh a1, 6(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sh t0, 4(a0) +; CHECK-RV64C-NEXT: sh a2, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sh a7, 2(a0) +; CHECK-RV64C-NEXT: sh t0, 2(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 ; CHECK-RV64C-NEXT: sh a6, 0(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_P1_store_v8i16: ; CHECK-RV32C: # %bb.0: +; CHECK-RV32C-NEXT: lh a7, 16(a1) +; CHECK-RV32C-NEXT: lh a3, 20(a1) +; CHECK-RV32C-NEXT: lh a4, 24(a1) +; CHECK-RV32C-NEXT: lh a5, 28(a1) ; CHECK-RV32C-NEXT: lh a6, 0(a1) -; CHECK-RV32C-NEXT: lh a7, 4(a1) -; CHECK-RV32C-NEXT: lh t0, 8(a1) -; CHECK-RV32C-NEXT: lh a5, 12(a1) -; CHECK-RV32C-NEXT: lh a2, 28(a1) -; CHECK-RV32C-NEXT: lh a3, 24(a1) -; CHECK-RV32C-NEXT: lh a4, 20(a1) -; CHECK-RV32C-NEXT: lh a1, 16(a1) +; CHECK-RV32C-NEXT: lh t0, 4(a1) +; CHECK-RV32C-NEXT: lh a2, 8(a1) +; CHECK-RV32C-NEXT: lh a1, 12(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sh a2, 14(a0) +; CHECK-RV32C-NEXT: sh a5, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sh a3, 12(a0) +; CHECK-RV32C-NEXT: sh a4, 12(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sh a4, 10(a0) +; CHECK-RV32C-NEXT: sh a3, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sh a1, 8(a0) +; CHECK-RV32C-NEXT: sh a7, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sh a5, 6(a0) +; CHECK-RV32C-NEXT: sh a1, 6(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sh t0, 4(a0) +; CHECK-RV32C-NEXT: sh a2, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sh a7, 2(a0) +; CHECK-RV32C-NEXT: sh t0, 2(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 ; CHECK-RV32C-NEXT: sh a6, 0(a0) ; CHECK-RV32C-NEXT: ret @@ -3743,30 +3743,30 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64-NEXT: lbu a7, 40(a1) ; CHECK-RV64-NEXT: lbu t0, 48(a1) ; CHECK-RV64-NEXT: lbu t1, 56(a1) -; CHECK-RV64-NEXT: lbu t2, 64(a1) -; CHECK-RV64-NEXT: lbu t3, 72(a1) -; CHECK-RV64-NEXT: lbu t4, 80(a1) -; CHECK-RV64-NEXT: lbu t5, 88(a1) -; CHECK-RV64-NEXT: lbu t6, 120(a1) -; CHECK-RV64-NEXT: lbu s0, 112(a1) -; CHECK-RV64-NEXT: lbu s1, 104(a1) -; CHECK-RV64-NEXT: lbu a1, 96(a1) +; CHECK-RV64-NEXT: lbu t2, 96(a1) +; CHECK-RV64-NEXT: lbu t3, 104(a1) +; CHECK-RV64-NEXT: lbu t4, 112(a1) +; CHECK-RV64-NEXT: lbu t5, 120(a1) +; CHECK-RV64-NEXT: lbu t6, 64(a1) +; CHECK-RV64-NEXT: lbu s0, 72(a1) +; CHECK-RV64-NEXT: lbu s1, 80(a1) +; CHECK-RV64-NEXT: lbu a1, 88(a1) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb t6, 15(a0) +; CHECK-RV64-NEXT: sb t5, 15(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb s0, 14(a0) +; CHECK-RV64-NEXT: sb t4, 14(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb s1, 13(a0) +; CHECK-RV64-NEXT: sb t3, 13(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb a1, 12(a0) +; CHECK-RV64-NEXT: sb t2, 12(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb t5, 11(a0) +; CHECK-RV64-NEXT: sb a1, 11(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb t4, 10(a0) +; CHECK-RV64-NEXT: sb s1, 10(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb t3, 9(a0) +; CHECK-RV64-NEXT: sb s0, 9(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb t2, 8(a0) +; CHECK-RV64-NEXT: sb t6, 8(a0) ; CHECK-RV64-NEXT: ntl.pall ; CHECK-RV64-NEXT: sb t1, 7(a0) ; CHECK-RV64-NEXT: ntl.pall @@ -3804,30 +3804,30 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32-NEXT: lbu a7, 20(a1) ; CHECK-RV32-NEXT: lbu t0, 24(a1) ; CHECK-RV32-NEXT: lbu t1, 28(a1) -; CHECK-RV32-NEXT: lbu t2, 32(a1) -; CHECK-RV32-NEXT: lbu t3, 36(a1) -; CHECK-RV32-NEXT: lbu t4, 40(a1) -; CHECK-RV32-NEXT: lbu t5, 44(a1) -; CHECK-RV32-NEXT: lbu t6, 60(a1) -; CHECK-RV32-NEXT: lbu s0, 56(a1) -; CHECK-RV32-NEXT: lbu s1, 52(a1) -; CHECK-RV32-NEXT: lbu a1, 48(a1) +; CHECK-RV32-NEXT: lbu t2, 48(a1) +; CHECK-RV32-NEXT: lbu t3, 52(a1) +; CHECK-RV32-NEXT: lbu t4, 56(a1) +; CHECK-RV32-NEXT: lbu t5, 60(a1) +; CHECK-RV32-NEXT: lbu t6, 32(a1) +; CHECK-RV32-NEXT: lbu s0, 36(a1) +; CHECK-RV32-NEXT: lbu s1, 40(a1) +; CHECK-RV32-NEXT: lbu a1, 44(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb t6, 15(a0) +; CHECK-RV32-NEXT: sb t5, 15(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb s0, 14(a0) +; CHECK-RV32-NEXT: sb t4, 14(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb s1, 13(a0) +; CHECK-RV32-NEXT: sb t3, 13(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb a1, 12(a0) +; CHECK-RV32-NEXT: sb t2, 12(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb t5, 11(a0) +; CHECK-RV32-NEXT: sb a1, 11(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb t4, 10(a0) +; CHECK-RV32-NEXT: sb s1, 10(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb t3, 9(a0) +; CHECK-RV32-NEXT: sb s0, 9(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb t2, 8(a0) +; CHECK-RV32-NEXT: sb t6, 8(a0) ; CHECK-RV32-NEXT: ntl.pall ; CHECK-RV32-NEXT: sb t1, 7(a0) ; CHECK-RV32-NEXT: ntl.pall @@ -3865,28 +3865,28 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64C-NEXT: lbu t3, 40(a1) ; CHECK-RV64C-NEXT: lbu t4, 48(a1) ; CHECK-RV64C-NEXT: lbu t5, 56(a1) +; CHECK-RV64C-NEXT: lbu a2, 96(a1) +; CHECK-RV64C-NEXT: lbu a3, 104(a1) +; CHECK-RV64C-NEXT: lbu a4, 112(a1) +; CHECK-RV64C-NEXT: lbu a5, 120(a1) ; CHECK-RV64C-NEXT: lbu t6, 64(a1) -; CHECK-RV64C-NEXT: lbu a3, 72(a1) -; CHECK-RV64C-NEXT: lbu a4, 80(a1) -; CHECK-RV64C-NEXT: lbu a5, 88(a1) -; CHECK-RV64C-NEXT: lbu a2, 120(a1) -; CHECK-RV64C-NEXT: lbu s0, 112(a1) -; CHECK-RV64C-NEXT: lbu s1, 104(a1) -; CHECK-RV64C-NEXT: lbu a1, 96(a1) +; CHECK-RV64C-NEXT: lbu s0, 72(a1) +; CHECK-RV64C-NEXT: lbu s1, 80(a1) +; CHECK-RV64C-NEXT: lbu a1, 88(a1) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb a2, 15(a0) +; CHECK-RV64C-NEXT: sb a5, 15(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb s0, 14(a0) +; CHECK-RV64C-NEXT: sb a4, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb s1, 13(a0) +; CHECK-RV64C-NEXT: sb a3, 13(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb a1, 12(a0) +; CHECK-RV64C-NEXT: sb a2, 12(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb a5, 11(a0) +; CHECK-RV64C-NEXT: sb a1, 11(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb a4, 10(a0) +; CHECK-RV64C-NEXT: sb s1, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb a3, 9(a0) +; CHECK-RV64C-NEXT: sb s0, 9(a0) ; CHECK-RV64C-NEXT: c.ntl.pall ; CHECK-RV64C-NEXT: sb t6, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.pall @@ -3926,28 +3926,28 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32C-NEXT: lbu t3, 20(a1) ; CHECK-RV32C-NEXT: lbu t4, 24(a1) ; CHECK-RV32C-NEXT: lbu t5, 28(a1) +; CHECK-RV32C-NEXT: lbu a2, 48(a1) +; CHECK-RV32C-NEXT: lbu a3, 52(a1) +; CHECK-RV32C-NEXT: lbu a4, 56(a1) +; CHECK-RV32C-NEXT: lbu a5, 60(a1) ; CHECK-RV32C-NEXT: lbu t6, 32(a1) -; CHECK-RV32C-NEXT: lbu a3, 36(a1) -; CHECK-RV32C-NEXT: lbu a4, 40(a1) -; CHECK-RV32C-NEXT: lbu a5, 44(a1) -; CHECK-RV32C-NEXT: lbu a2, 60(a1) -; CHECK-RV32C-NEXT: lbu s0, 56(a1) -; CHECK-RV32C-NEXT: lbu s1, 52(a1) -; CHECK-RV32C-NEXT: lbu a1, 48(a1) +; CHECK-RV32C-NEXT: lbu s0, 36(a1) +; CHECK-RV32C-NEXT: lbu s1, 40(a1) +; CHECK-RV32C-NEXT: lbu a1, 44(a1) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb a2, 15(a0) +; CHECK-RV32C-NEXT: sb a5, 15(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb s0, 14(a0) +; CHECK-RV32C-NEXT: sb a4, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb s1, 13(a0) +; CHECK-RV32C-NEXT: sb a3, 13(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb a1, 12(a0) +; CHECK-RV32C-NEXT: sb a2, 12(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb a5, 11(a0) +; CHECK-RV32C-NEXT: sb a1, 11(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb a4, 10(a0) +; CHECK-RV32C-NEXT: sb s1, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb a3, 9(a0) +; CHECK-RV32C-NEXT: sb s0, 9(a0) ; CHECK-RV32C-NEXT: c.ntl.pall ; CHECK-RV32C-NEXT: sb t6, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.pall @@ -3991,112 +3991,112 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) { define void @test_nontemporal_PALL_store_v8i16(ptr %p, <8 x i16> %v) { ; CHECK-RV64-LABEL: test_nontemporal_PALL_store_v8i16: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: lh a2, 0(a1) -; CHECK-RV64-NEXT: lh a3, 8(a1) -; CHECK-RV64-NEXT: lh a4, 16(a1) -; CHECK-RV64-NEXT: lh a5, 24(a1) -; CHECK-RV64-NEXT: lh a6, 56(a1) -; CHECK-RV64-NEXT: lh a7, 48(a1) -; CHECK-RV64-NEXT: lh t0, 40(a1) -; CHECK-RV64-NEXT: lh a1, 32(a1) +; CHECK-RV64-NEXT: lh a2, 32(a1) +; CHECK-RV64-NEXT: lh a3, 40(a1) +; CHECK-RV64-NEXT: lh a4, 48(a1) +; CHECK-RV64-NEXT: lh a5, 56(a1) +; CHECK-RV64-NEXT: lh a6, 0(a1) +; CHECK-RV64-NEXT: lh a7, 8(a1) +; CHECK-RV64-NEXT: lh t0, 16(a1) +; CHECK-RV64-NEXT: lh a1, 24(a1) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sh a6, 14(a0) +; CHECK-RV64-NEXT: sh a5, 14(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sh a7, 12(a0) +; CHECK-RV64-NEXT: sh a4, 12(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sh t0, 10(a0) +; CHECK-RV64-NEXT: sh a3, 10(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sh a1, 8(a0) +; CHECK-RV64-NEXT: sh a2, 8(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sh a5, 6(a0) +; CHECK-RV64-NEXT: sh a1, 6(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sh a4, 4(a0) +; CHECK-RV64-NEXT: sh t0, 4(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sh a3, 2(a0) +; CHECK-RV64-NEXT: sh a7, 2(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sh a2, 0(a0) +; CHECK-RV64-NEXT: sh a6, 0(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_PALL_store_v8i16: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lh a2, 0(a1) -; CHECK-RV32-NEXT: lh a3, 4(a1) -; CHECK-RV32-NEXT: lh a4, 8(a1) -; CHECK-RV32-NEXT: lh a5, 12(a1) -; CHECK-RV32-NEXT: lh a6, 28(a1) -; CHECK-RV32-NEXT: lh a7, 24(a1) -; CHECK-RV32-NEXT: lh t0, 20(a1) -; CHECK-RV32-NEXT: lh a1, 16(a1) +; CHECK-RV32-NEXT: lh a2, 16(a1) +; CHECK-RV32-NEXT: lh a3, 20(a1) +; CHECK-RV32-NEXT: lh a4, 24(a1) +; CHECK-RV32-NEXT: lh a5, 28(a1) +; CHECK-RV32-NEXT: lh a6, 0(a1) +; CHECK-RV32-NEXT: lh a7, 4(a1) +; CHECK-RV32-NEXT: lh t0, 8(a1) +; CHECK-RV32-NEXT: lh a1, 12(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sh a6, 14(a0) +; CHECK-RV32-NEXT: sh a5, 14(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sh a7, 12(a0) +; CHECK-RV32-NEXT: sh a4, 12(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sh t0, 10(a0) +; CHECK-RV32-NEXT: sh a3, 10(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sh a1, 8(a0) +; CHECK-RV32-NEXT: sh a2, 8(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sh a5, 6(a0) +; CHECK-RV32-NEXT: sh a1, 6(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sh a4, 4(a0) +; CHECK-RV32-NEXT: sh t0, 4(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sh a3, 2(a0) +; CHECK-RV32-NEXT: sh a7, 2(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sh a2, 0(a0) +; CHECK-RV32-NEXT: sh a6, 0(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_v8i16: ; CHECK-RV64C: # %bb.0: +; CHECK-RV64C-NEXT: lh a7, 32(a1) +; CHECK-RV64C-NEXT: lh a3, 40(a1) +; CHECK-RV64C-NEXT: lh a4, 48(a1) +; CHECK-RV64C-NEXT: lh a5, 56(a1) ; CHECK-RV64C-NEXT: lh a6, 0(a1) -; CHECK-RV64C-NEXT: lh a7, 8(a1) -; CHECK-RV64C-NEXT: lh t0, 16(a1) -; CHECK-RV64C-NEXT: lh a5, 24(a1) -; CHECK-RV64C-NEXT: lh a2, 56(a1) -; CHECK-RV64C-NEXT: lh a3, 48(a1) -; CHECK-RV64C-NEXT: lh a4, 40(a1) -; CHECK-RV64C-NEXT: lh a1, 32(a1) +; CHECK-RV64C-NEXT: lh t0, 8(a1) +; CHECK-RV64C-NEXT: lh a2, 16(a1) +; CHECK-RV64C-NEXT: lh a1, 24(a1) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sh a2, 14(a0) +; CHECK-RV64C-NEXT: sh a5, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sh a3, 12(a0) +; CHECK-RV64C-NEXT: sh a4, 12(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sh a4, 10(a0) +; CHECK-RV64C-NEXT: sh a3, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sh a1, 8(a0) +; CHECK-RV64C-NEXT: sh a7, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sh a5, 6(a0) +; CHECK-RV64C-NEXT: sh a1, 6(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sh t0, 4(a0) +; CHECK-RV64C-NEXT: sh a2, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sh a7, 2(a0) +; CHECK-RV64C-NEXT: sh t0, 2(a0) ; CHECK-RV64C-NEXT: c.ntl.pall ; CHECK-RV64C-NEXT: sh a6, 0(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_v8i16: ; CHECK-RV32C: # %bb.0: +; CHECK-RV32C-NEXT: lh a7, 16(a1) +; CHECK-RV32C-NEXT: lh a3, 20(a1) +; CHECK-RV32C-NEXT: lh a4, 24(a1) +; CHECK-RV32C-NEXT: lh a5, 28(a1) ; CHECK-RV32C-NEXT: lh a6, 0(a1) -; CHECK-RV32C-NEXT: lh a7, 4(a1) -; CHECK-RV32C-NEXT: lh t0, 8(a1) -; CHECK-RV32C-NEXT: lh a5, 12(a1) -; CHECK-RV32C-NEXT: lh a2, 28(a1) -; CHECK-RV32C-NEXT: lh a3, 24(a1) -; CHECK-RV32C-NEXT: lh a4, 20(a1) -; CHECK-RV32C-NEXT: lh a1, 16(a1) +; CHECK-RV32C-NEXT: lh t0, 4(a1) +; CHECK-RV32C-NEXT: lh a2, 8(a1) +; CHECK-RV32C-NEXT: lh a1, 12(a1) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sh a2, 14(a0) +; CHECK-RV32C-NEXT: sh a5, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sh a3, 12(a0) +; CHECK-RV32C-NEXT: sh a4, 12(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sh a4, 10(a0) +; CHECK-RV32C-NEXT: sh a3, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sh a1, 8(a0) +; CHECK-RV32C-NEXT: sh a7, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sh a5, 6(a0) +; CHECK-RV32C-NEXT: sh a1, 6(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sh t0, 4(a0) +; CHECK-RV32C-NEXT: sh a2, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sh a7, 2(a0) +; CHECK-RV32C-NEXT: sh t0, 2(a0) ; CHECK-RV32C-NEXT: c.ntl.pall ; CHECK-RV32C-NEXT: sh a6, 0(a0) ; CHECK-RV32C-NEXT: ret @@ -5157,30 +5157,30 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64-NEXT: lbu a7, 40(a1) ; CHECK-RV64-NEXT: lbu t0, 48(a1) ; CHECK-RV64-NEXT: lbu t1, 56(a1) -; CHECK-RV64-NEXT: lbu t2, 64(a1) -; CHECK-RV64-NEXT: lbu t3, 72(a1) -; CHECK-RV64-NEXT: lbu t4, 80(a1) -; CHECK-RV64-NEXT: lbu t5, 88(a1) -; CHECK-RV64-NEXT: lbu t6, 120(a1) -; CHECK-RV64-NEXT: lbu s0, 112(a1) -; CHECK-RV64-NEXT: lbu s1, 104(a1) -; CHECK-RV64-NEXT: lbu a1, 96(a1) +; CHECK-RV64-NEXT: lbu t2, 96(a1) +; CHECK-RV64-NEXT: lbu t3, 104(a1) +; CHECK-RV64-NEXT: lbu t4, 112(a1) +; CHECK-RV64-NEXT: lbu t5, 120(a1) +; CHECK-RV64-NEXT: lbu t6, 64(a1) +; CHECK-RV64-NEXT: lbu s0, 72(a1) +; CHECK-RV64-NEXT: lbu s1, 80(a1) +; CHECK-RV64-NEXT: lbu a1, 88(a1) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb t6, 15(a0) +; CHECK-RV64-NEXT: sb t5, 15(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb s0, 14(a0) +; CHECK-RV64-NEXT: sb t4, 14(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb s1, 13(a0) +; CHECK-RV64-NEXT: sb t3, 13(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb a1, 12(a0) +; CHECK-RV64-NEXT: sb t2, 12(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb t5, 11(a0) +; CHECK-RV64-NEXT: sb a1, 11(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb t4, 10(a0) +; CHECK-RV64-NEXT: sb s1, 10(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb t3, 9(a0) +; CHECK-RV64-NEXT: sb s0, 9(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb t2, 8(a0) +; CHECK-RV64-NEXT: sb t6, 8(a0) ; CHECK-RV64-NEXT: ntl.s1 ; CHECK-RV64-NEXT: sb t1, 7(a0) ; CHECK-RV64-NEXT: ntl.s1 @@ -5218,30 +5218,30 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32-NEXT: lbu a7, 20(a1) ; CHECK-RV32-NEXT: lbu t0, 24(a1) ; CHECK-RV32-NEXT: lbu t1, 28(a1) -; CHECK-RV32-NEXT: lbu t2, 32(a1) -; CHECK-RV32-NEXT: lbu t3, 36(a1) -; CHECK-RV32-NEXT: lbu t4, 40(a1) -; CHECK-RV32-NEXT: lbu t5, 44(a1) -; CHECK-RV32-NEXT: lbu t6, 60(a1) -; CHECK-RV32-NEXT: lbu s0, 56(a1) -; CHECK-RV32-NEXT: lbu s1, 52(a1) -; CHECK-RV32-NEXT: lbu a1, 48(a1) +; CHECK-RV32-NEXT: lbu t2, 48(a1) +; CHECK-RV32-NEXT: lbu t3, 52(a1) +; CHECK-RV32-NEXT: lbu t4, 56(a1) +; CHECK-RV32-NEXT: lbu t5, 60(a1) +; CHECK-RV32-NEXT: lbu t6, 32(a1) +; CHECK-RV32-NEXT: lbu s0, 36(a1) +; CHECK-RV32-NEXT: lbu s1, 40(a1) +; CHECK-RV32-NEXT: lbu a1, 44(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb t6, 15(a0) +; CHECK-RV32-NEXT: sb t5, 15(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb s0, 14(a0) +; CHECK-RV32-NEXT: sb t4, 14(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb s1, 13(a0) +; CHECK-RV32-NEXT: sb t3, 13(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb a1, 12(a0) +; CHECK-RV32-NEXT: sb t2, 12(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb t5, 11(a0) +; CHECK-RV32-NEXT: sb a1, 11(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb t4, 10(a0) +; CHECK-RV32-NEXT: sb s1, 10(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb t3, 9(a0) +; CHECK-RV32-NEXT: sb s0, 9(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb t2, 8(a0) +; CHECK-RV32-NEXT: sb t6, 8(a0) ; CHECK-RV32-NEXT: ntl.s1 ; CHECK-RV32-NEXT: sb t1, 7(a0) ; CHECK-RV32-NEXT: ntl.s1 @@ -5279,28 +5279,28 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64C-NEXT: lbu t3, 40(a1) ; CHECK-RV64C-NEXT: lbu t4, 48(a1) ; CHECK-RV64C-NEXT: lbu t5, 56(a1) +; CHECK-RV64C-NEXT: lbu a2, 96(a1) +; CHECK-RV64C-NEXT: lbu a3, 104(a1) +; CHECK-RV64C-NEXT: lbu a4, 112(a1) +; CHECK-RV64C-NEXT: lbu a5, 120(a1) ; CHECK-RV64C-NEXT: lbu t6, 64(a1) -; CHECK-RV64C-NEXT: lbu a3, 72(a1) -; CHECK-RV64C-NEXT: lbu a4, 80(a1) -; CHECK-RV64C-NEXT: lbu a5, 88(a1) -; CHECK-RV64C-NEXT: lbu a2, 120(a1) -; CHECK-RV64C-NEXT: lbu s0, 112(a1) -; CHECK-RV64C-NEXT: lbu s1, 104(a1) -; CHECK-RV64C-NEXT: lbu a1, 96(a1) +; CHECK-RV64C-NEXT: lbu s0, 72(a1) +; CHECK-RV64C-NEXT: lbu s1, 80(a1) +; CHECK-RV64C-NEXT: lbu a1, 88(a1) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb a2, 15(a0) +; CHECK-RV64C-NEXT: sb a5, 15(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb s0, 14(a0) +; CHECK-RV64C-NEXT: sb a4, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb s1, 13(a0) +; CHECK-RV64C-NEXT: sb a3, 13(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb a1, 12(a0) +; CHECK-RV64C-NEXT: sb a2, 12(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb a5, 11(a0) +; CHECK-RV64C-NEXT: sb a1, 11(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb a4, 10(a0) +; CHECK-RV64C-NEXT: sb s1, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb a3, 9(a0) +; CHECK-RV64C-NEXT: sb s0, 9(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 ; CHECK-RV64C-NEXT: sb t6, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 @@ -5340,28 +5340,28 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32C-NEXT: lbu t3, 20(a1) ; CHECK-RV32C-NEXT: lbu t4, 24(a1) ; CHECK-RV32C-NEXT: lbu t5, 28(a1) +; CHECK-RV32C-NEXT: lbu a2, 48(a1) +; CHECK-RV32C-NEXT: lbu a3, 52(a1) +; CHECK-RV32C-NEXT: lbu a4, 56(a1) +; CHECK-RV32C-NEXT: lbu a5, 60(a1) ; CHECK-RV32C-NEXT: lbu t6, 32(a1) -; CHECK-RV32C-NEXT: lbu a3, 36(a1) -; CHECK-RV32C-NEXT: lbu a4, 40(a1) -; CHECK-RV32C-NEXT: lbu a5, 44(a1) -; CHECK-RV32C-NEXT: lbu a2, 60(a1) -; CHECK-RV32C-NEXT: lbu s0, 56(a1) -; CHECK-RV32C-NEXT: lbu s1, 52(a1) -; CHECK-RV32C-NEXT: lbu a1, 48(a1) +; CHECK-RV32C-NEXT: lbu s0, 36(a1) +; CHECK-RV32C-NEXT: lbu s1, 40(a1) +; CHECK-RV32C-NEXT: lbu a1, 44(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb a2, 15(a0) +; CHECK-RV32C-NEXT: sb a5, 15(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb s0, 14(a0) +; CHECK-RV32C-NEXT: sb a4, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb s1, 13(a0) +; CHECK-RV32C-NEXT: sb a3, 13(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb a1, 12(a0) +; CHECK-RV32C-NEXT: sb a2, 12(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb a5, 11(a0) +; CHECK-RV32C-NEXT: sb a1, 11(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb a4, 10(a0) +; CHECK-RV32C-NEXT: sb s1, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb a3, 9(a0) +; CHECK-RV32C-NEXT: sb s0, 9(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 ; CHECK-RV32C-NEXT: sb t6, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 @@ -5405,112 +5405,112 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) { define void @test_nontemporal_S1_store_v8i16(ptr %p, <8 x i16> %v) { ; CHECK-RV64-LABEL: test_nontemporal_S1_store_v8i16: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: lh a2, 0(a1) -; CHECK-RV64-NEXT: lh a3, 8(a1) -; CHECK-RV64-NEXT: lh a4, 16(a1) -; CHECK-RV64-NEXT: lh a5, 24(a1) -; CHECK-RV64-NEXT: lh a6, 56(a1) -; CHECK-RV64-NEXT: lh a7, 48(a1) -; CHECK-RV64-NEXT: lh t0, 40(a1) -; CHECK-RV64-NEXT: lh a1, 32(a1) +; CHECK-RV64-NEXT: lh a2, 32(a1) +; CHECK-RV64-NEXT: lh a3, 40(a1) +; CHECK-RV64-NEXT: lh a4, 48(a1) +; CHECK-RV64-NEXT: lh a5, 56(a1) +; CHECK-RV64-NEXT: lh a6, 0(a1) +; CHECK-RV64-NEXT: lh a7, 8(a1) +; CHECK-RV64-NEXT: lh t0, 16(a1) +; CHECK-RV64-NEXT: lh a1, 24(a1) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sh a6, 14(a0) +; CHECK-RV64-NEXT: sh a5, 14(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sh a7, 12(a0) +; CHECK-RV64-NEXT: sh a4, 12(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sh t0, 10(a0) +; CHECK-RV64-NEXT: sh a3, 10(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sh a1, 8(a0) +; CHECK-RV64-NEXT: sh a2, 8(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sh a5, 6(a0) +; CHECK-RV64-NEXT: sh a1, 6(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sh a4, 4(a0) +; CHECK-RV64-NEXT: sh t0, 4(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sh a3, 2(a0) +; CHECK-RV64-NEXT: sh a7, 2(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sh a2, 0(a0) +; CHECK-RV64-NEXT: sh a6, 0(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_S1_store_v8i16: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lh a2, 0(a1) -; CHECK-RV32-NEXT: lh a3, 4(a1) -; CHECK-RV32-NEXT: lh a4, 8(a1) -; CHECK-RV32-NEXT: lh a5, 12(a1) -; CHECK-RV32-NEXT: lh a6, 28(a1) -; CHECK-RV32-NEXT: lh a7, 24(a1) -; CHECK-RV32-NEXT: lh t0, 20(a1) -; CHECK-RV32-NEXT: lh a1, 16(a1) +; CHECK-RV32-NEXT: lh a2, 16(a1) +; CHECK-RV32-NEXT: lh a3, 20(a1) +; CHECK-RV32-NEXT: lh a4, 24(a1) +; CHECK-RV32-NEXT: lh a5, 28(a1) +; CHECK-RV32-NEXT: lh a6, 0(a1) +; CHECK-RV32-NEXT: lh a7, 4(a1) +; CHECK-RV32-NEXT: lh t0, 8(a1) +; CHECK-RV32-NEXT: lh a1, 12(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sh a6, 14(a0) +; CHECK-RV32-NEXT: sh a5, 14(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sh a7, 12(a0) +; CHECK-RV32-NEXT: sh a4, 12(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sh t0, 10(a0) +; CHECK-RV32-NEXT: sh a3, 10(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sh a1, 8(a0) +; CHECK-RV32-NEXT: sh a2, 8(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sh a5, 6(a0) +; CHECK-RV32-NEXT: sh a1, 6(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sh a4, 4(a0) +; CHECK-RV32-NEXT: sh t0, 4(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sh a3, 2(a0) +; CHECK-RV32-NEXT: sh a7, 2(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sh a2, 0(a0) +; CHECK-RV32-NEXT: sh a6, 0(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_S1_store_v8i16: ; CHECK-RV64C: # %bb.0: +; CHECK-RV64C-NEXT: lh a7, 32(a1) +; CHECK-RV64C-NEXT: lh a3, 40(a1) +; CHECK-RV64C-NEXT: lh a4, 48(a1) +; CHECK-RV64C-NEXT: lh a5, 56(a1) ; CHECK-RV64C-NEXT: lh a6, 0(a1) -; CHECK-RV64C-NEXT: lh a7, 8(a1) -; CHECK-RV64C-NEXT: lh t0, 16(a1) -; CHECK-RV64C-NEXT: lh a5, 24(a1) -; CHECK-RV64C-NEXT: lh a2, 56(a1) -; CHECK-RV64C-NEXT: lh a3, 48(a1) -; CHECK-RV64C-NEXT: lh a4, 40(a1) -; CHECK-RV64C-NEXT: lh a1, 32(a1) +; CHECK-RV64C-NEXT: lh t0, 8(a1) +; CHECK-RV64C-NEXT: lh a2, 16(a1) +; CHECK-RV64C-NEXT: lh a1, 24(a1) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sh a2, 14(a0) +; CHECK-RV64C-NEXT: sh a5, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sh a3, 12(a0) +; CHECK-RV64C-NEXT: sh a4, 12(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sh a4, 10(a0) +; CHECK-RV64C-NEXT: sh a3, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sh a1, 8(a0) +; CHECK-RV64C-NEXT: sh a7, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sh a5, 6(a0) +; CHECK-RV64C-NEXT: sh a1, 6(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sh t0, 4(a0) +; CHECK-RV64C-NEXT: sh a2, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sh a7, 2(a0) +; CHECK-RV64C-NEXT: sh t0, 2(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 ; CHECK-RV64C-NEXT: sh a6, 0(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_S1_store_v8i16: ; CHECK-RV32C: # %bb.0: +; CHECK-RV32C-NEXT: lh a7, 16(a1) +; CHECK-RV32C-NEXT: lh a3, 20(a1) +; CHECK-RV32C-NEXT: lh a4, 24(a1) +; CHECK-RV32C-NEXT: lh a5, 28(a1) ; CHECK-RV32C-NEXT: lh a6, 0(a1) -; CHECK-RV32C-NEXT: lh a7, 4(a1) -; CHECK-RV32C-NEXT: lh t0, 8(a1) -; CHECK-RV32C-NEXT: lh a5, 12(a1) -; CHECK-RV32C-NEXT: lh a2, 28(a1) -; CHECK-RV32C-NEXT: lh a3, 24(a1) -; CHECK-RV32C-NEXT: lh a4, 20(a1) -; CHECK-RV32C-NEXT: lh a1, 16(a1) +; CHECK-RV32C-NEXT: lh t0, 4(a1) +; CHECK-RV32C-NEXT: lh a2, 8(a1) +; CHECK-RV32C-NEXT: lh a1, 12(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sh a2, 14(a0) +; CHECK-RV32C-NEXT: sh a5, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sh a3, 12(a0) +; CHECK-RV32C-NEXT: sh a4, 12(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sh a4, 10(a0) +; CHECK-RV32C-NEXT: sh a3, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sh a1, 8(a0) +; CHECK-RV32C-NEXT: sh a7, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sh a5, 6(a0) +; CHECK-RV32C-NEXT: sh a1, 6(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sh t0, 4(a0) +; CHECK-RV32C-NEXT: sh a2, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sh a7, 2(a0) +; CHECK-RV32C-NEXT: sh t0, 2(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 ; CHECK-RV32C-NEXT: sh a6, 0(a0) ; CHECK-RV32C-NEXT: ret @@ -6571,30 +6571,30 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64-NEXT: lbu a7, 40(a1) ; CHECK-RV64-NEXT: lbu t0, 48(a1) ; CHECK-RV64-NEXT: lbu t1, 56(a1) -; CHECK-RV64-NEXT: lbu t2, 64(a1) -; CHECK-RV64-NEXT: lbu t3, 72(a1) -; CHECK-RV64-NEXT: lbu t4, 80(a1) -; CHECK-RV64-NEXT: lbu t5, 88(a1) -; CHECK-RV64-NEXT: lbu t6, 120(a1) -; CHECK-RV64-NEXT: lbu s0, 112(a1) -; CHECK-RV64-NEXT: lbu s1, 104(a1) -; CHECK-RV64-NEXT: lbu a1, 96(a1) +; CHECK-RV64-NEXT: lbu t2, 96(a1) +; CHECK-RV64-NEXT: lbu t3, 104(a1) +; CHECK-RV64-NEXT: lbu t4, 112(a1) +; CHECK-RV64-NEXT: lbu t5, 120(a1) +; CHECK-RV64-NEXT: lbu t6, 64(a1) +; CHECK-RV64-NEXT: lbu s0, 72(a1) +; CHECK-RV64-NEXT: lbu s1, 80(a1) +; CHECK-RV64-NEXT: lbu a1, 88(a1) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t6, 15(a0) +; CHECK-RV64-NEXT: sb t5, 15(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb s0, 14(a0) +; CHECK-RV64-NEXT: sb t4, 14(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb s1, 13(a0) +; CHECK-RV64-NEXT: sb t3, 13(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb a1, 12(a0) +; CHECK-RV64-NEXT: sb t2, 12(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t5, 11(a0) +; CHECK-RV64-NEXT: sb a1, 11(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t4, 10(a0) +; CHECK-RV64-NEXT: sb s1, 10(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t3, 9(a0) +; CHECK-RV64-NEXT: sb s0, 9(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t2, 8(a0) +; CHECK-RV64-NEXT: sb t6, 8(a0) ; CHECK-RV64-NEXT: ntl.all ; CHECK-RV64-NEXT: sb t1, 7(a0) ; CHECK-RV64-NEXT: ntl.all @@ -6632,30 +6632,30 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32-NEXT: lbu a7, 20(a1) ; CHECK-RV32-NEXT: lbu t0, 24(a1) ; CHECK-RV32-NEXT: lbu t1, 28(a1) -; CHECK-RV32-NEXT: lbu t2, 32(a1) -; CHECK-RV32-NEXT: lbu t3, 36(a1) -; CHECK-RV32-NEXT: lbu t4, 40(a1) -; CHECK-RV32-NEXT: lbu t5, 44(a1) -; CHECK-RV32-NEXT: lbu t6, 60(a1) -; CHECK-RV32-NEXT: lbu s0, 56(a1) -; CHECK-RV32-NEXT: lbu s1, 52(a1) -; CHECK-RV32-NEXT: lbu a1, 48(a1) +; CHECK-RV32-NEXT: lbu t2, 48(a1) +; CHECK-RV32-NEXT: lbu t3, 52(a1) +; CHECK-RV32-NEXT: lbu t4, 56(a1) +; CHECK-RV32-NEXT: lbu t5, 60(a1) +; CHECK-RV32-NEXT: lbu t6, 32(a1) +; CHECK-RV32-NEXT: lbu s0, 36(a1) +; CHECK-RV32-NEXT: lbu s1, 40(a1) +; CHECK-RV32-NEXT: lbu a1, 44(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t6, 15(a0) +; CHECK-RV32-NEXT: sb t5, 15(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb s0, 14(a0) +; CHECK-RV32-NEXT: sb t4, 14(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb s1, 13(a0) +; CHECK-RV32-NEXT: sb t3, 13(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb a1, 12(a0) +; CHECK-RV32-NEXT: sb t2, 12(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t5, 11(a0) +; CHECK-RV32-NEXT: sb a1, 11(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t4, 10(a0) +; CHECK-RV32-NEXT: sb s1, 10(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t3, 9(a0) +; CHECK-RV32-NEXT: sb s0, 9(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t2, 8(a0) +; CHECK-RV32-NEXT: sb t6, 8(a0) ; CHECK-RV32-NEXT: ntl.all ; CHECK-RV32-NEXT: sb t1, 7(a0) ; CHECK-RV32-NEXT: ntl.all @@ -6693,28 +6693,28 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64C-NEXT: lbu t3, 40(a1) ; CHECK-RV64C-NEXT: lbu t4, 48(a1) ; CHECK-RV64C-NEXT: lbu t5, 56(a1) +; CHECK-RV64C-NEXT: lbu a2, 96(a1) +; CHECK-RV64C-NEXT: lbu a3, 104(a1) +; CHECK-RV64C-NEXT: lbu a4, 112(a1) +; CHECK-RV64C-NEXT: lbu a5, 120(a1) ; CHECK-RV64C-NEXT: lbu t6, 64(a1) -; CHECK-RV64C-NEXT: lbu a3, 72(a1) -; CHECK-RV64C-NEXT: lbu a4, 80(a1) -; CHECK-RV64C-NEXT: lbu a5, 88(a1) -; CHECK-RV64C-NEXT: lbu a2, 120(a1) -; CHECK-RV64C-NEXT: lbu s0, 112(a1) -; CHECK-RV64C-NEXT: lbu s1, 104(a1) -; CHECK-RV64C-NEXT: lbu a1, 96(a1) +; CHECK-RV64C-NEXT: lbu s0, 72(a1) +; CHECK-RV64C-NEXT: lbu s1, 80(a1) +; CHECK-RV64C-NEXT: lbu a1, 88(a1) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a2, 15(a0) +; CHECK-RV64C-NEXT: sb a5, 15(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb s0, 14(a0) +; CHECK-RV64C-NEXT: sb a4, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb s1, 13(a0) +; CHECK-RV64C-NEXT: sb a3, 13(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a1, 12(a0) +; CHECK-RV64C-NEXT: sb a2, 12(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a5, 11(a0) +; CHECK-RV64C-NEXT: sb a1, 11(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a4, 10(a0) +; CHECK-RV64C-NEXT: sb s1, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a3, 9(a0) +; CHECK-RV64C-NEXT: sb s0, 9(a0) ; CHECK-RV64C-NEXT: c.ntl.all ; CHECK-RV64C-NEXT: sb t6, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.all @@ -6754,28 +6754,28 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32C-NEXT: lbu t3, 20(a1) ; CHECK-RV32C-NEXT: lbu t4, 24(a1) ; CHECK-RV32C-NEXT: lbu t5, 28(a1) +; CHECK-RV32C-NEXT: lbu a2, 48(a1) +; CHECK-RV32C-NEXT: lbu a3, 52(a1) +; CHECK-RV32C-NEXT: lbu a4, 56(a1) +; CHECK-RV32C-NEXT: lbu a5, 60(a1) ; CHECK-RV32C-NEXT: lbu t6, 32(a1) -; CHECK-RV32C-NEXT: lbu a3, 36(a1) -; CHECK-RV32C-NEXT: lbu a4, 40(a1) -; CHECK-RV32C-NEXT: lbu a5, 44(a1) -; CHECK-RV32C-NEXT: lbu a2, 60(a1) -; CHECK-RV32C-NEXT: lbu s0, 56(a1) -; CHECK-RV32C-NEXT: lbu s1, 52(a1) -; CHECK-RV32C-NEXT: lbu a1, 48(a1) +; CHECK-RV32C-NEXT: lbu s0, 36(a1) +; CHECK-RV32C-NEXT: lbu s1, 40(a1) +; CHECK-RV32C-NEXT: lbu a1, 44(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a2, 15(a0) +; CHECK-RV32C-NEXT: sb a5, 15(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb s0, 14(a0) +; CHECK-RV32C-NEXT: sb a4, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb s1, 13(a0) +; CHECK-RV32C-NEXT: sb a3, 13(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a1, 12(a0) +; CHECK-RV32C-NEXT: sb a2, 12(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a5, 11(a0) +; CHECK-RV32C-NEXT: sb a1, 11(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a4, 10(a0) +; CHECK-RV32C-NEXT: sb s1, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a3, 9(a0) +; CHECK-RV32C-NEXT: sb s0, 9(a0) ; CHECK-RV32C-NEXT: c.ntl.all ; CHECK-RV32C-NEXT: sb t6, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.all @@ -6819,112 +6819,112 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) { define void @test_nontemporal_ALL_store_v8i16(ptr %p, <8 x i16> %v) { ; CHECK-RV64-LABEL: test_nontemporal_ALL_store_v8i16: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: lh a2, 0(a1) -; CHECK-RV64-NEXT: lh a3, 8(a1) -; CHECK-RV64-NEXT: lh a4, 16(a1) -; CHECK-RV64-NEXT: lh a5, 24(a1) -; CHECK-RV64-NEXT: lh a6, 56(a1) -; CHECK-RV64-NEXT: lh a7, 48(a1) -; CHECK-RV64-NEXT: lh t0, 40(a1) -; CHECK-RV64-NEXT: lh a1, 32(a1) +; CHECK-RV64-NEXT: lh a2, 32(a1) +; CHECK-RV64-NEXT: lh a3, 40(a1) +; CHECK-RV64-NEXT: lh a4, 48(a1) +; CHECK-RV64-NEXT: lh a5, 56(a1) +; CHECK-RV64-NEXT: lh a6, 0(a1) +; CHECK-RV64-NEXT: lh a7, 8(a1) +; CHECK-RV64-NEXT: lh t0, 16(a1) +; CHECK-RV64-NEXT: lh a1, 24(a1) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a6, 14(a0) +; CHECK-RV64-NEXT: sh a5, 14(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a7, 12(a0) +; CHECK-RV64-NEXT: sh a4, 12(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh t0, 10(a0) +; CHECK-RV64-NEXT: sh a3, 10(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a1, 8(a0) +; CHECK-RV64-NEXT: sh a2, 8(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a5, 6(a0) +; CHECK-RV64-NEXT: sh a1, 6(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a4, 4(a0) +; CHECK-RV64-NEXT: sh t0, 4(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a3, 2(a0) +; CHECK-RV64-NEXT: sh a7, 2(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a2, 0(a0) +; CHECK-RV64-NEXT: sh a6, 0(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_ALL_store_v8i16: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lh a2, 0(a1) -; CHECK-RV32-NEXT: lh a3, 4(a1) -; CHECK-RV32-NEXT: lh a4, 8(a1) -; CHECK-RV32-NEXT: lh a5, 12(a1) -; CHECK-RV32-NEXT: lh a6, 28(a1) -; CHECK-RV32-NEXT: lh a7, 24(a1) -; CHECK-RV32-NEXT: lh t0, 20(a1) -; CHECK-RV32-NEXT: lh a1, 16(a1) +; CHECK-RV32-NEXT: lh a2, 16(a1) +; CHECK-RV32-NEXT: lh a3, 20(a1) +; CHECK-RV32-NEXT: lh a4, 24(a1) +; CHECK-RV32-NEXT: lh a5, 28(a1) +; CHECK-RV32-NEXT: lh a6, 0(a1) +; CHECK-RV32-NEXT: lh a7, 4(a1) +; CHECK-RV32-NEXT: lh t0, 8(a1) +; CHECK-RV32-NEXT: lh a1, 12(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a6, 14(a0) +; CHECK-RV32-NEXT: sh a5, 14(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a7, 12(a0) +; CHECK-RV32-NEXT: sh a4, 12(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh t0, 10(a0) +; CHECK-RV32-NEXT: sh a3, 10(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a1, 8(a0) +; CHECK-RV32-NEXT: sh a2, 8(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a5, 6(a0) +; CHECK-RV32-NEXT: sh a1, 6(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a4, 4(a0) +; CHECK-RV32-NEXT: sh t0, 4(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a3, 2(a0) +; CHECK-RV32-NEXT: sh a7, 2(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a2, 0(a0) +; CHECK-RV32-NEXT: sh a6, 0(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_v8i16: ; CHECK-RV64C: # %bb.0: +; CHECK-RV64C-NEXT: lh a7, 32(a1) +; CHECK-RV64C-NEXT: lh a3, 40(a1) +; CHECK-RV64C-NEXT: lh a4, 48(a1) +; CHECK-RV64C-NEXT: lh a5, 56(a1) ; CHECK-RV64C-NEXT: lh a6, 0(a1) -; CHECK-RV64C-NEXT: lh a7, 8(a1) -; CHECK-RV64C-NEXT: lh t0, 16(a1) -; CHECK-RV64C-NEXT: lh a5, 24(a1) -; CHECK-RV64C-NEXT: lh a2, 56(a1) -; CHECK-RV64C-NEXT: lh a3, 48(a1) -; CHECK-RV64C-NEXT: lh a4, 40(a1) -; CHECK-RV64C-NEXT: lh a1, 32(a1) +; CHECK-RV64C-NEXT: lh t0, 8(a1) +; CHECK-RV64C-NEXT: lh a2, 16(a1) +; CHECK-RV64C-NEXT: lh a1, 24(a1) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a2, 14(a0) +; CHECK-RV64C-NEXT: sh a5, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a3, 12(a0) +; CHECK-RV64C-NEXT: sh a4, 12(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a4, 10(a0) +; CHECK-RV64C-NEXT: sh a3, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a1, 8(a0) +; CHECK-RV64C-NEXT: sh a7, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a5, 6(a0) +; CHECK-RV64C-NEXT: sh a1, 6(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh t0, 4(a0) +; CHECK-RV64C-NEXT: sh a2, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a7, 2(a0) +; CHECK-RV64C-NEXT: sh t0, 2(a0) ; CHECK-RV64C-NEXT: c.ntl.all ; CHECK-RV64C-NEXT: sh a6, 0(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_v8i16: ; CHECK-RV32C: # %bb.0: +; CHECK-RV32C-NEXT: lh a7, 16(a1) +; CHECK-RV32C-NEXT: lh a3, 20(a1) +; CHECK-RV32C-NEXT: lh a4, 24(a1) +; CHECK-RV32C-NEXT: lh a5, 28(a1) ; CHECK-RV32C-NEXT: lh a6, 0(a1) -; CHECK-RV32C-NEXT: lh a7, 4(a1) -; CHECK-RV32C-NEXT: lh t0, 8(a1) -; CHECK-RV32C-NEXT: lh a5, 12(a1) -; CHECK-RV32C-NEXT: lh a2, 28(a1) -; CHECK-RV32C-NEXT: lh a3, 24(a1) -; CHECK-RV32C-NEXT: lh a4, 20(a1) -; CHECK-RV32C-NEXT: lh a1, 16(a1) +; CHECK-RV32C-NEXT: lh t0, 4(a1) +; CHECK-RV32C-NEXT: lh a2, 8(a1) +; CHECK-RV32C-NEXT: lh a1, 12(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a2, 14(a0) +; CHECK-RV32C-NEXT: sh a5, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a3, 12(a0) +; CHECK-RV32C-NEXT: sh a4, 12(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a4, 10(a0) +; CHECK-RV32C-NEXT: sh a3, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a1, 8(a0) +; CHECK-RV32C-NEXT: sh a7, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a5, 6(a0) +; CHECK-RV32C-NEXT: sh a1, 6(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh t0, 4(a0) +; CHECK-RV32C-NEXT: sh a2, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a7, 2(a0) +; CHECK-RV32C-NEXT: sh t0, 2(a0) ; CHECK-RV32C-NEXT: c.ntl.all ; CHECK-RV32C-NEXT: sh a6, 0(a0) ; CHECK-RV32C-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll index 4bb65f376218f1..fe602b5b8fc2bc 100644 --- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll @@ -1241,8 +1241,8 @@ define i64 @foo2(ptr %p) { define void @PR41129(ptr %p64) { ; RV32-LABEL: PR41129: ; RV32: # %bb.0: # %entry -; RV32-NEXT: lw a2, 4(a0) ; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lw a2, 4(a0) ; RV32-NEXT: or a3, a1, a2 ; RV32-NEXT: beqz a3, .LBB37_2 ; RV32-NEXT: # %bb.1: # %false diff --git a/llvm/test/CodeGen/RISCV/push-pop-popret.ll b/llvm/test/CodeGen/RISCV/push-pop-popret.ll index 7548faaae61f47..85c2997e268a94 100644 --- a/llvm/test/CodeGen/RISCV/push-pop-popret.ll +++ b/llvm/test/CodeGen/RISCV/push-pop-popret.ll @@ -1117,26 +1117,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind { ; RV32IZCMP-NEXT: lw t3, 20(a5) ; RV32IZCMP-NEXT: lw t4, 24(a5) ; RV32IZCMP-NEXT: lw t5, 28(a5) -; RV32IZCMP-NEXT: lw t6, 32(a5) -; RV32IZCMP-NEXT: lw s2, 36(a5) -; RV32IZCMP-NEXT: lw s3, 40(a5) -; RV32IZCMP-NEXT: lw s4, 44(a5) -; RV32IZCMP-NEXT: lw a1, 48(a5) -; RV32IZCMP-NEXT: lw s0, 52(a5) -; RV32IZCMP-NEXT: lw s1, 68(a5) -; RV32IZCMP-NEXT: lw a2, 64(a5) -; RV32IZCMP-NEXT: lw a3, 60(a5) -; RV32IZCMP-NEXT: lw a4, 56(a5) -; RV32IZCMP-NEXT: sw s1, 68(a5) -; RV32IZCMP-NEXT: sw a2, 64(a5) -; RV32IZCMP-NEXT: sw a3, 60(a5) -; RV32IZCMP-NEXT: sw a4, 56(a5) -; RV32IZCMP-NEXT: sw s0, 52(a5) -; RV32IZCMP-NEXT: sw a1, 48(a5) -; RV32IZCMP-NEXT: sw s4, 44(a5) -; RV32IZCMP-NEXT: sw s3, 40(a5) -; RV32IZCMP-NEXT: sw s2, 36(a5) -; RV32IZCMP-NEXT: sw t6, 32(a5) +; RV32IZCMP-NEXT: lw t6, 48(a5) +; RV32IZCMP-NEXT: lw s2, 52(a5) +; RV32IZCMP-NEXT: lw a3, 56(a5) +; RV32IZCMP-NEXT: lw a4, 60(a5) +; RV32IZCMP-NEXT: lw a1, 64(a5) +; RV32IZCMP-NEXT: lw s0, 68(a5) +; RV32IZCMP-NEXT: lw s3, 32(a5) +; RV32IZCMP-NEXT: lw s4, 36(a5) +; RV32IZCMP-NEXT: lw s1, 40(a5) +; RV32IZCMP-NEXT: lw a2, 44(a5) +; RV32IZCMP-NEXT: sw s0, 68(a5) +; RV32IZCMP-NEXT: sw a1, 64(a5) +; RV32IZCMP-NEXT: sw a4, 60(a5) +; RV32IZCMP-NEXT: sw a3, 56(a5) +; RV32IZCMP-NEXT: sw s2, 52(a5) +; RV32IZCMP-NEXT: sw t6, 48(a5) +; RV32IZCMP-NEXT: sw a2, 44(a5) +; RV32IZCMP-NEXT: sw s1, 40(a5) +; RV32IZCMP-NEXT: sw s4, 36(a5) +; RV32IZCMP-NEXT: sw s3, 32(a5) ; RV32IZCMP-NEXT: sw t5, 28(a5) ; RV32IZCMP-NEXT: sw t4, 24(a5) ; RV32IZCMP-NEXT: sw t3, 20(a5) @@ -1160,26 +1160,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind { ; RV64IZCMP-NEXT: lw t3, 20(a5) ; RV64IZCMP-NEXT: lw t4, 24(a5) ; RV64IZCMP-NEXT: lw t5, 28(a5) -; RV64IZCMP-NEXT: lw t6, 32(a5) -; RV64IZCMP-NEXT: lw s2, 36(a5) -; RV64IZCMP-NEXT: lw s3, 40(a5) -; RV64IZCMP-NEXT: lw s4, 44(a5) -; RV64IZCMP-NEXT: lw a1, 48(a5) -; RV64IZCMP-NEXT: lw s0, 52(a5) -; RV64IZCMP-NEXT: lw s1, 68(a5) -; RV64IZCMP-NEXT: lw a2, 64(a5) -; RV64IZCMP-NEXT: lw a3, 60(a5) -; RV64IZCMP-NEXT: lw a4, 56(a5) -; RV64IZCMP-NEXT: sw s1, 68(a5) -; RV64IZCMP-NEXT: sw a2, 64(a5) -; RV64IZCMP-NEXT: sw a3, 60(a5) -; RV64IZCMP-NEXT: sw a4, 56(a5) -; RV64IZCMP-NEXT: sw s0, 52(a5) -; RV64IZCMP-NEXT: sw a1, 48(a5) -; RV64IZCMP-NEXT: sw s4, 44(a5) -; RV64IZCMP-NEXT: sw s3, 40(a5) -; RV64IZCMP-NEXT: sw s2, 36(a5) -; RV64IZCMP-NEXT: sw t6, 32(a5) +; RV64IZCMP-NEXT: lw t6, 48(a5) +; RV64IZCMP-NEXT: lw s2, 52(a5) +; RV64IZCMP-NEXT: lw a3, 56(a5) +; RV64IZCMP-NEXT: lw a4, 60(a5) +; RV64IZCMP-NEXT: lw a1, 64(a5) +; RV64IZCMP-NEXT: lw s0, 68(a5) +; RV64IZCMP-NEXT: lw s3, 32(a5) +; RV64IZCMP-NEXT: lw s4, 36(a5) +; RV64IZCMP-NEXT: lw s1, 40(a5) +; RV64IZCMP-NEXT: lw a2, 44(a5) +; RV64IZCMP-NEXT: sw s0, 68(a5) +; RV64IZCMP-NEXT: sw a1, 64(a5) +; RV64IZCMP-NEXT: sw a4, 60(a5) +; RV64IZCMP-NEXT: sw a3, 56(a5) +; RV64IZCMP-NEXT: sw s2, 52(a5) +; RV64IZCMP-NEXT: sw t6, 48(a5) +; RV64IZCMP-NEXT: sw a2, 44(a5) +; RV64IZCMP-NEXT: sw s1, 40(a5) +; RV64IZCMP-NEXT: sw s4, 36(a5) +; RV64IZCMP-NEXT: sw s3, 32(a5) ; RV64IZCMP-NEXT: sw t5, 28(a5) ; RV64IZCMP-NEXT: sw t4, 24(a5) ; RV64IZCMP-NEXT: sw t3, 20(a5) @@ -1203,26 +1203,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind { ; RV32IZCMP-SR-NEXT: lw t3, 20(a5) ; RV32IZCMP-SR-NEXT: lw t4, 24(a5) ; RV32IZCMP-SR-NEXT: lw t5, 28(a5) -; RV32IZCMP-SR-NEXT: lw t6, 32(a5) -; RV32IZCMP-SR-NEXT: lw s2, 36(a5) -; RV32IZCMP-SR-NEXT: lw s3, 40(a5) -; RV32IZCMP-SR-NEXT: lw s4, 44(a5) -; RV32IZCMP-SR-NEXT: lw a1, 48(a5) -; RV32IZCMP-SR-NEXT: lw s0, 52(a5) -; RV32IZCMP-SR-NEXT: lw s1, 68(a5) -; RV32IZCMP-SR-NEXT: lw a2, 64(a5) -; RV32IZCMP-SR-NEXT: lw a3, 60(a5) -; RV32IZCMP-SR-NEXT: lw a4, 56(a5) -; RV32IZCMP-SR-NEXT: sw s1, 68(a5) -; RV32IZCMP-SR-NEXT: sw a2, 64(a5) -; RV32IZCMP-SR-NEXT: sw a3, 60(a5) -; RV32IZCMP-SR-NEXT: sw a4, 56(a5) -; RV32IZCMP-SR-NEXT: sw s0, 52(a5) -; RV32IZCMP-SR-NEXT: sw a1, 48(a5) -; RV32IZCMP-SR-NEXT: sw s4, 44(a5) -; RV32IZCMP-SR-NEXT: sw s3, 40(a5) -; RV32IZCMP-SR-NEXT: sw s2, 36(a5) -; RV32IZCMP-SR-NEXT: sw t6, 32(a5) +; RV32IZCMP-SR-NEXT: lw t6, 48(a5) +; RV32IZCMP-SR-NEXT: lw s2, 52(a5) +; RV32IZCMP-SR-NEXT: lw a3, 56(a5) +; RV32IZCMP-SR-NEXT: lw a4, 60(a5) +; RV32IZCMP-SR-NEXT: lw a1, 64(a5) +; RV32IZCMP-SR-NEXT: lw s0, 68(a5) +; RV32IZCMP-SR-NEXT: lw s3, 32(a5) +; RV32IZCMP-SR-NEXT: lw s4, 36(a5) +; RV32IZCMP-SR-NEXT: lw s1, 40(a5) +; RV32IZCMP-SR-NEXT: lw a2, 44(a5) +; RV32IZCMP-SR-NEXT: sw s0, 68(a5) +; RV32IZCMP-SR-NEXT: sw a1, 64(a5) +; RV32IZCMP-SR-NEXT: sw a4, 60(a5) +; RV32IZCMP-SR-NEXT: sw a3, 56(a5) +; RV32IZCMP-SR-NEXT: sw s2, 52(a5) +; RV32IZCMP-SR-NEXT: sw t6, 48(a5) +; RV32IZCMP-SR-NEXT: sw a2, 44(a5) +; RV32IZCMP-SR-NEXT: sw s1, 40(a5) +; RV32IZCMP-SR-NEXT: sw s4, 36(a5) +; RV32IZCMP-SR-NEXT: sw s3, 32(a5) ; RV32IZCMP-SR-NEXT: sw t5, 28(a5) ; RV32IZCMP-SR-NEXT: sw t4, 24(a5) ; RV32IZCMP-SR-NEXT: sw t3, 20(a5) @@ -1246,26 +1246,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind { ; RV64IZCMP-SR-NEXT: lw t3, 20(a5) ; RV64IZCMP-SR-NEXT: lw t4, 24(a5) ; RV64IZCMP-SR-NEXT: lw t5, 28(a5) -; RV64IZCMP-SR-NEXT: lw t6, 32(a5) -; RV64IZCMP-SR-NEXT: lw s2, 36(a5) -; RV64IZCMP-SR-NEXT: lw s3, 40(a5) -; RV64IZCMP-SR-NEXT: lw s4, 44(a5) -; RV64IZCMP-SR-NEXT: lw a1, 48(a5) -; RV64IZCMP-SR-NEXT: lw s0, 52(a5) -; RV64IZCMP-SR-NEXT: lw s1, 68(a5) -; RV64IZCMP-SR-NEXT: lw a2, 64(a5) -; RV64IZCMP-SR-NEXT: lw a3, 60(a5) -; RV64IZCMP-SR-NEXT: lw a4, 56(a5) -; RV64IZCMP-SR-NEXT: sw s1, 68(a5) -; RV64IZCMP-SR-NEXT: sw a2, 64(a5) -; RV64IZCMP-SR-NEXT: sw a3, 60(a5) -; RV64IZCMP-SR-NEXT: sw a4, 56(a5) -; RV64IZCMP-SR-NEXT: sw s0, 52(a5) -; RV64IZCMP-SR-NEXT: sw a1, 48(a5) -; RV64IZCMP-SR-NEXT: sw s4, 44(a5) -; RV64IZCMP-SR-NEXT: sw s3, 40(a5) -; RV64IZCMP-SR-NEXT: sw s2, 36(a5) -; RV64IZCMP-SR-NEXT: sw t6, 32(a5) +; RV64IZCMP-SR-NEXT: lw t6, 48(a5) +; RV64IZCMP-SR-NEXT: lw s2, 52(a5) +; RV64IZCMP-SR-NEXT: lw a3, 56(a5) +; RV64IZCMP-SR-NEXT: lw a4, 60(a5) +; RV64IZCMP-SR-NEXT: lw a1, 64(a5) +; RV64IZCMP-SR-NEXT: lw s0, 68(a5) +; RV64IZCMP-SR-NEXT: lw s3, 32(a5) +; RV64IZCMP-SR-NEXT: lw s4, 36(a5) +; RV64IZCMP-SR-NEXT: lw s1, 40(a5) +; RV64IZCMP-SR-NEXT: lw a2, 44(a5) +; RV64IZCMP-SR-NEXT: sw s0, 68(a5) +; RV64IZCMP-SR-NEXT: sw a1, 64(a5) +; RV64IZCMP-SR-NEXT: sw a4, 60(a5) +; RV64IZCMP-SR-NEXT: sw a3, 56(a5) +; RV64IZCMP-SR-NEXT: sw s2, 52(a5) +; RV64IZCMP-SR-NEXT: sw t6, 48(a5) +; RV64IZCMP-SR-NEXT: sw a2, 44(a5) +; RV64IZCMP-SR-NEXT: sw s1, 40(a5) +; RV64IZCMP-SR-NEXT: sw s4, 36(a5) +; RV64IZCMP-SR-NEXT: sw s3, 32(a5) ; RV64IZCMP-SR-NEXT: sw t5, 28(a5) ; RV64IZCMP-SR-NEXT: sw t4, 24(a5) ; RV64IZCMP-SR-NEXT: sw t3, 20(a5) @@ -1294,26 +1294,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind { ; RV32I-NEXT: lw a7, 20(a5) ; RV32I-NEXT: lw t0, 24(a5) ; RV32I-NEXT: lw t1, 28(a5) -; RV32I-NEXT: lw t2, 32(a5) -; RV32I-NEXT: lw t3, 36(a5) -; RV32I-NEXT: lw t4, 40(a5) -; RV32I-NEXT: lw t5, 44(a5) -; RV32I-NEXT: lw t6, 48(a5) -; RV32I-NEXT: lw s0, 52(a5) -; RV32I-NEXT: lw s1, 68(a5) -; RV32I-NEXT: lw s2, 64(a5) -; RV32I-NEXT: lw s3, 60(a5) -; RV32I-NEXT: lw s4, 56(a5) -; RV32I-NEXT: sw s1, 68(a5) -; RV32I-NEXT: sw s2, 64(a5) -; RV32I-NEXT: sw s3, 60(a5) -; RV32I-NEXT: sw s4, 56(a5) -; RV32I-NEXT: sw s0, 52(a5) -; RV32I-NEXT: sw t6, 48(a5) -; RV32I-NEXT: sw t5, 44(a5) -; RV32I-NEXT: sw t4, 40(a5) -; RV32I-NEXT: sw t3, 36(a5) -; RV32I-NEXT: sw t2, 32(a5) +; RV32I-NEXT: lw t2, 48(a5) +; RV32I-NEXT: lw t3, 52(a5) +; RV32I-NEXT: lw t4, 56(a5) +; RV32I-NEXT: lw t5, 60(a5) +; RV32I-NEXT: lw t6, 64(a5) +; RV32I-NEXT: lw s0, 68(a5) +; RV32I-NEXT: lw s1, 32(a5) +; RV32I-NEXT: lw s2, 36(a5) +; RV32I-NEXT: lw s3, 40(a5) +; RV32I-NEXT: lw s4, 44(a5) +; RV32I-NEXT: sw s0, 68(a5) +; RV32I-NEXT: sw t6, 64(a5) +; RV32I-NEXT: sw t5, 60(a5) +; RV32I-NEXT: sw t4, 56(a5) +; RV32I-NEXT: sw t3, 52(a5) +; RV32I-NEXT: sw t2, 48(a5) +; RV32I-NEXT: sw s4, 44(a5) +; RV32I-NEXT: sw s3, 40(a5) +; RV32I-NEXT: sw s2, 36(a5) +; RV32I-NEXT: sw s1, 32(a5) ; RV32I-NEXT: sw t1, 28(a5) ; RV32I-NEXT: sw t0, 24(a5) ; RV32I-NEXT: sw a7, 20(a5) @@ -1348,26 +1348,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind { ; RV64I-NEXT: lw a7, 20(a5) ; RV64I-NEXT: lw t0, 24(a5) ; RV64I-NEXT: lw t1, 28(a5) -; RV64I-NEXT: lw t2, 32(a5) -; RV64I-NEXT: lw t3, 36(a5) -; RV64I-NEXT: lw t4, 40(a5) -; RV64I-NEXT: lw t5, 44(a5) -; RV64I-NEXT: lw t6, 48(a5) -; RV64I-NEXT: lw s0, 52(a5) -; RV64I-NEXT: lw s1, 68(a5) -; RV64I-NEXT: lw s2, 64(a5) -; RV64I-NEXT: lw s3, 60(a5) -; RV64I-NEXT: lw s4, 56(a5) -; RV64I-NEXT: sw s1, 68(a5) -; RV64I-NEXT: sw s2, 64(a5) -; RV64I-NEXT: sw s3, 60(a5) -; RV64I-NEXT: sw s4, 56(a5) -; RV64I-NEXT: sw s0, 52(a5) -; RV64I-NEXT: sw t6, 48(a5) -; RV64I-NEXT: sw t5, 44(a5) -; RV64I-NEXT: sw t4, 40(a5) -; RV64I-NEXT: sw t3, 36(a5) -; RV64I-NEXT: sw t2, 32(a5) +; RV64I-NEXT: lw t2, 48(a5) +; RV64I-NEXT: lw t3, 52(a5) +; RV64I-NEXT: lw t4, 56(a5) +; RV64I-NEXT: lw t5, 60(a5) +; RV64I-NEXT: lw t6, 64(a5) +; RV64I-NEXT: lw s0, 68(a5) +; RV64I-NEXT: lw s1, 32(a5) +; RV64I-NEXT: lw s2, 36(a5) +; RV64I-NEXT: lw s3, 40(a5) +; RV64I-NEXT: lw s4, 44(a5) +; RV64I-NEXT: sw s0, 68(a5) +; RV64I-NEXT: sw t6, 64(a5) +; RV64I-NEXT: sw t5, 60(a5) +; RV64I-NEXT: sw t4, 56(a5) +; RV64I-NEXT: sw t3, 52(a5) +; RV64I-NEXT: sw t2, 48(a5) +; RV64I-NEXT: sw s4, 44(a5) +; RV64I-NEXT: sw s3, 40(a5) +; RV64I-NEXT: sw s2, 36(a5) +; RV64I-NEXT: sw s1, 32(a5) ; RV64I-NEXT: sw t1, 28(a5) ; RV64I-NEXT: sw t0, 24(a5) ; RV64I-NEXT: sw a7, 20(a5) @@ -1813,16 +1813,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV32IZCMP-NEXT: sw t4, 44(sp) # 4-byte Folded Spill ; RV32IZCMP-NEXT: sw t5, 40(sp) # 4-byte Folded Spill ; RV32IZCMP-NEXT: sw t6, 36(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lui a6, %hi(var_test_irq) -; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(a6) +; RV32IZCMP-NEXT: lui t0, %hi(var_test_irq) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(t0) ; RV32IZCMP-NEXT: sw a0, 32(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a6) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(t0) ; RV32IZCMP-NEXT: sw a0, 28(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a6) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(t0) ; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a6) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(t0) ; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: addi a5, a6, %lo(var_test_irq) +; RV32IZCMP-NEXT: addi a5, t0, %lo(var_test_irq) ; RV32IZCMP-NEXT: lw a0, 16(a5) ; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill ; RV32IZCMP-NEXT: lw a0, 20(a5) @@ -1845,22 +1845,22 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV32IZCMP-NEXT: lw t3, 84(a5) ; RV32IZCMP-NEXT: lw t2, 88(a5) ; RV32IZCMP-NEXT: lw t1, 92(a5) -; RV32IZCMP-NEXT: lw t0, 96(a5) -; RV32IZCMP-NEXT: lw s0, 100(a5) -; RV32IZCMP-NEXT: lw a7, 104(a5) -; RV32IZCMP-NEXT: lw a4, 108(a5) +; RV32IZCMP-NEXT: lw a7, 112(a5) +; RV32IZCMP-NEXT: lw s0, 116(a5) +; RV32IZCMP-NEXT: lw a3, 120(a5) ; RV32IZCMP-NEXT: lw a0, 124(a5) -; RV32IZCMP-NEXT: lw a1, 120(a5) -; RV32IZCMP-NEXT: lw a2, 116(a5) -; RV32IZCMP-NEXT: lw a3, 112(a5) +; RV32IZCMP-NEXT: lw a6, 96(a5) +; RV32IZCMP-NEXT: lw a4, 100(a5) +; RV32IZCMP-NEXT: lw a2, 104(a5) +; RV32IZCMP-NEXT: lw a1, 108(a5) ; RV32IZCMP-NEXT: sw a0, 124(a5) -; RV32IZCMP-NEXT: sw a1, 120(a5) -; RV32IZCMP-NEXT: sw a2, 116(a5) -; RV32IZCMP-NEXT: sw a3, 112(a5) -; RV32IZCMP-NEXT: sw a4, 108(a5) -; RV32IZCMP-NEXT: sw a7, 104(a5) -; RV32IZCMP-NEXT: sw s0, 100(a5) -; RV32IZCMP-NEXT: sw t0, 96(a5) +; RV32IZCMP-NEXT: sw a3, 120(a5) +; RV32IZCMP-NEXT: sw s0, 116(a5) +; RV32IZCMP-NEXT: sw a7, 112(a5) +; RV32IZCMP-NEXT: sw a1, 108(a5) +; RV32IZCMP-NEXT: sw a2, 104(a5) +; RV32IZCMP-NEXT: sw a4, 100(a5) +; RV32IZCMP-NEXT: sw a6, 96(a5) ; RV32IZCMP-NEXT: sw t1, 92(a5) ; RV32IZCMP-NEXT: sw t2, 88(a5) ; RV32IZCMP-NEXT: sw t3, 84(a5) @@ -1884,13 +1884,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload ; RV32IZCMP-NEXT: sw a0, 16(a5) ; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a6) +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(t0) ; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a6) +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(t0) ; RV32IZCMP-NEXT: lw a0, 28(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a6) +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(t0) ; RV32IZCMP-NEXT: lw a0, 32(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(a6) +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(t0) ; RV32IZCMP-NEXT: lw t0, 92(sp) # 4-byte Folded Reload ; RV32IZCMP-NEXT: lw t1, 88(sp) # 4-byte Folded Reload ; RV32IZCMP-NEXT: lw t2, 84(sp) # 4-byte Folded Reload @@ -1929,16 +1929,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV64IZCMP-NEXT: sd t4, 72(sp) # 8-byte Folded Spill ; RV64IZCMP-NEXT: sd t5, 64(sp) # 8-byte Folded Spill ; RV64IZCMP-NEXT: sd t6, 56(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lui a6, %hi(var_test_irq) -; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(a6) +; RV64IZCMP-NEXT: lui t0, %hi(var_test_irq) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(t0) ; RV64IZCMP-NEXT: sd a0, 48(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a6) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(t0) ; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a6) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(t0) ; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a6) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(t0) ; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: addi a5, a6, %lo(var_test_irq) +; RV64IZCMP-NEXT: addi a5, t0, %lo(var_test_irq) ; RV64IZCMP-NEXT: lw a0, 16(a5) ; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill ; RV64IZCMP-NEXT: lw a0, 20(a5) @@ -1961,22 +1961,22 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV64IZCMP-NEXT: lw t3, 84(a5) ; RV64IZCMP-NEXT: lw t2, 88(a5) ; RV64IZCMP-NEXT: lw t1, 92(a5) -; RV64IZCMP-NEXT: lw t0, 96(a5) -; RV64IZCMP-NEXT: lw s0, 100(a5) -; RV64IZCMP-NEXT: lw a7, 104(a5) -; RV64IZCMP-NEXT: lw a4, 108(a5) +; RV64IZCMP-NEXT: lw a7, 112(a5) +; RV64IZCMP-NEXT: lw s0, 116(a5) +; RV64IZCMP-NEXT: lw a3, 120(a5) ; RV64IZCMP-NEXT: lw a0, 124(a5) -; RV64IZCMP-NEXT: lw a1, 120(a5) -; RV64IZCMP-NEXT: lw a2, 116(a5) -; RV64IZCMP-NEXT: lw a3, 112(a5) +; RV64IZCMP-NEXT: lw a6, 96(a5) +; RV64IZCMP-NEXT: lw a4, 100(a5) +; RV64IZCMP-NEXT: lw a2, 104(a5) +; RV64IZCMP-NEXT: lw a1, 108(a5) ; RV64IZCMP-NEXT: sw a0, 124(a5) -; RV64IZCMP-NEXT: sw a1, 120(a5) -; RV64IZCMP-NEXT: sw a2, 116(a5) -; RV64IZCMP-NEXT: sw a3, 112(a5) -; RV64IZCMP-NEXT: sw a4, 108(a5) -; RV64IZCMP-NEXT: sw a7, 104(a5) -; RV64IZCMP-NEXT: sw s0, 100(a5) -; RV64IZCMP-NEXT: sw t0, 96(a5) +; RV64IZCMP-NEXT: sw a3, 120(a5) +; RV64IZCMP-NEXT: sw s0, 116(a5) +; RV64IZCMP-NEXT: sw a7, 112(a5) +; RV64IZCMP-NEXT: sw a1, 108(a5) +; RV64IZCMP-NEXT: sw a2, 104(a5) +; RV64IZCMP-NEXT: sw a4, 100(a5) +; RV64IZCMP-NEXT: sw a6, 96(a5) ; RV64IZCMP-NEXT: sw t1, 92(a5) ; RV64IZCMP-NEXT: sw t2, 88(a5) ; RV64IZCMP-NEXT: sw t3, 84(a5) @@ -2000,13 +2000,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload ; RV64IZCMP-NEXT: sw a0, 16(a5) ; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a6) +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(t0) ; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a6) +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(t0) ; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a6) +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(t0) ; RV64IZCMP-NEXT: ld a0, 48(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(a6) +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(t0) ; RV64IZCMP-NEXT: ld t0, 168(sp) # 8-byte Folded Reload ; RV64IZCMP-NEXT: ld t1, 160(sp) # 8-byte Folded Reload ; RV64IZCMP-NEXT: ld t2, 152(sp) # 8-byte Folded Reload @@ -2045,16 +2045,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV32IZCMP-SR-NEXT: sw t4, 44(sp) # 4-byte Folded Spill ; RV32IZCMP-SR-NEXT: sw t5, 40(sp) # 4-byte Folded Spill ; RV32IZCMP-SR-NEXT: sw t6, 36(sp) # 4-byte Folded Spill -; RV32IZCMP-SR-NEXT: lui a6, %hi(var_test_irq) -; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(a6) +; RV32IZCMP-SR-NEXT: lui t0, %hi(var_test_irq) +; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(t0) ; RV32IZCMP-SR-NEXT: sw a0, 32(sp) # 4-byte Folded Spill -; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(a6) +; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(t0) ; RV32IZCMP-SR-NEXT: sw a0, 28(sp) # 4-byte Folded Spill -; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(a6) +; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(t0) ; RV32IZCMP-SR-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(a6) +; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(t0) ; RV32IZCMP-SR-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32IZCMP-SR-NEXT: addi a5, a6, %lo(var_test_irq) +; RV32IZCMP-SR-NEXT: addi a5, t0, %lo(var_test_irq) ; RV32IZCMP-SR-NEXT: lw a0, 16(a5) ; RV32IZCMP-SR-NEXT: sw a0, 16(sp) # 4-byte Folded Spill ; RV32IZCMP-SR-NEXT: lw a0, 20(a5) @@ -2077,22 +2077,22 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV32IZCMP-SR-NEXT: lw t3, 84(a5) ; RV32IZCMP-SR-NEXT: lw t2, 88(a5) ; RV32IZCMP-SR-NEXT: lw t1, 92(a5) -; RV32IZCMP-SR-NEXT: lw t0, 96(a5) -; RV32IZCMP-SR-NEXT: lw s0, 100(a5) -; RV32IZCMP-SR-NEXT: lw a7, 104(a5) -; RV32IZCMP-SR-NEXT: lw a4, 108(a5) +; RV32IZCMP-SR-NEXT: lw a7, 112(a5) +; RV32IZCMP-SR-NEXT: lw s0, 116(a5) +; RV32IZCMP-SR-NEXT: lw a3, 120(a5) ; RV32IZCMP-SR-NEXT: lw a0, 124(a5) -; RV32IZCMP-SR-NEXT: lw a1, 120(a5) -; RV32IZCMP-SR-NEXT: lw a2, 116(a5) -; RV32IZCMP-SR-NEXT: lw a3, 112(a5) +; RV32IZCMP-SR-NEXT: lw a6, 96(a5) +; RV32IZCMP-SR-NEXT: lw a4, 100(a5) +; RV32IZCMP-SR-NEXT: lw a2, 104(a5) +; RV32IZCMP-SR-NEXT: lw a1, 108(a5) ; RV32IZCMP-SR-NEXT: sw a0, 124(a5) -; RV32IZCMP-SR-NEXT: sw a1, 120(a5) -; RV32IZCMP-SR-NEXT: sw a2, 116(a5) -; RV32IZCMP-SR-NEXT: sw a3, 112(a5) -; RV32IZCMP-SR-NEXT: sw a4, 108(a5) -; RV32IZCMP-SR-NEXT: sw a7, 104(a5) -; RV32IZCMP-SR-NEXT: sw s0, 100(a5) -; RV32IZCMP-SR-NEXT: sw t0, 96(a5) +; RV32IZCMP-SR-NEXT: sw a3, 120(a5) +; RV32IZCMP-SR-NEXT: sw s0, 116(a5) +; RV32IZCMP-SR-NEXT: sw a7, 112(a5) +; RV32IZCMP-SR-NEXT: sw a1, 108(a5) +; RV32IZCMP-SR-NEXT: sw a2, 104(a5) +; RV32IZCMP-SR-NEXT: sw a4, 100(a5) +; RV32IZCMP-SR-NEXT: sw a6, 96(a5) ; RV32IZCMP-SR-NEXT: sw t1, 92(a5) ; RV32IZCMP-SR-NEXT: sw t2, 88(a5) ; RV32IZCMP-SR-NEXT: sw t3, 84(a5) @@ -2116,13 +2116,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV32IZCMP-SR-NEXT: lw a0, 16(sp) # 4-byte Folded Reload ; RV32IZCMP-SR-NEXT: sw a0, 16(a5) ; RV32IZCMP-SR-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(a6) +; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(t0) ; RV32IZCMP-SR-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(a6) +; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(t0) ; RV32IZCMP-SR-NEXT: lw a0, 28(sp) # 4-byte Folded Reload -; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(a6) +; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(t0) ; RV32IZCMP-SR-NEXT: lw a0, 32(sp) # 4-byte Folded Reload -; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(a6) +; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(t0) ; RV32IZCMP-SR-NEXT: lw t0, 92(sp) # 4-byte Folded Reload ; RV32IZCMP-SR-NEXT: lw t1, 88(sp) # 4-byte Folded Reload ; RV32IZCMP-SR-NEXT: lw t2, 84(sp) # 4-byte Folded Reload @@ -2161,16 +2161,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV64IZCMP-SR-NEXT: sd t4, 72(sp) # 8-byte Folded Spill ; RV64IZCMP-SR-NEXT: sd t5, 64(sp) # 8-byte Folded Spill ; RV64IZCMP-SR-NEXT: sd t6, 56(sp) # 8-byte Folded Spill -; RV64IZCMP-SR-NEXT: lui a6, %hi(var_test_irq) -; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(a6) +; RV64IZCMP-SR-NEXT: lui t0, %hi(var_test_irq) +; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(t0) ; RV64IZCMP-SR-NEXT: sd a0, 48(sp) # 8-byte Folded Spill -; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(a6) +; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(t0) ; RV64IZCMP-SR-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(a6) +; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(t0) ; RV64IZCMP-SR-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(a6) +; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(t0) ; RV64IZCMP-SR-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64IZCMP-SR-NEXT: addi a5, a6, %lo(var_test_irq) +; RV64IZCMP-SR-NEXT: addi a5, t0, %lo(var_test_irq) ; RV64IZCMP-SR-NEXT: lw a0, 16(a5) ; RV64IZCMP-SR-NEXT: sd a0, 16(sp) # 8-byte Folded Spill ; RV64IZCMP-SR-NEXT: lw a0, 20(a5) @@ -2193,22 +2193,22 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV64IZCMP-SR-NEXT: lw t3, 84(a5) ; RV64IZCMP-SR-NEXT: lw t2, 88(a5) ; RV64IZCMP-SR-NEXT: lw t1, 92(a5) -; RV64IZCMP-SR-NEXT: lw t0, 96(a5) -; RV64IZCMP-SR-NEXT: lw s0, 100(a5) -; RV64IZCMP-SR-NEXT: lw a7, 104(a5) -; RV64IZCMP-SR-NEXT: lw a4, 108(a5) +; RV64IZCMP-SR-NEXT: lw a7, 112(a5) +; RV64IZCMP-SR-NEXT: lw s0, 116(a5) +; RV64IZCMP-SR-NEXT: lw a3, 120(a5) ; RV64IZCMP-SR-NEXT: lw a0, 124(a5) -; RV64IZCMP-SR-NEXT: lw a1, 120(a5) -; RV64IZCMP-SR-NEXT: lw a2, 116(a5) -; RV64IZCMP-SR-NEXT: lw a3, 112(a5) +; RV64IZCMP-SR-NEXT: lw a6, 96(a5) +; RV64IZCMP-SR-NEXT: lw a4, 100(a5) +; RV64IZCMP-SR-NEXT: lw a2, 104(a5) +; RV64IZCMP-SR-NEXT: lw a1, 108(a5) ; RV64IZCMP-SR-NEXT: sw a0, 124(a5) -; RV64IZCMP-SR-NEXT: sw a1, 120(a5) -; RV64IZCMP-SR-NEXT: sw a2, 116(a5) -; RV64IZCMP-SR-NEXT: sw a3, 112(a5) -; RV64IZCMP-SR-NEXT: sw a4, 108(a5) -; RV64IZCMP-SR-NEXT: sw a7, 104(a5) -; RV64IZCMP-SR-NEXT: sw s0, 100(a5) -; RV64IZCMP-SR-NEXT: sw t0, 96(a5) +; RV64IZCMP-SR-NEXT: sw a3, 120(a5) +; RV64IZCMP-SR-NEXT: sw s0, 116(a5) +; RV64IZCMP-SR-NEXT: sw a7, 112(a5) +; RV64IZCMP-SR-NEXT: sw a1, 108(a5) +; RV64IZCMP-SR-NEXT: sw a2, 104(a5) +; RV64IZCMP-SR-NEXT: sw a4, 100(a5) +; RV64IZCMP-SR-NEXT: sw a6, 96(a5) ; RV64IZCMP-SR-NEXT: sw t1, 92(a5) ; RV64IZCMP-SR-NEXT: sw t2, 88(a5) ; RV64IZCMP-SR-NEXT: sw t3, 84(a5) @@ -2232,13 +2232,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV64IZCMP-SR-NEXT: ld a0, 16(sp) # 8-byte Folded Reload ; RV64IZCMP-SR-NEXT: sw a0, 16(a5) ; RV64IZCMP-SR-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(a6) +; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(t0) ; RV64IZCMP-SR-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(a6) +; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(t0) ; RV64IZCMP-SR-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(a6) +; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(t0) ; RV64IZCMP-SR-NEXT: ld a0, 48(sp) # 8-byte Folded Reload -; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(a6) +; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(t0) ; RV64IZCMP-SR-NEXT: ld t0, 168(sp) # 8-byte Folded Reload ; RV64IZCMP-SR-NEXT: ld t1, 160(sp) # 8-byte Folded Reload ; RV64IZCMP-SR-NEXT: ld t2, 152(sp) # 8-byte Folded Reload @@ -2289,16 +2289,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV32I-NEXT: sw t4, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw t5, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw t6, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a6, %hi(var_test_irq) -; RV32I-NEXT: lw a0, %lo(var_test_irq)(a6) +; RV32I-NEXT: lui a7, %hi(var_test_irq) +; RV32I-NEXT: lw a0, %lo(var_test_irq)(a7) ; RV32I-NEXT: sw a0, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a6) +; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a7) ; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a6) +; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a7) ; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a6) +; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a7) ; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: addi a5, a6, %lo(var_test_irq) +; RV32I-NEXT: addi a5, a7, %lo(var_test_irq) ; RV32I-NEXT: lw a0, 16(a5) ; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a0, 20(a5) @@ -2321,22 +2321,22 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV32I-NEXT: lw s8, 84(a5) ; RV32I-NEXT: lw s9, 88(a5) ; RV32I-NEXT: lw s10, 92(a5) -; RV32I-NEXT: lw s11, 96(a5) -; RV32I-NEXT: lw ra, 100(a5) -; RV32I-NEXT: lw a7, 104(a5) -; RV32I-NEXT: lw a4, 108(a5) +; RV32I-NEXT: lw s11, 112(a5) +; RV32I-NEXT: lw ra, 116(a5) +; RV32I-NEXT: lw a3, 120(a5) ; RV32I-NEXT: lw a0, 124(a5) -; RV32I-NEXT: lw a1, 120(a5) -; RV32I-NEXT: lw a2, 116(a5) -; RV32I-NEXT: lw a3, 112(a5) +; RV32I-NEXT: lw a6, 96(a5) +; RV32I-NEXT: lw a4, 100(a5) +; RV32I-NEXT: lw a2, 104(a5) +; RV32I-NEXT: lw a1, 108(a5) ; RV32I-NEXT: sw a0, 124(a5) -; RV32I-NEXT: sw a1, 120(a5) -; RV32I-NEXT: sw a2, 116(a5) -; RV32I-NEXT: sw a3, 112(a5) -; RV32I-NEXT: sw a4, 108(a5) -; RV32I-NEXT: sw a7, 104(a5) -; RV32I-NEXT: sw ra, 100(a5) -; RV32I-NEXT: sw s11, 96(a5) +; RV32I-NEXT: sw a3, 120(a5) +; RV32I-NEXT: sw ra, 116(a5) +; RV32I-NEXT: sw s11, 112(a5) +; RV32I-NEXT: sw a1, 108(a5) +; RV32I-NEXT: sw a2, 104(a5) +; RV32I-NEXT: sw a4, 100(a5) +; RV32I-NEXT: sw a6, 96(a5) ; RV32I-NEXT: sw s10, 92(a5) ; RV32I-NEXT: sw s9, 88(a5) ; RV32I-NEXT: sw s8, 84(a5) @@ -2360,13 +2360,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: sw a0, 16(a5) ; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a6) +; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a7) ; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a6) +; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a7) ; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a6) +; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a7) ; RV32I-NEXT: lw a0, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var_test_irq)(a6) +; RV32I-NEXT: sw a0, %lo(var_test_irq)(a7) ; RV32I-NEXT: lw ra, 140(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw t0, 136(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw t1, 132(sp) # 4-byte Folded Reload @@ -2429,16 +2429,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV64I-NEXT: sd t4, 64(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd t5, 56(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd t6, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a6, %hi(var_test_irq) -; RV64I-NEXT: lw a0, %lo(var_test_irq)(a6) +; RV64I-NEXT: lui a7, %hi(var_test_irq) +; RV64I-NEXT: lw a0, %lo(var_test_irq)(a7) ; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a6) +; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a7) ; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a6) +; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a7) ; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a6) +; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a7) ; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: addi a5, a6, %lo(var_test_irq) +; RV64I-NEXT: addi a5, a7, %lo(var_test_irq) ; RV64I-NEXT: lw a0, 16(a5) ; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lw a0, 20(a5) @@ -2461,22 +2461,22 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV64I-NEXT: lw s8, 84(a5) ; RV64I-NEXT: lw s9, 88(a5) ; RV64I-NEXT: lw s10, 92(a5) -; RV64I-NEXT: lw s11, 96(a5) -; RV64I-NEXT: lw ra, 100(a5) -; RV64I-NEXT: lw a7, 104(a5) -; RV64I-NEXT: lw a4, 108(a5) +; RV64I-NEXT: lw s11, 112(a5) +; RV64I-NEXT: lw ra, 116(a5) +; RV64I-NEXT: lw a3, 120(a5) ; RV64I-NEXT: lw a0, 124(a5) -; RV64I-NEXT: lw a1, 120(a5) -; RV64I-NEXT: lw a2, 116(a5) -; RV64I-NEXT: lw a3, 112(a5) +; RV64I-NEXT: lw a6, 96(a5) +; RV64I-NEXT: lw a4, 100(a5) +; RV64I-NEXT: lw a2, 104(a5) +; RV64I-NEXT: lw a1, 108(a5) ; RV64I-NEXT: sw a0, 124(a5) -; RV64I-NEXT: sw a1, 120(a5) -; RV64I-NEXT: sw a2, 116(a5) -; RV64I-NEXT: sw a3, 112(a5) -; RV64I-NEXT: sw a4, 108(a5) -; RV64I-NEXT: sw a7, 104(a5) -; RV64I-NEXT: sw ra, 100(a5) -; RV64I-NEXT: sw s11, 96(a5) +; RV64I-NEXT: sw a3, 120(a5) +; RV64I-NEXT: sw ra, 116(a5) +; RV64I-NEXT: sw s11, 112(a5) +; RV64I-NEXT: sw a1, 108(a5) +; RV64I-NEXT: sw a2, 104(a5) +; RV64I-NEXT: sw a4, 100(a5) +; RV64I-NEXT: sw a6, 96(a5) ; RV64I-NEXT: sw s10, 92(a5) ; RV64I-NEXT: sw s9, 88(a5) ; RV64I-NEXT: sw s8, 84(a5) @@ -2500,13 +2500,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" { ; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: sw a0, 16(a5) ; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a6) +; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a7) ; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a6) +; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a7) ; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a6) +; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a7) ; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var_test_irq)(a6) +; RV64I-NEXT: sw a0, %lo(var_test_irq)(a7) ; RV64I-NEXT: ld ra, 264(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld t0, 256(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld t1, 248(sp) # 8-byte Folded Reload @@ -2546,16 +2546,16 @@ define void @callee_no_irq() nounwind{ ; RV32IZCMP-LABEL: callee_no_irq: ; RV32IZCMP: # %bb.0: ; RV32IZCMP-NEXT: cm.push {ra, s0-s11}, -96 -; RV32IZCMP-NEXT: lui a6, %hi(var_test_irq) -; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(a6) +; RV32IZCMP-NEXT: lui t0, %hi(var_test_irq) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(t0) ; RV32IZCMP-NEXT: sw a0, 28(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a6) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(t0) ; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a6) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(t0) ; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a6) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(t0) ; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: addi a5, a6, %lo(var_test_irq) +; RV32IZCMP-NEXT: addi a5, t0, %lo(var_test_irq) ; RV32IZCMP-NEXT: lw a0, 16(a5) ; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill ; RV32IZCMP-NEXT: lw a0, 20(a5) @@ -2578,22 +2578,22 @@ define void @callee_no_irq() nounwind{ ; RV32IZCMP-NEXT: lw t3, 84(a5) ; RV32IZCMP-NEXT: lw t2, 88(a5) ; RV32IZCMP-NEXT: lw t1, 92(a5) -; RV32IZCMP-NEXT: lw t0, 96(a5) -; RV32IZCMP-NEXT: lw s0, 100(a5) -; RV32IZCMP-NEXT: lw a7, 104(a5) -; RV32IZCMP-NEXT: lw a4, 108(a5) +; RV32IZCMP-NEXT: lw a7, 112(a5) +; RV32IZCMP-NEXT: lw s0, 116(a5) +; RV32IZCMP-NEXT: lw a3, 120(a5) ; RV32IZCMP-NEXT: lw a0, 124(a5) -; RV32IZCMP-NEXT: lw a1, 120(a5) -; RV32IZCMP-NEXT: lw a2, 116(a5) -; RV32IZCMP-NEXT: lw a3, 112(a5) +; RV32IZCMP-NEXT: lw a6, 96(a5) +; RV32IZCMP-NEXT: lw a4, 100(a5) +; RV32IZCMP-NEXT: lw a2, 104(a5) +; RV32IZCMP-NEXT: lw a1, 108(a5) ; RV32IZCMP-NEXT: sw a0, 124(a5) -; RV32IZCMP-NEXT: sw a1, 120(a5) -; RV32IZCMP-NEXT: sw a2, 116(a5) -; RV32IZCMP-NEXT: sw a3, 112(a5) -; RV32IZCMP-NEXT: sw a4, 108(a5) -; RV32IZCMP-NEXT: sw a7, 104(a5) -; RV32IZCMP-NEXT: sw s0, 100(a5) -; RV32IZCMP-NEXT: sw t0, 96(a5) +; RV32IZCMP-NEXT: sw a3, 120(a5) +; RV32IZCMP-NEXT: sw s0, 116(a5) +; RV32IZCMP-NEXT: sw a7, 112(a5) +; RV32IZCMP-NEXT: sw a1, 108(a5) +; RV32IZCMP-NEXT: sw a2, 104(a5) +; RV32IZCMP-NEXT: sw a4, 100(a5) +; RV32IZCMP-NEXT: sw a6, 96(a5) ; RV32IZCMP-NEXT: sw t1, 92(a5) ; RV32IZCMP-NEXT: sw t2, 88(a5) ; RV32IZCMP-NEXT: sw t3, 84(a5) @@ -2617,28 +2617,28 @@ define void @callee_no_irq() nounwind{ ; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload ; RV32IZCMP-NEXT: sw a0, 16(a5) ; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a6) +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(t0) ; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a6) +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(t0) ; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a6) +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(t0) ; RV32IZCMP-NEXT: lw a0, 28(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(a6) +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(t0) ; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 96 ; ; RV64IZCMP-LABEL: callee_no_irq: ; RV64IZCMP: # %bb.0: ; RV64IZCMP-NEXT: cm.push {ra, s0-s11}, -160 -; RV64IZCMP-NEXT: lui a6, %hi(var_test_irq) -; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(a6) +; RV64IZCMP-NEXT: lui t0, %hi(var_test_irq) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(t0) ; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a6) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(t0) ; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a6) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(t0) ; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a6) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(t0) ; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: addi a5, a6, %lo(var_test_irq) +; RV64IZCMP-NEXT: addi a5, t0, %lo(var_test_irq) ; RV64IZCMP-NEXT: lw a0, 16(a5) ; RV64IZCMP-NEXT: sd a0, 8(sp) # 8-byte Folded Spill ; RV64IZCMP-NEXT: lw a0, 20(a5) @@ -2661,22 +2661,22 @@ define void @callee_no_irq() nounwind{ ; RV64IZCMP-NEXT: lw t3, 84(a5) ; RV64IZCMP-NEXT: lw t2, 88(a5) ; RV64IZCMP-NEXT: lw t1, 92(a5) -; RV64IZCMP-NEXT: lw t0, 96(a5) -; RV64IZCMP-NEXT: lw s0, 100(a5) -; RV64IZCMP-NEXT: lw a7, 104(a5) -; RV64IZCMP-NEXT: lw a4, 108(a5) +; RV64IZCMP-NEXT: lw a7, 112(a5) +; RV64IZCMP-NEXT: lw s0, 116(a5) +; RV64IZCMP-NEXT: lw a3, 120(a5) ; RV64IZCMP-NEXT: lw a0, 124(a5) -; RV64IZCMP-NEXT: lw a1, 120(a5) -; RV64IZCMP-NEXT: lw a2, 116(a5) -; RV64IZCMP-NEXT: lw a3, 112(a5) +; RV64IZCMP-NEXT: lw a6, 96(a5) +; RV64IZCMP-NEXT: lw a4, 100(a5) +; RV64IZCMP-NEXT: lw a2, 104(a5) +; RV64IZCMP-NEXT: lw a1, 108(a5) ; RV64IZCMP-NEXT: sw a0, 124(a5) -; RV64IZCMP-NEXT: sw a1, 120(a5) -; RV64IZCMP-NEXT: sw a2, 116(a5) -; RV64IZCMP-NEXT: sw a3, 112(a5) -; RV64IZCMP-NEXT: sw a4, 108(a5) -; RV64IZCMP-NEXT: sw a7, 104(a5) -; RV64IZCMP-NEXT: sw s0, 100(a5) -; RV64IZCMP-NEXT: sw t0, 96(a5) +; RV64IZCMP-NEXT: sw a3, 120(a5) +; RV64IZCMP-NEXT: sw s0, 116(a5) +; RV64IZCMP-NEXT: sw a7, 112(a5) +; RV64IZCMP-NEXT: sw a1, 108(a5) +; RV64IZCMP-NEXT: sw a2, 104(a5) +; RV64IZCMP-NEXT: sw a4, 100(a5) +; RV64IZCMP-NEXT: sw a6, 96(a5) ; RV64IZCMP-NEXT: sw t1, 92(a5) ; RV64IZCMP-NEXT: sw t2, 88(a5) ; RV64IZCMP-NEXT: sw t3, 84(a5) @@ -2700,28 +2700,28 @@ define void @callee_no_irq() nounwind{ ; RV64IZCMP-NEXT: ld a0, 8(sp) # 8-byte Folded Reload ; RV64IZCMP-NEXT: sw a0, 16(a5) ; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a6) +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(t0) ; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a6) +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(t0) ; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a6) +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(t0) ; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(a6) +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(t0) ; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160 ; ; RV32IZCMP-SR-LABEL: callee_no_irq: ; RV32IZCMP-SR: # %bb.0: ; RV32IZCMP-SR-NEXT: cm.push {ra, s0-s11}, -96 -; RV32IZCMP-SR-NEXT: lui a6, %hi(var_test_irq) -; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(a6) +; RV32IZCMP-SR-NEXT: lui t0, %hi(var_test_irq) +; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(t0) ; RV32IZCMP-SR-NEXT: sw a0, 28(sp) # 4-byte Folded Spill -; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(a6) +; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(t0) ; RV32IZCMP-SR-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(a6) +; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(t0) ; RV32IZCMP-SR-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(a6) +; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(t0) ; RV32IZCMP-SR-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32IZCMP-SR-NEXT: addi a5, a6, %lo(var_test_irq) +; RV32IZCMP-SR-NEXT: addi a5, t0, %lo(var_test_irq) ; RV32IZCMP-SR-NEXT: lw a0, 16(a5) ; RV32IZCMP-SR-NEXT: sw a0, 12(sp) # 4-byte Folded Spill ; RV32IZCMP-SR-NEXT: lw a0, 20(a5) @@ -2744,22 +2744,22 @@ define void @callee_no_irq() nounwind{ ; RV32IZCMP-SR-NEXT: lw t3, 84(a5) ; RV32IZCMP-SR-NEXT: lw t2, 88(a5) ; RV32IZCMP-SR-NEXT: lw t1, 92(a5) -; RV32IZCMP-SR-NEXT: lw t0, 96(a5) -; RV32IZCMP-SR-NEXT: lw s0, 100(a5) -; RV32IZCMP-SR-NEXT: lw a7, 104(a5) -; RV32IZCMP-SR-NEXT: lw a4, 108(a5) +; RV32IZCMP-SR-NEXT: lw a7, 112(a5) +; RV32IZCMP-SR-NEXT: lw s0, 116(a5) +; RV32IZCMP-SR-NEXT: lw a3, 120(a5) ; RV32IZCMP-SR-NEXT: lw a0, 124(a5) -; RV32IZCMP-SR-NEXT: lw a1, 120(a5) -; RV32IZCMP-SR-NEXT: lw a2, 116(a5) -; RV32IZCMP-SR-NEXT: lw a3, 112(a5) +; RV32IZCMP-SR-NEXT: lw a6, 96(a5) +; RV32IZCMP-SR-NEXT: lw a4, 100(a5) +; RV32IZCMP-SR-NEXT: lw a2, 104(a5) +; RV32IZCMP-SR-NEXT: lw a1, 108(a5) ; RV32IZCMP-SR-NEXT: sw a0, 124(a5) -; RV32IZCMP-SR-NEXT: sw a1, 120(a5) -; RV32IZCMP-SR-NEXT: sw a2, 116(a5) -; RV32IZCMP-SR-NEXT: sw a3, 112(a5) -; RV32IZCMP-SR-NEXT: sw a4, 108(a5) -; RV32IZCMP-SR-NEXT: sw a7, 104(a5) -; RV32IZCMP-SR-NEXT: sw s0, 100(a5) -; RV32IZCMP-SR-NEXT: sw t0, 96(a5) +; RV32IZCMP-SR-NEXT: sw a3, 120(a5) +; RV32IZCMP-SR-NEXT: sw s0, 116(a5) +; RV32IZCMP-SR-NEXT: sw a7, 112(a5) +; RV32IZCMP-SR-NEXT: sw a1, 108(a5) +; RV32IZCMP-SR-NEXT: sw a2, 104(a5) +; RV32IZCMP-SR-NEXT: sw a4, 100(a5) +; RV32IZCMP-SR-NEXT: sw a6, 96(a5) ; RV32IZCMP-SR-NEXT: sw t1, 92(a5) ; RV32IZCMP-SR-NEXT: sw t2, 88(a5) ; RV32IZCMP-SR-NEXT: sw t3, 84(a5) @@ -2783,28 +2783,28 @@ define void @callee_no_irq() nounwind{ ; RV32IZCMP-SR-NEXT: lw a0, 12(sp) # 4-byte Folded Reload ; RV32IZCMP-SR-NEXT: sw a0, 16(a5) ; RV32IZCMP-SR-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(a6) +; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(t0) ; RV32IZCMP-SR-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(a6) +; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(t0) ; RV32IZCMP-SR-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(a6) +; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(t0) ; RV32IZCMP-SR-NEXT: lw a0, 28(sp) # 4-byte Folded Reload -; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(a6) +; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(t0) ; RV32IZCMP-SR-NEXT: cm.popret {ra, s0-s11}, 96 ; ; RV64IZCMP-SR-LABEL: callee_no_irq: ; RV64IZCMP-SR: # %bb.0: ; RV64IZCMP-SR-NEXT: cm.push {ra, s0-s11}, -160 -; RV64IZCMP-SR-NEXT: lui a6, %hi(var_test_irq) -; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(a6) +; RV64IZCMP-SR-NEXT: lui t0, %hi(var_test_irq) +; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(t0) ; RV64IZCMP-SR-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(a6) +; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(t0) ; RV64IZCMP-SR-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(a6) +; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(t0) ; RV64IZCMP-SR-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(a6) +; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(t0) ; RV64IZCMP-SR-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64IZCMP-SR-NEXT: addi a5, a6, %lo(var_test_irq) +; RV64IZCMP-SR-NEXT: addi a5, t0, %lo(var_test_irq) ; RV64IZCMP-SR-NEXT: lw a0, 16(a5) ; RV64IZCMP-SR-NEXT: sd a0, 8(sp) # 8-byte Folded Spill ; RV64IZCMP-SR-NEXT: lw a0, 20(a5) @@ -2827,22 +2827,22 @@ define void @callee_no_irq() nounwind{ ; RV64IZCMP-SR-NEXT: lw t3, 84(a5) ; RV64IZCMP-SR-NEXT: lw t2, 88(a5) ; RV64IZCMP-SR-NEXT: lw t1, 92(a5) -; RV64IZCMP-SR-NEXT: lw t0, 96(a5) -; RV64IZCMP-SR-NEXT: lw s0, 100(a5) -; RV64IZCMP-SR-NEXT: lw a7, 104(a5) -; RV64IZCMP-SR-NEXT: lw a4, 108(a5) +; RV64IZCMP-SR-NEXT: lw a7, 112(a5) +; RV64IZCMP-SR-NEXT: lw s0, 116(a5) +; RV64IZCMP-SR-NEXT: lw a3, 120(a5) ; RV64IZCMP-SR-NEXT: lw a0, 124(a5) -; RV64IZCMP-SR-NEXT: lw a1, 120(a5) -; RV64IZCMP-SR-NEXT: lw a2, 116(a5) -; RV64IZCMP-SR-NEXT: lw a3, 112(a5) +; RV64IZCMP-SR-NEXT: lw a6, 96(a5) +; RV64IZCMP-SR-NEXT: lw a4, 100(a5) +; RV64IZCMP-SR-NEXT: lw a2, 104(a5) +; RV64IZCMP-SR-NEXT: lw a1, 108(a5) ; RV64IZCMP-SR-NEXT: sw a0, 124(a5) -; RV64IZCMP-SR-NEXT: sw a1, 120(a5) -; RV64IZCMP-SR-NEXT: sw a2, 116(a5) -; RV64IZCMP-SR-NEXT: sw a3, 112(a5) -; RV64IZCMP-SR-NEXT: sw a4, 108(a5) -; RV64IZCMP-SR-NEXT: sw a7, 104(a5) -; RV64IZCMP-SR-NEXT: sw s0, 100(a5) -; RV64IZCMP-SR-NEXT: sw t0, 96(a5) +; RV64IZCMP-SR-NEXT: sw a3, 120(a5) +; RV64IZCMP-SR-NEXT: sw s0, 116(a5) +; RV64IZCMP-SR-NEXT: sw a7, 112(a5) +; RV64IZCMP-SR-NEXT: sw a1, 108(a5) +; RV64IZCMP-SR-NEXT: sw a2, 104(a5) +; RV64IZCMP-SR-NEXT: sw a4, 100(a5) +; RV64IZCMP-SR-NEXT: sw a6, 96(a5) ; RV64IZCMP-SR-NEXT: sw t1, 92(a5) ; RV64IZCMP-SR-NEXT: sw t2, 88(a5) ; RV64IZCMP-SR-NEXT: sw t3, 84(a5) @@ -2866,13 +2866,13 @@ define void @callee_no_irq() nounwind{ ; RV64IZCMP-SR-NEXT: ld a0, 8(sp) # 8-byte Folded Reload ; RV64IZCMP-SR-NEXT: sw a0, 16(a5) ; RV64IZCMP-SR-NEXT: ld a0, 16(sp) # 8-byte Folded Reload -; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(a6) +; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(t0) ; RV64IZCMP-SR-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(a6) +; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(t0) ; RV64IZCMP-SR-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(a6) +; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(t0) ; RV64IZCMP-SR-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(a6) +; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(t0) ; RV64IZCMP-SR-NEXT: cm.popret {ra, s0-s11}, 160 ; ; RV32I-LABEL: callee_no_irq: @@ -2891,16 +2891,16 @@ define void @callee_no_irq() nounwind{ ; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a6, %hi(var_test_irq) -; RV32I-NEXT: lw a0, %lo(var_test_irq)(a6) +; RV32I-NEXT: lui a7, %hi(var_test_irq) +; RV32I-NEXT: lw a0, %lo(var_test_irq)(a7) ; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a6) +; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a7) ; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a6) +; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a7) ; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a6) +; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a7) ; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: addi a5, a6, %lo(var_test_irq) +; RV32I-NEXT: addi a5, a7, %lo(var_test_irq) ; RV32I-NEXT: lw a0, 16(a5) ; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a0, 20(a5) @@ -2923,22 +2923,22 @@ define void @callee_no_irq() nounwind{ ; RV32I-NEXT: lw s8, 84(a5) ; RV32I-NEXT: lw s9, 88(a5) ; RV32I-NEXT: lw s10, 92(a5) -; RV32I-NEXT: lw s11, 96(a5) -; RV32I-NEXT: lw ra, 100(a5) -; RV32I-NEXT: lw a7, 104(a5) -; RV32I-NEXT: lw a4, 108(a5) +; RV32I-NEXT: lw s11, 112(a5) +; RV32I-NEXT: lw ra, 116(a5) +; RV32I-NEXT: lw a3, 120(a5) ; RV32I-NEXT: lw a0, 124(a5) -; RV32I-NEXT: lw a1, 120(a5) -; RV32I-NEXT: lw a2, 116(a5) -; RV32I-NEXT: lw a3, 112(a5) +; RV32I-NEXT: lw a6, 96(a5) +; RV32I-NEXT: lw a4, 100(a5) +; RV32I-NEXT: lw a2, 104(a5) +; RV32I-NEXT: lw a1, 108(a5) ; RV32I-NEXT: sw a0, 124(a5) -; RV32I-NEXT: sw a1, 120(a5) -; RV32I-NEXT: sw a2, 116(a5) -; RV32I-NEXT: sw a3, 112(a5) -; RV32I-NEXT: sw a4, 108(a5) -; RV32I-NEXT: sw a7, 104(a5) -; RV32I-NEXT: sw ra, 100(a5) -; RV32I-NEXT: sw s11, 96(a5) +; RV32I-NEXT: sw a3, 120(a5) +; RV32I-NEXT: sw ra, 116(a5) +; RV32I-NEXT: sw s11, 112(a5) +; RV32I-NEXT: sw a1, 108(a5) +; RV32I-NEXT: sw a2, 104(a5) +; RV32I-NEXT: sw a4, 100(a5) +; RV32I-NEXT: sw a6, 96(a5) ; RV32I-NEXT: sw s10, 92(a5) ; RV32I-NEXT: sw s9, 88(a5) ; RV32I-NEXT: sw s8, 84(a5) @@ -2962,13 +2962,13 @@ define void @callee_no_irq() nounwind{ ; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: sw a0, 16(a5) ; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a6) +; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a7) ; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a6) +; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a7) ; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a6) +; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a7) ; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var_test_irq)(a6) +; RV32I-NEXT: sw a0, %lo(var_test_irq)(a7) ; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload @@ -3001,16 +3001,16 @@ define void @callee_no_irq() nounwind{ ; RV64I-NEXT: sd s9, 72(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s10, 64(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s11, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a6, %hi(var_test_irq) -; RV64I-NEXT: lw a0, %lo(var_test_irq)(a6) +; RV64I-NEXT: lui a7, %hi(var_test_irq) +; RV64I-NEXT: lw a0, %lo(var_test_irq)(a7) ; RV64I-NEXT: sd a0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a6) +; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a7) ; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a6) +; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a7) ; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a6) +; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a7) ; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: addi a5, a6, %lo(var_test_irq) +; RV64I-NEXT: addi a5, a7, %lo(var_test_irq) ; RV64I-NEXT: lw a0, 16(a5) ; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: lw a0, 20(a5) @@ -3033,22 +3033,22 @@ define void @callee_no_irq() nounwind{ ; RV64I-NEXT: lw s8, 84(a5) ; RV64I-NEXT: lw s9, 88(a5) ; RV64I-NEXT: lw s10, 92(a5) -; RV64I-NEXT: lw s11, 96(a5) -; RV64I-NEXT: lw ra, 100(a5) -; RV64I-NEXT: lw a7, 104(a5) -; RV64I-NEXT: lw a4, 108(a5) +; RV64I-NEXT: lw s11, 112(a5) +; RV64I-NEXT: lw ra, 116(a5) +; RV64I-NEXT: lw a3, 120(a5) ; RV64I-NEXT: lw a0, 124(a5) -; RV64I-NEXT: lw a1, 120(a5) -; RV64I-NEXT: lw a2, 116(a5) -; RV64I-NEXT: lw a3, 112(a5) +; RV64I-NEXT: lw a6, 96(a5) +; RV64I-NEXT: lw a4, 100(a5) +; RV64I-NEXT: lw a2, 104(a5) +; RV64I-NEXT: lw a1, 108(a5) ; RV64I-NEXT: sw a0, 124(a5) -; RV64I-NEXT: sw a1, 120(a5) -; RV64I-NEXT: sw a2, 116(a5) -; RV64I-NEXT: sw a3, 112(a5) -; RV64I-NEXT: sw a4, 108(a5) -; RV64I-NEXT: sw a7, 104(a5) -; RV64I-NEXT: sw ra, 100(a5) -; RV64I-NEXT: sw s11, 96(a5) +; RV64I-NEXT: sw a3, 120(a5) +; RV64I-NEXT: sw ra, 116(a5) +; RV64I-NEXT: sw s11, 112(a5) +; RV64I-NEXT: sw a1, 108(a5) +; RV64I-NEXT: sw a2, 104(a5) +; RV64I-NEXT: sw a4, 100(a5) +; RV64I-NEXT: sw a6, 96(a5) ; RV64I-NEXT: sw s10, 92(a5) ; RV64I-NEXT: sw s9, 88(a5) ; RV64I-NEXT: sw s8, 84(a5) @@ -3072,13 +3072,13 @@ define void @callee_no_irq() nounwind{ ; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: sw a0, 16(a5) ; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a6) +; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a7) ; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a6) +; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a7) ; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a6) +; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a7) ; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var_test_irq)(a6) +; RV64I-NEXT: sw a0, %lo(var_test_irq)(a7) ; RV64I-NEXT: ld ra, 152(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 144(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 136(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/reduction-formation.ll b/llvm/test/CodeGen/RISCV/reduction-formation.ll index 6b4dc0cd3699e3..ced3a38ab5ea00 100644 --- a/llvm/test/CodeGen/RISCV/reduction-formation.ll +++ b/llvm/test/CodeGen/RISCV/reduction-formation.ll @@ -8,24 +8,24 @@ define i32 @reduce_sum_4xi32(<4 x i32> %v) { ; RV32-LABEL: reduce_sum_4xi32: ; RV32: # %bb.0: -; RV32-NEXT: lw a1, 12(a0) +; RV32-NEXT: lw a1, 0(a0) ; RV32-NEXT: lw a2, 4(a0) -; RV32-NEXT: lw a3, 0(a0) -; RV32-NEXT: lw a0, 8(a0) -; RV32-NEXT: add a2, a3, a2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: lw a3, 8(a0) +; RV32-NEXT: lw a0, 12(a0) +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a0, a3, a0 +; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: reduce_sum_4xi32: ; RV64: # %bb.0: -; RV64-NEXT: lw a1, 24(a0) +; RV64-NEXT: lw a1, 0(a0) ; RV64-NEXT: lw a2, 8(a0) -; RV64-NEXT: lw a3, 0(a0) -; RV64-NEXT: lw a0, 16(a0) -; RV64-NEXT: add a2, a3, a2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: addw a0, a2, a0 +; RV64-NEXT: lw a3, 16(a0) +; RV64-NEXT: lw a0, 24(a0) +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: add a0, a3, a0 +; RV64-NEXT: addw a0, a1, a0 ; RV64-NEXT: ret %e0 = extractelement <4 x i32> %v, i32 0 %e1 = extractelement <4 x i32> %v, i32 1 @@ -40,24 +40,24 @@ define i32 @reduce_sum_4xi32(<4 x i32> %v) { define i32 @reduce_xor_4xi32(<4 x i32> %v) { ; RV32-LABEL: reduce_xor_4xi32: ; RV32: # %bb.0: -; RV32-NEXT: lw a1, 12(a0) +; RV32-NEXT: lw a1, 0(a0) ; RV32-NEXT: lw a2, 4(a0) -; RV32-NEXT: lw a3, 0(a0) -; RV32-NEXT: lw a0, 8(a0) -; RV32-NEXT: xor a2, a3, a2 -; RV32-NEXT: xor a0, a0, a1 -; RV32-NEXT: xor a0, a2, a0 +; RV32-NEXT: lw a3, 8(a0) +; RV32-NEXT: lw a0, 12(a0) +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: xor a0, a3, a0 +; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: reduce_xor_4xi32: ; RV64: # %bb.0: -; RV64-NEXT: ld a1, 24(a0) +; RV64-NEXT: ld a1, 0(a0) ; RV64-NEXT: ld a2, 8(a0) -; RV64-NEXT: ld a3, 0(a0) -; RV64-NEXT: ld a0, 16(a0) -; RV64-NEXT: xor a2, a3, a2 -; RV64-NEXT: xor a0, a0, a1 -; RV64-NEXT: xor a0, a2, a0 +; RV64-NEXT: ld a3, 16(a0) +; RV64-NEXT: ld a0, 24(a0) +; RV64-NEXT: xor a1, a1, a2 +; RV64-NEXT: xor a0, a3, a0 +; RV64-NEXT: xor a0, a1, a0 ; RV64-NEXT: ret %e0 = extractelement <4 x i32> %v, i32 0 %e1 = extractelement <4 x i32> %v, i32 1 @@ -72,24 +72,24 @@ define i32 @reduce_xor_4xi32(<4 x i32> %v) { define i32 @reduce_or_4xi32(<4 x i32> %v) { ; RV32-LABEL: reduce_or_4xi32: ; RV32: # %bb.0: -; RV32-NEXT: lw a1, 12(a0) +; RV32-NEXT: lw a1, 0(a0) ; RV32-NEXT: lw a2, 4(a0) -; RV32-NEXT: lw a3, 0(a0) -; RV32-NEXT: lw a0, 8(a0) -; RV32-NEXT: or a2, a3, a2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: lw a3, 8(a0) +; RV32-NEXT: lw a0, 12(a0) +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: or a0, a3, a0 +; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: reduce_or_4xi32: ; RV64: # %bb.0: -; RV64-NEXT: ld a1, 24(a0) +; RV64-NEXT: ld a1, 0(a0) ; RV64-NEXT: ld a2, 8(a0) -; RV64-NEXT: ld a3, 0(a0) -; RV64-NEXT: ld a0, 16(a0) -; RV64-NEXT: or a2, a3, a2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: or a0, a2, a0 +; RV64-NEXT: ld a3, 16(a0) +; RV64-NEXT: ld a0, 24(a0) +; RV64-NEXT: or a1, a1, a2 +; RV64-NEXT: or a0, a3, a0 +; RV64-NEXT: or a0, a1, a0 ; RV64-NEXT: ret %e0 = extractelement <4 x i32> %v, i32 0 %e1 = extractelement <4 x i32> %v, i32 1 diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index fa320f53cec6ce..e24b1b41645cdf 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -683,9 +683,9 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind { ; RV32I-LABEL: ctpop_v2i64: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a4, 0(a1) ; RV32I-NEXT: lw a2, 8(a1) -; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: lw a1, 12(a1) ; RV32I-NEXT: srli a5, a3, 1 ; RV32I-NEXT: lui a6, 349525 ; RV32I-NEXT: addi a6, a6, 1365 @@ -707,37 +707,37 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind { ; RV32I-NEXT: slli t0, a3, 16 ; RV32I-NEXT: add a3, a3, t0 ; RV32I-NEXT: srli a3, a3, 24 -; RV32I-NEXT: srli t0, a1, 1 +; RV32I-NEXT: srli t0, a4, 1 ; RV32I-NEXT: and t0, t0, a6 -; RV32I-NEXT: sub a1, a1, t0 -; RV32I-NEXT: and t0, a1, a5 +; RV32I-NEXT: sub a4, a4, t0 +; RV32I-NEXT: and t0, a4, a5 +; RV32I-NEXT: srli a4, a4, 2 +; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: add a4, t0, a4 +; RV32I-NEXT: srli t0, a4, 4 +; RV32I-NEXT: add a4, a4, t0 +; RV32I-NEXT: and a4, a4, a7 +; RV32I-NEXT: slli t0, a4, 8 +; RV32I-NEXT: add a4, a4, t0 +; RV32I-NEXT: slli t0, a4, 16 +; RV32I-NEXT: add a4, a4, t0 +; RV32I-NEXT: srli a4, a4, 24 +; RV32I-NEXT: add a3, a4, a3 +; RV32I-NEXT: srli a4, a1, 1 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: and a4, a1, a5 ; RV32I-NEXT: srli a1, a1, 2 ; RV32I-NEXT: and a1, a1, a5 -; RV32I-NEXT: add a1, t0, a1 -; RV32I-NEXT: srli t0, a1, 4 -; RV32I-NEXT: add a1, a1, t0 +; RV32I-NEXT: add a1, a4, a1 +; RV32I-NEXT: srli a4, a1, 4 +; RV32I-NEXT: add a1, a1, a4 ; RV32I-NEXT: and a1, a1, a7 -; RV32I-NEXT: slli t0, a1, 8 -; RV32I-NEXT: add a1, a1, t0 -; RV32I-NEXT: slli t0, a1, 16 -; RV32I-NEXT: add a1, a1, t0 +; RV32I-NEXT: slli a4, a1, 8 +; RV32I-NEXT: add a1, a1, a4 +; RV32I-NEXT: slli a4, a1, 16 +; RV32I-NEXT: add a1, a1, a4 ; RV32I-NEXT: srli a1, a1, 24 -; RV32I-NEXT: add a1, a1, a3 -; RV32I-NEXT: srli a3, a4, 1 -; RV32I-NEXT: and a3, a3, a6 -; RV32I-NEXT: sub a4, a4, a3 -; RV32I-NEXT: and a3, a4, a5 -; RV32I-NEXT: srli a4, a4, 2 -; RV32I-NEXT: and a4, a4, a5 -; RV32I-NEXT: add a3, a3, a4 -; RV32I-NEXT: srli a4, a3, 4 -; RV32I-NEXT: add a3, a3, a4 -; RV32I-NEXT: and a3, a3, a7 -; RV32I-NEXT: slli a4, a3, 8 -; RV32I-NEXT: add a3, a3, a4 -; RV32I-NEXT: slli a4, a3, 16 -; RV32I-NEXT: add a3, a3, a4 -; RV32I-NEXT: srli a3, a3, 24 ; RV32I-NEXT: srli a4, a2, 1 ; RV32I-NEXT: and a4, a4, a6 ; RV32I-NEXT: sub a2, a2, a4 @@ -753,11 +753,11 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind { ; RV32I-NEXT: slli a4, a2, 16 ; RV32I-NEXT: add a2, a2, a4 ; RV32I-NEXT: srli a2, a2, 24 -; RV32I-NEXT: add a2, a2, a3 +; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: sw zero, 12(a0) ; RV32I-NEXT: sw zero, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: sw a1, 8(a0) +; RV32I-NEXT: sw a3, 0(a0) ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: ctpop_v2i64: @@ -785,21 +785,21 @@ define <2 x i1> @ctpop_v2i64_ult_two(<2 x i64> %a) nounwind { ; RV32I-LABEL: ctpop_v2i64_ult_two: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a1, 0(a0) -; RV32I-NEXT: lw a2, 12(a0) +; RV32I-NEXT: lw a2, 4(a0) ; RV32I-NEXT: lw a3, 8(a0) -; RV32I-NEXT: lw a0, 4(a0) -; RV32I-NEXT: addi a4, a1, -1 -; RV32I-NEXT: and a4, a1, a4 +; RV32I-NEXT: lw a4, 12(a0) +; RV32I-NEXT: addi a0, a1, -1 +; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: seqz a1, a1 -; RV32I-NEXT: sub a1, a0, a1 -; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: or a0, a4, a0 +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: seqz a0, a0 ; RV32I-NEXT: addi a1, a3, -1 ; RV32I-NEXT: and a1, a3, a1 -; RV32I-NEXT: seqz a3, a3 -; RV32I-NEXT: sub a3, a2, a3 -; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: seqz a2, a3 +; RV32I-NEXT: sub a2, a4, a2 +; RV32I-NEXT: and a2, a4, a2 ; RV32I-NEXT: or a1, a1, a2 ; RV32I-NEXT: seqz a1, a1 ; RV32I-NEXT: ret @@ -828,21 +828,21 @@ define <2 x i1> @ctpop_v2i64_ugt_one(<2 x i64> %a) nounwind { ; RV32I-LABEL: ctpop_v2i64_ugt_one: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a1, 0(a0) -; RV32I-NEXT: lw a2, 12(a0) +; RV32I-NEXT: lw a2, 4(a0) ; RV32I-NEXT: lw a3, 8(a0) -; RV32I-NEXT: lw a0, 4(a0) -; RV32I-NEXT: addi a4, a1, -1 -; RV32I-NEXT: and a4, a1, a4 +; RV32I-NEXT: lw a4, 12(a0) +; RV32I-NEXT: addi a0, a1, -1 +; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: seqz a1, a1 -; RV32I-NEXT: sub a1, a0, a1 -; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: or a0, a4, a0 +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: addi a1, a3, -1 ; RV32I-NEXT: and a1, a3, a1 -; RV32I-NEXT: seqz a3, a3 -; RV32I-NEXT: sub a3, a2, a3 -; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: seqz a2, a3 +; RV32I-NEXT: sub a2, a4, a2 +; RV32I-NEXT: and a2, a4, a2 ; RV32I-NEXT: or a1, a1, a2 ; RV32I-NEXT: snez a1, a1 ; RV32I-NEXT: ret @@ -873,15 +873,15 @@ define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind { ; RV32I-LABEL: ctpop_v2i64_eq_one: ; RV32I: # %bb.0: ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lw a2, 12(a0) -; RV32I-NEXT: lw a0, 4(a0) -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: beqz a0, .LBB22_3 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a2, 12(a1) +; RV32I-NEXT: beqz a3, .LBB22_3 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: seqz a3, a3 -; RV32I-NEXT: sub a3, a0, a3 -; RV32I-NEXT: xor a0, a0, a3 -; RV32I-NEXT: sltu a0, a3, a0 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: sub a0, a3, a0 +; RV32I-NEXT: xor a3, a3, a0 +; RV32I-NEXT: sltu a0, a0, a3 ; RV32I-NEXT: lw a1, 8(a1) ; RV32I-NEXT: bnez a2, .LBB22_4 ; RV32I-NEXT: .LBB22_2: @@ -890,9 +890,9 @@ define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind { ; RV32I-NEXT: sltu a1, a2, a1 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB22_3: -; RV32I-NEXT: addi a0, a3, -1 -; RV32I-NEXT: xor a3, a3, a0 -; RV32I-NEXT: sltu a0, a0, a3 +; RV32I-NEXT: addi a3, a0, -1 +; RV32I-NEXT: xor a0, a0, a3 +; RV32I-NEXT: sltu a0, a3, a0 ; RV32I-NEXT: lw a1, 8(a1) ; RV32I-NEXT: beqz a2, .LBB22_2 ; RV32I-NEXT: .LBB22_4: @@ -927,20 +927,20 @@ define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind { define <2 x i1> @ctpop_v2i64_ne_one(<2 x i64> %a) nounwind { ; RV32I-LABEL: ctpop_v2i64_ne_one: ; RV32I: # %bb.0: +; RV32I-NEXT: lw a2, 0(a0) +; RV32I-NEXT: lw a3, 4(a0) ; RV32I-NEXT: lw a1, 12(a0) -; RV32I-NEXT: lw a2, 4(a0) -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: beqz a2, .LBB23_2 +; RV32I-NEXT: beqz a3, .LBB23_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: seqz a3, a3 -; RV32I-NEXT: sub a3, a2, a3 -; RV32I-NEXT: xor a2, a2, a3 -; RV32I-NEXT: sltu a2, a3, a2 -; RV32I-NEXT: j .LBB23_3 -; RV32I-NEXT: .LBB23_2: -; RV32I-NEXT: addi a2, a3, -1 +; RV32I-NEXT: seqz a2, a2 +; RV32I-NEXT: sub a2, a3, a2 ; RV32I-NEXT: xor a3, a3, a2 ; RV32I-NEXT: sltu a2, a2, a3 +; RV32I-NEXT: j .LBB23_3 +; RV32I-NEXT: .LBB23_2: +; RV32I-NEXT: addi a3, a2, -1 +; RV32I-NEXT: xor a2, a2, a3 +; RV32I-NEXT: sltu a2, a3, a2 ; RV32I-NEXT: .LBB23_3: ; RV32I-NEXT: lw a3, 8(a0) ; RV32I-NEXT: xori a0, a2, 1 diff --git a/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll b/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll index f38aa71fb158d0..6c4466796aeedd 100644 --- a/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll @@ -177,12 +177,12 @@ define i8 @test13(ptr %0, i64 %1) { ; RV64I-NEXT: li a2, 1 ; RV64I-NEXT: subw a2, a2, a1 ; RV64I-NEXT: add a2, a0, a2 -; RV64I-NEXT: lbu a2, 0(a2) ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: subw a3, a3, a1 ; RV64I-NEXT: add a0, a0, a3 +; RV64I-NEXT: lbu a1, 0(a2) ; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: ret %3 = mul i64 %1, -4294967296 %4 = add i64 %3, 4294967296 ; 1 << 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll b/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll index d34c10798f4821..92b88054a1d3bc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll +++ b/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll @@ -8,14 +8,14 @@ declare i32 @llvm.vp.reduce.add.v4i32(i32, <4 x i32>, <4 x i1>, i32) define i32 @vpreduce_add_v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) { ; RV32-LABEL: vpreduce_add_v4i32: ; RV32: # %bb.0: -; RV32-NEXT: lw a4, 4(a1) -; RV32-NEXT: lw a5, 12(a1) +; RV32-NEXT: lw a4, 0(a1) +; RV32-NEXT: lw a5, 4(a1) ; RV32-NEXT: lw a6, 8(a1) -; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: lw a1, 12(a1) ; RV32-NEXT: lw a7, 0(a2) -; RV32-NEXT: lw t0, 8(a2) -; RV32-NEXT: lw t1, 12(a2) -; RV32-NEXT: lw a2, 4(a2) +; RV32-NEXT: lw t0, 4(a2) +; RV32-NEXT: lw t1, 8(a2) +; RV32-NEXT: lw a2, 12(a2) ; RV32-NEXT: snez t2, a3 ; RV32-NEXT: sltiu t3, a3, 3 ; RV32-NEXT: xori t3, t3, 1 @@ -23,34 +23,34 @@ define i32 @vpreduce_add_v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) { ; RV32-NEXT: xori t4, t4, 1 ; RV32-NEXT: sltiu a3, a3, 2 ; RV32-NEXT: xori a3, a3, 1 -; RV32-NEXT: and a2, a3, a2 -; RV32-NEXT: and a3, t4, t1 -; RV32-NEXT: and t0, t3, t0 +; RV32-NEXT: and a3, a3, t0 +; RV32-NEXT: and a2, t4, a2 +; RV32-NEXT: and t0, t3, t1 ; RV32-NEXT: and a7, t2, a7 ; RV32-NEXT: neg a7, a7 -; RV32-NEXT: and a1, a7, a1 +; RV32-NEXT: and a4, a7, a4 ; RV32-NEXT: neg a7, t0 ; RV32-NEXT: and a6, a7, a6 -; RV32-NEXT: neg a3, a3 -; RV32-NEXT: and a3, a3, a5 ; RV32-NEXT: neg a2, a2 -; RV32-NEXT: and a2, a2, a4 -; RV32-NEXT: add a2, a2, a3 -; RV32-NEXT: add a1, a1, a6 -; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: neg a2, a3 +; RV32-NEXT: and a2, a2, a5 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a4, a4, a6 +; RV32-NEXT: add a1, a4, a1 ; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_add_v4i32: ; RV64: # %bb.0: -; RV64-NEXT: lw a4, 8(a1) -; RV64-NEXT: lw a5, 24(a1) +; RV64-NEXT: lw a4, 0(a1) +; RV64-NEXT: lw a5, 8(a1) ; RV64-NEXT: lw a6, 16(a1) -; RV64-NEXT: lw a1, 0(a1) +; RV64-NEXT: lw a1, 24(a1) ; RV64-NEXT: ld a7, 0(a2) -; RV64-NEXT: ld t0, 16(a2) -; RV64-NEXT: ld t1, 24(a2) -; RV64-NEXT: ld a2, 8(a2) +; RV64-NEXT: ld t0, 8(a2) +; RV64-NEXT: ld t1, 16(a2) +; RV64-NEXT: ld a2, 24(a2) ; RV64-NEXT: sext.w a3, a3 ; RV64-NEXT: snez t2, a3 ; RV64-NEXT: sltiu t3, a3, 3 @@ -59,21 +59,21 @@ define i32 @vpreduce_add_v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) { ; RV64-NEXT: xori t4, t4, 1 ; RV64-NEXT: sltiu a3, a3, 2 ; RV64-NEXT: xori a3, a3, 1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: and a3, t4, t1 -; RV64-NEXT: and t0, t3, t0 +; RV64-NEXT: and a3, a3, t0 +; RV64-NEXT: and a2, t4, a2 +; RV64-NEXT: and t0, t3, t1 ; RV64-NEXT: and a7, t2, a7 ; RV64-NEXT: negw a7, a7 -; RV64-NEXT: and a1, a7, a1 +; RV64-NEXT: and a4, a7, a4 ; RV64-NEXT: negw a7, t0 ; RV64-NEXT: and a6, a7, a6 -; RV64-NEXT: negw a3, a3 -; RV64-NEXT: and a3, a3, a5 ; RV64-NEXT: negw a2, a2 -; RV64-NEXT: and a2, a2, a4 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a1, a1, a6 -; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: and a1, a2, a1 +; RV64-NEXT: negw a2, a3 +; RV64-NEXT: and a2, a2, a5 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a4, a4, a6 +; RV64-NEXT: add a1, a4, a1 ; RV64-NEXT: addw a0, a1, a0 ; RV64-NEXT: ret %r = call i32 @llvm.vp.reduce.add.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll index 309ca1f964287a..f1cfb6748fd619 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll @@ -121,13 +121,13 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) { ; CHECK-NEXT: lbu a3, 985(sp) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v24, a2 +; CHECK-NEXT: lbu a1, 1012(sp) ; CHECK-NEXT: vmv.s.x v24, a3 -; CHECK-NEXT: li a1, 478 -; CHECK-NEXT: li a2, 477 -; CHECK-NEXT: lbu a3, 1012(sp) -; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v8, v24, a2 -; CHECK-NEXT: vmv.s.x v24, a3 +; CHECK-NEXT: li a2, 478 +; CHECK-NEXT: li a3, 477 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma +; CHECK-NEXT: vslideup.vx v8, v24, a3 +; CHECK-NEXT: vmv.s.x v24, a1 ; CHECK-NEXT: li a1, 501 ; CHECK-NEXT: li a2, 500 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll index 8ed19ddb1af5cf..81e20a29881630 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll @@ -26,38 +26,38 @@ define void @add_v4i32(ptr %x, ptr %y) { define void @add_v2i64(ptr %x, ptr %y) { ; RV32-LABEL: add_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: lw a2, 8(a0) -; RV32-NEXT: lw a3, 12(a0) +; RV32-NEXT: lw a2, 0(a1) +; RV32-NEXT: lw a3, 4(a1) ; RV32-NEXT: lw a4, 0(a0) ; RV32-NEXT: lw a5, 4(a0) -; RV32-NEXT: lw a6, 4(a1) -; RV32-NEXT: lw a7, 0(a1) +; RV32-NEXT: lw a6, 8(a0) +; RV32-NEXT: lw a7, 12(a0) ; RV32-NEXT: lw t0, 8(a1) ; RV32-NEXT: lw a1, 12(a1) -; RV32-NEXT: add a5, a5, a6 -; RV32-NEXT: add a7, a4, a7 -; RV32-NEXT: sltu a4, a7, a4 -; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: add a1, a3, a1 -; RV32-NEXT: add t0, a2, t0 -; RV32-NEXT: sltu a2, t0, a2 -; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a3, a5, a3 +; RV32-NEXT: add a2, a4, a2 +; RV32-NEXT: sltu a4, a2, a4 +; RV32-NEXT: add a3, a3, a4 +; RV32-NEXT: add a1, a7, a1 +; RV32-NEXT: add t0, a6, t0 +; RV32-NEXT: sltu a4, t0, a6 +; RV32-NEXT: add a1, a1, a4 ; RV32-NEXT: sw t0, 8(a0) -; RV32-NEXT: sw a7, 0(a0) +; RV32-NEXT: sw a2, 0(a0) ; RV32-NEXT: sw a1, 12(a0) -; RV32-NEXT: sw a4, 4(a0) +; RV32-NEXT: sw a3, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: add_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: ld a2, 8(a0) -; RV64-NEXT: ld a3, 0(a0) +; RV64-NEXT: ld a2, 0(a0) +; RV64-NEXT: ld a3, 8(a0) ; RV64-NEXT: ld a4, 0(a1) ; RV64-NEXT: ld a1, 8(a1) -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a2, a2, a4 +; RV64-NEXT: add a1, a3, a1 ; RV64-NEXT: sd a1, 8(a0) -; RV64-NEXT: sd a3, 0(a0) +; RV64-NEXT: sd a2, 0(a0) ; RV64-NEXT: ret %a = load <2 x i64>, ptr %x %b = load <2 x i64>, ptr %y @@ -134,14 +134,14 @@ define void @fadd_v4f32(ptr %x, ptr %y) { define void @fadd_v2f64(ptr %x, ptr %y) { ; CHECK-LABEL: fadd_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: fld fa5, 8(a0) -; CHECK-NEXT: fld fa4, 0(a0) +; CHECK-NEXT: fld fa5, 0(a0) +; CHECK-NEXT: fld fa4, 8(a0) ; CHECK-NEXT: fld fa3, 0(a1) ; CHECK-NEXT: fld fa2, 8(a1) -; CHECK-NEXT: fadd.d fa4, fa4, fa3 -; CHECK-NEXT: fadd.d fa5, fa5, fa2 -; CHECK-NEXT: fsd fa5, 8(a0) -; CHECK-NEXT: fsd fa4, 0(a0) +; CHECK-NEXT: fadd.d fa5, fa5, fa3 +; CHECK-NEXT: fadd.d fa4, fa4, fa2 +; CHECK-NEXT: fsd fa4, 8(a0) +; CHECK-NEXT: fsd fa5, 0(a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = load <2 x double>, ptr %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index cbea842e28f0f2..43cee6610e7872 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -1398,37 +1398,37 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RV32VB-NEXT: slli a3, a3, 16 ; RV32VB-NEXT: slli a4, a4, 24 ; RV32VB-NEXT: or a3, a4, a3 +; RV32VB-NEXT: lbu a2, 4(a0) +; RV32VB-NEXT: lbu a4, 5(a0) ; RV32VB-NEXT: or a1, a1, a3 -; RV32VB-NEXT: lbu a2, 5(a0) -; RV32VB-NEXT: lbu a3, 4(a0) -; RV32VB-NEXT: lbu a4, 6(a0) +; RV32VB-NEXT: lbu a3, 6(a0) ; RV32VB-NEXT: lbu a5, 7(a0) -; RV32VB-NEXT: slli a2, a2, 8 -; RV32VB-NEXT: or a2, a3, a2 -; RV32VB-NEXT: slli a4, a4, 16 -; RV32VB-NEXT: slli a5, a5, 24 -; RV32VB-NEXT: or a4, a5, a4 +; RV32VB-NEXT: slli a4, a4, 8 ; RV32VB-NEXT: or a2, a2, a4 -; RV32VB-NEXT: lbu a3, 9(a0) +; RV32VB-NEXT: slli a3, a3, 16 +; RV32VB-NEXT: slli a5, a5, 24 +; RV32VB-NEXT: or a3, a5, a3 ; RV32VB-NEXT: lbu a4, 8(a0) -; RV32VB-NEXT: lbu a5, 10(a0) +; RV32VB-NEXT: lbu a5, 9(a0) +; RV32VB-NEXT: or a2, a2, a3 +; RV32VB-NEXT: lbu a3, 10(a0) ; RV32VB-NEXT: lbu a6, 11(a0) -; RV32VB-NEXT: slli a3, a3, 8 -; RV32VB-NEXT: or a3, a4, a3 -; RV32VB-NEXT: slli a5, a5, 16 +; RV32VB-NEXT: slli a5, a5, 8 +; RV32VB-NEXT: or a4, a4, a5 +; RV32VB-NEXT: slli a3, a3, 16 ; RV32VB-NEXT: slli a6, a6, 24 -; RV32VB-NEXT: or a4, a6, a5 -; RV32VB-NEXT: or a3, a3, a4 -; RV32VB-NEXT: lbu a4, 13(a0) +; RV32VB-NEXT: or a3, a6, a3 ; RV32VB-NEXT: lbu a5, 12(a0) -; RV32VB-NEXT: lbu a6, 14(a0) +; RV32VB-NEXT: lbu a6, 13(a0) +; RV32VB-NEXT: or a3, a4, a3 +; RV32VB-NEXT: lbu a4, 14(a0) ; RV32VB-NEXT: lbu a0, 15(a0) -; RV32VB-NEXT: slli a4, a4, 8 -; RV32VB-NEXT: or a4, a5, a4 -; RV32VB-NEXT: slli a6, a6, 16 +; RV32VB-NEXT: slli a6, a6, 8 +; RV32VB-NEXT: or a5, a5, a6 +; RV32VB-NEXT: slli a4, a4, 16 ; RV32VB-NEXT: slli a0, a0, 24 -; RV32VB-NEXT: or a0, a0, a6 -; RV32VB-NEXT: or a0, a4, a0 +; RV32VB-NEXT: or a0, a0, a4 +; RV32VB-NEXT: or a0, a5, a0 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-NEXT: vmv.v.x v8, a1 ; RV32VB-NEXT: vslide1down.vx v8, v8, a2 @@ -1443,27 +1443,27 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RV32VB-PACK-NEXT: lbu a3, 2(a0) ; RV32VB-PACK-NEXT: lbu a4, 3(a0) ; RV32VB-PACK-NEXT: packh a1, a1, a2 -; RV32VB-PACK-NEXT: packh a2, a3, a4 -; RV32VB-PACK-NEXT: pack a1, a1, a2 ; RV32VB-PACK-NEXT: lbu a2, 4(a0) -; RV32VB-PACK-NEXT: lbu a3, 5(a0) -; RV32VB-PACK-NEXT: lbu a4, 6(a0) -; RV32VB-PACK-NEXT: lbu a5, 7(a0) -; RV32VB-PACK-NEXT: lbu a6, 8(a0) -; RV32VB-PACK-NEXT: lbu a7, 9(a0) -; RV32VB-PACK-NEXT: packh a2, a2, a3 -; RV32VB-PACK-NEXT: packh a3, a4, a5 -; RV32VB-PACK-NEXT: pack a2, a2, a3 +; RV32VB-PACK-NEXT: lbu a5, 5(a0) +; RV32VB-PACK-NEXT: lbu a6, 6(a0) +; RV32VB-PACK-NEXT: lbu a7, 7(a0) +; RV32VB-PACK-NEXT: packh a3, a3, a4 +; RV32VB-PACK-NEXT: pack a1, a1, a3 +; RV32VB-PACK-NEXT: packh a2, a2, a5 ; RV32VB-PACK-NEXT: packh a3, a6, a7 -; RV32VB-PACK-NEXT: lbu a4, 10(a0) -; RV32VB-PACK-NEXT: lbu a5, 11(a0) -; RV32VB-PACK-NEXT: lbu a6, 12(a0) +; RV32VB-PACK-NEXT: lbu a4, 8(a0) +; RV32VB-PACK-NEXT: lbu a5, 9(a0) +; RV32VB-PACK-NEXT: pack a2, a2, a3 +; RV32VB-PACK-NEXT: lbu a3, 10(a0) +; RV32VB-PACK-NEXT: lbu a6, 11(a0) +; RV32VB-PACK-NEXT: packh a4, a4, a5 +; RV32VB-PACK-NEXT: lbu a5, 12(a0) ; RV32VB-PACK-NEXT: lbu a7, 13(a0) ; RV32VB-PACK-NEXT: lbu t0, 14(a0) ; RV32VB-PACK-NEXT: lbu a0, 15(a0) -; RV32VB-PACK-NEXT: packh a4, a4, a5 -; RV32VB-PACK-NEXT: pack a3, a3, a4 -; RV32VB-PACK-NEXT: packh a4, a6, a7 +; RV32VB-PACK-NEXT: packh a3, a3, a6 +; RV32VB-PACK-NEXT: pack a3, a4, a3 +; RV32VB-PACK-NEXT: packh a4, a5, a7 ; RV32VB-PACK-NEXT: packh a0, t0, a0 ; RV32VB-PACK-NEXT: pack a0, a4, a0 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -1532,34 +1532,34 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RVA22U64-NEXT: slli a3, a3, 16 ; RVA22U64-NEXT: slli a4, a4, 24 ; RVA22U64-NEXT: or a3, a3, a4 -; RVA22U64-NEXT: lbu a2, 4(a0) ; RVA22U64-NEXT: or a1, a1, a3 +; RVA22U64-NEXT: lbu a2, 4(a0) ; RVA22U64-NEXT: lbu a3, 5(a0) ; RVA22U64-NEXT: lbu a4, 6(a0) -; RVA22U64-NEXT: slli a2, a2, 32 ; RVA22U64-NEXT: lbu a5, 7(a0) +; RVA22U64-NEXT: slli a2, a2, 32 ; RVA22U64-NEXT: slli a3, a3, 40 ; RVA22U64-NEXT: or a2, a2, a3 ; RVA22U64-NEXT: slli a4, a4, 48 ; RVA22U64-NEXT: slli a5, a5, 56 ; RVA22U64-NEXT: or a4, a4, a5 ; RVA22U64-NEXT: or a2, a2, a4 -; RVA22U64-NEXT: or a1, a1, a2 -; RVA22U64-NEXT: lbu a2, 9(a0) ; RVA22U64-NEXT: lbu a3, 8(a0) -; RVA22U64-NEXT: lbu a4, 10(a0) +; RVA22U64-NEXT: lbu a4, 9(a0) +; RVA22U64-NEXT: or a1, a1, a2 +; RVA22U64-NEXT: lbu a2, 10(a0) ; RVA22U64-NEXT: lbu a5, 11(a0) -; RVA22U64-NEXT: slli a2, a2, 8 -; RVA22U64-NEXT: or a2, a2, a3 -; RVA22U64-NEXT: slli a4, a4, 16 +; RVA22U64-NEXT: slli a4, a4, 8 +; RVA22U64-NEXT: or a3, a3, a4 +; RVA22U64-NEXT: slli a2, a2, 16 ; RVA22U64-NEXT: slli a5, a5, 24 -; RVA22U64-NEXT: or a4, a4, a5 +; RVA22U64-NEXT: or a2, a2, a5 +; RVA22U64-NEXT: or a2, a2, a3 ; RVA22U64-NEXT: lbu a3, 12(a0) -; RVA22U64-NEXT: or a2, a2, a4 ; RVA22U64-NEXT: lbu a4, 13(a0) ; RVA22U64-NEXT: lbu a5, 14(a0) -; RVA22U64-NEXT: slli a3, a3, 32 ; RVA22U64-NEXT: lbu a0, 15(a0) +; RVA22U64-NEXT: slli a3, a3, 32 ; RVA22U64-NEXT: slli a4, a4, 40 ; RVA22U64-NEXT: or a3, a3, a4 ; RVA22U64-NEXT: slli a5, a5, 48 @@ -1576,34 +1576,34 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RVA22U64-PACK: # %bb.0: ; RVA22U64-PACK-NEXT: lbu a1, 0(a0) ; RVA22U64-PACK-NEXT: lbu a2, 1(a0) -; RVA22U64-PACK-NEXT: lbu a3, 2(a0) +; RVA22U64-PACK-NEXT: lbu a6, 2(a0) ; RVA22U64-PACK-NEXT: lbu a4, 3(a0) -; RVA22U64-PACK-NEXT: packh a1, a1, a2 -; RVA22U64-PACK-NEXT: packh a2, a3, a4 -; RVA22U64-PACK-NEXT: lbu a3, 4(a0) -; RVA22U64-PACK-NEXT: lbu a4, 5(a0) -; RVA22U64-PACK-NEXT: packw a6, a1, a2 -; RVA22U64-PACK-NEXT: lbu a2, 6(a0) -; RVA22U64-PACK-NEXT: lbu a5, 7(a0) -; RVA22U64-PACK-NEXT: packh a3, a3, a4 -; RVA22U64-PACK-NEXT: lbu a4, 8(a0) -; RVA22U64-PACK-NEXT: lbu a1, 9(a0) +; RVA22U64-PACK-NEXT: packh a7, a1, a2 +; RVA22U64-PACK-NEXT: lbu a2, 4(a0) +; RVA22U64-PACK-NEXT: lbu a5, 5(a0) +; RVA22U64-PACK-NEXT: lbu a3, 6(a0) +; RVA22U64-PACK-NEXT: lbu a1, 7(a0) +; RVA22U64-PACK-NEXT: packh a4, a6, a4 +; RVA22U64-PACK-NEXT: packw a4, a7, a4 ; RVA22U64-PACK-NEXT: packh a2, a2, a5 -; RVA22U64-PACK-NEXT: packw a2, a3, a2 -; RVA22U64-PACK-NEXT: pack a6, a6, a2 -; RVA22U64-PACK-NEXT: packh a7, a4, a1 -; RVA22U64-PACK-NEXT: lbu a3, 10(a0) -; RVA22U64-PACK-NEXT: lbu a4, 11(a0) -; RVA22U64-PACK-NEXT: lbu a5, 12(a0) -; RVA22U64-PACK-NEXT: lbu a2, 13(a0) -; RVA22U64-PACK-NEXT: lbu a1, 14(a0) +; RVA22U64-PACK-NEXT: packh a1, a3, a1 +; RVA22U64-PACK-NEXT: packw a1, a2, a1 +; RVA22U64-PACK-NEXT: lbu a2, 8(a0) +; RVA22U64-PACK-NEXT: lbu a3, 9(a0) +; RVA22U64-PACK-NEXT: pack a6, a4, a1 +; RVA22U64-PACK-NEXT: lbu a7, 10(a0) +; RVA22U64-PACK-NEXT: lbu a5, 11(a0) +; RVA22U64-PACK-NEXT: packh a2, a2, a3 +; RVA22U64-PACK-NEXT: lbu a3, 12(a0) +; RVA22U64-PACK-NEXT: lbu a1, 13(a0) +; RVA22U64-PACK-NEXT: lbu a4, 14(a0) ; RVA22U64-PACK-NEXT: lbu a0, 15(a0) -; RVA22U64-PACK-NEXT: packh a3, a3, a4 -; RVA22U64-PACK-NEXT: packw a3, a7, a3 -; RVA22U64-PACK-NEXT: packh a2, a5, a2 -; RVA22U64-PACK-NEXT: packh a0, a1, a0 -; RVA22U64-PACK-NEXT: packw a0, a2, a0 -; RVA22U64-PACK-NEXT: pack a0, a3, a0 +; RVA22U64-PACK-NEXT: packh a5, a7, a5 +; RVA22U64-PACK-NEXT: packw a2, a2, a5 +; RVA22U64-PACK-NEXT: packh a1, a3, a1 +; RVA22U64-PACK-NEXT: packh a0, a4, a0 +; RVA22U64-PACK-NEXT: packw a0, a1, a0 +; RVA22U64-PACK-NEXT: pack a0, a2, a0 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-PACK-NEXT: vmv.v.x v8, a6 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0 @@ -1720,39 +1720,39 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV32-ONLY-NEXT: lbu a2, 1(a0) ; RV32-ONLY-NEXT: lbu a3, 22(a0) ; RV32-ONLY-NEXT: lbu a4, 31(a0) -; RV32-ONLY-NEXT: lbu a5, 44(a0) -; RV32-ONLY-NEXT: lbu a6, 55(a0) -; RV32-ONLY-NEXT: lbu a7, 623(a0) +; RV32-ONLY-NEXT: lbu a5, 623(a0) +; RV32-ONLY-NEXT: lbu a6, 44(a0) +; RV32-ONLY-NEXT: lbu a7, 55(a0) ; RV32-ONLY-NEXT: lbu t0, 75(a0) ; RV32-ONLY-NEXT: lbu t1, 82(a0) -; RV32-ONLY-NEXT: lbu t2, 93(a0) -; RV32-ONLY-NEXT: lbu t3, 105(a0) -; RV32-ONLY-NEXT: lbu t4, 161(a0) -; RV32-ONLY-NEXT: lbu t5, 124(a0) -; RV32-ONLY-NEXT: lbu t6, 163(a0) -; RV32-ONLY-NEXT: lbu s0, 144(a0) -; RV32-ONLY-NEXT: lbu a0, 154(a0) +; RV32-ONLY-NEXT: lbu t2, 154(a0) +; RV32-ONLY-NEXT: lbu t3, 161(a0) +; RV32-ONLY-NEXT: lbu t4, 163(a0) +; RV32-ONLY-NEXT: lbu t5, 93(a0) +; RV32-ONLY-NEXT: lbu t6, 105(a0) +; RV32-ONLY-NEXT: lbu s0, 124(a0) +; RV32-ONLY-NEXT: lbu a0, 144(a0) ; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-ONLY-NEXT: vmv.v.x v8, a1 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 -; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV32-ONLY-NEXT: vslide1down.vx v9, v8, t0 ; RV32-ONLY-NEXT: vmv.v.x v8, t1 -; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t2 -; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t3 -; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t4 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t5 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t6 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t3 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, s0 -; RV32-ONLY-NEXT: li a1, 255 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t4 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0 +; RV32-ONLY-NEXT: li a0, 255 ; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-ONLY-NEXT: vmv.s.x v0, a1 +; RV32-ONLY-NEXT: vmv.s.x v0, a0 ; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t2 ; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t ; RV32-ONLY-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32-ONLY-NEXT: addi sp, sp, 16 @@ -1770,36 +1770,36 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV32VB-NEXT: slli a4, a4, 24 ; RV32VB-NEXT: or a3, a4, a3 ; RV32VB-NEXT: or a1, a1, a3 -; RV32VB-NEXT: lbu a2, 55(a0) -; RV32VB-NEXT: lbu a3, 44(a0) +; RV32VB-NEXT: lbu a2, 44(a0) +; RV32VB-NEXT: lbu a3, 55(a0) ; RV32VB-NEXT: lbu a4, 623(a0) ; RV32VB-NEXT: lbu a5, 75(a0) -; RV32VB-NEXT: slli a2, a2, 8 -; RV32VB-NEXT: or a2, a3, a2 +; RV32VB-NEXT: lbu a6, 82(a0) +; RV32VB-NEXT: slli a3, a3, 8 +; RV32VB-NEXT: or a2, a2, a3 ; RV32VB-NEXT: slli a4, a4, 16 ; RV32VB-NEXT: slli a5, a5, 24 ; RV32VB-NEXT: or a4, a5, a4 ; RV32VB-NEXT: or a2, a2, a4 ; RV32VB-NEXT: lbu a3, 93(a0) -; RV32VB-NEXT: lbu a4, 82(a0) -; RV32VB-NEXT: lbu a5, 105(a0) -; RV32VB-NEXT: lbu a6, 161(a0) +; RV32VB-NEXT: lbu a4, 105(a0) +; RV32VB-NEXT: lbu a5, 124(a0) +; RV32VB-NEXT: lbu a7, 144(a0) ; RV32VB-NEXT: slli a3, a3, 8 -; RV32VB-NEXT: or a3, a4, a3 -; RV32VB-NEXT: slli a5, a5, 16 -; RV32VB-NEXT: slli a6, a6, 24 -; RV32VB-NEXT: or a4, a6, a5 +; RV32VB-NEXT: lbu t0, 154(a0) +; RV32VB-NEXT: lbu t1, 161(a0) +; RV32VB-NEXT: or a3, a6, a3 +; RV32VB-NEXT: slli a4, a4, 16 +; RV32VB-NEXT: lbu a0, 163(a0) +; RV32VB-NEXT: slli t1, t1, 24 +; RV32VB-NEXT: or a4, t1, a4 ; RV32VB-NEXT: or a3, a3, a4 -; RV32VB-NEXT: lbu a4, 163(a0) -; RV32VB-NEXT: lbu a5, 124(a0) -; RV32VB-NEXT: lbu a6, 144(a0) -; RV32VB-NEXT: lbu a0, 154(a0) -; RV32VB-NEXT: slli a4, a4, 8 -; RV32VB-NEXT: or a4, a5, a4 -; RV32VB-NEXT: slli a6, a6, 16 -; RV32VB-NEXT: slli a0, a0, 24 -; RV32VB-NEXT: or a0, a0, a6 -; RV32VB-NEXT: or a0, a4, a0 +; RV32VB-NEXT: slli a0, a0, 8 +; RV32VB-NEXT: or a0, a5, a0 +; RV32VB-NEXT: slli a7, a7, 16 +; RV32VB-NEXT: slli t0, t0, 24 +; RV32VB-NEXT: or a4, t0, a7 +; RV32VB-NEXT: or a0, a0, a4 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-NEXT: vmv.v.x v8, a1 ; RV32VB-NEXT: vslide1down.vx v8, v8, a2 @@ -1815,32 +1815,32 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV32VB-PACK-NEXT: lbu a4, 31(a0) ; RV32VB-PACK-NEXT: packh a1, a1, a2 ; RV32VB-PACK-NEXT: packh a2, a3, a4 +; RV32VB-PACK-NEXT: lbu a3, 623(a0) +; RV32VB-PACK-NEXT: lbu a4, 44(a0) +; RV32VB-PACK-NEXT: lbu a5, 55(a0) +; RV32VB-PACK-NEXT: lbu a6, 75(a0) ; RV32VB-PACK-NEXT: pack a1, a1, a2 -; RV32VB-PACK-NEXT: lbu a2, 44(a0) -; RV32VB-PACK-NEXT: lbu a3, 55(a0) -; RV32VB-PACK-NEXT: lbu a4, 623(a0) -; RV32VB-PACK-NEXT: lbu a5, 75(a0) -; RV32VB-PACK-NEXT: lbu a6, 82(a0) -; RV32VB-PACK-NEXT: lbu a7, 93(a0) -; RV32VB-PACK-NEXT: packh a2, a2, a3 -; RV32VB-PACK-NEXT: packh a3, a4, a5 -; RV32VB-PACK-NEXT: pack a2, a2, a3 -; RV32VB-PACK-NEXT: packh a3, a6, a7 -; RV32VB-PACK-NEXT: lbu a4, 105(a0) -; RV32VB-PACK-NEXT: lbu a5, 161(a0) -; RV32VB-PACK-NEXT: lbu a6, 124(a0) -; RV32VB-PACK-NEXT: lbu a7, 163(a0) -; RV32VB-PACK-NEXT: lbu t0, 144(a0) -; RV32VB-PACK-NEXT: lbu a0, 154(a0) +; RV32VB-PACK-NEXT: lbu a2, 82(a0) ; RV32VB-PACK-NEXT: packh a4, a4, a5 -; RV32VB-PACK-NEXT: pack a3, a3, a4 -; RV32VB-PACK-NEXT: packh a4, a6, a7 -; RV32VB-PACK-NEXT: packh a0, t0, a0 -; RV32VB-PACK-NEXT: pack a0, a4, a0 +; RV32VB-PACK-NEXT: packh a3, a3, a6 +; RV32VB-PACK-NEXT: pack a3, a4, a3 +; RV32VB-PACK-NEXT: lbu a4, 154(a0) +; RV32VB-PACK-NEXT: lbu a5, 161(a0) +; RV32VB-PACK-NEXT: lbu a6, 163(a0) +; RV32VB-PACK-NEXT: lbu a7, 93(a0) +; RV32VB-PACK-NEXT: lbu t0, 105(a0) +; RV32VB-PACK-NEXT: lbu t1, 124(a0) +; RV32VB-PACK-NEXT: lbu a0, 144(a0) +; RV32VB-PACK-NEXT: packh a2, a2, a7 +; RV32VB-PACK-NEXT: packh a5, t0, a5 +; RV32VB-PACK-NEXT: pack a2, a2, a5 +; RV32VB-PACK-NEXT: packh a5, t1, a6 +; RV32VB-PACK-NEXT: packh a0, a0, a4 +; RV32VB-PACK-NEXT: pack a0, a5, a0 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-PACK-NEXT: vmv.v.x v8, a1 -; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3 +; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-PACK-NEXT: ret ; @@ -1854,39 +1854,39 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV64V-ONLY-NEXT: lbu a2, 1(a0) ; RV64V-ONLY-NEXT: lbu a3, 22(a0) ; RV64V-ONLY-NEXT: lbu a4, 31(a0) -; RV64V-ONLY-NEXT: lbu a5, 44(a0) -; RV64V-ONLY-NEXT: lbu a6, 55(a0) -; RV64V-ONLY-NEXT: lbu a7, 623(a0) +; RV64V-ONLY-NEXT: lbu a5, 623(a0) +; RV64V-ONLY-NEXT: lbu a6, 44(a0) +; RV64V-ONLY-NEXT: lbu a7, 55(a0) ; RV64V-ONLY-NEXT: lbu t0, 75(a0) ; RV64V-ONLY-NEXT: lbu t1, 82(a0) -; RV64V-ONLY-NEXT: lbu t2, 93(a0) -; RV64V-ONLY-NEXT: lbu t3, 105(a0) -; RV64V-ONLY-NEXT: lbu t4, 161(a0) -; RV64V-ONLY-NEXT: lbu t5, 124(a0) -; RV64V-ONLY-NEXT: lbu t6, 163(a0) -; RV64V-ONLY-NEXT: lbu s0, 144(a0) -; RV64V-ONLY-NEXT: lbu a0, 154(a0) +; RV64V-ONLY-NEXT: lbu t2, 154(a0) +; RV64V-ONLY-NEXT: lbu t3, 161(a0) +; RV64V-ONLY-NEXT: lbu t4, 163(a0) +; RV64V-ONLY-NEXT: lbu t5, 93(a0) +; RV64V-ONLY-NEXT: lbu t6, 105(a0) +; RV64V-ONLY-NEXT: lbu s0, 124(a0) +; RV64V-ONLY-NEXT: lbu a0, 144(a0) ; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64V-ONLY-NEXT: vmv.v.x v8, a1 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4 -; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v8, t0 ; RV64V-ONLY-NEXT: vmv.v.x v8, t1 -; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t2 -; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t3 -; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t4 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t5 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t6 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t3 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, s0 -; RV64V-ONLY-NEXT: li a1, 255 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t4 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0 +; RV64V-ONLY-NEXT: li a0, 255 ; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64V-ONLY-NEXT: vmv.s.x v0, a1 +; RV64V-ONLY-NEXT: vmv.s.x v0, a0 ; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t2 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t ; RV64V-ONLY-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; RV64V-ONLY-NEXT: addi sp, sp, 16 @@ -1903,43 +1903,43 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RVA22U64-NEXT: slli a3, a3, 16 ; RVA22U64-NEXT: slli a4, a4, 24 ; RVA22U64-NEXT: or a3, a3, a4 -; RVA22U64-NEXT: lbu a2, 44(a0) ; RVA22U64-NEXT: or a1, a1, a3 -; RVA22U64-NEXT: lbu a3, 55(a0) -; RVA22U64-NEXT: lbu a4, 623(a0) -; RVA22U64-NEXT: slli a2, a2, 32 +; RVA22U64-NEXT: lbu a2, 623(a0) +; RVA22U64-NEXT: lbu a3, 44(a0) +; RVA22U64-NEXT: lbu a4, 55(a0) ; RVA22U64-NEXT: lbu a5, 75(a0) -; RVA22U64-NEXT: slli a3, a3, 40 -; RVA22U64-NEXT: or a2, a2, a3 -; RVA22U64-NEXT: slli a4, a4, 48 +; RVA22U64-NEXT: lbu a6, 82(a0) +; RVA22U64-NEXT: slli a3, a3, 32 +; RVA22U64-NEXT: slli a4, a4, 40 +; RVA22U64-NEXT: or a3, a3, a4 +; RVA22U64-NEXT: slli a2, a2, 48 ; RVA22U64-NEXT: slli a5, a5, 56 -; RVA22U64-NEXT: or a4, a4, a5 -; RVA22U64-NEXT: or a2, a2, a4 -; RVA22U64-NEXT: or a1, a1, a2 +; RVA22U64-NEXT: or a2, a2, a5 +; RVA22U64-NEXT: or a2, a2, a3 +; RVA22U64-NEXT: or a7, a1, a2 ; RVA22U64-NEXT: lbu a2, 93(a0) -; RVA22U64-NEXT: lbu a3, 82(a0) -; RVA22U64-NEXT: lbu a4, 105(a0) -; RVA22U64-NEXT: lbu a5, 161(a0) +; RVA22U64-NEXT: lbu t0, 105(a0) +; RVA22U64-NEXT: lbu a4, 124(a0) +; RVA22U64-NEXT: lbu a5, 144(a0) ; RVA22U64-NEXT: slli a2, a2, 8 +; RVA22U64-NEXT: lbu a1, 154(a0) +; RVA22U64-NEXT: lbu a3, 161(a0) +; RVA22U64-NEXT: or a2, a6, a2 +; RVA22U64-NEXT: slli t0, t0, 16 +; RVA22U64-NEXT: lbu a0, 163(a0) +; RVA22U64-NEXT: slli a3, a3, 24 +; RVA22U64-NEXT: or a3, a3, t0 ; RVA22U64-NEXT: or a2, a2, a3 -; RVA22U64-NEXT: slli a4, a4, 16 -; RVA22U64-NEXT: slli a5, a5, 24 -; RVA22U64-NEXT: or a4, a4, a5 -; RVA22U64-NEXT: lbu a3, 124(a0) -; RVA22U64-NEXT: or a2, a2, a4 -; RVA22U64-NEXT: lbu a4, 163(a0) -; RVA22U64-NEXT: lbu a5, 144(a0) -; RVA22U64-NEXT: slli a3, a3, 32 -; RVA22U64-NEXT: lbu a0, 154(a0) -; RVA22U64-NEXT: slli a4, a4, 40 -; RVA22U64-NEXT: or a3, a3, a4 +; RVA22U64-NEXT: slli a4, a4, 32 +; RVA22U64-NEXT: slli a0, a0, 40 +; RVA22U64-NEXT: or a0, a0, a4 ; RVA22U64-NEXT: slli a5, a5, 48 -; RVA22U64-NEXT: slli a0, a0, 56 -; RVA22U64-NEXT: or a0, a0, a5 -; RVA22U64-NEXT: or a0, a0, a3 +; RVA22U64-NEXT: slli a1, a1, 56 +; RVA22U64-NEXT: or a1, a1, a5 +; RVA22U64-NEXT: or a0, a0, a1 ; RVA22U64-NEXT: or a0, a0, a2 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RVA22U64-NEXT: vmv.v.x v8, a1 +; RVA22U64-NEXT: vmv.v.x v8, a7 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-NEXT: ret ; @@ -1949,34 +1949,34 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RVA22U64-PACK-NEXT: lbu a2, 1(a0) ; RVA22U64-PACK-NEXT: lbu a3, 22(a0) ; RVA22U64-PACK-NEXT: lbu a4, 31(a0) -; RVA22U64-PACK-NEXT: packh a1, a1, a2 +; RVA22U64-PACK-NEXT: packh a6, a1, a2 ; RVA22U64-PACK-NEXT: packh a2, a3, a4 -; RVA22U64-PACK-NEXT: lbu a3, 44(a0) -; RVA22U64-PACK-NEXT: lbu a4, 55(a0) -; RVA22U64-PACK-NEXT: packw a6, a1, a2 -; RVA22U64-PACK-NEXT: lbu a2, 623(a0) -; RVA22U64-PACK-NEXT: lbu a5, 75(a0) -; RVA22U64-PACK-NEXT: packh a3, a3, a4 -; RVA22U64-PACK-NEXT: lbu a4, 82(a0) -; RVA22U64-PACK-NEXT: lbu a1, 93(a0) -; RVA22U64-PACK-NEXT: packh a2, a2, a5 -; RVA22U64-PACK-NEXT: packw a2, a3, a2 -; RVA22U64-PACK-NEXT: pack a6, a6, a2 -; RVA22U64-PACK-NEXT: packh a7, a4, a1 -; RVA22U64-PACK-NEXT: lbu a3, 105(a0) -; RVA22U64-PACK-NEXT: lbu a4, 161(a0) -; RVA22U64-PACK-NEXT: lbu a5, 124(a0) -; RVA22U64-PACK-NEXT: lbu a2, 163(a0) -; RVA22U64-PACK-NEXT: lbu a1, 144(a0) -; RVA22U64-PACK-NEXT: lbu a0, 154(a0) -; RVA22U64-PACK-NEXT: packh a3, a3, a4 -; RVA22U64-PACK-NEXT: packw a3, a7, a3 -; RVA22U64-PACK-NEXT: packh a2, a5, a2 -; RVA22U64-PACK-NEXT: packh a0, a1, a0 +; RVA22U64-PACK-NEXT: lbu a3, 623(a0) +; RVA22U64-PACK-NEXT: lbu a4, 44(a0) +; RVA22U64-PACK-NEXT: lbu a5, 55(a0) +; RVA22U64-PACK-NEXT: lbu a1, 75(a0) +; RVA22U64-PACK-NEXT: packw a2, a6, a2 +; RVA22U64-PACK-NEXT: lbu a6, 82(a0) +; RVA22U64-PACK-NEXT: packh a4, a4, a5 +; RVA22U64-PACK-NEXT: packh a1, a3, a1 +; RVA22U64-PACK-NEXT: packw a1, a4, a1 +; RVA22U64-PACK-NEXT: pack a7, a2, a1 +; RVA22U64-PACK-NEXT: lbu t0, 154(a0) +; RVA22U64-PACK-NEXT: lbu a3, 161(a0) +; RVA22U64-PACK-NEXT: lbu a4, 163(a0) +; RVA22U64-PACK-NEXT: lbu a5, 93(a0) +; RVA22U64-PACK-NEXT: lbu a1, 105(a0) +; RVA22U64-PACK-NEXT: lbu a2, 124(a0) +; RVA22U64-PACK-NEXT: lbu a0, 144(a0) +; RVA22U64-PACK-NEXT: packh a5, a6, a5 +; RVA22U64-PACK-NEXT: packh a1, a1, a3 +; RVA22U64-PACK-NEXT: packw a1, a5, a1 +; RVA22U64-PACK-NEXT: packh a2, a2, a4 +; RVA22U64-PACK-NEXT: packh a0, a0, t0 ; RVA22U64-PACK-NEXT: packw a0, a2, a0 -; RVA22U64-PACK-NEXT: pack a0, a3, a0 +; RVA22U64-PACK-NEXT: pack a0, a1, a0 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RVA22U64-PACK-NEXT: vmv.v.x v8, a6 +; RVA22U64-PACK-NEXT: vmv.v.x v8, a7 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-PACK-NEXT: ret ; @@ -1990,39 +1990,39 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV64ZVE32-NEXT: lbu a2, 1(a0) ; RV64ZVE32-NEXT: lbu a3, 22(a0) ; RV64ZVE32-NEXT: lbu a4, 31(a0) -; RV64ZVE32-NEXT: lbu a5, 44(a0) -; RV64ZVE32-NEXT: lbu a6, 55(a0) -; RV64ZVE32-NEXT: lbu a7, 623(a0) +; RV64ZVE32-NEXT: lbu a5, 623(a0) +; RV64ZVE32-NEXT: lbu a6, 44(a0) +; RV64ZVE32-NEXT: lbu a7, 55(a0) ; RV64ZVE32-NEXT: lbu t0, 75(a0) ; RV64ZVE32-NEXT: lbu t1, 82(a0) -; RV64ZVE32-NEXT: lbu t2, 93(a0) -; RV64ZVE32-NEXT: lbu t3, 105(a0) -; RV64ZVE32-NEXT: lbu t4, 161(a0) -; RV64ZVE32-NEXT: lbu t5, 124(a0) -; RV64ZVE32-NEXT: lbu t6, 163(a0) -; RV64ZVE32-NEXT: lbu s0, 144(a0) -; RV64ZVE32-NEXT: lbu a0, 154(a0) +; RV64ZVE32-NEXT: lbu t2, 154(a0) +; RV64ZVE32-NEXT: lbu t3, 161(a0) +; RV64ZVE32-NEXT: lbu t4, 163(a0) +; RV64ZVE32-NEXT: lbu t5, 93(a0) +; RV64ZVE32-NEXT: lbu t6, 105(a0) +; RV64ZVE32-NEXT: lbu s0, 124(a0) +; RV64ZVE32-NEXT: lbu a0, 144(a0) ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.x v8, a1 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4 -; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5 ; RV64ZVE32-NEXT: vslide1down.vx v9, v8, t0 ; RV64ZVE32-NEXT: vmv.v.x v8, t1 -; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t2 -; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t3 -; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t4 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t5 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t6 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t3 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, s0 -; RV64ZVE32-NEXT: li a1, 255 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t4 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0 +; RV64ZVE32-NEXT: li a0, 255 ; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32-NEXT: vmv.s.x v0, a1 +; RV64ZVE32-NEXT: vmv.s.x v0, a0 ; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t2 ; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t ; RV64ZVE32-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; RV64ZVE32-NEXT: addi sp, sp, 16 @@ -2085,20 +2085,20 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) { ; RV32-ONLY-NEXT: lbu a1, 82(a0) ; RV32-ONLY-NEXT: lbu a2, 93(a0) ; RV32-ONLY-NEXT: lbu a3, 105(a0) -; RV32-ONLY-NEXT: lbu a4, 161(a0) -; RV32-ONLY-NEXT: lbu a5, 124(a0) -; RV32-ONLY-NEXT: lbu a6, 163(a0) -; RV32-ONLY-NEXT: lbu a7, 144(a0) -; RV32-ONLY-NEXT: lbu a0, 154(a0) +; RV32-ONLY-NEXT: lbu a4, 124(a0) +; RV32-ONLY-NEXT: lbu a5, 144(a0) +; RV32-ONLY-NEXT: lbu a6, 154(a0) +; RV32-ONLY-NEXT: lbu a7, 161(a0) +; RV32-ONLY-NEXT: lbu a0, 163(a0) ; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-ONLY-NEXT: vmv.v.x v8, a1 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6 -; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7 -; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0 ; RV32-ONLY-NEXT: ret ; ; RV32VB-LABEL: buildvec_v16i8_undef_low_half: @@ -2106,23 +2106,23 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) { ; RV32VB-NEXT: lbu a1, 93(a0) ; RV32VB-NEXT: lbu a2, 82(a0) ; RV32VB-NEXT: lbu a3, 105(a0) -; RV32VB-NEXT: lbu a4, 161(a0) +; RV32VB-NEXT: lbu a4, 124(a0) ; RV32VB-NEXT: slli a1, a1, 8 +; RV32VB-NEXT: lbu a5, 144(a0) +; RV32VB-NEXT: lbu a6, 154(a0) +; RV32VB-NEXT: lbu a7, 161(a0) ; RV32VB-NEXT: or a1, a2, a1 ; RV32VB-NEXT: slli a3, a3, 16 -; RV32VB-NEXT: slli a4, a4, 24 -; RV32VB-NEXT: or a3, a4, a3 -; RV32VB-NEXT: or a1, a1, a3 -; RV32VB-NEXT: lbu a2, 163(a0) -; RV32VB-NEXT: lbu a3, 124(a0) -; RV32VB-NEXT: lbu a4, 144(a0) -; RV32VB-NEXT: lbu a0, 154(a0) -; RV32VB-NEXT: slli a2, a2, 8 -; RV32VB-NEXT: or a2, a3, a2 -; RV32VB-NEXT: slli a4, a4, 16 -; RV32VB-NEXT: slli a0, a0, 24 -; RV32VB-NEXT: or a0, a0, a4 -; RV32VB-NEXT: or a0, a2, a0 +; RV32VB-NEXT: lbu a0, 163(a0) +; RV32VB-NEXT: slli a7, a7, 24 +; RV32VB-NEXT: or a2, a7, a3 +; RV32VB-NEXT: or a1, a1, a2 +; RV32VB-NEXT: slli a0, a0, 8 +; RV32VB-NEXT: or a0, a4, a0 +; RV32VB-NEXT: slli a5, a5, 16 +; RV32VB-NEXT: slli a6, a6, 24 +; RV32VB-NEXT: or a2, a6, a5 +; RV32VB-NEXT: or a0, a0, a2 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-NEXT: vmv.v.i v8, 0 ; RV32VB-NEXT: vslide1down.vx v8, v8, zero @@ -2132,26 +2132,26 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) { ; ; RV32VB-PACK-LABEL: buildvec_v16i8_undef_low_half: ; RV32VB-PACK: # %bb.0: -; RV32VB-PACK-NEXT: lbu a1, 82(a0) -; RV32VB-PACK-NEXT: lbu a2, 93(a0) -; RV32VB-PACK-NEXT: packh a1, a1, a2 -; RV32VB-PACK-NEXT: lbu a2, 105(a0) +; RV32VB-PACK-NEXT: lbu a1, 144(a0) +; RV32VB-PACK-NEXT: lbu a2, 154(a0) ; RV32VB-PACK-NEXT: lbu a3, 161(a0) -; RV32VB-PACK-NEXT: lbu a4, 124(a0) -; RV32VB-PACK-NEXT: lbu a5, 163(a0) -; RV32VB-PACK-NEXT: lbu a6, 144(a0) -; RV32VB-PACK-NEXT: lbu a0, 154(a0) -; RV32VB-PACK-NEXT: packh a2, a2, a3 -; RV32VB-PACK-NEXT: pack a1, a1, a2 -; RV32VB-PACK-NEXT: packh a2, a4, a5 -; RV32VB-PACK-NEXT: packh a0, a6, a0 -; RV32VB-PACK-NEXT: pack a0, a2, a0 -; RV32VB-PACK-NEXT: packh a2, a0, a0 -; RV32VB-PACK-NEXT: pack a2, a2, a2 +; RV32VB-PACK-NEXT: lbu a4, 82(a0) +; RV32VB-PACK-NEXT: lbu a5, 93(a0) +; RV32VB-PACK-NEXT: lbu a6, 105(a0) +; RV32VB-PACK-NEXT: lbu a7, 124(a0) +; RV32VB-PACK-NEXT: lbu a0, 163(a0) +; RV32VB-PACK-NEXT: packh a4, a4, a5 +; RV32VB-PACK-NEXT: packh a3, a6, a3 +; RV32VB-PACK-NEXT: pack a3, a4, a3 +; RV32VB-PACK-NEXT: packh a0, a7, a0 +; RV32VB-PACK-NEXT: packh a1, a1, a2 +; RV32VB-PACK-NEXT: pack a0, a0, a1 +; RV32VB-PACK-NEXT: packh a1, a0, a0 +; RV32VB-PACK-NEXT: pack a1, a1, a1 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32VB-PACK-NEXT: vmv.v.x v8, a2 -; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2 +; RV32VB-PACK-NEXT: vmv.v.x v8, a1 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1 +; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-PACK-NEXT: ret ; @@ -2160,44 +2160,44 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) { ; RV64V-ONLY-NEXT: lbu a1, 82(a0) ; RV64V-ONLY-NEXT: lbu a2, 93(a0) ; RV64V-ONLY-NEXT: lbu a3, 105(a0) -; RV64V-ONLY-NEXT: lbu a4, 161(a0) -; RV64V-ONLY-NEXT: lbu a5, 124(a0) -; RV64V-ONLY-NEXT: lbu a6, 163(a0) -; RV64V-ONLY-NEXT: lbu a7, 144(a0) -; RV64V-ONLY-NEXT: lbu a0, 154(a0) +; RV64V-ONLY-NEXT: lbu a4, 124(a0) +; RV64V-ONLY-NEXT: lbu a5, 144(a0) +; RV64V-ONLY-NEXT: lbu a6, 154(a0) +; RV64V-ONLY-NEXT: lbu a7, 161(a0) +; RV64V-ONLY-NEXT: lbu a0, 163(a0) ; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64V-ONLY-NEXT: vmv.v.x v8, a1 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6 -; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7 -; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0 ; RV64V-ONLY-NEXT: ret ; ; RVA22U64-LABEL: buildvec_v16i8_undef_low_half: ; RVA22U64: # %bb.0: ; RVA22U64-NEXT: lbu a1, 93(a0) -; RVA22U64-NEXT: lbu a2, 82(a0) -; RVA22U64-NEXT: lbu a3, 105(a0) -; RVA22U64-NEXT: lbu a4, 161(a0) +; RVA22U64-NEXT: lbu a6, 82(a0) +; RVA22U64-NEXT: lbu a7, 105(a0) +; RVA22U64-NEXT: lbu a4, 124(a0) ; RVA22U64-NEXT: slli a1, a1, 8 -; RVA22U64-NEXT: or a1, a1, a2 -; RVA22U64-NEXT: slli a3, a3, 16 -; RVA22U64-NEXT: slli a4, a4, 24 -; RVA22U64-NEXT: or a3, a3, a4 -; RVA22U64-NEXT: lbu a2, 124(a0) +; RVA22U64-NEXT: lbu a5, 144(a0) +; RVA22U64-NEXT: lbu a2, 154(a0) +; RVA22U64-NEXT: lbu a3, 161(a0) +; RVA22U64-NEXT: or a1, a6, a1 +; RVA22U64-NEXT: slli a7, a7, 16 +; RVA22U64-NEXT: lbu a0, 163(a0) +; RVA22U64-NEXT: slli a3, a3, 24 +; RVA22U64-NEXT: or a3, a3, a7 ; RVA22U64-NEXT: or a1, a1, a3 -; RVA22U64-NEXT: lbu a3, 163(a0) -; RVA22U64-NEXT: lbu a4, 144(a0) -; RVA22U64-NEXT: slli a2, a2, 32 -; RVA22U64-NEXT: lbu a0, 154(a0) -; RVA22U64-NEXT: slli a3, a3, 40 -; RVA22U64-NEXT: or a2, a2, a3 -; RVA22U64-NEXT: slli a4, a4, 48 -; RVA22U64-NEXT: slli a0, a0, 56 +; RVA22U64-NEXT: slli a4, a4, 32 +; RVA22U64-NEXT: slli a0, a0, 40 ; RVA22U64-NEXT: or a0, a0, a4 +; RVA22U64-NEXT: slli a5, a5, 48 +; RVA22U64-NEXT: slli a2, a2, 56 +; RVA22U64-NEXT: or a2, a2, a5 ; RVA22U64-NEXT: or a0, a0, a2 ; RVA22U64-NEXT: or a0, a0, a1 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -2207,21 +2207,21 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) { ; ; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_low_half: ; RVA22U64-PACK: # %bb.0: -; RVA22U64-PACK-NEXT: lbu a1, 82(a0) -; RVA22U64-PACK-NEXT: lbu a2, 93(a0) -; RVA22U64-PACK-NEXT: packh a6, a1, a2 -; RVA22U64-PACK-NEXT: lbu a2, 105(a0) +; RVA22U64-PACK-NEXT: lbu a6, 144(a0) +; RVA22U64-PACK-NEXT: lbu a7, 154(a0) ; RVA22U64-PACK-NEXT: lbu a3, 161(a0) -; RVA22U64-PACK-NEXT: lbu a4, 124(a0) -; RVA22U64-PACK-NEXT: lbu a5, 163(a0) -; RVA22U64-PACK-NEXT: lbu a1, 144(a0) -; RVA22U64-PACK-NEXT: lbu a0, 154(a0) -; RVA22U64-PACK-NEXT: packh a2, a2, a3 -; RVA22U64-PACK-NEXT: packw a2, a6, a2 -; RVA22U64-PACK-NEXT: packh a3, a4, a5 -; RVA22U64-PACK-NEXT: packh a0, a1, a0 -; RVA22U64-PACK-NEXT: packw a0, a3, a0 -; RVA22U64-PACK-NEXT: pack a0, a2, a0 +; RVA22U64-PACK-NEXT: lbu a4, 82(a0) +; RVA22U64-PACK-NEXT: lbu a5, 93(a0) +; RVA22U64-PACK-NEXT: lbu a1, 105(a0) +; RVA22U64-PACK-NEXT: lbu a2, 124(a0) +; RVA22U64-PACK-NEXT: lbu a0, 163(a0) +; RVA22U64-PACK-NEXT: packh a4, a4, a5 +; RVA22U64-PACK-NEXT: packh a1, a1, a3 +; RVA22U64-PACK-NEXT: packw a1, a4, a1 +; RVA22U64-PACK-NEXT: packh a0, a2, a0 +; RVA22U64-PACK-NEXT: packh a2, a6, a7 +; RVA22U64-PACK-NEXT: packw a0, a0, a2 +; RVA22U64-PACK-NEXT: pack a0, a1, a0 ; RVA22U64-PACK-NEXT: packh a1, a0, a0 ; RVA22U64-PACK-NEXT: packw a1, a1, a1 ; RVA22U64-PACK-NEXT: pack a1, a1, a1 @@ -2235,20 +2235,20 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) { ; RV64ZVE32-NEXT: lbu a1, 82(a0) ; RV64ZVE32-NEXT: lbu a2, 93(a0) ; RV64ZVE32-NEXT: lbu a3, 105(a0) -; RV64ZVE32-NEXT: lbu a4, 161(a0) -; RV64ZVE32-NEXT: lbu a5, 124(a0) -; RV64ZVE32-NEXT: lbu a6, 163(a0) -; RV64ZVE32-NEXT: lbu a7, 144(a0) -; RV64ZVE32-NEXT: lbu a0, 154(a0) +; RV64ZVE32-NEXT: lbu a4, 124(a0) +; RV64ZVE32-NEXT: lbu a5, 144(a0) +; RV64ZVE32-NEXT: lbu a6, 154(a0) +; RV64ZVE32-NEXT: lbu a7, 161(a0) +; RV64ZVE32-NEXT: lbu a0, 163(a0) ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.x v8, a1 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7 -; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32-NEXT: ret %p9 = getelementptr i8, ptr %p, i32 82 %p10 = getelementptr i8, ptr %p, i32 93 @@ -2286,18 +2286,18 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { ; RV32-ONLY-NEXT: lbu a2, 1(a0) ; RV32-ONLY-NEXT: lbu a3, 22(a0) ; RV32-ONLY-NEXT: lbu a4, 31(a0) -; RV32-ONLY-NEXT: lbu a5, 44(a0) -; RV32-ONLY-NEXT: lbu a6, 55(a0) -; RV32-ONLY-NEXT: lbu a7, 623(a0) +; RV32-ONLY-NEXT: lbu a5, 623(a0) +; RV32-ONLY-NEXT: lbu a6, 44(a0) +; RV32-ONLY-NEXT: lbu a7, 55(a0) ; RV32-ONLY-NEXT: lbu a0, 75(a0) ; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-ONLY-NEXT: vmv.v.x v8, a1 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 -; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0 ; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 8 ; RV32-ONLY-NEXT: ret @@ -2313,16 +2313,16 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { ; RV32VB-NEXT: slli a3, a3, 16 ; RV32VB-NEXT: slli a4, a4, 24 ; RV32VB-NEXT: or a3, a4, a3 +; RV32VB-NEXT: lbu a2, 44(a0) +; RV32VB-NEXT: lbu a4, 55(a0) ; RV32VB-NEXT: or a1, a1, a3 -; RV32VB-NEXT: lbu a2, 55(a0) -; RV32VB-NEXT: lbu a3, 44(a0) -; RV32VB-NEXT: lbu a4, 623(a0) +; RV32VB-NEXT: lbu a3, 623(a0) ; RV32VB-NEXT: lbu a0, 75(a0) -; RV32VB-NEXT: slli a2, a2, 8 -; RV32VB-NEXT: or a2, a3, a2 -; RV32VB-NEXT: slli a4, a4, 16 +; RV32VB-NEXT: slli a4, a4, 8 +; RV32VB-NEXT: or a2, a2, a4 +; RV32VB-NEXT: slli a3, a3, 16 ; RV32VB-NEXT: slli a0, a0, 24 -; RV32VB-NEXT: or a0, a0, a4 +; RV32VB-NEXT: or a0, a0, a3 ; RV32VB-NEXT: or a0, a2, a0 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-NEXT: vmv.v.x v8, a1 @@ -2335,18 +2335,18 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { ; RV32VB-PACK: # %bb.0: ; RV32VB-PACK-NEXT: lbu a1, 0(a0) ; RV32VB-PACK-NEXT: lbu a2, 1(a0) +; RV32VB-PACK-NEXT: lbu a3, 22(a0) +; RV32VB-PACK-NEXT: lbu a4, 31(a0) ; RV32VB-PACK-NEXT: packh a1, a1, a2 -; RV32VB-PACK-NEXT: lbu a2, 22(a0) -; RV32VB-PACK-NEXT: lbu a3, 31(a0) -; RV32VB-PACK-NEXT: lbu a4, 44(a0) -; RV32VB-PACK-NEXT: lbu a5, 55(a0) -; RV32VB-PACK-NEXT: lbu a6, 623(a0) +; RV32VB-PACK-NEXT: lbu a2, 623(a0) +; RV32VB-PACK-NEXT: lbu a5, 44(a0) +; RV32VB-PACK-NEXT: lbu a6, 55(a0) ; RV32VB-PACK-NEXT: lbu a0, 75(a0) -; RV32VB-PACK-NEXT: packh a2, a2, a3 -; RV32VB-PACK-NEXT: pack a1, a1, a2 -; RV32VB-PACK-NEXT: packh a2, a4, a5 -; RV32VB-PACK-NEXT: packh a0, a6, a0 -; RV32VB-PACK-NEXT: pack a0, a2, a0 +; RV32VB-PACK-NEXT: packh a3, a3, a4 +; RV32VB-PACK-NEXT: pack a1, a1, a3 +; RV32VB-PACK-NEXT: packh a3, a5, a6 +; RV32VB-PACK-NEXT: packh a0, a2, a0 +; RV32VB-PACK-NEXT: pack a0, a3, a0 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-PACK-NEXT: vmv.v.x v8, a1 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 @@ -2362,18 +2362,18 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { ; RV64V-ONLY-NEXT: lbu a2, 1(a0) ; RV64V-ONLY-NEXT: lbu a3, 22(a0) ; RV64V-ONLY-NEXT: lbu a4, 31(a0) -; RV64V-ONLY-NEXT: lbu a5, 44(a0) -; RV64V-ONLY-NEXT: lbu a6, 55(a0) -; RV64V-ONLY-NEXT: lbu a7, 623(a0) +; RV64V-ONLY-NEXT: lbu a5, 623(a0) +; RV64V-ONLY-NEXT: lbu a6, 44(a0) +; RV64V-ONLY-NEXT: lbu a7, 55(a0) ; RV64V-ONLY-NEXT: lbu a0, 75(a0) ; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64V-ONLY-NEXT: vmv.v.x v8, a1 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4 -; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 8 ; RV64V-ONLY-NEXT: ret @@ -2389,12 +2389,12 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { ; RVA22U64-NEXT: slli a3, a3, 16 ; RVA22U64-NEXT: slli a4, a4, 24 ; RVA22U64-NEXT: or a3, a3, a4 -; RVA22U64-NEXT: lbu a2, 44(a0) ; RVA22U64-NEXT: or a1, a1, a3 +; RVA22U64-NEXT: lbu a2, 44(a0) ; RVA22U64-NEXT: lbu a3, 55(a0) ; RVA22U64-NEXT: lbu a4, 623(a0) -; RVA22U64-NEXT: slli a2, a2, 32 ; RVA22U64-NEXT: lbu a0, 75(a0) +; RVA22U64-NEXT: slli a2, a2, 32 ; RVA22U64-NEXT: slli a3, a3, 40 ; RVA22U64-NEXT: or a2, a2, a3 ; RVA22U64-NEXT: slli a4, a4, 48 @@ -2411,19 +2411,19 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { ; RVA22U64-PACK: # %bb.0: ; RVA22U64-PACK-NEXT: lbu a1, 0(a0) ; RVA22U64-PACK-NEXT: lbu a2, 1(a0) -; RVA22U64-PACK-NEXT: packh a6, a1, a2 -; RVA22U64-PACK-NEXT: lbu a2, 22(a0) -; RVA22U64-PACK-NEXT: lbu a3, 31(a0) -; RVA22U64-PACK-NEXT: lbu a4, 44(a0) -; RVA22U64-PACK-NEXT: lbu a5, 55(a0) -; RVA22U64-PACK-NEXT: lbu a1, 623(a0) +; RVA22U64-PACK-NEXT: lbu a6, 22(a0) +; RVA22U64-PACK-NEXT: lbu a4, 31(a0) +; RVA22U64-PACK-NEXT: packh a1, a1, a2 +; RVA22U64-PACK-NEXT: lbu a2, 623(a0) +; RVA22U64-PACK-NEXT: lbu a5, 44(a0) +; RVA22U64-PACK-NEXT: lbu a3, 55(a0) ; RVA22U64-PACK-NEXT: lbu a0, 75(a0) -; RVA22U64-PACK-NEXT: packh a2, a2, a3 -; RVA22U64-PACK-NEXT: packw a2, a6, a2 -; RVA22U64-PACK-NEXT: packh a3, a4, a5 -; RVA22U64-PACK-NEXT: packh a0, a1, a0 +; RVA22U64-PACK-NEXT: packh a4, a6, a4 +; RVA22U64-PACK-NEXT: packw a1, a1, a4 +; RVA22U64-PACK-NEXT: packh a3, a5, a3 +; RVA22U64-PACK-NEXT: packh a0, a2, a0 ; RVA22U64-PACK-NEXT: packw a0, a3, a0 -; RVA22U64-PACK-NEXT: pack a0, a2, a0 +; RVA22U64-PACK-NEXT: pack a0, a1, a0 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-PACK-NEXT: vmv.v.x v8, a0 ; RVA22U64-PACK-NEXT: packh a0, a0, a0 @@ -2438,18 +2438,18 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { ; RV64ZVE32-NEXT: lbu a2, 1(a0) ; RV64ZVE32-NEXT: lbu a3, 22(a0) ; RV64ZVE32-NEXT: lbu a4, 31(a0) -; RV64ZVE32-NEXT: lbu a5, 44(a0) -; RV64ZVE32-NEXT: lbu a6, 55(a0) -; RV64ZVE32-NEXT: lbu a7, 623(a0) +; RV64ZVE32-NEXT: lbu a5, 623(a0) +; RV64ZVE32-NEXT: lbu a6, 44(a0) +; RV64ZVE32-NEXT: lbu a7, 55(a0) ; RV64ZVE32-NEXT: lbu a0, 75(a0) ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.x v8, a1 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4 -; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 8 ; RV64ZVE32-NEXT: ret @@ -2484,20 +2484,20 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; RV32-ONLY-LABEL: buildvec_v16i8_undef_edges: ; RV32-ONLY: # %bb.0: -; RV32-ONLY-NEXT: lbu a1, 31(a0) -; RV32-ONLY-NEXT: lbu a2, 44(a0) -; RV32-ONLY-NEXT: lbu a3, 55(a0) -; RV32-ONLY-NEXT: lbu a4, 623(a0) +; RV32-ONLY-NEXT: lbu a1, 623(a0) +; RV32-ONLY-NEXT: lbu a2, 31(a0) +; RV32-ONLY-NEXT: lbu a3, 44(a0) +; RV32-ONLY-NEXT: lbu a4, 55(a0) ; RV32-ONLY-NEXT: lbu a5, 75(a0) ; RV32-ONLY-NEXT: lbu a6, 82(a0) ; RV32-ONLY-NEXT: lbu a7, 93(a0) ; RV32-ONLY-NEXT: lbu t0, 105(a0) ; RV32-ONLY-NEXT: lbu a0, 161(a0) ; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-ONLY-NEXT: vmv.v.x v8, a1 -; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 +; RV32-ONLY-NEXT: vmv.v.x v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1 ; RV32-ONLY-NEXT: vslide1down.vx v9, v8, a5 ; RV32-ONLY-NEXT: vmv.v.x v8, a6 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7 @@ -2513,58 +2513,58 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; ; RV32VB-LABEL: buildvec_v16i8_undef_edges: ; RV32VB: # %bb.0: -; RV32VB-NEXT: lbu a1, 31(a0) -; RV32VB-NEXT: lbu a2, 55(a0) +; RV32VB-NEXT: lbu a1, 55(a0) +; RV32VB-NEXT: lbu a2, 31(a0) ; RV32VB-NEXT: lbu a3, 44(a0) ; RV32VB-NEXT: lbu a4, 623(a0) ; RV32VB-NEXT: lbu a5, 75(a0) -; RV32VB-NEXT: slli a2, a2, 8 -; RV32VB-NEXT: or a2, a3, a2 +; RV32VB-NEXT: slli a1, a1, 8 +; RV32VB-NEXT: or a1, a3, a1 ; RV32VB-NEXT: slli a4, a4, 16 ; RV32VB-NEXT: slli a5, a5, 24 -; RV32VB-NEXT: lbu a3, 93(a0) ; RV32VB-NEXT: or a4, a5, a4 -; RV32VB-NEXT: or a2, a2, a4 -; RV32VB-NEXT: lbu a4, 82(a0) -; RV32VB-NEXT: slli a3, a3, 8 -; RV32VB-NEXT: lbu a5, 105(a0) +; RV32VB-NEXT: lbu a3, 82(a0) +; RV32VB-NEXT: lbu a5, 93(a0) +; RV32VB-NEXT: or a1, a1, a4 +; RV32VB-NEXT: lbu a4, 105(a0) ; RV32VB-NEXT: lbu a0, 161(a0) -; RV32VB-NEXT: or a3, a4, a3 -; RV32VB-NEXT: slli a1, a1, 24 -; RV32VB-NEXT: slli a5, a5, 16 +; RV32VB-NEXT: slli a5, a5, 8 +; RV32VB-NEXT: or a3, a3, a5 +; RV32VB-NEXT: slli a2, a2, 24 +; RV32VB-NEXT: slli a4, a4, 16 ; RV32VB-NEXT: slli a0, a0, 24 -; RV32VB-NEXT: or a0, a0, a5 +; RV32VB-NEXT: or a0, a0, a4 ; RV32VB-NEXT: or a0, a3, a0 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32VB-NEXT: vmv.v.x v8, a1 -; RV32VB-NEXT: vslide1down.vx v8, v8, a2 +; RV32VB-NEXT: vmv.v.x v8, a2 +; RV32VB-NEXT: vslide1down.vx v8, v8, a1 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-NEXT: vslide1down.vx v8, v8, zero ; RV32VB-NEXT: ret ; ; RV32VB-PACK-LABEL: buildvec_v16i8_undef_edges: ; RV32VB-PACK: # %bb.0: -; RV32VB-PACK-NEXT: lbu a1, 31(a0) -; RV32VB-PACK-NEXT: lbu a2, 44(a0) -; RV32VB-PACK-NEXT: lbu a3, 55(a0) -; RV32VB-PACK-NEXT: lbu a4, 623(a0) +; RV32VB-PACK-NEXT: lbu a1, 623(a0) +; RV32VB-PACK-NEXT: lbu a2, 31(a0) +; RV32VB-PACK-NEXT: lbu a3, 44(a0) +; RV32VB-PACK-NEXT: lbu a4, 55(a0) ; RV32VB-PACK-NEXT: lbu a5, 75(a0) -; RV32VB-PACK-NEXT: packh a1, a0, a1 -; RV32VB-PACK-NEXT: packh a2, a2, a3 -; RV32VB-PACK-NEXT: packh a3, a4, a5 +; RV32VB-PACK-NEXT: packh a2, a0, a2 +; RV32VB-PACK-NEXT: packh a3, a3, a4 +; RV32VB-PACK-NEXT: packh a1, a1, a5 ; RV32VB-PACK-NEXT: lbu a4, 82(a0) ; RV32VB-PACK-NEXT: lbu a5, 93(a0) -; RV32VB-PACK-NEXT: pack a2, a2, a3 +; RV32VB-PACK-NEXT: pack a1, a3, a1 ; RV32VB-PACK-NEXT: lbu a3, 105(a0) ; RV32VB-PACK-NEXT: lbu a0, 161(a0) ; RV32VB-PACK-NEXT: packh a4, a4, a5 ; RV32VB-PACK-NEXT: packh a5, a0, a0 -; RV32VB-PACK-NEXT: pack a1, a5, a1 +; RV32VB-PACK-NEXT: pack a2, a5, a2 ; RV32VB-PACK-NEXT: packh a0, a3, a0 ; RV32VB-PACK-NEXT: pack a0, a4, a0 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32VB-PACK-NEXT: vmv.v.x v8, a1 -; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2 +; RV32VB-PACK-NEXT: vmv.v.x v8, a2 +; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-PACK-NEXT: pack a0, a5, a5 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 @@ -2572,20 +2572,20 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; ; RV64V-ONLY-LABEL: buildvec_v16i8_undef_edges: ; RV64V-ONLY: # %bb.0: -; RV64V-ONLY-NEXT: lbu a1, 31(a0) -; RV64V-ONLY-NEXT: lbu a2, 44(a0) -; RV64V-ONLY-NEXT: lbu a3, 55(a0) -; RV64V-ONLY-NEXT: lbu a4, 623(a0) +; RV64V-ONLY-NEXT: lbu a1, 623(a0) +; RV64V-ONLY-NEXT: lbu a2, 31(a0) +; RV64V-ONLY-NEXT: lbu a3, 44(a0) +; RV64V-ONLY-NEXT: lbu a4, 55(a0) ; RV64V-ONLY-NEXT: lbu a5, 75(a0) ; RV64V-ONLY-NEXT: lbu a6, 82(a0) ; RV64V-ONLY-NEXT: lbu a7, 93(a0) ; RV64V-ONLY-NEXT: lbu t0, 105(a0) ; RV64V-ONLY-NEXT: lbu a0, 161(a0) ; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64V-ONLY-NEXT: vmv.v.x v8, a1 -; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 +; RV64V-ONLY-NEXT: vmv.v.x v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v8, a5 ; RV64V-ONLY-NEXT: vmv.v.x v8, a6 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7 @@ -2601,30 +2601,30 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; ; RVA22U64-LABEL: buildvec_v16i8_undef_edges: ; RVA22U64: # %bb.0: -; RVA22U64-NEXT: lbu a1, 44(a0) -; RVA22U64-NEXT: lbu a2, 55(a0) -; RVA22U64-NEXT: lbu a3, 31(a0) +; RVA22U64-NEXT: lbu a1, 31(a0) +; RVA22U64-NEXT: lbu a2, 44(a0) +; RVA22U64-NEXT: lbu a3, 55(a0) ; RVA22U64-NEXT: lbu a4, 623(a0) -; RVA22U64-NEXT: slli a1, a1, 32 -; RVA22U64-NEXT: slli a2, a2, 40 ; RVA22U64-NEXT: lbu a5, 75(a0) -; RVA22U64-NEXT: or a1, a1, a2 -; RVA22U64-NEXT: slli a3, a3, 24 +; RVA22U64-NEXT: slli a2, a2, 32 +; RVA22U64-NEXT: slli a3, a3, 40 +; RVA22U64-NEXT: or a2, a2, a3 +; RVA22U64-NEXT: slli a1, a1, 24 ; RVA22U64-NEXT: slli a4, a4, 48 ; RVA22U64-NEXT: slli a5, a5, 56 ; RVA22U64-NEXT: or a4, a4, a5 -; RVA22U64-NEXT: or a1, a1, a4 -; RVA22U64-NEXT: add.uw a1, a3, a1 -; RVA22U64-NEXT: lbu a2, 93(a0) +; RVA22U64-NEXT: or a2, a2, a4 ; RVA22U64-NEXT: lbu a3, 82(a0) -; RVA22U64-NEXT: lbu a4, 105(a0) +; RVA22U64-NEXT: lbu a4, 93(a0) +; RVA22U64-NEXT: add.uw a1, a1, a2 +; RVA22U64-NEXT: lbu a2, 105(a0) ; RVA22U64-NEXT: lbu a0, 161(a0) -; RVA22U64-NEXT: slli a2, a2, 8 -; RVA22U64-NEXT: or a2, a2, a3 -; RVA22U64-NEXT: slli a4, a4, 16 +; RVA22U64-NEXT: slli a4, a4, 8 +; RVA22U64-NEXT: or a3, a3, a4 +; RVA22U64-NEXT: slli a2, a2, 16 ; RVA22U64-NEXT: slli a0, a0, 24 -; RVA22U64-NEXT: or a0, a0, a4 ; RVA22U64-NEXT: or a0, a0, a2 +; RVA22U64-NEXT: or a0, a0, a3 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-NEXT: vmv.v.x v8, a1 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0 @@ -2632,48 +2632,48 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; ; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_edges: ; RVA22U64-PACK: # %bb.0: -; RVA22U64-PACK-NEXT: lbu a1, 31(a0) -; RVA22U64-PACK-NEXT: lbu a2, 44(a0) -; RVA22U64-PACK-NEXT: lbu a3, 55(a0) -; RVA22U64-PACK-NEXT: lbu a4, 623(a0) +; RVA22U64-PACK-NEXT: lbu a1, 623(a0) +; RVA22U64-PACK-NEXT: lbu a2, 31(a0) +; RVA22U64-PACK-NEXT: lbu a3, 44(a0) +; RVA22U64-PACK-NEXT: lbu a4, 55(a0) ; RVA22U64-PACK-NEXT: lbu a5, 75(a0) -; RVA22U64-PACK-NEXT: packh a6, a0, a1 -; RVA22U64-PACK-NEXT: packh a1, a0, a0 -; RVA22U64-PACK-NEXT: packh a2, a2, a3 -; RVA22U64-PACK-NEXT: packh a3, a4, a5 -; RVA22U64-PACK-NEXT: packw a7, a2, a3 +; RVA22U64-PACK-NEXT: packh a6, a0, a2 +; RVA22U64-PACK-NEXT: packh a2, a0, a0 +; RVA22U64-PACK-NEXT: packh a3, a3, a4 +; RVA22U64-PACK-NEXT: packh a1, a1, a5 +; RVA22U64-PACK-NEXT: packw a7, a3, a1 ; RVA22U64-PACK-NEXT: lbu a3, 82(a0) ; RVA22U64-PACK-NEXT: lbu a4, 93(a0) ; RVA22U64-PACK-NEXT: lbu a5, 105(a0) ; RVA22U64-PACK-NEXT: lbu a0, 161(a0) -; RVA22U64-PACK-NEXT: packw a2, a1, a6 -; RVA22U64-PACK-NEXT: pack a2, a2, a7 +; RVA22U64-PACK-NEXT: packw a1, a2, a6 +; RVA22U64-PACK-NEXT: pack a1, a1, a7 ; RVA22U64-PACK-NEXT: packh a3, a3, a4 ; RVA22U64-PACK-NEXT: packh a0, a5, a0 ; RVA22U64-PACK-NEXT: packw a0, a3, a0 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RVA22U64-PACK-NEXT: vmv.v.x v8, a2 -; RVA22U64-PACK-NEXT: packw a1, a1, a1 +; RVA22U64-PACK-NEXT: vmv.v.x v8, a1 +; RVA22U64-PACK-NEXT: packw a1, a2, a2 ; RVA22U64-PACK-NEXT: pack a0, a0, a1 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-PACK-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_v16i8_undef_edges: ; RV64ZVE32: # %bb.0: -; RV64ZVE32-NEXT: lbu a1, 31(a0) -; RV64ZVE32-NEXT: lbu a2, 44(a0) -; RV64ZVE32-NEXT: lbu a3, 55(a0) -; RV64ZVE32-NEXT: lbu a4, 623(a0) +; RV64ZVE32-NEXT: lbu a1, 623(a0) +; RV64ZVE32-NEXT: lbu a2, 31(a0) +; RV64ZVE32-NEXT: lbu a3, 44(a0) +; RV64ZVE32-NEXT: lbu a4, 55(a0) ; RV64ZVE32-NEXT: lbu a5, 75(a0) ; RV64ZVE32-NEXT: lbu a6, 82(a0) ; RV64ZVE32-NEXT: lbu a7, 93(a0) ; RV64ZVE32-NEXT: lbu t0, 105(a0) ; RV64ZVE32-NEXT: lbu a0, 161(a0) ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64ZVE32-NEXT: vmv.v.x v8, a1 -; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32-NEXT: vmv.v.x v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1 ; RV64ZVE32-NEXT: vslide1down.vx v9, v8, a5 ; RV64ZVE32-NEXT: vmv.v.x v8, a6 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7 @@ -2757,30 +2757,30 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RV32VB: # %bb.0: ; RV32VB-NEXT: lbu a1, 1(a0) ; RV32VB-NEXT: lbu a2, 0(a0) -; RV32VB-NEXT: slli a1, a1, 8 ; RV32VB-NEXT: lbu a3, 55(a0) ; RV32VB-NEXT: lbu a4, 44(a0) +; RV32VB-NEXT: slli a1, a1, 8 ; RV32VB-NEXT: or a1, a2, a1 -; RV32VB-NEXT: lbu a2, 75(a0) ; RV32VB-NEXT: slli a3, a3, 8 ; RV32VB-NEXT: or a3, a4, a3 -; RV32VB-NEXT: lbu a4, 93(a0) +; RV32VB-NEXT: lbu a2, 75(a0) +; RV32VB-NEXT: lbu a4, 82(a0) +; RV32VB-NEXT: lbu a5, 93(a0) +; RV32VB-NEXT: lbu a6, 124(a0) ; RV32VB-NEXT: slli a2, a2, 24 ; RV32VB-NEXT: or a2, a3, a2 -; RV32VB-NEXT: lbu a3, 82(a0) -; RV32VB-NEXT: slli a4, a4, 8 -; RV32VB-NEXT: lbu a5, 144(a0) -; RV32VB-NEXT: lbu a6, 154(a0) -; RV32VB-NEXT: or a3, a3, a4 -; RV32VB-NEXT: lbu a0, 124(a0) -; RV32VB-NEXT: slli a5, a5, 16 -; RV32VB-NEXT: slli a6, a6, 24 -; RV32VB-NEXT: or a4, a6, a5 -; RV32VB-NEXT: or a0, a0, a4 +; RV32VB-NEXT: lbu a3, 144(a0) +; RV32VB-NEXT: lbu a0, 154(a0) +; RV32VB-NEXT: slli a5, a5, 8 +; RV32VB-NEXT: or a4, a4, a5 +; RV32VB-NEXT: slli a3, a3, 16 +; RV32VB-NEXT: slli a0, a0, 24 +; RV32VB-NEXT: or a0, a0, a3 +; RV32VB-NEXT: or a0, a6, a0 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-NEXT: vmv.v.x v8, a1 ; RV32VB-NEXT: vslide1down.vx v8, v8, a2 -; RV32VB-NEXT: vslide1down.vx v8, v8, a3 +; RV32VB-NEXT: vslide1down.vx v8, v8, a4 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-NEXT: ret ; @@ -2790,20 +2790,20 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RV32VB-PACK-NEXT: lbu a2, 1(a0) ; RV32VB-PACK-NEXT: lbu a3, 44(a0) ; RV32VB-PACK-NEXT: lbu a4, 55(a0) -; RV32VB-PACK-NEXT: lbu a5, 75(a0) ; RV32VB-PACK-NEXT: packh a1, a1, a2 ; RV32VB-PACK-NEXT: packh a2, a3, a4 -; RV32VB-PACK-NEXT: packh a3, a0, a5 +; RV32VB-PACK-NEXT: lbu a3, 75(a0) ; RV32VB-PACK-NEXT: lbu a4, 82(a0) ; RV32VB-PACK-NEXT: lbu a5, 93(a0) -; RV32VB-PACK-NEXT: lbu a6, 144(a0) -; RV32VB-PACK-NEXT: lbu a7, 154(a0) -; RV32VB-PACK-NEXT: lbu a0, 124(a0) +; RV32VB-PACK-NEXT: lbu a6, 124(a0) +; RV32VB-PACK-NEXT: lbu a7, 144(a0) +; RV32VB-PACK-NEXT: lbu a0, 154(a0) +; RV32VB-PACK-NEXT: packh a3, a0, a3 ; RV32VB-PACK-NEXT: pack a2, a2, a3 ; RV32VB-PACK-NEXT: packh a3, a4, a5 -; RV32VB-PACK-NEXT: packh a4, a6, a7 -; RV32VB-PACK-NEXT: packh a0, a0, a0 -; RV32VB-PACK-NEXT: pack a0, a0, a4 +; RV32VB-PACK-NEXT: packh a0, a7, a0 +; RV32VB-PACK-NEXT: packh a4, a6, a0 +; RV32VB-PACK-NEXT: pack a0, a4, a0 ; RV32VB-PACK-NEXT: packh a4, a0, a0 ; RV32VB-PACK-NEXT: pack a1, a1, a4 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -2852,32 +2852,32 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RVA22U64: # %bb.0: ; RVA22U64-NEXT: lbu a1, 1(a0) ; RVA22U64-NEXT: lbu a2, 0(a0) -; RVA22U64-NEXT: slli a1, a1, 8 ; RVA22U64-NEXT: lbu a3, 44(a0) ; RVA22U64-NEXT: lbu a4, 55(a0) -; RVA22U64-NEXT: or a1, a1, a2 -; RVA22U64-NEXT: lbu a2, 75(a0) +; RVA22U64-NEXT: slli a1, a1, 8 +; RVA22U64-NEXT: or a6, a2, a1 ; RVA22U64-NEXT: slli a3, a3, 32 ; RVA22U64-NEXT: slli a4, a4, 40 ; RVA22U64-NEXT: or a3, a3, a4 +; RVA22U64-NEXT: lbu a2, 75(a0) +; RVA22U64-NEXT: lbu a4, 82(a0) +; RVA22U64-NEXT: lbu a5, 93(a0) +; RVA22U64-NEXT: lbu a1, 124(a0) ; RVA22U64-NEXT: slli a2, a2, 56 -; RVA22U64-NEXT: lbu a4, 93(a0) ; RVA22U64-NEXT: or a2, a2, a3 -; RVA22U64-NEXT: or a1, a1, a2 -; RVA22U64-NEXT: lbu a2, 82(a0) -; RVA22U64-NEXT: slli a4, a4, 8 +; RVA22U64-NEXT: or a2, a6, a2 ; RVA22U64-NEXT: lbu a3, 144(a0) -; RVA22U64-NEXT: lbu a5, 154(a0) -; RVA22U64-NEXT: or a2, a2, a4 -; RVA22U64-NEXT: lbu a0, 124(a0) +; RVA22U64-NEXT: lbu a0, 154(a0) +; RVA22U64-NEXT: slli a5, a5, 8 +; RVA22U64-NEXT: or a4, a4, a5 ; RVA22U64-NEXT: slli a3, a3, 48 -; RVA22U64-NEXT: slli a5, a5, 56 -; RVA22U64-NEXT: or a3, a3, a5 -; RVA22U64-NEXT: slli a0, a0, 32 +; RVA22U64-NEXT: slli a0, a0, 56 ; RVA22U64-NEXT: or a0, a0, a3 -; RVA22U64-NEXT: or a0, a0, a2 +; RVA22U64-NEXT: slli a1, a1, 32 +; RVA22U64-NEXT: or a0, a0, a1 +; RVA22U64-NEXT: or a0, a0, a4 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RVA22U64-NEXT: vmv.v.x v8, a1 +; RVA22U64-NEXT: vmv.v.x v8, a2 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-NEXT: ret ; @@ -2887,27 +2887,27 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RVA22U64-PACK-NEXT: lbu a2, 1(a0) ; RVA22U64-PACK-NEXT: lbu a3, 44(a0) ; RVA22U64-PACK-NEXT: lbu a4, 55(a0) -; RVA22U64-PACK-NEXT: lbu a5, 75(a0) -; RVA22U64-PACK-NEXT: packh a1, a1, a2 +; RVA22U64-PACK-NEXT: packh a6, a1, a2 ; RVA22U64-PACK-NEXT: packh a2, a3, a4 -; RVA22U64-PACK-NEXT: packh a3, a0, a5 -; RVA22U64-PACK-NEXT: packw a6, a2, a3 -; RVA22U64-PACK-NEXT: packh a3, a0, a0 -; RVA22U64-PACK-NEXT: packw a7, a1, a3 -; RVA22U64-PACK-NEXT: lbu a4, 82(a0) +; RVA22U64-PACK-NEXT: lbu a3, 75(a0) +; RVA22U64-PACK-NEXT: lbu a7, 82(a0) ; RVA22U64-PACK-NEXT: lbu a5, 93(a0) -; RVA22U64-PACK-NEXT: lbu a2, 144(a0) -; RVA22U64-PACK-NEXT: lbu a1, 154(a0) -; RVA22U64-PACK-NEXT: lbu a0, 124(a0) -; RVA22U64-PACK-NEXT: pack a6, a7, a6 -; RVA22U64-PACK-NEXT: packh a4, a4, a5 -; RVA22U64-PACK-NEXT: packh a1, a2, a1 -; RVA22U64-PACK-NEXT: packh a0, a0, a0 -; RVA22U64-PACK-NEXT: packw a0, a0, a1 -; RVA22U64-PACK-NEXT: packw a1, a4, a3 -; RVA22U64-PACK-NEXT: pack a0, a1, a0 +; RVA22U64-PACK-NEXT: lbu t0, 124(a0) +; RVA22U64-PACK-NEXT: packh a3, a0, a3 +; RVA22U64-PACK-NEXT: packw a2, a2, a3 +; RVA22U64-PACK-NEXT: packh a3, a0, a0 +; RVA22U64-PACK-NEXT: lbu a4, 144(a0) +; RVA22U64-PACK-NEXT: lbu a0, 154(a0) +; RVA22U64-PACK-NEXT: packw a1, a6, a3 +; RVA22U64-PACK-NEXT: pack a1, a1, a2 +; RVA22U64-PACK-NEXT: packh a2, a7, a5 +; RVA22U64-PACK-NEXT: packh a0, a4, a0 +; RVA22U64-PACK-NEXT: packh a4, t0, a0 +; RVA22U64-PACK-NEXT: packw a0, a4, a0 +; RVA22U64-PACK-NEXT: packw a2, a2, a3 +; RVA22U64-PACK-NEXT: pack a0, a2, a0 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RVA22U64-PACK-NEXT: vmv.v.x v8, a6 +; RVA22U64-PACK-NEXT: vmv.v.x v8, a1 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-PACK-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll index 82e0760d593c26..af46849ae08719 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll @@ -7,25 +7,25 @@ define <4 x i1> @load_large_vector(ptr %p) { ; ZVE32X-LABEL: load_large_vector: ; ZVE32X: # %bb.0: -; ZVE32X-NEXT: ld a1, 80(a0) -; ZVE32X-NEXT: ld a2, 72(a0) -; ZVE32X-NEXT: ld a3, 56(a0) +; ZVE32X-NEXT: ld a1, 0(a0) +; ZVE32X-NEXT: ld a2, 8(a0) +; ZVE32X-NEXT: ld a3, 24(a0) ; ZVE32X-NEXT: ld a4, 32(a0) -; ZVE32X-NEXT: ld a5, 24(a0) -; ZVE32X-NEXT: ld a6, 48(a0) -; ZVE32X-NEXT: ld a7, 8(a0) -; ZVE32X-NEXT: ld a0, 0(a0) -; ZVE32X-NEXT: xor a4, a5, a4 -; ZVE32X-NEXT: snez a4, a4 +; ZVE32X-NEXT: ld a5, 48(a0) +; ZVE32X-NEXT: ld a6, 56(a0) +; ZVE32X-NEXT: ld a7, 72(a0) +; ZVE32X-NEXT: ld a0, 80(a0) +; ZVE32X-NEXT: xor a3, a3, a4 +; ZVE32X-NEXT: snez a3, a3 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vmv.s.x v8, a4 +; ZVE32X-NEXT: vmv.s.x v8, a3 ; ZVE32X-NEXT: vand.vi v8, v8, 1 ; ZVE32X-NEXT: vmsne.vi v0, v8, 0 ; ZVE32X-NEXT: vmv.s.x v9, zero ; ZVE32X-NEXT: vmerge.vim v8, v9, 1, v0 -; ZVE32X-NEXT: xor a0, a0, a7 -; ZVE32X-NEXT: snez a0, a0 -; ZVE32X-NEXT: vmv.s.x v10, a0 +; ZVE32X-NEXT: xor a1, a1, a2 +; ZVE32X-NEXT: snez a1, a1 +; ZVE32X-NEXT: vmv.s.x v10, a1 ; ZVE32X-NEXT: vand.vi v10, v10, 1 ; ZVE32X-NEXT: vmsne.vi v0, v10, 0 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma @@ -35,9 +35,9 @@ define <4 x i1> @load_large_vector(ptr %p) { ; ZVE32X-NEXT: vslideup.vi v11, v8, 1 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; ZVE32X-NEXT: vmsne.vi v0, v11, 0 -; ZVE32X-NEXT: xor a0, a6, a3 -; ZVE32X-NEXT: snez a0, a0 -; ZVE32X-NEXT: vmv.s.x v8, a0 +; ZVE32X-NEXT: xor a1, a5, a6 +; ZVE32X-NEXT: snez a1, a1 +; ZVE32X-NEXT: vmv.s.x v8, a1 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; ZVE32X-NEXT: vand.vi v8, v8, 1 ; ZVE32X-NEXT: vmsne.vi v8, v8, 0 @@ -50,8 +50,8 @@ define <4 x i1> @load_large_vector(ptr %p) { ; ZVE32X-NEXT: vslideup.vi v11, v8, 2 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; ZVE32X-NEXT: vmsne.vi v0, v11, 0 -; ZVE32X-NEXT: xor a1, a2, a1 -; ZVE32X-NEXT: snez a0, a1 +; ZVE32X-NEXT: xor a0, a7, a0 +; ZVE32X-NEXT: snez a0, a0 ; ZVE32X-NEXT: vmv.s.x v8, a0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; ZVE32X-NEXT: vand.vi v8, v8, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll index a90ee3ebb87668..43184a28ba3238 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll @@ -777,24 +777,24 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ; RV32-NEXT: vfmv.f.s fa5, v10 ; RV32-NEXT: fcvt.w.d a2, fa5 ; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vfmv.f.s fa5, v8 +; RV32-NEXT: fcvt.w.d a3, fa5 ; RV32-NEXT: fld fa5, 32(sp) -; RV32-NEXT: vfmv.f.s fa4, v8 -; RV32-NEXT: fld fa3, 40(sp) -; RV32-NEXT: fcvt.w.d a3, fa4 +; RV32-NEXT: fld fa4, 40(sp) +; RV32-NEXT: fld fa3, 48(sp) +; RV32-NEXT: fld fa2, 56(sp) ; RV32-NEXT: fcvt.w.d a4, fa5 +; RV32-NEXT: fcvt.w.d a5, fa4 +; RV32-NEXT: fcvt.w.d a6, fa3 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vmv.v.x v8, a1 -; RV32-NEXT: fcvt.w.d a1, fa3 -; RV32-NEXT: fld fa5, 48(sp) ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a2 ; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: fcvt.w.d a0, fa5 -; RV32-NEXT: fld fa5, 56(sp) ; RV32-NEXT: vslide1down.vx v8, v8, a4 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: fcvt.w.d a0, fa5 +; RV32-NEXT: vslide1down.vx v8, v8, a5 +; RV32-NEXT: vslide1down.vx v8, v8, a6 +; RV32-NEXT: fcvt.w.d a0, fa2 ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: addi sp, s0, -128 ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload @@ -827,24 +827,24 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ; RV64-i32-NEXT: vfmv.f.s fa5, v10 ; RV64-i32-NEXT: fcvt.l.d a2, fa5 ; RV64-i32-NEXT: vslidedown.vi v8, v8, 3 +; RV64-i32-NEXT: vfmv.f.s fa5, v8 +; RV64-i32-NEXT: fcvt.l.d a3, fa5 ; RV64-i32-NEXT: fld fa5, 32(sp) -; RV64-i32-NEXT: vfmv.f.s fa4, v8 -; RV64-i32-NEXT: fld fa3, 40(sp) -; RV64-i32-NEXT: fcvt.l.d a3, fa4 +; RV64-i32-NEXT: fld fa4, 40(sp) +; RV64-i32-NEXT: fld fa3, 48(sp) +; RV64-i32-NEXT: fld fa2, 56(sp) ; RV64-i32-NEXT: fcvt.l.d a4, fa5 +; RV64-i32-NEXT: fcvt.l.d a5, fa4 +; RV64-i32-NEXT: fcvt.l.d a6, fa3 ; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-i32-NEXT: vmv.v.x v8, a1 -; RV64-i32-NEXT: fcvt.l.d a1, fa3 -; RV64-i32-NEXT: fld fa5, 48(sp) ; RV64-i32-NEXT: vslide1down.vx v8, v8, a0 ; RV64-i32-NEXT: vslide1down.vx v8, v8, a2 ; RV64-i32-NEXT: vslide1down.vx v8, v8, a3 -; RV64-i32-NEXT: fcvt.l.d a0, fa5 -; RV64-i32-NEXT: fld fa5, 56(sp) ; RV64-i32-NEXT: vslide1down.vx v8, v8, a4 -; RV64-i32-NEXT: vslide1down.vx v8, v8, a1 -; RV64-i32-NEXT: vslide1down.vx v8, v8, a0 -; RV64-i32-NEXT: fcvt.l.d a0, fa5 +; RV64-i32-NEXT: vslide1down.vx v8, v8, a5 +; RV64-i32-NEXT: vslide1down.vx v8, v8, a6 +; RV64-i32-NEXT: fcvt.l.d a0, fa2 ; RV64-i32-NEXT: vslide1down.vx v8, v8, a0 ; RV64-i32-NEXT: addi sp, s0, -128 ; RV64-i32-NEXT: ld ra, 120(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 24a5bd154c64f5..9cd38056364494 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -3525,9 +3525,9 @@ define <1 x i64> @mgather_v1i64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i64> %passthr ; RV32ZVE32F-NEXT: bnez a2, .LBB42_2 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: lw a1, 4(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: lw a0, 0(a1) +; RV32ZVE32F-NEXT: lw a1, 4(a1) ; RV32ZVE32F-NEXT: .LBB42_2: # %else ; RV32ZVE32F-NEXT: ret ; @@ -3571,30 +3571,30 @@ define <2 x i64> @mgather_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> %passthr ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 -; RV32ZVE32F-NEXT: lw a2, 4(a3) -; RV32ZVE32F-NEXT: lw a3, 0(a3) +; RV32ZVE32F-NEXT: lw a2, 0(a3) +; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: andi a4, a4, 2 ; RV32ZVE32F-NEXT: bnez a4, .LBB43_4 ; RV32ZVE32F-NEXT: .LBB43_2: -; RV32ZVE32F-NEXT: lw a4, 12(a1) -; RV32ZVE32F-NEXT: lw a1, 8(a1) +; RV32ZVE32F-NEXT: lw a4, 8(a1) +; RV32ZVE32F-NEXT: lw a1, 12(a1) ; RV32ZVE32F-NEXT: j .LBB43_5 ; RV32ZVE32F-NEXT: .LBB43_3: -; RV32ZVE32F-NEXT: lw a2, 4(a1) -; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: lw a2, 0(a1) +; RV32ZVE32F-NEXT: lw a3, 4(a1) ; RV32ZVE32F-NEXT: andi a4, a4, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB43_2 ; RV32ZVE32F-NEXT: .LBB43_4: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: lw a4, 4(a1) -; RV32ZVE32F-NEXT: lw a1, 0(a1) +; RV32ZVE32F-NEXT: lw a4, 0(a1) +; RV32ZVE32F-NEXT: lw a1, 4(a1) ; RV32ZVE32F-NEXT: .LBB43_5: # %else2 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a2, 4(a0) -; RV32ZVE32F-NEXT: sw a1, 8(a0) -; RV32ZVE32F-NEXT: sw a4, 12(a0) +; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 8(a0) +; RV32ZVE32F-NEXT: sw a1, 12(a0) ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_v2i64: @@ -3644,60 +3644,60 @@ define <4 x i64> @mgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i64> %passthr ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 -; RV32ZVE32F-NEXT: lw a2, 4(a3) -; RV32ZVE32F-NEXT: lw a3, 0(a3) +; RV32ZVE32F-NEXT: lw a2, 0(a3) +; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: andi a4, a6, 2 ; RV32ZVE32F-NEXT: bnez a4, .LBB44_6 ; RV32ZVE32F-NEXT: .LBB44_2: -; RV32ZVE32F-NEXT: lw a4, 12(a1) -; RV32ZVE32F-NEXT: lw a5, 8(a1) +; RV32ZVE32F-NEXT: lw a4, 8(a1) +; RV32ZVE32F-NEXT: lw a5, 12(a1) ; RV32ZVE32F-NEXT: andi a7, a6, 4 ; RV32ZVE32F-NEXT: bnez a7, .LBB44_7 ; RV32ZVE32F-NEXT: .LBB44_3: -; RV32ZVE32F-NEXT: lw a7, 20(a1) -; RV32ZVE32F-NEXT: lw t0, 16(a1) +; RV32ZVE32F-NEXT: lw a7, 16(a1) +; RV32ZVE32F-NEXT: lw t0, 20(a1) ; RV32ZVE32F-NEXT: andi a6, a6, 8 ; RV32ZVE32F-NEXT: bnez a6, .LBB44_8 ; RV32ZVE32F-NEXT: .LBB44_4: -; RV32ZVE32F-NEXT: lw a6, 28(a1) -; RV32ZVE32F-NEXT: lw a1, 24(a1) +; RV32ZVE32F-NEXT: lw a6, 24(a1) +; RV32ZVE32F-NEXT: lw a1, 28(a1) ; RV32ZVE32F-NEXT: j .LBB44_9 ; RV32ZVE32F-NEXT: .LBB44_5: -; RV32ZVE32F-NEXT: lw a2, 4(a1) -; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: lw a2, 0(a1) +; RV32ZVE32F-NEXT: lw a3, 4(a1) ; RV32ZVE32F-NEXT: andi a4, a6, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB44_2 ; RV32ZVE32F-NEXT: .LBB44_6: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v9 -; RV32ZVE32F-NEXT: lw a4, 4(a5) -; RV32ZVE32F-NEXT: lw a5, 0(a5) +; RV32ZVE32F-NEXT: lw a4, 0(a5) +; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: andi a7, a6, 4 ; RV32ZVE32F-NEXT: beqz a7, .LBB44_3 ; RV32ZVE32F-NEXT: .LBB44_7: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s t0, v9 -; RV32ZVE32F-NEXT: lw a7, 4(t0) -; RV32ZVE32F-NEXT: lw t0, 0(t0) +; RV32ZVE32F-NEXT: lw a7, 0(t0) +; RV32ZVE32F-NEXT: lw t0, 4(t0) ; RV32ZVE32F-NEXT: andi a6, a6, 8 ; RV32ZVE32F-NEXT: beqz a6, .LBB44_4 ; RV32ZVE32F-NEXT: .LBB44_8: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: lw a6, 4(a1) -; RV32ZVE32F-NEXT: lw a1, 0(a1) +; RV32ZVE32F-NEXT: lw a6, 0(a1) +; RV32ZVE32F-NEXT: lw a1, 4(a1) ; RV32ZVE32F-NEXT: .LBB44_9: # %else8 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a2, 4(a0) -; RV32ZVE32F-NEXT: sw a5, 8(a0) -; RV32ZVE32F-NEXT: sw a4, 12(a0) -; RV32ZVE32F-NEXT: sw t0, 16(a0) -; RV32ZVE32F-NEXT: sw a7, 20(a0) -; RV32ZVE32F-NEXT: sw a1, 24(a0) -; RV32ZVE32F-NEXT: sw a6, 28(a0) +; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 8(a0) +; RV32ZVE32F-NEXT: sw a5, 12(a0) +; RV32ZVE32F-NEXT: sw a7, 16(a0) +; RV32ZVE32F-NEXT: sw t0, 20(a0) +; RV32ZVE32F-NEXT: sw a6, 24(a0) +; RV32ZVE32F-NEXT: sw a1, 28(a0) ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_v4i64: @@ -3775,18 +3775,18 @@ define <4 x i64> @mgather_truemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) { ; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a5, v9 -; RV32ZVE32F-NEXT: lw a6, 0(a5) -; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a7, v8 -; RV32ZVE32F-NEXT: lw t0, 4(a7) -; RV32ZVE32F-NEXT: lw a7, 0(a7) +; RV32ZVE32F-NEXT: vmv.x.s a6, v8 +; RV32ZVE32F-NEXT: lw a7, 0(a6) +; RV32ZVE32F-NEXT: lw a6, 4(a6) +; RV32ZVE32F-NEXT: lw t0, 0(a5) +; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: sw a1, 4(a0) ; RV32ZVE32F-NEXT: sw a2, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 28(a0) +; RV32ZVE32F-NEXT: sw a6, 28(a0) ; RV32ZVE32F-NEXT: sw a7, 24(a0) ; RV32ZVE32F-NEXT: sw a5, 20(a0) -; RV32ZVE32F-NEXT: sw a6, 16(a0) +; RV32ZVE32F-NEXT: sw t0, 16(a0) ; RV32ZVE32F-NEXT: sw a3, 12(a0) ; RV32ZVE32F-NEXT: sw a4, 8(a0) ; RV32ZVE32F-NEXT: ret @@ -3823,22 +3823,22 @@ define <4 x i64> @mgather_falsemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) ; ; RV32ZVE32F-LABEL: mgather_falsemask_v4i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: lw a2, 0(a1) -; RV32ZVE32F-NEXT: lw a3, 4(a1) -; RV32ZVE32F-NEXT: lw a4, 8(a1) -; RV32ZVE32F-NEXT: lw a5, 12(a1) -; RV32ZVE32F-NEXT: lw a6, 28(a1) -; RV32ZVE32F-NEXT: lw a7, 24(a1) -; RV32ZVE32F-NEXT: lw t0, 20(a1) -; RV32ZVE32F-NEXT: lw a1, 16(a1) -; RV32ZVE32F-NEXT: sw a6, 28(a0) -; RV32ZVE32F-NEXT: sw a7, 24(a0) -; RV32ZVE32F-NEXT: sw t0, 20(a0) -; RV32ZVE32F-NEXT: sw a1, 16(a0) -; RV32ZVE32F-NEXT: sw a5, 12(a0) -; RV32ZVE32F-NEXT: sw a4, 8(a0) -; RV32ZVE32F-NEXT: sw a3, 4(a0) -; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: lw a2, 16(a1) +; RV32ZVE32F-NEXT: lw a3, 20(a1) +; RV32ZVE32F-NEXT: lw a4, 24(a1) +; RV32ZVE32F-NEXT: lw a5, 28(a1) +; RV32ZVE32F-NEXT: lw a6, 0(a1) +; RV32ZVE32F-NEXT: lw a7, 4(a1) +; RV32ZVE32F-NEXT: lw t0, 8(a1) +; RV32ZVE32F-NEXT: lw a1, 12(a1) +; RV32ZVE32F-NEXT: sw a5, 28(a0) +; RV32ZVE32F-NEXT: sw a4, 24(a0) +; RV32ZVE32F-NEXT: sw a3, 20(a0) +; RV32ZVE32F-NEXT: sw a2, 16(a0) +; RV32ZVE32F-NEXT: sw a1, 12(a0) +; RV32ZVE32F-NEXT: sw t0, 8(a0) +; RV32ZVE32F-NEXT: sw a7, 4(a0) +; RV32ZVE32F-NEXT: sw a6, 0(a0) ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_falsemask_v4i64: @@ -3882,77 +3882,77 @@ define <8 x i64> @mgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i64> %passthr ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 -; RV32ZVE32F-NEXT: lw a2, 4(a3) -; RV32ZVE32F-NEXT: lw a3, 0(a3) +; RV32ZVE32F-NEXT: lw a2, 0(a3) +; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: bnez a4, .LBB47_8 ; RV32ZVE32F-NEXT: .LBB47_2: -; RV32ZVE32F-NEXT: lw a4, 12(a1) -; RV32ZVE32F-NEXT: lw a5, 8(a1) +; RV32ZVE32F-NEXT: lw a4, 8(a1) +; RV32ZVE32F-NEXT: lw a5, 12(a1) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: bnez a6, .LBB47_9 ; RV32ZVE32F-NEXT: .LBB47_3: -; RV32ZVE32F-NEXT: lw a6, 20(a1) -; RV32ZVE32F-NEXT: lw a7, 16(a1) +; RV32ZVE32F-NEXT: lw a6, 16(a1) +; RV32ZVE32F-NEXT: lw a7, 20(a1) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: bnez t1, .LBB47_10 ; RV32ZVE32F-NEXT: .LBB47_4: -; RV32ZVE32F-NEXT: lw t1, 28(a1) -; RV32ZVE32F-NEXT: lw t2, 24(a1) +; RV32ZVE32F-NEXT: lw t1, 24(a1) +; RV32ZVE32F-NEXT: lw t2, 28(a1) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: bnez t3, .LBB47_11 ; RV32ZVE32F-NEXT: .LBB47_5: -; RV32ZVE32F-NEXT: lw t3, 36(a1) -; RV32ZVE32F-NEXT: lw t4, 32(a1) +; RV32ZVE32F-NEXT: lw t3, 32(a1) +; RV32ZVE32F-NEXT: lw t4, 36(a1) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: bnez t5, .LBB47_12 ; RV32ZVE32F-NEXT: .LBB47_6: -; RV32ZVE32F-NEXT: lw t5, 44(a1) -; RV32ZVE32F-NEXT: lw t6, 40(a1) +; RV32ZVE32F-NEXT: lw t5, 40(a1) +; RV32ZVE32F-NEXT: lw t6, 44(a1) ; RV32ZVE32F-NEXT: j .LBB47_13 ; RV32ZVE32F-NEXT: .LBB47_7: -; RV32ZVE32F-NEXT: lw a2, 4(a1) -; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: lw a2, 0(a1) +; RV32ZVE32F-NEXT: lw a3, 4(a1) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB47_2 ; RV32ZVE32F-NEXT: .LBB47_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 -; RV32ZVE32F-NEXT: lw a4, 4(a5) -; RV32ZVE32F-NEXT: lw a5, 0(a5) +; RV32ZVE32F-NEXT: lw a4, 0(a5) +; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB47_3 ; RV32ZVE32F-NEXT: .LBB47_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 -; RV32ZVE32F-NEXT: lw a6, 4(a7) -; RV32ZVE32F-NEXT: lw a7, 0(a7) +; RV32ZVE32F-NEXT: lw a6, 0(a7) +; RV32ZVE32F-NEXT: lw a7, 4(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB47_4 ; RV32ZVE32F-NEXT: .LBB47_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 -; RV32ZVE32F-NEXT: lw t1, 4(t2) -; RV32ZVE32F-NEXT: lw t2, 0(t2) +; RV32ZVE32F-NEXT: lw t1, 0(t2) +; RV32ZVE32F-NEXT: lw t2, 4(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB47_5 ; RV32ZVE32F-NEXT: .LBB47_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 -; RV32ZVE32F-NEXT: lw t3, 4(t4) -; RV32ZVE32F-NEXT: lw t4, 0(t4) +; RV32ZVE32F-NEXT: lw t3, 0(t4) +; RV32ZVE32F-NEXT: lw t4, 4(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB47_6 ; RV32ZVE32F-NEXT: .LBB47_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 -; RV32ZVE32F-NEXT: lw t5, 4(t6) -; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: lw t5, 0(t6) +; RV32ZVE32F-NEXT: lw t6, 4(t6) ; RV32ZVE32F-NEXT: .LBB47_13: # %else14 ; RV32ZVE32F-NEXT: addi sp, sp, -16 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 @@ -3966,42 +3966,42 @@ define <8 x i64> @mgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i64> %passthr ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 -; RV32ZVE32F-NEXT: lw s0, 4(s1) -; RV32ZVE32F-NEXT: lw s1, 0(s1) +; RV32ZVE32F-NEXT: lw s0, 0(s1) +; RV32ZVE32F-NEXT: lw s1, 4(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: bnez t0, .LBB47_17 ; RV32ZVE32F-NEXT: .LBB47_15: -; RV32ZVE32F-NEXT: lw t0, 60(a1) -; RV32ZVE32F-NEXT: lw a1, 56(a1) +; RV32ZVE32F-NEXT: lw t0, 56(a1) +; RV32ZVE32F-NEXT: lw a1, 60(a1) ; RV32ZVE32F-NEXT: j .LBB47_18 ; RV32ZVE32F-NEXT: .LBB47_16: -; RV32ZVE32F-NEXT: lw s0, 52(a1) -; RV32ZVE32F-NEXT: lw s1, 48(a1) +; RV32ZVE32F-NEXT: lw s0, 48(a1) +; RV32ZVE32F-NEXT: lw s1, 52(a1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: beqz t0, .LBB47_15 ; RV32ZVE32F-NEXT: .LBB47_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: lw t0, 4(a1) -; RV32ZVE32F-NEXT: lw a1, 0(a1) +; RV32ZVE32F-NEXT: lw t0, 0(a1) +; RV32ZVE32F-NEXT: lw a1, 4(a1) ; RV32ZVE32F-NEXT: .LBB47_18: # %else20 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a2, 4(a0) -; RV32ZVE32F-NEXT: sw a5, 8(a0) -; RV32ZVE32F-NEXT: sw a4, 12(a0) -; RV32ZVE32F-NEXT: sw a7, 16(a0) -; RV32ZVE32F-NEXT: sw a6, 20(a0) -; RV32ZVE32F-NEXT: sw t2, 24(a0) -; RV32ZVE32F-NEXT: sw t1, 28(a0) -; RV32ZVE32F-NEXT: sw t4, 32(a0) -; RV32ZVE32F-NEXT: sw t3, 36(a0) -; RV32ZVE32F-NEXT: sw t6, 40(a0) -; RV32ZVE32F-NEXT: sw t5, 44(a0) -; RV32ZVE32F-NEXT: sw s1, 48(a0) -; RV32ZVE32F-NEXT: sw s0, 52(a0) -; RV32ZVE32F-NEXT: sw a1, 56(a0) -; RV32ZVE32F-NEXT: sw t0, 60(a0) +; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 8(a0) +; RV32ZVE32F-NEXT: sw a5, 12(a0) +; RV32ZVE32F-NEXT: sw a6, 16(a0) +; RV32ZVE32F-NEXT: sw a7, 20(a0) +; RV32ZVE32F-NEXT: sw t1, 24(a0) +; RV32ZVE32F-NEXT: sw t2, 28(a0) +; RV32ZVE32F-NEXT: sw t3, 32(a0) +; RV32ZVE32F-NEXT: sw t4, 36(a0) +; RV32ZVE32F-NEXT: sw t5, 40(a0) +; RV32ZVE32F-NEXT: sw t6, 44(a0) +; RV32ZVE32F-NEXT: sw s0, 48(a0) +; RV32ZVE32F-NEXT: sw s1, 52(a0) +; RV32ZVE32F-NEXT: sw t0, 56(a0) +; RV32ZVE32F-NEXT: sw a1, 60(a0) ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: addi sp, sp, 16 @@ -4129,77 +4129,77 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV32ZVE32F-NEXT: beqz a3, .LBB48_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 -; RV32ZVE32F-NEXT: lw a1, 4(a3) -; RV32ZVE32F-NEXT: lw a3, 0(a3) +; RV32ZVE32F-NEXT: lw a1, 0(a3) +; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: bnez a4, .LBB48_8 ; RV32ZVE32F-NEXT: .LBB48_2: -; RV32ZVE32F-NEXT: lw a4, 12(a2) -; RV32ZVE32F-NEXT: lw a5, 8(a2) +; RV32ZVE32F-NEXT: lw a4, 8(a2) +; RV32ZVE32F-NEXT: lw a5, 12(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: bnez a6, .LBB48_9 ; RV32ZVE32F-NEXT: .LBB48_3: -; RV32ZVE32F-NEXT: lw a6, 20(a2) -; RV32ZVE32F-NEXT: lw a7, 16(a2) +; RV32ZVE32F-NEXT: lw a6, 16(a2) +; RV32ZVE32F-NEXT: lw a7, 20(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: bnez t1, .LBB48_10 ; RV32ZVE32F-NEXT: .LBB48_4: -; RV32ZVE32F-NEXT: lw t1, 28(a2) -; RV32ZVE32F-NEXT: lw t2, 24(a2) +; RV32ZVE32F-NEXT: lw t1, 24(a2) +; RV32ZVE32F-NEXT: lw t2, 28(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: bnez t3, .LBB48_11 ; RV32ZVE32F-NEXT: .LBB48_5: -; RV32ZVE32F-NEXT: lw t3, 36(a2) -; RV32ZVE32F-NEXT: lw t4, 32(a2) +; RV32ZVE32F-NEXT: lw t3, 32(a2) +; RV32ZVE32F-NEXT: lw t4, 36(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: bnez t5, .LBB48_12 ; RV32ZVE32F-NEXT: .LBB48_6: -; RV32ZVE32F-NEXT: lw t5, 44(a2) -; RV32ZVE32F-NEXT: lw t6, 40(a2) +; RV32ZVE32F-NEXT: lw t5, 40(a2) +; RV32ZVE32F-NEXT: lw t6, 44(a2) ; RV32ZVE32F-NEXT: j .LBB48_13 ; RV32ZVE32F-NEXT: .LBB48_7: -; RV32ZVE32F-NEXT: lw a1, 4(a2) -; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: lw a1, 0(a2) +; RV32ZVE32F-NEXT: lw a3, 4(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB48_2 ; RV32ZVE32F-NEXT: .LBB48_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 -; RV32ZVE32F-NEXT: lw a4, 4(a5) -; RV32ZVE32F-NEXT: lw a5, 0(a5) +; RV32ZVE32F-NEXT: lw a4, 0(a5) +; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB48_3 ; RV32ZVE32F-NEXT: .LBB48_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 -; RV32ZVE32F-NEXT: lw a6, 4(a7) -; RV32ZVE32F-NEXT: lw a7, 0(a7) +; RV32ZVE32F-NEXT: lw a6, 0(a7) +; RV32ZVE32F-NEXT: lw a7, 4(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB48_4 ; RV32ZVE32F-NEXT: .LBB48_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 -; RV32ZVE32F-NEXT: lw t1, 4(t2) -; RV32ZVE32F-NEXT: lw t2, 0(t2) +; RV32ZVE32F-NEXT: lw t1, 0(t2) +; RV32ZVE32F-NEXT: lw t2, 4(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB48_5 ; RV32ZVE32F-NEXT: .LBB48_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 -; RV32ZVE32F-NEXT: lw t3, 4(t4) -; RV32ZVE32F-NEXT: lw t4, 0(t4) +; RV32ZVE32F-NEXT: lw t3, 0(t4) +; RV32ZVE32F-NEXT: lw t4, 4(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB48_6 ; RV32ZVE32F-NEXT: .LBB48_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 -; RV32ZVE32F-NEXT: lw t5, 4(t6) -; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: lw t5, 0(t6) +; RV32ZVE32F-NEXT: lw t6, 4(t6) ; RV32ZVE32F-NEXT: .LBB48_13: # %else14 ; RV32ZVE32F-NEXT: addi sp, sp, -16 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 @@ -4213,42 +4213,42 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 -; RV32ZVE32F-NEXT: lw s0, 4(s1) -; RV32ZVE32F-NEXT: lw s1, 0(s1) +; RV32ZVE32F-NEXT: lw s0, 0(s1) +; RV32ZVE32F-NEXT: lw s1, 4(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: bnez t0, .LBB48_17 ; RV32ZVE32F-NEXT: .LBB48_15: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: lw t0, 56(a2) +; RV32ZVE32F-NEXT: lw a2, 60(a2) ; RV32ZVE32F-NEXT: j .LBB48_18 ; RV32ZVE32F-NEXT: .LBB48_16: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: lw s0, 48(a2) +; RV32ZVE32F-NEXT: lw s1, 52(a2) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: beqz t0, .LBB48_15 ; RV32ZVE32F-NEXT: .LBB48_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: lw t0, 4(a2) -; RV32ZVE32F-NEXT: lw a2, 0(a2) +; RV32ZVE32F-NEXT: lw t0, 0(a2) +; RV32ZVE32F-NEXT: lw a2, 4(a2) ; RV32ZVE32F-NEXT: .LBB48_18: # %else20 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a1, 4(a0) -; RV32ZVE32F-NEXT: sw a5, 8(a0) -; RV32ZVE32F-NEXT: sw a4, 12(a0) -; RV32ZVE32F-NEXT: sw a7, 16(a0) -; RV32ZVE32F-NEXT: sw a6, 20(a0) -; RV32ZVE32F-NEXT: sw t2, 24(a0) -; RV32ZVE32F-NEXT: sw t1, 28(a0) -; RV32ZVE32F-NEXT: sw t4, 32(a0) -; RV32ZVE32F-NEXT: sw t3, 36(a0) -; RV32ZVE32F-NEXT: sw t6, 40(a0) -; RV32ZVE32F-NEXT: sw t5, 44(a0) -; RV32ZVE32F-NEXT: sw s1, 48(a0) -; RV32ZVE32F-NEXT: sw s0, 52(a0) -; RV32ZVE32F-NEXT: sw a2, 56(a0) -; RV32ZVE32F-NEXT: sw t0, 60(a0) +; RV32ZVE32F-NEXT: sw a1, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 8(a0) +; RV32ZVE32F-NEXT: sw a5, 12(a0) +; RV32ZVE32F-NEXT: sw a6, 16(a0) +; RV32ZVE32F-NEXT: sw a7, 20(a0) +; RV32ZVE32F-NEXT: sw t1, 24(a0) +; RV32ZVE32F-NEXT: sw t2, 28(a0) +; RV32ZVE32F-NEXT: sw t3, 32(a0) +; RV32ZVE32F-NEXT: sw t4, 36(a0) +; RV32ZVE32F-NEXT: sw t5, 40(a0) +; RV32ZVE32F-NEXT: sw t6, 44(a0) +; RV32ZVE32F-NEXT: sw s0, 48(a0) +; RV32ZVE32F-NEXT: sw s1, 52(a0) +; RV32ZVE32F-NEXT: sw t0, 56(a0) +; RV32ZVE32F-NEXT: sw a2, 60(a0) ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: addi sp, sp, 16 @@ -4403,77 +4403,77 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV32ZVE32F-NEXT: beqz a3, .LBB49_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 -; RV32ZVE32F-NEXT: lw a1, 4(a3) -; RV32ZVE32F-NEXT: lw a3, 0(a3) +; RV32ZVE32F-NEXT: lw a1, 0(a3) +; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: bnez a4, .LBB49_8 ; RV32ZVE32F-NEXT: .LBB49_2: -; RV32ZVE32F-NEXT: lw a4, 12(a2) -; RV32ZVE32F-NEXT: lw a5, 8(a2) +; RV32ZVE32F-NEXT: lw a4, 8(a2) +; RV32ZVE32F-NEXT: lw a5, 12(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: bnez a6, .LBB49_9 ; RV32ZVE32F-NEXT: .LBB49_3: -; RV32ZVE32F-NEXT: lw a6, 20(a2) -; RV32ZVE32F-NEXT: lw a7, 16(a2) +; RV32ZVE32F-NEXT: lw a6, 16(a2) +; RV32ZVE32F-NEXT: lw a7, 20(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: bnez t1, .LBB49_10 ; RV32ZVE32F-NEXT: .LBB49_4: -; RV32ZVE32F-NEXT: lw t1, 28(a2) -; RV32ZVE32F-NEXT: lw t2, 24(a2) +; RV32ZVE32F-NEXT: lw t1, 24(a2) +; RV32ZVE32F-NEXT: lw t2, 28(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: bnez t3, .LBB49_11 ; RV32ZVE32F-NEXT: .LBB49_5: -; RV32ZVE32F-NEXT: lw t3, 36(a2) -; RV32ZVE32F-NEXT: lw t4, 32(a2) +; RV32ZVE32F-NEXT: lw t3, 32(a2) +; RV32ZVE32F-NEXT: lw t4, 36(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: bnez t5, .LBB49_12 ; RV32ZVE32F-NEXT: .LBB49_6: -; RV32ZVE32F-NEXT: lw t5, 44(a2) -; RV32ZVE32F-NEXT: lw t6, 40(a2) +; RV32ZVE32F-NEXT: lw t5, 40(a2) +; RV32ZVE32F-NEXT: lw t6, 44(a2) ; RV32ZVE32F-NEXT: j .LBB49_13 ; RV32ZVE32F-NEXT: .LBB49_7: -; RV32ZVE32F-NEXT: lw a1, 4(a2) -; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: lw a1, 0(a2) +; RV32ZVE32F-NEXT: lw a3, 4(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB49_2 ; RV32ZVE32F-NEXT: .LBB49_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 -; RV32ZVE32F-NEXT: lw a4, 4(a5) -; RV32ZVE32F-NEXT: lw a5, 0(a5) +; RV32ZVE32F-NEXT: lw a4, 0(a5) +; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB49_3 ; RV32ZVE32F-NEXT: .LBB49_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 -; RV32ZVE32F-NEXT: lw a6, 4(a7) -; RV32ZVE32F-NEXT: lw a7, 0(a7) +; RV32ZVE32F-NEXT: lw a6, 0(a7) +; RV32ZVE32F-NEXT: lw a7, 4(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB49_4 ; RV32ZVE32F-NEXT: .LBB49_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 -; RV32ZVE32F-NEXT: lw t1, 4(t2) -; RV32ZVE32F-NEXT: lw t2, 0(t2) +; RV32ZVE32F-NEXT: lw t1, 0(t2) +; RV32ZVE32F-NEXT: lw t2, 4(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB49_5 ; RV32ZVE32F-NEXT: .LBB49_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 -; RV32ZVE32F-NEXT: lw t3, 4(t4) -; RV32ZVE32F-NEXT: lw t4, 0(t4) +; RV32ZVE32F-NEXT: lw t3, 0(t4) +; RV32ZVE32F-NEXT: lw t4, 4(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB49_6 ; RV32ZVE32F-NEXT: .LBB49_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 -; RV32ZVE32F-NEXT: lw t5, 4(t6) -; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: lw t5, 0(t6) +; RV32ZVE32F-NEXT: lw t6, 4(t6) ; RV32ZVE32F-NEXT: .LBB49_13: # %else14 ; RV32ZVE32F-NEXT: addi sp, sp, -16 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 @@ -4487,42 +4487,42 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 -; RV32ZVE32F-NEXT: lw s0, 4(s1) -; RV32ZVE32F-NEXT: lw s1, 0(s1) +; RV32ZVE32F-NEXT: lw s0, 0(s1) +; RV32ZVE32F-NEXT: lw s1, 4(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: bnez t0, .LBB49_17 ; RV32ZVE32F-NEXT: .LBB49_15: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: lw t0, 56(a2) +; RV32ZVE32F-NEXT: lw a2, 60(a2) ; RV32ZVE32F-NEXT: j .LBB49_18 ; RV32ZVE32F-NEXT: .LBB49_16: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: lw s0, 48(a2) +; RV32ZVE32F-NEXT: lw s1, 52(a2) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: beqz t0, .LBB49_15 ; RV32ZVE32F-NEXT: .LBB49_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: lw t0, 4(a2) -; RV32ZVE32F-NEXT: lw a2, 0(a2) +; RV32ZVE32F-NEXT: lw t0, 0(a2) +; RV32ZVE32F-NEXT: lw a2, 4(a2) ; RV32ZVE32F-NEXT: .LBB49_18: # %else20 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a1, 4(a0) -; RV32ZVE32F-NEXT: sw a5, 8(a0) -; RV32ZVE32F-NEXT: sw a4, 12(a0) -; RV32ZVE32F-NEXT: sw a7, 16(a0) -; RV32ZVE32F-NEXT: sw a6, 20(a0) -; RV32ZVE32F-NEXT: sw t2, 24(a0) -; RV32ZVE32F-NEXT: sw t1, 28(a0) -; RV32ZVE32F-NEXT: sw t4, 32(a0) -; RV32ZVE32F-NEXT: sw t3, 36(a0) -; RV32ZVE32F-NEXT: sw t6, 40(a0) -; RV32ZVE32F-NEXT: sw t5, 44(a0) -; RV32ZVE32F-NEXT: sw s1, 48(a0) -; RV32ZVE32F-NEXT: sw s0, 52(a0) -; RV32ZVE32F-NEXT: sw a2, 56(a0) -; RV32ZVE32F-NEXT: sw t0, 60(a0) +; RV32ZVE32F-NEXT: sw a1, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 8(a0) +; RV32ZVE32F-NEXT: sw a5, 12(a0) +; RV32ZVE32F-NEXT: sw a6, 16(a0) +; RV32ZVE32F-NEXT: sw a7, 20(a0) +; RV32ZVE32F-NEXT: sw t1, 24(a0) +; RV32ZVE32F-NEXT: sw t2, 28(a0) +; RV32ZVE32F-NEXT: sw t3, 32(a0) +; RV32ZVE32F-NEXT: sw t4, 36(a0) +; RV32ZVE32F-NEXT: sw t5, 40(a0) +; RV32ZVE32F-NEXT: sw t6, 44(a0) +; RV32ZVE32F-NEXT: sw s0, 48(a0) +; RV32ZVE32F-NEXT: sw s1, 52(a0) +; RV32ZVE32F-NEXT: sw t0, 56(a0) +; RV32ZVE32F-NEXT: sw a2, 60(a0) ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: addi sp, sp, 16 @@ -4679,77 +4679,77 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV32ZVE32F-NEXT: beqz a3, .LBB50_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 -; RV32ZVE32F-NEXT: lw a1, 4(a3) -; RV32ZVE32F-NEXT: lw a3, 0(a3) +; RV32ZVE32F-NEXT: lw a1, 0(a3) +; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: bnez a4, .LBB50_8 ; RV32ZVE32F-NEXT: .LBB50_2: -; RV32ZVE32F-NEXT: lw a4, 12(a2) -; RV32ZVE32F-NEXT: lw a5, 8(a2) +; RV32ZVE32F-NEXT: lw a4, 8(a2) +; RV32ZVE32F-NEXT: lw a5, 12(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: bnez a6, .LBB50_9 ; RV32ZVE32F-NEXT: .LBB50_3: -; RV32ZVE32F-NEXT: lw a6, 20(a2) -; RV32ZVE32F-NEXT: lw a7, 16(a2) +; RV32ZVE32F-NEXT: lw a6, 16(a2) +; RV32ZVE32F-NEXT: lw a7, 20(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: bnez t1, .LBB50_10 ; RV32ZVE32F-NEXT: .LBB50_4: -; RV32ZVE32F-NEXT: lw t1, 28(a2) -; RV32ZVE32F-NEXT: lw t2, 24(a2) +; RV32ZVE32F-NEXT: lw t1, 24(a2) +; RV32ZVE32F-NEXT: lw t2, 28(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: bnez t3, .LBB50_11 ; RV32ZVE32F-NEXT: .LBB50_5: -; RV32ZVE32F-NEXT: lw t3, 36(a2) -; RV32ZVE32F-NEXT: lw t4, 32(a2) +; RV32ZVE32F-NEXT: lw t3, 32(a2) +; RV32ZVE32F-NEXT: lw t4, 36(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: bnez t5, .LBB50_12 ; RV32ZVE32F-NEXT: .LBB50_6: -; RV32ZVE32F-NEXT: lw t5, 44(a2) -; RV32ZVE32F-NEXT: lw t6, 40(a2) +; RV32ZVE32F-NEXT: lw t5, 40(a2) +; RV32ZVE32F-NEXT: lw t6, 44(a2) ; RV32ZVE32F-NEXT: j .LBB50_13 ; RV32ZVE32F-NEXT: .LBB50_7: -; RV32ZVE32F-NEXT: lw a1, 4(a2) -; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: lw a1, 0(a2) +; RV32ZVE32F-NEXT: lw a3, 4(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB50_2 ; RV32ZVE32F-NEXT: .LBB50_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 -; RV32ZVE32F-NEXT: lw a4, 4(a5) -; RV32ZVE32F-NEXT: lw a5, 0(a5) +; RV32ZVE32F-NEXT: lw a4, 0(a5) +; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB50_3 ; RV32ZVE32F-NEXT: .LBB50_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 -; RV32ZVE32F-NEXT: lw a6, 4(a7) -; RV32ZVE32F-NEXT: lw a7, 0(a7) +; RV32ZVE32F-NEXT: lw a6, 0(a7) +; RV32ZVE32F-NEXT: lw a7, 4(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB50_4 ; RV32ZVE32F-NEXT: .LBB50_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 -; RV32ZVE32F-NEXT: lw t1, 4(t2) -; RV32ZVE32F-NEXT: lw t2, 0(t2) +; RV32ZVE32F-NEXT: lw t1, 0(t2) +; RV32ZVE32F-NEXT: lw t2, 4(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB50_5 ; RV32ZVE32F-NEXT: .LBB50_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 -; RV32ZVE32F-NEXT: lw t3, 4(t4) -; RV32ZVE32F-NEXT: lw t4, 0(t4) +; RV32ZVE32F-NEXT: lw t3, 0(t4) +; RV32ZVE32F-NEXT: lw t4, 4(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB50_6 ; RV32ZVE32F-NEXT: .LBB50_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 -; RV32ZVE32F-NEXT: lw t5, 4(t6) -; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: lw t5, 0(t6) +; RV32ZVE32F-NEXT: lw t6, 4(t6) ; RV32ZVE32F-NEXT: .LBB50_13: # %else14 ; RV32ZVE32F-NEXT: addi sp, sp, -16 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 @@ -4763,42 +4763,42 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 -; RV32ZVE32F-NEXT: lw s0, 4(s1) -; RV32ZVE32F-NEXT: lw s1, 0(s1) +; RV32ZVE32F-NEXT: lw s0, 0(s1) +; RV32ZVE32F-NEXT: lw s1, 4(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: bnez t0, .LBB50_17 ; RV32ZVE32F-NEXT: .LBB50_15: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: lw t0, 56(a2) +; RV32ZVE32F-NEXT: lw a2, 60(a2) ; RV32ZVE32F-NEXT: j .LBB50_18 ; RV32ZVE32F-NEXT: .LBB50_16: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: lw s0, 48(a2) +; RV32ZVE32F-NEXT: lw s1, 52(a2) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: beqz t0, .LBB50_15 ; RV32ZVE32F-NEXT: .LBB50_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: lw t0, 4(a2) -; RV32ZVE32F-NEXT: lw a2, 0(a2) +; RV32ZVE32F-NEXT: lw t0, 0(a2) +; RV32ZVE32F-NEXT: lw a2, 4(a2) ; RV32ZVE32F-NEXT: .LBB50_18: # %else20 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a1, 4(a0) -; RV32ZVE32F-NEXT: sw a5, 8(a0) -; RV32ZVE32F-NEXT: sw a4, 12(a0) -; RV32ZVE32F-NEXT: sw a7, 16(a0) -; RV32ZVE32F-NEXT: sw a6, 20(a0) -; RV32ZVE32F-NEXT: sw t2, 24(a0) -; RV32ZVE32F-NEXT: sw t1, 28(a0) -; RV32ZVE32F-NEXT: sw t4, 32(a0) -; RV32ZVE32F-NEXT: sw t3, 36(a0) -; RV32ZVE32F-NEXT: sw t6, 40(a0) -; RV32ZVE32F-NEXT: sw t5, 44(a0) -; RV32ZVE32F-NEXT: sw s1, 48(a0) -; RV32ZVE32F-NEXT: sw s0, 52(a0) -; RV32ZVE32F-NEXT: sw a2, 56(a0) -; RV32ZVE32F-NEXT: sw t0, 60(a0) +; RV32ZVE32F-NEXT: sw a1, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 8(a0) +; RV32ZVE32F-NEXT: sw a5, 12(a0) +; RV32ZVE32F-NEXT: sw a6, 16(a0) +; RV32ZVE32F-NEXT: sw a7, 20(a0) +; RV32ZVE32F-NEXT: sw t1, 24(a0) +; RV32ZVE32F-NEXT: sw t2, 28(a0) +; RV32ZVE32F-NEXT: sw t3, 32(a0) +; RV32ZVE32F-NEXT: sw t4, 36(a0) +; RV32ZVE32F-NEXT: sw t5, 40(a0) +; RV32ZVE32F-NEXT: sw t6, 44(a0) +; RV32ZVE32F-NEXT: sw s0, 48(a0) +; RV32ZVE32F-NEXT: sw s1, 52(a0) +; RV32ZVE32F-NEXT: sw t0, 56(a0) +; RV32ZVE32F-NEXT: sw a2, 60(a0) ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: addi sp, sp, 16 @@ -4962,77 +4962,77 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV32ZVE32F-NEXT: beqz a3, .LBB51_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 -; RV32ZVE32F-NEXT: lw a1, 4(a3) -; RV32ZVE32F-NEXT: lw a3, 0(a3) +; RV32ZVE32F-NEXT: lw a1, 0(a3) +; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: bnez a4, .LBB51_8 ; RV32ZVE32F-NEXT: .LBB51_2: -; RV32ZVE32F-NEXT: lw a4, 12(a2) -; RV32ZVE32F-NEXT: lw a5, 8(a2) +; RV32ZVE32F-NEXT: lw a4, 8(a2) +; RV32ZVE32F-NEXT: lw a5, 12(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: bnez a6, .LBB51_9 ; RV32ZVE32F-NEXT: .LBB51_3: -; RV32ZVE32F-NEXT: lw a6, 20(a2) -; RV32ZVE32F-NEXT: lw a7, 16(a2) +; RV32ZVE32F-NEXT: lw a6, 16(a2) +; RV32ZVE32F-NEXT: lw a7, 20(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: bnez t1, .LBB51_10 ; RV32ZVE32F-NEXT: .LBB51_4: -; RV32ZVE32F-NEXT: lw t1, 28(a2) -; RV32ZVE32F-NEXT: lw t2, 24(a2) +; RV32ZVE32F-NEXT: lw t1, 24(a2) +; RV32ZVE32F-NEXT: lw t2, 28(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: bnez t3, .LBB51_11 ; RV32ZVE32F-NEXT: .LBB51_5: -; RV32ZVE32F-NEXT: lw t3, 36(a2) -; RV32ZVE32F-NEXT: lw t4, 32(a2) +; RV32ZVE32F-NEXT: lw t3, 32(a2) +; RV32ZVE32F-NEXT: lw t4, 36(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: bnez t5, .LBB51_12 ; RV32ZVE32F-NEXT: .LBB51_6: -; RV32ZVE32F-NEXT: lw t5, 44(a2) -; RV32ZVE32F-NEXT: lw t6, 40(a2) +; RV32ZVE32F-NEXT: lw t5, 40(a2) +; RV32ZVE32F-NEXT: lw t6, 44(a2) ; RV32ZVE32F-NEXT: j .LBB51_13 ; RV32ZVE32F-NEXT: .LBB51_7: -; RV32ZVE32F-NEXT: lw a1, 4(a2) -; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: lw a1, 0(a2) +; RV32ZVE32F-NEXT: lw a3, 4(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB51_2 ; RV32ZVE32F-NEXT: .LBB51_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 -; RV32ZVE32F-NEXT: lw a4, 4(a5) -; RV32ZVE32F-NEXT: lw a5, 0(a5) +; RV32ZVE32F-NEXT: lw a4, 0(a5) +; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB51_3 ; RV32ZVE32F-NEXT: .LBB51_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 -; RV32ZVE32F-NEXT: lw a6, 4(a7) -; RV32ZVE32F-NEXT: lw a7, 0(a7) +; RV32ZVE32F-NEXT: lw a6, 0(a7) +; RV32ZVE32F-NEXT: lw a7, 4(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB51_4 ; RV32ZVE32F-NEXT: .LBB51_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 -; RV32ZVE32F-NEXT: lw t1, 4(t2) -; RV32ZVE32F-NEXT: lw t2, 0(t2) +; RV32ZVE32F-NEXT: lw t1, 0(t2) +; RV32ZVE32F-NEXT: lw t2, 4(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB51_5 ; RV32ZVE32F-NEXT: .LBB51_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 -; RV32ZVE32F-NEXT: lw t3, 4(t4) -; RV32ZVE32F-NEXT: lw t4, 0(t4) +; RV32ZVE32F-NEXT: lw t3, 0(t4) +; RV32ZVE32F-NEXT: lw t4, 4(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB51_6 ; RV32ZVE32F-NEXT: .LBB51_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 -; RV32ZVE32F-NEXT: lw t5, 4(t6) -; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: lw t5, 0(t6) +; RV32ZVE32F-NEXT: lw t6, 4(t6) ; RV32ZVE32F-NEXT: .LBB51_13: # %else14 ; RV32ZVE32F-NEXT: addi sp, sp, -16 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 @@ -5046,42 +5046,42 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 -; RV32ZVE32F-NEXT: lw s0, 4(s1) -; RV32ZVE32F-NEXT: lw s1, 0(s1) +; RV32ZVE32F-NEXT: lw s0, 0(s1) +; RV32ZVE32F-NEXT: lw s1, 4(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: bnez t0, .LBB51_17 ; RV32ZVE32F-NEXT: .LBB51_15: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: lw t0, 56(a2) +; RV32ZVE32F-NEXT: lw a2, 60(a2) ; RV32ZVE32F-NEXT: j .LBB51_18 ; RV32ZVE32F-NEXT: .LBB51_16: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: lw s0, 48(a2) +; RV32ZVE32F-NEXT: lw s1, 52(a2) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: beqz t0, .LBB51_15 ; RV32ZVE32F-NEXT: .LBB51_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: lw t0, 4(a2) -; RV32ZVE32F-NEXT: lw a2, 0(a2) +; RV32ZVE32F-NEXT: lw t0, 0(a2) +; RV32ZVE32F-NEXT: lw a2, 4(a2) ; RV32ZVE32F-NEXT: .LBB51_18: # %else20 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a1, 4(a0) -; RV32ZVE32F-NEXT: sw a5, 8(a0) -; RV32ZVE32F-NEXT: sw a4, 12(a0) -; RV32ZVE32F-NEXT: sw a7, 16(a0) -; RV32ZVE32F-NEXT: sw a6, 20(a0) -; RV32ZVE32F-NEXT: sw t2, 24(a0) -; RV32ZVE32F-NEXT: sw t1, 28(a0) -; RV32ZVE32F-NEXT: sw t4, 32(a0) -; RV32ZVE32F-NEXT: sw t3, 36(a0) -; RV32ZVE32F-NEXT: sw t6, 40(a0) -; RV32ZVE32F-NEXT: sw t5, 44(a0) -; RV32ZVE32F-NEXT: sw s1, 48(a0) -; RV32ZVE32F-NEXT: sw s0, 52(a0) -; RV32ZVE32F-NEXT: sw a2, 56(a0) -; RV32ZVE32F-NEXT: sw t0, 60(a0) +; RV32ZVE32F-NEXT: sw a1, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 8(a0) +; RV32ZVE32F-NEXT: sw a5, 12(a0) +; RV32ZVE32F-NEXT: sw a6, 16(a0) +; RV32ZVE32F-NEXT: sw a7, 20(a0) +; RV32ZVE32F-NEXT: sw t1, 24(a0) +; RV32ZVE32F-NEXT: sw t2, 28(a0) +; RV32ZVE32F-NEXT: sw t3, 32(a0) +; RV32ZVE32F-NEXT: sw t4, 36(a0) +; RV32ZVE32F-NEXT: sw t5, 40(a0) +; RV32ZVE32F-NEXT: sw t6, 44(a0) +; RV32ZVE32F-NEXT: sw s0, 48(a0) +; RV32ZVE32F-NEXT: sw s1, 52(a0) +; RV32ZVE32F-NEXT: sw t0, 56(a0) +; RV32ZVE32F-NEXT: sw a2, 60(a0) ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: addi sp, sp, 16 @@ -5237,77 +5237,77 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV32ZVE32F-NEXT: beqz a3, .LBB52_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 -; RV32ZVE32F-NEXT: lw a1, 4(a3) -; RV32ZVE32F-NEXT: lw a3, 0(a3) +; RV32ZVE32F-NEXT: lw a1, 0(a3) +; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: bnez a4, .LBB52_8 ; RV32ZVE32F-NEXT: .LBB52_2: -; RV32ZVE32F-NEXT: lw a4, 12(a2) -; RV32ZVE32F-NEXT: lw a5, 8(a2) +; RV32ZVE32F-NEXT: lw a4, 8(a2) +; RV32ZVE32F-NEXT: lw a5, 12(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: bnez a6, .LBB52_9 ; RV32ZVE32F-NEXT: .LBB52_3: -; RV32ZVE32F-NEXT: lw a6, 20(a2) -; RV32ZVE32F-NEXT: lw a7, 16(a2) +; RV32ZVE32F-NEXT: lw a6, 16(a2) +; RV32ZVE32F-NEXT: lw a7, 20(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: bnez t1, .LBB52_10 ; RV32ZVE32F-NEXT: .LBB52_4: -; RV32ZVE32F-NEXT: lw t1, 28(a2) -; RV32ZVE32F-NEXT: lw t2, 24(a2) +; RV32ZVE32F-NEXT: lw t1, 24(a2) +; RV32ZVE32F-NEXT: lw t2, 28(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: bnez t3, .LBB52_11 ; RV32ZVE32F-NEXT: .LBB52_5: -; RV32ZVE32F-NEXT: lw t3, 36(a2) -; RV32ZVE32F-NEXT: lw t4, 32(a2) +; RV32ZVE32F-NEXT: lw t3, 32(a2) +; RV32ZVE32F-NEXT: lw t4, 36(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: bnez t5, .LBB52_12 ; RV32ZVE32F-NEXT: .LBB52_6: -; RV32ZVE32F-NEXT: lw t5, 44(a2) -; RV32ZVE32F-NEXT: lw t6, 40(a2) +; RV32ZVE32F-NEXT: lw t5, 40(a2) +; RV32ZVE32F-NEXT: lw t6, 44(a2) ; RV32ZVE32F-NEXT: j .LBB52_13 ; RV32ZVE32F-NEXT: .LBB52_7: -; RV32ZVE32F-NEXT: lw a1, 4(a2) -; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: lw a1, 0(a2) +; RV32ZVE32F-NEXT: lw a3, 4(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB52_2 ; RV32ZVE32F-NEXT: .LBB52_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 -; RV32ZVE32F-NEXT: lw a4, 4(a5) -; RV32ZVE32F-NEXT: lw a5, 0(a5) +; RV32ZVE32F-NEXT: lw a4, 0(a5) +; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB52_3 ; RV32ZVE32F-NEXT: .LBB52_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 -; RV32ZVE32F-NEXT: lw a6, 4(a7) -; RV32ZVE32F-NEXT: lw a7, 0(a7) +; RV32ZVE32F-NEXT: lw a6, 0(a7) +; RV32ZVE32F-NEXT: lw a7, 4(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB52_4 ; RV32ZVE32F-NEXT: .LBB52_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 -; RV32ZVE32F-NEXT: lw t1, 4(t2) -; RV32ZVE32F-NEXT: lw t2, 0(t2) +; RV32ZVE32F-NEXT: lw t1, 0(t2) +; RV32ZVE32F-NEXT: lw t2, 4(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB52_5 ; RV32ZVE32F-NEXT: .LBB52_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 -; RV32ZVE32F-NEXT: lw t3, 4(t4) -; RV32ZVE32F-NEXT: lw t4, 0(t4) +; RV32ZVE32F-NEXT: lw t3, 0(t4) +; RV32ZVE32F-NEXT: lw t4, 4(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB52_6 ; RV32ZVE32F-NEXT: .LBB52_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 -; RV32ZVE32F-NEXT: lw t5, 4(t6) -; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: lw t5, 0(t6) +; RV32ZVE32F-NEXT: lw t6, 4(t6) ; RV32ZVE32F-NEXT: .LBB52_13: # %else14 ; RV32ZVE32F-NEXT: addi sp, sp, -16 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 @@ -5321,42 +5321,42 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 -; RV32ZVE32F-NEXT: lw s0, 4(s1) -; RV32ZVE32F-NEXT: lw s1, 0(s1) +; RV32ZVE32F-NEXT: lw s0, 0(s1) +; RV32ZVE32F-NEXT: lw s1, 4(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: bnez t0, .LBB52_17 ; RV32ZVE32F-NEXT: .LBB52_15: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: lw t0, 56(a2) +; RV32ZVE32F-NEXT: lw a2, 60(a2) ; RV32ZVE32F-NEXT: j .LBB52_18 ; RV32ZVE32F-NEXT: .LBB52_16: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: lw s0, 48(a2) +; RV32ZVE32F-NEXT: lw s1, 52(a2) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: beqz t0, .LBB52_15 ; RV32ZVE32F-NEXT: .LBB52_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: lw t0, 4(a2) -; RV32ZVE32F-NEXT: lw a2, 0(a2) +; RV32ZVE32F-NEXT: lw t0, 0(a2) +; RV32ZVE32F-NEXT: lw a2, 4(a2) ; RV32ZVE32F-NEXT: .LBB52_18: # %else20 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a1, 4(a0) -; RV32ZVE32F-NEXT: sw a5, 8(a0) -; RV32ZVE32F-NEXT: sw a4, 12(a0) -; RV32ZVE32F-NEXT: sw a7, 16(a0) -; RV32ZVE32F-NEXT: sw a6, 20(a0) -; RV32ZVE32F-NEXT: sw t2, 24(a0) -; RV32ZVE32F-NEXT: sw t1, 28(a0) -; RV32ZVE32F-NEXT: sw t4, 32(a0) -; RV32ZVE32F-NEXT: sw t3, 36(a0) -; RV32ZVE32F-NEXT: sw t6, 40(a0) -; RV32ZVE32F-NEXT: sw t5, 44(a0) -; RV32ZVE32F-NEXT: sw s1, 48(a0) -; RV32ZVE32F-NEXT: sw s0, 52(a0) -; RV32ZVE32F-NEXT: sw a2, 56(a0) -; RV32ZVE32F-NEXT: sw t0, 60(a0) +; RV32ZVE32F-NEXT: sw a1, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 8(a0) +; RV32ZVE32F-NEXT: sw a5, 12(a0) +; RV32ZVE32F-NEXT: sw a6, 16(a0) +; RV32ZVE32F-NEXT: sw a7, 20(a0) +; RV32ZVE32F-NEXT: sw t1, 24(a0) +; RV32ZVE32F-NEXT: sw t2, 28(a0) +; RV32ZVE32F-NEXT: sw t3, 32(a0) +; RV32ZVE32F-NEXT: sw t4, 36(a0) +; RV32ZVE32F-NEXT: sw t5, 40(a0) +; RV32ZVE32F-NEXT: sw t6, 44(a0) +; RV32ZVE32F-NEXT: sw s0, 48(a0) +; RV32ZVE32F-NEXT: sw s1, 52(a0) +; RV32ZVE32F-NEXT: sw t0, 56(a0) +; RV32ZVE32F-NEXT: sw a2, 60(a0) ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: addi sp, sp, 16 @@ -5514,77 +5514,77 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV32ZVE32F-NEXT: beqz a3, .LBB53_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 -; RV32ZVE32F-NEXT: lw a1, 4(a3) -; RV32ZVE32F-NEXT: lw a3, 0(a3) +; RV32ZVE32F-NEXT: lw a1, 0(a3) +; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: bnez a4, .LBB53_8 ; RV32ZVE32F-NEXT: .LBB53_2: -; RV32ZVE32F-NEXT: lw a4, 12(a2) -; RV32ZVE32F-NEXT: lw a5, 8(a2) +; RV32ZVE32F-NEXT: lw a4, 8(a2) +; RV32ZVE32F-NEXT: lw a5, 12(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: bnez a6, .LBB53_9 ; RV32ZVE32F-NEXT: .LBB53_3: -; RV32ZVE32F-NEXT: lw a6, 20(a2) -; RV32ZVE32F-NEXT: lw a7, 16(a2) +; RV32ZVE32F-NEXT: lw a6, 16(a2) +; RV32ZVE32F-NEXT: lw a7, 20(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: bnez t1, .LBB53_10 ; RV32ZVE32F-NEXT: .LBB53_4: -; RV32ZVE32F-NEXT: lw t1, 28(a2) -; RV32ZVE32F-NEXT: lw t2, 24(a2) +; RV32ZVE32F-NEXT: lw t1, 24(a2) +; RV32ZVE32F-NEXT: lw t2, 28(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: bnez t3, .LBB53_11 ; RV32ZVE32F-NEXT: .LBB53_5: -; RV32ZVE32F-NEXT: lw t3, 36(a2) -; RV32ZVE32F-NEXT: lw t4, 32(a2) +; RV32ZVE32F-NEXT: lw t3, 32(a2) +; RV32ZVE32F-NEXT: lw t4, 36(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: bnez t5, .LBB53_12 ; RV32ZVE32F-NEXT: .LBB53_6: -; RV32ZVE32F-NEXT: lw t5, 44(a2) -; RV32ZVE32F-NEXT: lw t6, 40(a2) +; RV32ZVE32F-NEXT: lw t5, 40(a2) +; RV32ZVE32F-NEXT: lw t6, 44(a2) ; RV32ZVE32F-NEXT: j .LBB53_13 ; RV32ZVE32F-NEXT: .LBB53_7: -; RV32ZVE32F-NEXT: lw a1, 4(a2) -; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: lw a1, 0(a2) +; RV32ZVE32F-NEXT: lw a3, 4(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB53_2 ; RV32ZVE32F-NEXT: .LBB53_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 -; RV32ZVE32F-NEXT: lw a4, 4(a5) -; RV32ZVE32F-NEXT: lw a5, 0(a5) +; RV32ZVE32F-NEXT: lw a4, 0(a5) +; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB53_3 ; RV32ZVE32F-NEXT: .LBB53_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 -; RV32ZVE32F-NEXT: lw a6, 4(a7) -; RV32ZVE32F-NEXT: lw a7, 0(a7) +; RV32ZVE32F-NEXT: lw a6, 0(a7) +; RV32ZVE32F-NEXT: lw a7, 4(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB53_4 ; RV32ZVE32F-NEXT: .LBB53_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 -; RV32ZVE32F-NEXT: lw t1, 4(t2) -; RV32ZVE32F-NEXT: lw t2, 0(t2) +; RV32ZVE32F-NEXT: lw t1, 0(t2) +; RV32ZVE32F-NEXT: lw t2, 4(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB53_5 ; RV32ZVE32F-NEXT: .LBB53_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 -; RV32ZVE32F-NEXT: lw t3, 4(t4) -; RV32ZVE32F-NEXT: lw t4, 0(t4) +; RV32ZVE32F-NEXT: lw t3, 0(t4) +; RV32ZVE32F-NEXT: lw t4, 4(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB53_6 ; RV32ZVE32F-NEXT: .LBB53_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 -; RV32ZVE32F-NEXT: lw t5, 4(t6) -; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: lw t5, 0(t6) +; RV32ZVE32F-NEXT: lw t6, 4(t6) ; RV32ZVE32F-NEXT: .LBB53_13: # %else14 ; RV32ZVE32F-NEXT: addi sp, sp, -16 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 @@ -5598,42 +5598,42 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 -; RV32ZVE32F-NEXT: lw s0, 4(s1) -; RV32ZVE32F-NEXT: lw s1, 0(s1) +; RV32ZVE32F-NEXT: lw s0, 0(s1) +; RV32ZVE32F-NEXT: lw s1, 4(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: bnez t0, .LBB53_17 ; RV32ZVE32F-NEXT: .LBB53_15: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: lw t0, 56(a2) +; RV32ZVE32F-NEXT: lw a2, 60(a2) ; RV32ZVE32F-NEXT: j .LBB53_18 ; RV32ZVE32F-NEXT: .LBB53_16: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: lw s0, 48(a2) +; RV32ZVE32F-NEXT: lw s1, 52(a2) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: beqz t0, .LBB53_15 ; RV32ZVE32F-NEXT: .LBB53_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: lw t0, 4(a2) -; RV32ZVE32F-NEXT: lw a2, 0(a2) +; RV32ZVE32F-NEXT: lw t0, 0(a2) +; RV32ZVE32F-NEXT: lw a2, 4(a2) ; RV32ZVE32F-NEXT: .LBB53_18: # %else20 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a1, 4(a0) -; RV32ZVE32F-NEXT: sw a5, 8(a0) -; RV32ZVE32F-NEXT: sw a4, 12(a0) -; RV32ZVE32F-NEXT: sw a7, 16(a0) -; RV32ZVE32F-NEXT: sw a6, 20(a0) -; RV32ZVE32F-NEXT: sw t2, 24(a0) -; RV32ZVE32F-NEXT: sw t1, 28(a0) -; RV32ZVE32F-NEXT: sw t4, 32(a0) -; RV32ZVE32F-NEXT: sw t3, 36(a0) -; RV32ZVE32F-NEXT: sw t6, 40(a0) -; RV32ZVE32F-NEXT: sw t5, 44(a0) -; RV32ZVE32F-NEXT: sw s1, 48(a0) -; RV32ZVE32F-NEXT: sw s0, 52(a0) -; RV32ZVE32F-NEXT: sw a2, 56(a0) -; RV32ZVE32F-NEXT: sw t0, 60(a0) +; RV32ZVE32F-NEXT: sw a1, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 8(a0) +; RV32ZVE32F-NEXT: sw a5, 12(a0) +; RV32ZVE32F-NEXT: sw a6, 16(a0) +; RV32ZVE32F-NEXT: sw a7, 20(a0) +; RV32ZVE32F-NEXT: sw t1, 24(a0) +; RV32ZVE32F-NEXT: sw t2, 28(a0) +; RV32ZVE32F-NEXT: sw t3, 32(a0) +; RV32ZVE32F-NEXT: sw t4, 36(a0) +; RV32ZVE32F-NEXT: sw t5, 40(a0) +; RV32ZVE32F-NEXT: sw t6, 44(a0) +; RV32ZVE32F-NEXT: sw s0, 48(a0) +; RV32ZVE32F-NEXT: sw s1, 52(a0) +; RV32ZVE32F-NEXT: sw t0, 56(a0) +; RV32ZVE32F-NEXT: sw a2, 60(a0) ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: addi sp, sp, 16 @@ -5798,77 +5798,77 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i ; RV32ZVE32F-NEXT: beqz a3, .LBB54_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 -; RV32ZVE32F-NEXT: lw a1, 4(a3) -; RV32ZVE32F-NEXT: lw a3, 0(a3) +; RV32ZVE32F-NEXT: lw a1, 0(a3) +; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: bnez a4, .LBB54_8 ; RV32ZVE32F-NEXT: .LBB54_2: -; RV32ZVE32F-NEXT: lw a4, 12(a2) -; RV32ZVE32F-NEXT: lw a5, 8(a2) +; RV32ZVE32F-NEXT: lw a4, 8(a2) +; RV32ZVE32F-NEXT: lw a5, 12(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: bnez a6, .LBB54_9 ; RV32ZVE32F-NEXT: .LBB54_3: -; RV32ZVE32F-NEXT: lw a6, 20(a2) -; RV32ZVE32F-NEXT: lw a7, 16(a2) +; RV32ZVE32F-NEXT: lw a6, 16(a2) +; RV32ZVE32F-NEXT: lw a7, 20(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: bnez t1, .LBB54_10 ; RV32ZVE32F-NEXT: .LBB54_4: -; RV32ZVE32F-NEXT: lw t1, 28(a2) -; RV32ZVE32F-NEXT: lw t2, 24(a2) +; RV32ZVE32F-NEXT: lw t1, 24(a2) +; RV32ZVE32F-NEXT: lw t2, 28(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: bnez t3, .LBB54_11 ; RV32ZVE32F-NEXT: .LBB54_5: -; RV32ZVE32F-NEXT: lw t3, 36(a2) -; RV32ZVE32F-NEXT: lw t4, 32(a2) +; RV32ZVE32F-NEXT: lw t3, 32(a2) +; RV32ZVE32F-NEXT: lw t4, 36(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: bnez t5, .LBB54_12 ; RV32ZVE32F-NEXT: .LBB54_6: -; RV32ZVE32F-NEXT: lw t5, 44(a2) -; RV32ZVE32F-NEXT: lw t6, 40(a2) +; RV32ZVE32F-NEXT: lw t5, 40(a2) +; RV32ZVE32F-NEXT: lw t6, 44(a2) ; RV32ZVE32F-NEXT: j .LBB54_13 ; RV32ZVE32F-NEXT: .LBB54_7: -; RV32ZVE32F-NEXT: lw a1, 4(a2) -; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: lw a1, 0(a2) +; RV32ZVE32F-NEXT: lw a3, 4(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB54_2 ; RV32ZVE32F-NEXT: .LBB54_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 -; RV32ZVE32F-NEXT: lw a4, 4(a5) -; RV32ZVE32F-NEXT: lw a5, 0(a5) +; RV32ZVE32F-NEXT: lw a4, 0(a5) +; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB54_3 ; RV32ZVE32F-NEXT: .LBB54_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 -; RV32ZVE32F-NEXT: lw a6, 4(a7) -; RV32ZVE32F-NEXT: lw a7, 0(a7) +; RV32ZVE32F-NEXT: lw a6, 0(a7) +; RV32ZVE32F-NEXT: lw a7, 4(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB54_4 ; RV32ZVE32F-NEXT: .LBB54_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 -; RV32ZVE32F-NEXT: lw t1, 4(t2) -; RV32ZVE32F-NEXT: lw t2, 0(t2) +; RV32ZVE32F-NEXT: lw t1, 0(t2) +; RV32ZVE32F-NEXT: lw t2, 4(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB54_5 ; RV32ZVE32F-NEXT: .LBB54_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 -; RV32ZVE32F-NEXT: lw t3, 4(t4) -; RV32ZVE32F-NEXT: lw t4, 0(t4) +; RV32ZVE32F-NEXT: lw t3, 0(t4) +; RV32ZVE32F-NEXT: lw t4, 4(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB54_6 ; RV32ZVE32F-NEXT: .LBB54_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 -; RV32ZVE32F-NEXT: lw t5, 4(t6) -; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: lw t5, 0(t6) +; RV32ZVE32F-NEXT: lw t6, 4(t6) ; RV32ZVE32F-NEXT: .LBB54_13: # %else14 ; RV32ZVE32F-NEXT: addi sp, sp, -16 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 @@ -5882,42 +5882,42 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 -; RV32ZVE32F-NEXT: lw s0, 4(s1) -; RV32ZVE32F-NEXT: lw s1, 0(s1) +; RV32ZVE32F-NEXT: lw s0, 0(s1) +; RV32ZVE32F-NEXT: lw s1, 4(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: bnez t0, .LBB54_17 ; RV32ZVE32F-NEXT: .LBB54_15: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: lw t0, 56(a2) +; RV32ZVE32F-NEXT: lw a2, 60(a2) ; RV32ZVE32F-NEXT: j .LBB54_18 ; RV32ZVE32F-NEXT: .LBB54_16: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: lw s0, 48(a2) +; RV32ZVE32F-NEXT: lw s1, 52(a2) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: beqz t0, .LBB54_15 ; RV32ZVE32F-NEXT: .LBB54_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: lw t0, 4(a2) -; RV32ZVE32F-NEXT: lw a2, 0(a2) +; RV32ZVE32F-NEXT: lw t0, 0(a2) +; RV32ZVE32F-NEXT: lw a2, 4(a2) ; RV32ZVE32F-NEXT: .LBB54_18: # %else20 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a1, 4(a0) -; RV32ZVE32F-NEXT: sw a5, 8(a0) -; RV32ZVE32F-NEXT: sw a4, 12(a0) -; RV32ZVE32F-NEXT: sw a7, 16(a0) -; RV32ZVE32F-NEXT: sw a6, 20(a0) -; RV32ZVE32F-NEXT: sw t2, 24(a0) -; RV32ZVE32F-NEXT: sw t1, 28(a0) -; RV32ZVE32F-NEXT: sw t4, 32(a0) -; RV32ZVE32F-NEXT: sw t3, 36(a0) -; RV32ZVE32F-NEXT: sw t6, 40(a0) -; RV32ZVE32F-NEXT: sw t5, 44(a0) -; RV32ZVE32F-NEXT: sw s1, 48(a0) -; RV32ZVE32F-NEXT: sw s0, 52(a0) -; RV32ZVE32F-NEXT: sw a2, 56(a0) -; RV32ZVE32F-NEXT: sw t0, 60(a0) +; RV32ZVE32F-NEXT: sw a1, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 8(a0) +; RV32ZVE32F-NEXT: sw a5, 12(a0) +; RV32ZVE32F-NEXT: sw a6, 16(a0) +; RV32ZVE32F-NEXT: sw a7, 20(a0) +; RV32ZVE32F-NEXT: sw t1, 24(a0) +; RV32ZVE32F-NEXT: sw t2, 28(a0) +; RV32ZVE32F-NEXT: sw t3, 32(a0) +; RV32ZVE32F-NEXT: sw t4, 36(a0) +; RV32ZVE32F-NEXT: sw t5, 40(a0) +; RV32ZVE32F-NEXT: sw t6, 44(a0) +; RV32ZVE32F-NEXT: sw s0, 48(a0) +; RV32ZVE32F-NEXT: sw s1, 52(a0) +; RV32ZVE32F-NEXT: sw t0, 56(a0) +; RV32ZVE32F-NEXT: sw a2, 60(a0) ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: addi sp, sp, 16 @@ -6071,77 +6071,77 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV32ZVE32F-NEXT: beqz a3, .LBB55_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 -; RV32ZVE32F-NEXT: lw a1, 4(a3) -; RV32ZVE32F-NEXT: lw a3, 0(a3) +; RV32ZVE32F-NEXT: lw a1, 0(a3) +; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: bnez a4, .LBB55_8 ; RV32ZVE32F-NEXT: .LBB55_2: -; RV32ZVE32F-NEXT: lw a4, 12(a2) -; RV32ZVE32F-NEXT: lw a5, 8(a2) +; RV32ZVE32F-NEXT: lw a4, 8(a2) +; RV32ZVE32F-NEXT: lw a5, 12(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: bnez a6, .LBB55_9 ; RV32ZVE32F-NEXT: .LBB55_3: -; RV32ZVE32F-NEXT: lw a6, 20(a2) -; RV32ZVE32F-NEXT: lw a7, 16(a2) +; RV32ZVE32F-NEXT: lw a6, 16(a2) +; RV32ZVE32F-NEXT: lw a7, 20(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: bnez t1, .LBB55_10 ; RV32ZVE32F-NEXT: .LBB55_4: -; RV32ZVE32F-NEXT: lw t1, 28(a2) -; RV32ZVE32F-NEXT: lw t2, 24(a2) +; RV32ZVE32F-NEXT: lw t1, 24(a2) +; RV32ZVE32F-NEXT: lw t2, 28(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: bnez t3, .LBB55_11 ; RV32ZVE32F-NEXT: .LBB55_5: -; RV32ZVE32F-NEXT: lw t3, 36(a2) -; RV32ZVE32F-NEXT: lw t4, 32(a2) +; RV32ZVE32F-NEXT: lw t3, 32(a2) +; RV32ZVE32F-NEXT: lw t4, 36(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: bnez t5, .LBB55_12 ; RV32ZVE32F-NEXT: .LBB55_6: -; RV32ZVE32F-NEXT: lw t5, 44(a2) -; RV32ZVE32F-NEXT: lw t6, 40(a2) +; RV32ZVE32F-NEXT: lw t5, 40(a2) +; RV32ZVE32F-NEXT: lw t6, 44(a2) ; RV32ZVE32F-NEXT: j .LBB55_13 ; RV32ZVE32F-NEXT: .LBB55_7: -; RV32ZVE32F-NEXT: lw a1, 4(a2) -; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: lw a1, 0(a2) +; RV32ZVE32F-NEXT: lw a3, 4(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB55_2 ; RV32ZVE32F-NEXT: .LBB55_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 -; RV32ZVE32F-NEXT: lw a4, 4(a5) -; RV32ZVE32F-NEXT: lw a5, 0(a5) +; RV32ZVE32F-NEXT: lw a4, 0(a5) +; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB55_3 ; RV32ZVE32F-NEXT: .LBB55_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 -; RV32ZVE32F-NEXT: lw a6, 4(a7) -; RV32ZVE32F-NEXT: lw a7, 0(a7) +; RV32ZVE32F-NEXT: lw a6, 0(a7) +; RV32ZVE32F-NEXT: lw a7, 4(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB55_4 ; RV32ZVE32F-NEXT: .LBB55_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 -; RV32ZVE32F-NEXT: lw t1, 4(t2) -; RV32ZVE32F-NEXT: lw t2, 0(t2) +; RV32ZVE32F-NEXT: lw t1, 0(t2) +; RV32ZVE32F-NEXT: lw t2, 4(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB55_5 ; RV32ZVE32F-NEXT: .LBB55_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 -; RV32ZVE32F-NEXT: lw t3, 4(t4) -; RV32ZVE32F-NEXT: lw t4, 0(t4) +; RV32ZVE32F-NEXT: lw t3, 0(t4) +; RV32ZVE32F-NEXT: lw t4, 4(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB55_6 ; RV32ZVE32F-NEXT: .LBB55_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 -; RV32ZVE32F-NEXT: lw t5, 4(t6) -; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: lw t5, 0(t6) +; RV32ZVE32F-NEXT: lw t6, 4(t6) ; RV32ZVE32F-NEXT: .LBB55_13: # %else14 ; RV32ZVE32F-NEXT: addi sp, sp, -16 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 @@ -6155,42 +6155,42 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 -; RV32ZVE32F-NEXT: lw s0, 4(s1) -; RV32ZVE32F-NEXT: lw s1, 0(s1) +; RV32ZVE32F-NEXT: lw s0, 0(s1) +; RV32ZVE32F-NEXT: lw s1, 4(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: bnez t0, .LBB55_17 ; RV32ZVE32F-NEXT: .LBB55_15: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: lw t0, 56(a2) +; RV32ZVE32F-NEXT: lw a2, 60(a2) ; RV32ZVE32F-NEXT: j .LBB55_18 ; RV32ZVE32F-NEXT: .LBB55_16: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: lw s0, 48(a2) +; RV32ZVE32F-NEXT: lw s1, 52(a2) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: beqz t0, .LBB55_15 ; RV32ZVE32F-NEXT: .LBB55_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: lw t0, 4(a2) -; RV32ZVE32F-NEXT: lw a2, 0(a2) +; RV32ZVE32F-NEXT: lw t0, 0(a2) +; RV32ZVE32F-NEXT: lw a2, 4(a2) ; RV32ZVE32F-NEXT: .LBB55_18: # %else20 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a1, 4(a0) -; RV32ZVE32F-NEXT: sw a5, 8(a0) -; RV32ZVE32F-NEXT: sw a4, 12(a0) -; RV32ZVE32F-NEXT: sw a7, 16(a0) -; RV32ZVE32F-NEXT: sw a6, 20(a0) -; RV32ZVE32F-NEXT: sw t2, 24(a0) -; RV32ZVE32F-NEXT: sw t1, 28(a0) -; RV32ZVE32F-NEXT: sw t4, 32(a0) -; RV32ZVE32F-NEXT: sw t3, 36(a0) -; RV32ZVE32F-NEXT: sw t6, 40(a0) -; RV32ZVE32F-NEXT: sw t5, 44(a0) -; RV32ZVE32F-NEXT: sw s1, 48(a0) -; RV32ZVE32F-NEXT: sw s0, 52(a0) -; RV32ZVE32F-NEXT: sw a2, 56(a0) -; RV32ZVE32F-NEXT: sw t0, 60(a0) +; RV32ZVE32F-NEXT: sw a1, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 8(a0) +; RV32ZVE32F-NEXT: sw a5, 12(a0) +; RV32ZVE32F-NEXT: sw a6, 16(a0) +; RV32ZVE32F-NEXT: sw a7, 20(a0) +; RV32ZVE32F-NEXT: sw t1, 24(a0) +; RV32ZVE32F-NEXT: sw t2, 28(a0) +; RV32ZVE32F-NEXT: sw t3, 32(a0) +; RV32ZVE32F-NEXT: sw t4, 36(a0) +; RV32ZVE32F-NEXT: sw t5, 40(a0) +; RV32ZVE32F-NEXT: sw t6, 44(a0) +; RV32ZVE32F-NEXT: sw s0, 48(a0) +; RV32ZVE32F-NEXT: sw s1, 52(a0) +; RV32ZVE32F-NEXT: sw t0, 56(a0) +; RV32ZVE32F-NEXT: sw a2, 60(a0) ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: addi sp, sp, 16 @@ -6345,77 +6345,77 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV32ZVE32F-NEXT: beqz a3, .LBB56_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 -; RV32ZVE32F-NEXT: lw a1, 4(a3) -; RV32ZVE32F-NEXT: lw a3, 0(a3) +; RV32ZVE32F-NEXT: lw a1, 0(a3) +; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: bnez a4, .LBB56_8 ; RV32ZVE32F-NEXT: .LBB56_2: -; RV32ZVE32F-NEXT: lw a4, 12(a2) -; RV32ZVE32F-NEXT: lw a5, 8(a2) +; RV32ZVE32F-NEXT: lw a4, 8(a2) +; RV32ZVE32F-NEXT: lw a5, 12(a2) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: bnez a6, .LBB56_9 ; RV32ZVE32F-NEXT: .LBB56_3: -; RV32ZVE32F-NEXT: lw a6, 20(a2) -; RV32ZVE32F-NEXT: lw a7, 16(a2) +; RV32ZVE32F-NEXT: lw a6, 16(a2) +; RV32ZVE32F-NEXT: lw a7, 20(a2) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: bnez t1, .LBB56_10 ; RV32ZVE32F-NEXT: .LBB56_4: -; RV32ZVE32F-NEXT: lw t1, 28(a2) -; RV32ZVE32F-NEXT: lw t2, 24(a2) +; RV32ZVE32F-NEXT: lw t1, 24(a2) +; RV32ZVE32F-NEXT: lw t2, 28(a2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: bnez t3, .LBB56_11 ; RV32ZVE32F-NEXT: .LBB56_5: -; RV32ZVE32F-NEXT: lw t3, 36(a2) -; RV32ZVE32F-NEXT: lw t4, 32(a2) +; RV32ZVE32F-NEXT: lw t3, 32(a2) +; RV32ZVE32F-NEXT: lw t4, 36(a2) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: bnez t5, .LBB56_12 ; RV32ZVE32F-NEXT: .LBB56_6: -; RV32ZVE32F-NEXT: lw t5, 44(a2) -; RV32ZVE32F-NEXT: lw t6, 40(a2) +; RV32ZVE32F-NEXT: lw t5, 40(a2) +; RV32ZVE32F-NEXT: lw t6, 44(a2) ; RV32ZVE32F-NEXT: j .LBB56_13 ; RV32ZVE32F-NEXT: .LBB56_7: -; RV32ZVE32F-NEXT: lw a1, 4(a2) -; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: lw a1, 0(a2) +; RV32ZVE32F-NEXT: lw a3, 4(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB56_2 ; RV32ZVE32F-NEXT: .LBB56_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 -; RV32ZVE32F-NEXT: lw a4, 4(a5) -; RV32ZVE32F-NEXT: lw a5, 0(a5) +; RV32ZVE32F-NEXT: lw a4, 0(a5) +; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB56_3 ; RV32ZVE32F-NEXT: .LBB56_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 -; RV32ZVE32F-NEXT: lw a6, 4(a7) -; RV32ZVE32F-NEXT: lw a7, 0(a7) +; RV32ZVE32F-NEXT: lw a6, 0(a7) +; RV32ZVE32F-NEXT: lw a7, 4(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB56_4 ; RV32ZVE32F-NEXT: .LBB56_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 -; RV32ZVE32F-NEXT: lw t1, 4(t2) -; RV32ZVE32F-NEXT: lw t2, 0(t2) +; RV32ZVE32F-NEXT: lw t1, 0(t2) +; RV32ZVE32F-NEXT: lw t2, 4(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB56_5 ; RV32ZVE32F-NEXT: .LBB56_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 -; RV32ZVE32F-NEXT: lw t3, 4(t4) -; RV32ZVE32F-NEXT: lw t4, 0(t4) +; RV32ZVE32F-NEXT: lw t3, 0(t4) +; RV32ZVE32F-NEXT: lw t4, 4(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB56_6 ; RV32ZVE32F-NEXT: .LBB56_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 -; RV32ZVE32F-NEXT: lw t5, 4(t6) -; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: lw t5, 0(t6) +; RV32ZVE32F-NEXT: lw t6, 4(t6) ; RV32ZVE32F-NEXT: .LBB56_13: # %else14 ; RV32ZVE32F-NEXT: addi sp, sp, -16 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 @@ -6429,42 +6429,42 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 -; RV32ZVE32F-NEXT: lw s0, 4(s1) -; RV32ZVE32F-NEXT: lw s1, 0(s1) +; RV32ZVE32F-NEXT: lw s0, 0(s1) +; RV32ZVE32F-NEXT: lw s1, 4(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: bnez t0, .LBB56_17 ; RV32ZVE32F-NEXT: .LBB56_15: -; RV32ZVE32F-NEXT: lw t0, 60(a2) -; RV32ZVE32F-NEXT: lw a2, 56(a2) +; RV32ZVE32F-NEXT: lw t0, 56(a2) +; RV32ZVE32F-NEXT: lw a2, 60(a2) ; RV32ZVE32F-NEXT: j .LBB56_18 ; RV32ZVE32F-NEXT: .LBB56_16: -; RV32ZVE32F-NEXT: lw s0, 52(a2) -; RV32ZVE32F-NEXT: lw s1, 48(a2) +; RV32ZVE32F-NEXT: lw s0, 48(a2) +; RV32ZVE32F-NEXT: lw s1, 52(a2) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: beqz t0, .LBB56_15 ; RV32ZVE32F-NEXT: .LBB56_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: lw t0, 4(a2) -; RV32ZVE32F-NEXT: lw a2, 0(a2) +; RV32ZVE32F-NEXT: lw t0, 0(a2) +; RV32ZVE32F-NEXT: lw a2, 4(a2) ; RV32ZVE32F-NEXT: .LBB56_18: # %else20 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a1, 4(a0) -; RV32ZVE32F-NEXT: sw a5, 8(a0) -; RV32ZVE32F-NEXT: sw a4, 12(a0) -; RV32ZVE32F-NEXT: sw a7, 16(a0) -; RV32ZVE32F-NEXT: sw a6, 20(a0) -; RV32ZVE32F-NEXT: sw t2, 24(a0) -; RV32ZVE32F-NEXT: sw t1, 28(a0) -; RV32ZVE32F-NEXT: sw t4, 32(a0) -; RV32ZVE32F-NEXT: sw t3, 36(a0) -; RV32ZVE32F-NEXT: sw t6, 40(a0) -; RV32ZVE32F-NEXT: sw t5, 44(a0) -; RV32ZVE32F-NEXT: sw s1, 48(a0) -; RV32ZVE32F-NEXT: sw s0, 52(a0) -; RV32ZVE32F-NEXT: sw a2, 56(a0) -; RV32ZVE32F-NEXT: sw t0, 60(a0) +; RV32ZVE32F-NEXT: sw a1, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 8(a0) +; RV32ZVE32F-NEXT: sw a5, 12(a0) +; RV32ZVE32F-NEXT: sw a6, 16(a0) +; RV32ZVE32F-NEXT: sw a7, 20(a0) +; RV32ZVE32F-NEXT: sw t1, 24(a0) +; RV32ZVE32F-NEXT: sw t2, 28(a0) +; RV32ZVE32F-NEXT: sw t3, 32(a0) +; RV32ZVE32F-NEXT: sw t4, 36(a0) +; RV32ZVE32F-NEXT: sw t5, 40(a0) +; RV32ZVE32F-NEXT: sw t6, 44(a0) +; RV32ZVE32F-NEXT: sw s0, 48(a0) +; RV32ZVE32F-NEXT: sw s1, 52(a0) +; RV32ZVE32F-NEXT: sw t0, 56(a0) +; RV32ZVE32F-NEXT: sw a2, 60(a0) ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: addi sp, sp, 16 @@ -6617,10 +6617,10 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, ; ; RV32ZVE32F-LABEL: mgather_baseidx_v8i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: lw a4, 56(a2) -; RV32ZVE32F-NEXT: lw a5, 48(a2) -; RV32ZVE32F-NEXT: lw a6, 40(a2) -; RV32ZVE32F-NEXT: lw a7, 32(a2) +; RV32ZVE32F-NEXT: lw a4, 32(a2) +; RV32ZVE32F-NEXT: lw a5, 40(a2) +; RV32ZVE32F-NEXT: lw a6, 48(a2) +; RV32ZVE32F-NEXT: lw a7, 56(a2) ; RV32ZVE32F-NEXT: lw t0, 0(a2) ; RV32ZVE32F-NEXT: lw t1, 8(a2) ; RV32ZVE32F-NEXT: lw t2, 16(a2) @@ -6630,10 +6630,10 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t2 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 @@ -6643,77 +6643,77 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, ; RV32ZVE32F-NEXT: beqz a2, .LBB57_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: lw a1, 4(a2) -; RV32ZVE32F-NEXT: lw a2, 0(a2) +; RV32ZVE32F-NEXT: lw a1, 0(a2) +; RV32ZVE32F-NEXT: lw a2, 4(a2) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: bnez a4, .LBB57_8 ; RV32ZVE32F-NEXT: .LBB57_2: -; RV32ZVE32F-NEXT: lw a4, 12(a3) -; RV32ZVE32F-NEXT: lw a5, 8(a3) +; RV32ZVE32F-NEXT: lw a4, 8(a3) +; RV32ZVE32F-NEXT: lw a5, 12(a3) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: bnez a6, .LBB57_9 ; RV32ZVE32F-NEXT: .LBB57_3: -; RV32ZVE32F-NEXT: lw a6, 20(a3) -; RV32ZVE32F-NEXT: lw a7, 16(a3) +; RV32ZVE32F-NEXT: lw a6, 16(a3) +; RV32ZVE32F-NEXT: lw a7, 20(a3) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: bnez t1, .LBB57_10 ; RV32ZVE32F-NEXT: .LBB57_4: -; RV32ZVE32F-NEXT: lw t1, 28(a3) -; RV32ZVE32F-NEXT: lw t2, 24(a3) +; RV32ZVE32F-NEXT: lw t1, 24(a3) +; RV32ZVE32F-NEXT: lw t2, 28(a3) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: bnez t3, .LBB57_11 ; RV32ZVE32F-NEXT: .LBB57_5: -; RV32ZVE32F-NEXT: lw t3, 36(a3) -; RV32ZVE32F-NEXT: lw t4, 32(a3) +; RV32ZVE32F-NEXT: lw t3, 32(a3) +; RV32ZVE32F-NEXT: lw t4, 36(a3) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: bnez t5, .LBB57_12 ; RV32ZVE32F-NEXT: .LBB57_6: -; RV32ZVE32F-NEXT: lw t5, 44(a3) -; RV32ZVE32F-NEXT: lw t6, 40(a3) +; RV32ZVE32F-NEXT: lw t5, 40(a3) +; RV32ZVE32F-NEXT: lw t6, 44(a3) ; RV32ZVE32F-NEXT: j .LBB57_13 ; RV32ZVE32F-NEXT: .LBB57_7: -; RV32ZVE32F-NEXT: lw a1, 4(a3) -; RV32ZVE32F-NEXT: lw a2, 0(a3) +; RV32ZVE32F-NEXT: lw a1, 0(a3) +; RV32ZVE32F-NEXT: lw a2, 4(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 ; RV32ZVE32F-NEXT: beqz a4, .LBB57_2 ; RV32ZVE32F-NEXT: .LBB57_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10 -; RV32ZVE32F-NEXT: lw a4, 4(a5) -; RV32ZVE32F-NEXT: lw a5, 0(a5) +; RV32ZVE32F-NEXT: lw a4, 0(a5) +; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB57_3 ; RV32ZVE32F-NEXT: .LBB57_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10 -; RV32ZVE32F-NEXT: lw a6, 4(a7) -; RV32ZVE32F-NEXT: lw a7, 0(a7) +; RV32ZVE32F-NEXT: lw a6, 0(a7) +; RV32ZVE32F-NEXT: lw a7, 4(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB57_4 ; RV32ZVE32F-NEXT: .LBB57_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10 -; RV32ZVE32F-NEXT: lw t1, 4(t2) -; RV32ZVE32F-NEXT: lw t2, 0(t2) +; RV32ZVE32F-NEXT: lw t1, 0(t2) +; RV32ZVE32F-NEXT: lw t2, 4(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB57_5 ; RV32ZVE32F-NEXT: .LBB57_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10 -; RV32ZVE32F-NEXT: lw t3, 4(t4) -; RV32ZVE32F-NEXT: lw t4, 0(t4) +; RV32ZVE32F-NEXT: lw t3, 0(t4) +; RV32ZVE32F-NEXT: lw t4, 4(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB57_6 ; RV32ZVE32F-NEXT: .LBB57_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10 -; RV32ZVE32F-NEXT: lw t5, 4(t6) -; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: lw t5, 0(t6) +; RV32ZVE32F-NEXT: lw t6, 4(t6) ; RV32ZVE32F-NEXT: .LBB57_13: # %else14 ; RV32ZVE32F-NEXT: addi sp, sp, -16 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 @@ -6727,42 +6727,42 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10 -; RV32ZVE32F-NEXT: lw s0, 4(s1) -; RV32ZVE32F-NEXT: lw s1, 0(s1) +; RV32ZVE32F-NEXT: lw s0, 0(s1) +; RV32ZVE32F-NEXT: lw s1, 4(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: bnez t0, .LBB57_17 ; RV32ZVE32F-NEXT: .LBB57_15: -; RV32ZVE32F-NEXT: lw t0, 60(a3) -; RV32ZVE32F-NEXT: lw a3, 56(a3) +; RV32ZVE32F-NEXT: lw t0, 56(a3) +; RV32ZVE32F-NEXT: lw a3, 60(a3) ; RV32ZVE32F-NEXT: j .LBB57_18 ; RV32ZVE32F-NEXT: .LBB57_16: -; RV32ZVE32F-NEXT: lw s0, 52(a3) -; RV32ZVE32F-NEXT: lw s1, 48(a3) +; RV32ZVE32F-NEXT: lw s0, 48(a3) +; RV32ZVE32F-NEXT: lw s1, 52(a3) ; RV32ZVE32F-NEXT: andi t0, t0, -128 ; RV32ZVE32F-NEXT: beqz t0, .LBB57_15 ; RV32ZVE32F-NEXT: .LBB57_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 -; RV32ZVE32F-NEXT: lw t0, 4(a3) -; RV32ZVE32F-NEXT: lw a3, 0(a3) +; RV32ZVE32F-NEXT: lw t0, 0(a3) +; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: .LBB57_18: # %else20 -; RV32ZVE32F-NEXT: sw a2, 0(a0) -; RV32ZVE32F-NEXT: sw a1, 4(a0) -; RV32ZVE32F-NEXT: sw a5, 8(a0) -; RV32ZVE32F-NEXT: sw a4, 12(a0) -; RV32ZVE32F-NEXT: sw a7, 16(a0) -; RV32ZVE32F-NEXT: sw a6, 20(a0) -; RV32ZVE32F-NEXT: sw t2, 24(a0) -; RV32ZVE32F-NEXT: sw t1, 28(a0) -; RV32ZVE32F-NEXT: sw t4, 32(a0) -; RV32ZVE32F-NEXT: sw t3, 36(a0) -; RV32ZVE32F-NEXT: sw t6, 40(a0) -; RV32ZVE32F-NEXT: sw t5, 44(a0) -; RV32ZVE32F-NEXT: sw s1, 48(a0) -; RV32ZVE32F-NEXT: sw s0, 52(a0) -; RV32ZVE32F-NEXT: sw a3, 56(a0) -; RV32ZVE32F-NEXT: sw t0, 60(a0) +; RV32ZVE32F-NEXT: sw a1, 0(a0) +; RV32ZVE32F-NEXT: sw a2, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 8(a0) +; RV32ZVE32F-NEXT: sw a5, 12(a0) +; RV32ZVE32F-NEXT: sw a6, 16(a0) +; RV32ZVE32F-NEXT: sw a7, 20(a0) +; RV32ZVE32F-NEXT: sw t1, 24(a0) +; RV32ZVE32F-NEXT: sw t2, 28(a0) +; RV32ZVE32F-NEXT: sw t3, 32(a0) +; RV32ZVE32F-NEXT: sw t4, 36(a0) +; RV32ZVE32F-NEXT: sw t5, 40(a0) +; RV32ZVE32F-NEXT: sw t6, 44(a0) +; RV32ZVE32F-NEXT: sw s0, 48(a0) +; RV32ZVE32F-NEXT: sw s1, 52(a0) +; RV32ZVE32F-NEXT: sw t0, 56(a0) +; RV32ZVE32F-NEXT: sw a3, 60(a0) ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: addi sp, sp, 16 @@ -11676,10 +11676,10 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> ; ; RV32ZVE32F-LABEL: mgather_baseidx_v8f64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: lw a3, 56(a2) -; RV32ZVE32F-NEXT: lw a4, 48(a2) -; RV32ZVE32F-NEXT: lw a5, 40(a2) -; RV32ZVE32F-NEXT: lw a6, 32(a2) +; RV32ZVE32F-NEXT: lw a3, 32(a2) +; RV32ZVE32F-NEXT: lw a4, 40(a2) +; RV32ZVE32F-NEXT: lw a5, 48(a2) +; RV32ZVE32F-NEXT: lw a6, 56(a2) ; RV32ZVE32F-NEXT: lw a7, 0(a2) ; RV32ZVE32F-NEXT: lw t0, 8(a2) ; RV32ZVE32F-NEXT: lw t1, 16(a2) @@ -11689,10 +11689,10 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a2, v0 @@ -12729,54 +12729,54 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV32-NEXT: slli a0, a0, 8 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: slli a3, a3, 8 -; RV32-NEXT: or a2, a3, a2 ; RV32-NEXT: vslidedown.vi v10, v8, 2 ; RV32-NEXT: vmv.x.s a1, v10 -; RV32-NEXT: lbu a3, 0(a1) +; RV32-NEXT: lbu a4, 0(a1) ; RV32-NEXT: lbu a1, 1(a1) ; RV32-NEXT: vslidedown.vi v10, v8, 3 -; RV32-NEXT: vmv.x.s a4, v10 -; RV32-NEXT: lbu a5, 1(a4) -; RV32-NEXT: lbu a4, 0(a4) +; RV32-NEXT: vmv.x.s a5, v10 +; RV32-NEXT: lbu a6, 0(a5) +; RV32-NEXT: lbu a5, 1(a5) +; RV32-NEXT: or a2, a3, a2 ; RV32-NEXT: slli a1, a1, 8 -; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: or a1, a1, a4 ; RV32-NEXT: slli a5, a5, 8 -; RV32-NEXT: or a4, a5, a4 ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v10, v8, 4 ; RV32-NEXT: vmv.x.s a3, v10 -; RV32-NEXT: lbu a5, 0(a3) +; RV32-NEXT: lbu a4, 0(a3) ; RV32-NEXT: lbu a3, 1(a3) ; RV32-NEXT: vslidedown.vi v10, v8, 5 -; RV32-NEXT: vmv.x.s a6, v10 -; RV32-NEXT: lbu a7, 1(a6) -; RV32-NEXT: lbu a6, 0(a6) +; RV32-NEXT: vmv.x.s a7, v10 +; RV32-NEXT: lbu t0, 0(a7) +; RV32-NEXT: lbu a7, 1(a7) +; RV32-NEXT: or a5, a5, a6 ; RV32-NEXT: slli a3, a3, 8 -; RV32-NEXT: or a3, a3, a5 +; RV32-NEXT: or a3, a3, a4 ; RV32-NEXT: slli a7, a7, 8 -; RV32-NEXT: or a5, a7, a6 ; RV32-NEXT: vslidedown.vi v10, v8, 6 -; RV32-NEXT: vmv.x.s a6, v10 -; RV32-NEXT: lbu a7, 0(a6) -; RV32-NEXT: lbu a6, 1(a6) +; RV32-NEXT: vmv.x.s a4, v10 +; RV32-NEXT: lbu a6, 0(a4) +; RV32-NEXT: lbu a4, 1(a4) ; RV32-NEXT: vslidedown.vi v8, v8, 7 -; RV32-NEXT: vmv.x.s t0, v8 -; RV32-NEXT: lbu t1, 1(t0) -; RV32-NEXT: lbu t0, 0(t0) -; RV32-NEXT: slli a6, a6, 8 -; RV32-NEXT: or a6, a6, a7 +; RV32-NEXT: vmv.x.s t1, v8 +; RV32-NEXT: lbu t2, 0(t1) +; RV32-NEXT: lbu t1, 1(t1) +; RV32-NEXT: or a7, a7, t0 +; RV32-NEXT: slli a4, a4, 8 +; RV32-NEXT: or a4, a4, a6 ; RV32-NEXT: slli t1, t1, 8 -; RV32-NEXT: or a7, t1, t0 +; RV32-NEXT: or a6, t1, t2 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV32-NEXT: vmv.v.x v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a2 ; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v9, v8, a4 +; RV32-NEXT: vslide1down.vx v9, v8, a5 ; RV32-NEXT: vmv.v.x v8, a3 -; RV32-NEXT: vslide1down.vx v8, v8, a5 -; RV32-NEXT: vslide1down.vx v8, v8, a6 -; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslide1down.vx v8, v8, a7 +; RV32-NEXT: vslide1down.vx v8, v8, a4 +; RV32-NEXT: vmv.v.i v0, 15 +; RV32-NEXT: vslide1down.vx v8, v8, a6 ; RV32-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV32-NEXT: ret ; @@ -12805,50 +12805,50 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV64V-NEXT: lbu a2, 0(a2) ; RV64V-NEXT: slli a0, a0, 8 ; RV64V-NEXT: or a0, a0, a1 -; RV64V-NEXT: slli a1, a3, 8 -; RV64V-NEXT: or a1, a1, a2 +; RV64V-NEXT: slli a3, a3, 8 ; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV64V-NEXT: vslidedown.vi v12, v8, 2 -; RV64V-NEXT: vmv.x.s a2, v12 -; RV64V-NEXT: lbu a3, 0(a2) -; RV64V-NEXT: lbu a2, 1(a2) +; RV64V-NEXT: vmv.x.s a1, v12 +; RV64V-NEXT: lbu a4, 0(a1) +; RV64V-NEXT: lbu a1, 1(a1) ; RV64V-NEXT: vslidedown.vi v12, v8, 3 -; RV64V-NEXT: vmv.x.s a4, v12 -; RV64V-NEXT: lbu a5, 0(a4) -; RV64V-NEXT: lbu a4, 1(a4) -; RV64V-NEXT: mv a6, sp +; RV64V-NEXT: vmv.x.s a5, v12 +; RV64V-NEXT: lbu a6, 0(a5) +; RV64V-NEXT: lbu a5, 1(a5) +; RV64V-NEXT: or a2, a3, a2 +; RV64V-NEXT: slli a1, a1, 8 +; RV64V-NEXT: or a1, a1, a4 +; RV64V-NEXT: slli a5, a5, 8 +; RV64V-NEXT: mv a3, sp ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64V-NEXT: vse64.v v8, (a6) -; RV64V-NEXT: ld a6, 32(sp) -; RV64V-NEXT: slli a2, a2, 8 -; RV64V-NEXT: or a2, a2, a3 -; RV64V-NEXT: slli a4, a4, 8 -; RV64V-NEXT: lbu a3, 1(a6) -; RV64V-NEXT: ld a7, 40(sp) -; RV64V-NEXT: lbu a6, 0(a6) -; RV64V-NEXT: or a4, a4, a5 -; RV64V-NEXT: slli a3, a3, 8 -; RV64V-NEXT: lbu a5, 1(a7) -; RV64V-NEXT: or a3, a3, a6 -; RV64V-NEXT: lbu a6, 0(a7) +; RV64V-NEXT: vse64.v v8, (a3) +; RV64V-NEXT: ld a3, 32(sp) +; RV64V-NEXT: ld a4, 40(sp) ; RV64V-NEXT: ld a7, 48(sp) -; RV64V-NEXT: slli a5, a5, 8 ; RV64V-NEXT: ld t0, 56(sp) +; RV64V-NEXT: lbu t1, 0(a3) +; RV64V-NEXT: lbu a3, 1(a3) +; RV64V-NEXT: lbu t2, 0(a4) +; RV64V-NEXT: lbu a4, 1(a4) ; RV64V-NEXT: or a5, a5, a6 -; RV64V-NEXT: lbu a6, 1(a7) -; RV64V-NEXT: lbu a7, 0(a7) -; RV64V-NEXT: lbu t1, 1(t0) -; RV64V-NEXT: lbu t0, 0(t0) -; RV64V-NEXT: slli a6, a6, 8 -; RV64V-NEXT: or a6, a6, a7 -; RV64V-NEXT: slli t1, t1, 8 -; RV64V-NEXT: or a7, t1, t0 +; RV64V-NEXT: slli a3, a3, 8 +; RV64V-NEXT: or a3, a3, t1 +; RV64V-NEXT: slli a4, a4, 8 +; RV64V-NEXT: lbu a6, 0(a7) +; RV64V-NEXT: lbu a7, 1(a7) +; RV64V-NEXT: lbu t1, 0(t0) +; RV64V-NEXT: lbu t0, 1(t0) +; RV64V-NEXT: or a4, a4, t2 +; RV64V-NEXT: slli a7, a7, 8 +; RV64V-NEXT: or a6, a7, a6 +; RV64V-NEXT: slli t0, t0, 8 +; RV64V-NEXT: or a7, t0, t1 ; RV64V-NEXT: vmv.v.x v8, a0 -; RV64V-NEXT: vslide1down.vx v8, v8, a1 ; RV64V-NEXT: vslide1down.vx v8, v8, a2 -; RV64V-NEXT: vslide1down.vx v9, v8, a4 +; RV64V-NEXT: vslide1down.vx v8, v8, a1 +; RV64V-NEXT: vslide1down.vx v9, v8, a5 ; RV64V-NEXT: vmv.v.x v8, a3 -; RV64V-NEXT: vslide1down.vx v8, v8, a5 +; RV64V-NEXT: vslide1down.vx v8, v8, a4 ; RV64V-NEXT: vslide1down.vx v8, v8, a6 ; RV64V-NEXT: vmv.v.i v0, 15 ; RV64V-NEXT: vslide1down.vx v8, v8, a7 @@ -12868,39 +12868,39 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV64ZVE32F-NEXT: slli a1, a1, 8 ; RV64ZVE32F-NEXT: or a1, a1, a2 ; RV64ZVE32F-NEXT: slli a3, a3, 8 -; RV64ZVE32F-NEXT: or a3, a3, a4 -; RV64ZVE32F-NEXT: lbu a2, 9(a0) -; RV64ZVE32F-NEXT: lbu a4, 8(a0) -; RV64ZVE32F-NEXT: lbu a5, 13(a0) +; RV64ZVE32F-NEXT: lbu a2, 8(a0) +; RV64ZVE32F-NEXT: lbu a5, 9(a0) ; RV64ZVE32F-NEXT: lbu a6, 12(a0) -; RV64ZVE32F-NEXT: slli a2, a2, 8 -; RV64ZVE32F-NEXT: or a2, a2, a4 +; RV64ZVE32F-NEXT: lbu a7, 13(a0) +; RV64ZVE32F-NEXT: or a3, a3, a4 ; RV64ZVE32F-NEXT: slli a5, a5, 8 -; RV64ZVE32F-NEXT: or a4, a5, a6 +; RV64ZVE32F-NEXT: or a2, a5, a2 +; RV64ZVE32F-NEXT: slli a7, a7, 8 +; RV64ZVE32F-NEXT: lbu a4, 16(a0) ; RV64ZVE32F-NEXT: lbu a5, 17(a0) -; RV64ZVE32F-NEXT: lbu a6, 16(a0) -; RV64ZVE32F-NEXT: lbu a7, 21(a0) ; RV64ZVE32F-NEXT: lbu t0, 20(a0) +; RV64ZVE32F-NEXT: lbu t1, 21(a0) +; RV64ZVE32F-NEXT: or a6, a7, a6 ; RV64ZVE32F-NEXT: slli a5, a5, 8 -; RV64ZVE32F-NEXT: or a5, a5, a6 -; RV64ZVE32F-NEXT: slli a7, a7, 8 -; RV64ZVE32F-NEXT: or a6, a7, t0 +; RV64ZVE32F-NEXT: or a4, a5, a4 +; RV64ZVE32F-NEXT: slli t1, t1, 8 +; RV64ZVE32F-NEXT: lbu a5, 24(a0) ; RV64ZVE32F-NEXT: lbu a7, 25(a0) -; RV64ZVE32F-NEXT: lbu t0, 24(a0) -; RV64ZVE32F-NEXT: lbu t1, 29(a0) -; RV64ZVE32F-NEXT: lbu a0, 28(a0) +; RV64ZVE32F-NEXT: lbu t2, 28(a0) +; RV64ZVE32F-NEXT: lbu a0, 29(a0) +; RV64ZVE32F-NEXT: or t0, t1, t0 ; RV64ZVE32F-NEXT: slli a7, a7, 8 -; RV64ZVE32F-NEXT: or a7, a7, t0 -; RV64ZVE32F-NEXT: slli t1, t1, 8 -; RV64ZVE32F-NEXT: or a0, t1, a0 +; RV64ZVE32F-NEXT: or a5, a7, a5 +; RV64ZVE32F-NEXT: slli a0, a0, 8 +; RV64ZVE32F-NEXT: or a0, a0, t2 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4 -; RV64ZVE32F-NEXT: vmv.v.x v8, a5 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.x v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, t0 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t @@ -13018,24 +13018,24 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_reverse_unit_strided_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 28(a0) -; RV64ZVE32F-NEXT: lh a2, 30(a0) -; RV64ZVE32F-NEXT: lh a3, 24(a0) -; RV64ZVE32F-NEXT: lh a4, 26(a0) -; RV64ZVE32F-NEXT: lh a5, 20(a0) -; RV64ZVE32F-NEXT: lh a6, 22(a0) -; RV64ZVE32F-NEXT: lh a7, 16(a0) -; RV64ZVE32F-NEXT: lh a0, 18(a0) +; RV64ZVE32F-NEXT: lh a1, 24(a0) +; RV64ZVE32F-NEXT: lh a2, 26(a0) +; RV64ZVE32F-NEXT: lh a3, 28(a0) +; RV64ZVE32F-NEXT: lh a4, 30(a0) +; RV64ZVE32F-NEXT: lh a5, 16(a0) +; RV64ZVE32F-NEXT: lh a6, 18(a0) +; RV64ZVE32F-NEXT: lh a7, 20(a0) +; RV64ZVE32F-NEXT: lh a0, 22(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.x v8, a1 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4 -; RV64ZVE32F-NEXT: vmv.v.x v8, a5 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vmv.v.x v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a2 +; RV64ZVE32F-NEXT: vmv.v.x v8, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -13063,24 +13063,24 @@ define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_reverse_strided_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 28(a0) -; RV64ZVE32F-NEXT: lh a2, 30(a0) -; RV64ZVE32F-NEXT: lh a3, 20(a0) -; RV64ZVE32F-NEXT: lh a4, 22(a0) -; RV64ZVE32F-NEXT: lh a5, 12(a0) -; RV64ZVE32F-NEXT: lh a6, 14(a0) -; RV64ZVE32F-NEXT: lh a7, 4(a0) -; RV64ZVE32F-NEXT: lh a0, 6(a0) +; RV64ZVE32F-NEXT: lh a1, 20(a0) +; RV64ZVE32F-NEXT: lh a2, 22(a0) +; RV64ZVE32F-NEXT: lh a3, 28(a0) +; RV64ZVE32F-NEXT: lh a4, 30(a0) +; RV64ZVE32F-NEXT: lh a5, 4(a0) +; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: lh a7, 12(a0) +; RV64ZVE32F-NEXT: lh a0, 14(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.x v8, a1 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4 -; RV64ZVE32F-NEXT: vmv.v.x v8, a5 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vmv.v.x v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a2 +; RV64ZVE32F-NEXT: vmv.v.x v8, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -13107,21 +13107,21 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 0(a0) -; RV64ZVE32F-NEXT: lh a2, 2(a0) +; RV64ZVE32F-NEXT: lh a1, 8(a0) +; RV64ZVE32F-NEXT: lh a2, 10(a0) ; RV64ZVE32F-NEXT: lh a3, 16(a0) ; RV64ZVE32F-NEXT: lh a4, 18(a0) -; RV64ZVE32F-NEXT: lh a5, 8(a0) -; RV64ZVE32F-NEXT: lh a6, 10(a0) +; RV64ZVE32F-NEXT: lh a5, 0(a0) +; RV64ZVE32F-NEXT: lh a6, 2(a0) ; RV64ZVE32F-NEXT: lh a7, 4(a0) ; RV64ZVE32F-NEXT: lh a0, 6(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.x v8, a1 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4 ; RV64ZVE32F-NEXT: vmv.v.x v8, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4 +; RV64ZVE32F-NEXT: vmv.v.x v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 @@ -13154,21 +13154,21 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 0(a0) -; RV64ZVE32F-NEXT: lh a2, 2(a0) +; RV64ZVE32F-NEXT: lh a1, 8(a0) +; RV64ZVE32F-NEXT: lh a2, 10(a0) ; RV64ZVE32F-NEXT: lh a3, 18(a0) ; RV64ZVE32F-NEXT: lh a4, 20(a0) -; RV64ZVE32F-NEXT: lh a5, 8(a0) -; RV64ZVE32F-NEXT: lh a6, 10(a0) +; RV64ZVE32F-NEXT: lh a5, 0(a0) +; RV64ZVE32F-NEXT: lh a6, 2(a0) ; RV64ZVE32F-NEXT: lh a7, 4(a0) ; RV64ZVE32F-NEXT: lh a0, 6(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.x v8, a1 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4 ; RV64ZVE32F-NEXT: vmv.v.x v8, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4 +; RV64ZVE32F-NEXT: vmv.v.x v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 @@ -13202,23 +13202,23 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned2: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 2(a0) -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: lh a3, 18(a0) -; RV64ZVE32F-NEXT: lh a4, 20(a0) -; RV64ZVE32F-NEXT: lh a5, 8(a0) -; RV64ZVE32F-NEXT: lh a6, 10(a0) -; RV64ZVE32F-NEXT: lh a0, 6(a0) +; RV64ZVE32F-NEXT: lh a1, 10(a0) +; RV64ZVE32F-NEXT: lh a2, 18(a0) +; RV64ZVE32F-NEXT: lh a3, 20(a0) +; RV64ZVE32F-NEXT: lh a4, 2(a0) +; RV64ZVE32F-NEXT: lh a5, 4(a0) +; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: lh a0, 8(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.x v8, a1 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4 -; RV64ZVE32F-NEXT: vmv.v.x v8, a5 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.x v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a3 +; RV64ZVE32F-NEXT: vmv.v.x v8, a0 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13406,18 +13406,18 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { ; RV64ZVE32F-LABEL: mgather_shuffle_vrgather: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: lh a1, 0(a0) -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: lh a3, 6(a0) -; RV64ZVE32F-NEXT: lh a4, 2(a0) +; RV64ZVE32F-NEXT: lh a2, 2(a0) +; RV64ZVE32F-NEXT: lh a3, 4(a0) +; RV64ZVE32F-NEXT: lh a4, 6(a0) ; RV64ZVE32F-NEXT: lh a5, 8(a0) ; RV64ZVE32F-NEXT: lh a6, 10(a0) ; RV64ZVE32F-NEXT: lh a7, 12(a0) ; RV64ZVE32F-NEXT: lh a0, 14(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a2 ; RV64ZVE32F-NEXT: vmv.v.x v8, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 @@ -13541,109 +13541,109 @@ define <32 x i64> @mgather_strided_split(ptr %base) { ; RV32ZVE32F-NEXT: vse32.v v8, (a1) ; RV32ZVE32F-NEXT: lw a1, 288(sp) ; RV32ZVE32F-NEXT: lw a2, 292(sp) -; RV32ZVE32F-NEXT: lw a3, 0(a1) -; RV32ZVE32F-NEXT: sw a3, 188(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a3, 296(sp) +; RV32ZVE32F-NEXT: lw a4, 300(sp) +; RV32ZVE32F-NEXT: lw a5, 0(a1) +; RV32ZVE32F-NEXT: sw a5, 188(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: lw a1, 4(a1) ; RV32ZVE32F-NEXT: sw a1, 184(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a1, 296(sp) -; RV32ZVE32F-NEXT: lw a3, 0(a2) -; RV32ZVE32F-NEXT: sw a3, 180(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a2, 4(a2) -; RV32ZVE32F-NEXT: sw a2, 176(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a2, 300(sp) -; RV32ZVE32F-NEXT: lw a3, 0(a1) -; RV32ZVE32F-NEXT: sw a3, 172(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: lw a1, 0(a2) +; RV32ZVE32F-NEXT: sw a1, 180(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a2) +; RV32ZVE32F-NEXT: sw a1, 176(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 0(a3) +; RV32ZVE32F-NEXT: sw a1, 172(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: sw a1, 168(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 0(a4) +; RV32ZVE32F-NEXT: sw a1, 164(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a4) +; RV32ZVE32F-NEXT: sw a1, 160(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: lw a1, 304(sp) -; RV32ZVE32F-NEXT: lw a3, 0(a2) -; RV32ZVE32F-NEXT: sw a3, 164(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a2, 4(a2) -; RV32ZVE32F-NEXT: sw a2, 160(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: lw a2, 308(sp) -; RV32ZVE32F-NEXT: lw a3, 0(a1) -; RV32ZVE32F-NEXT: sw a3, 156(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a3, 312(sp) +; RV32ZVE32F-NEXT: lw a4, 316(sp) +; RV32ZVE32F-NEXT: lw a5, 0(a1) +; RV32ZVE32F-NEXT: sw a5, 156(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: lw a1, 4(a1) ; RV32ZVE32F-NEXT: sw a1, 152(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a1, 312(sp) -; RV32ZVE32F-NEXT: lw a3, 0(a2) -; RV32ZVE32F-NEXT: sw a3, 148(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a2, 4(a2) -; RV32ZVE32F-NEXT: sw a2, 144(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a2, 316(sp) -; RV32ZVE32F-NEXT: lw a3, 0(a1) -; RV32ZVE32F-NEXT: sw a3, 140(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: lw a1, 0(a2) +; RV32ZVE32F-NEXT: sw a1, 148(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a2) +; RV32ZVE32F-NEXT: sw a1, 144(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 0(a3) +; RV32ZVE32F-NEXT: sw a1, 140(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: sw a1, 136(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 0(a4) +; RV32ZVE32F-NEXT: sw a1, 132(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a4) +; RV32ZVE32F-NEXT: sw a1, 128(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: lw a1, 320(sp) -; RV32ZVE32F-NEXT: lw a3, 0(a2) -; RV32ZVE32F-NEXT: sw a3, 132(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a2, 4(a2) -; RV32ZVE32F-NEXT: sw a2, 128(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: lw a2, 324(sp) -; RV32ZVE32F-NEXT: lw a3, 0(a1) -; RV32ZVE32F-NEXT: sw a3, 124(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a3, 328(sp) +; RV32ZVE32F-NEXT: lw a4, 332(sp) +; RV32ZVE32F-NEXT: lw a5, 0(a1) +; RV32ZVE32F-NEXT: sw a5, 124(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: lw a1, 4(a1) ; RV32ZVE32F-NEXT: sw a1, 120(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a1, 328(sp) -; RV32ZVE32F-NEXT: lw a3, 0(a2) -; RV32ZVE32F-NEXT: sw a3, 116(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a2, 4(a2) -; RV32ZVE32F-NEXT: sw a2, 112(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a2, 332(sp) -; RV32ZVE32F-NEXT: lw a3, 0(a1) -; RV32ZVE32F-NEXT: sw a3, 104(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw ra, 4(a1) -; RV32ZVE32F-NEXT: lw a1, 336(sp) -; RV32ZVE32F-NEXT: lw s10, 0(a2) -; RV32ZVE32F-NEXT: lw s8, 4(a2) -; RV32ZVE32F-NEXT: lw a2, 340(sp) -; RV32ZVE32F-NEXT: lw s6, 0(a1) -; RV32ZVE32F-NEXT: lw s4, 4(a1) -; RV32ZVE32F-NEXT: lw a4, 344(sp) -; RV32ZVE32F-NEXT: lw s2, 0(a2) -; RV32ZVE32F-NEXT: lw t5, 4(a2) -; RV32ZVE32F-NEXT: lw a2, 348(sp) -; RV32ZVE32F-NEXT: lw t3, 0(a4) -; RV32ZVE32F-NEXT: lw t2, 4(a4) -; RV32ZVE32F-NEXT: lw a4, 352(sp) -; RV32ZVE32F-NEXT: lw t0, 0(a2) -; RV32ZVE32F-NEXT: lw a7, 4(a2) -; RV32ZVE32F-NEXT: lw a2, 356(sp) -; RV32ZVE32F-NEXT: lw a6, 0(a4) -; RV32ZVE32F-NEXT: lw a5, 4(a4) -; RV32ZVE32F-NEXT: lw a4, 360(sp) ; RV32ZVE32F-NEXT: lw a1, 0(a2) -; RV32ZVE32F-NEXT: sw a1, 108(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw a1, 116(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: lw a1, 4(a2) -; RV32ZVE32F-NEXT: sw a1, 100(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a2, 364(sp) -; RV32ZVE32F-NEXT: lw s11, 0(a4) -; RV32ZVE32F-NEXT: lw s9, 4(a4) -; RV32ZVE32F-NEXT: lw a1, 368(sp) -; RV32ZVE32F-NEXT: lw s7, 0(a2) -; RV32ZVE32F-NEXT: lw s5, 4(a2) +; RV32ZVE32F-NEXT: sw a1, 112(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 0(a3) +; RV32ZVE32F-NEXT: sw a1, 104(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw ra, 4(a3) +; RV32ZVE32F-NEXT: lw s10, 0(a4) +; RV32ZVE32F-NEXT: lw s8, 4(a4) +; RV32ZVE32F-NEXT: lw a2, 336(sp) +; RV32ZVE32F-NEXT: lw a4, 340(sp) +; RV32ZVE32F-NEXT: lw a6, 344(sp) +; RV32ZVE32F-NEXT: lw t0, 348(sp) +; RV32ZVE32F-NEXT: lw s5, 0(a2) +; RV32ZVE32F-NEXT: lw s4, 4(a2) +; RV32ZVE32F-NEXT: lw t6, 0(a4) +; RV32ZVE32F-NEXT: lw t5, 4(a4) +; RV32ZVE32F-NEXT: lw t3, 0(a6) +; RV32ZVE32F-NEXT: lw t2, 4(a6) +; RV32ZVE32F-NEXT: lw t1, 0(t0) +; RV32ZVE32F-NEXT: lw a7, 4(t0) +; RV32ZVE32F-NEXT: lw a6, 352(sp) +; RV32ZVE32F-NEXT: lw t0, 356(sp) +; RV32ZVE32F-NEXT: lw t4, 360(sp) +; RV32ZVE32F-NEXT: lw a1, 364(sp) +; RV32ZVE32F-NEXT: lw a5, 0(a6) +; RV32ZVE32F-NEXT: lw a6, 4(a6) +; RV32ZVE32F-NEXT: lw a2, 0(t0) +; RV32ZVE32F-NEXT: sw a2, 108(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 4(t0) +; RV32ZVE32F-NEXT: sw a2, 100(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw s11, 0(t4) +; RV32ZVE32F-NEXT: lw s9, 4(t4) +; RV32ZVE32F-NEXT: lw s7, 0(a1) +; RV32ZVE32F-NEXT: lw s6, 4(a1) +; RV32ZVE32F-NEXT: lw a4, 368(sp) ; RV32ZVE32F-NEXT: lw a3, 372(sp) -; RV32ZVE32F-NEXT: lw s3, 0(a1) -; RV32ZVE32F-NEXT: lw t6, 4(a1) ; RV32ZVE32F-NEXT: lw a2, 376(sp) -; RV32ZVE32F-NEXT: lw t4, 0(a3) ; RV32ZVE32F-NEXT: lw a1, 380(sp) -; RV32ZVE32F-NEXT: lw t1, 4(a3) +; RV32ZVE32F-NEXT: lw s3, 0(a4) +; RV32ZVE32F-NEXT: lw s2, 4(a4) +; RV32ZVE32F-NEXT: lw t4, 0(a3) +; RV32ZVE32F-NEXT: lw t0, 4(a3) ; RV32ZVE32F-NEXT: lw a4, 0(a2) ; RV32ZVE32F-NEXT: lw a3, 4(a2) ; RV32ZVE32F-NEXT: lw a2, 0(a1) ; RV32ZVE32F-NEXT: lw a1, 4(a1) -; RV32ZVE32F-NEXT: sw a5, 196(a0) -; RV32ZVE32F-NEXT: sw a6, 192(a0) +; RV32ZVE32F-NEXT: sw a6, 196(a0) +; RV32ZVE32F-NEXT: sw a5, 192(a0) ; RV32ZVE32F-NEXT: sw a7, 188(a0) -; RV32ZVE32F-NEXT: sw t0, 184(a0) +; RV32ZVE32F-NEXT: sw t1, 184(a0) ; RV32ZVE32F-NEXT: sw t2, 180(a0) ; RV32ZVE32F-NEXT: sw t3, 176(a0) ; RV32ZVE32F-NEXT: sw t5, 172(a0) -; RV32ZVE32F-NEXT: sw s2, 168(a0) +; RV32ZVE32F-NEXT: sw t6, 168(a0) ; RV32ZVE32F-NEXT: sw s4, 164(a0) -; RV32ZVE32F-NEXT: sw s6, 160(a0) +; RV32ZVE32F-NEXT: sw s5, 160(a0) ; RV32ZVE32F-NEXT: sw s8, 156(a0) ; RV32ZVE32F-NEXT: sw s10, 152(a0) ; RV32ZVE32F-NEXT: sw ra, 148(a0) @@ -13697,11 +13697,11 @@ define <32 x i64> @mgather_strided_split(ptr %base) { ; RV32ZVE32F-NEXT: sw a2, 248(a0) ; RV32ZVE32F-NEXT: sw a3, 244(a0) ; RV32ZVE32F-NEXT: sw a4, 240(a0) -; RV32ZVE32F-NEXT: sw t1, 236(a0) +; RV32ZVE32F-NEXT: sw t0, 236(a0) ; RV32ZVE32F-NEXT: sw t4, 232(a0) -; RV32ZVE32F-NEXT: sw t6, 228(a0) +; RV32ZVE32F-NEXT: sw s2, 228(a0) ; RV32ZVE32F-NEXT: sw s3, 224(a0) -; RV32ZVE32F-NEXT: sw s5, 220(a0) +; RV32ZVE32F-NEXT: sw s6, 220(a0) ; RV32ZVE32F-NEXT: sw s7, 216(a0) ; RV32ZVE32F-NEXT: sw s9, 212(a0) ; RV32ZVE32F-NEXT: sw s11, 208(a0) @@ -13812,22 +13812,22 @@ define <32 x i64> @mgather_strided_split(ptr %base) { ; RV64ZVE32F-NEXT: ld s8, 336(a1) ; RV64ZVE32F-NEXT: ld s9, 352(a1) ; RV64ZVE32F-NEXT: ld s10, 368(a1) -; RV64ZVE32F-NEXT: ld s11, 384(a1) -; RV64ZVE32F-NEXT: ld ra, 400(a1) -; RV64ZVE32F-NEXT: ld a6, 416(a1) -; RV64ZVE32F-NEXT: ld a5, 432(a1) -; RV64ZVE32F-NEXT: ld a2, 496(a1) +; RV64ZVE32F-NEXT: ld s11, 448(a1) +; RV64ZVE32F-NEXT: ld ra, 464(a1) ; RV64ZVE32F-NEXT: ld a3, 480(a1) -; RV64ZVE32F-NEXT: ld a4, 464(a1) -; RV64ZVE32F-NEXT: ld a1, 448(a1) +; RV64ZVE32F-NEXT: ld a2, 496(a1) +; RV64ZVE32F-NEXT: ld a6, 384(a1) +; RV64ZVE32F-NEXT: ld a5, 400(a1) +; RV64ZVE32F-NEXT: ld a4, 416(a1) +; RV64ZVE32F-NEXT: ld a1, 432(a1) ; RV64ZVE32F-NEXT: sd a2, 248(a0) ; RV64ZVE32F-NEXT: sd a3, 240(a0) -; RV64ZVE32F-NEXT: sd a4, 232(a0) -; RV64ZVE32F-NEXT: sd a1, 224(a0) -; RV64ZVE32F-NEXT: sd a5, 216(a0) -; RV64ZVE32F-NEXT: sd a6, 208(a0) -; RV64ZVE32F-NEXT: sd ra, 200(a0) -; RV64ZVE32F-NEXT: sd s11, 192(a0) +; RV64ZVE32F-NEXT: sd ra, 232(a0) +; RV64ZVE32F-NEXT: sd s11, 224(a0) +; RV64ZVE32F-NEXT: sd a1, 216(a0) +; RV64ZVE32F-NEXT: sd a4, 208(a0) +; RV64ZVE32F-NEXT: sd a5, 200(a0) +; RV64ZVE32F-NEXT: sd a6, 192(a0) ; RV64ZVE32F-NEXT: sd s10, 184(a0) ; RV64ZVE32F-NEXT: sd s9, 176(a0) ; RV64ZVE32F-NEXT: sd s8, 168(a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index fe037a5af57c06..bc7758717c1c15 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -275,9 +275,9 @@ define void @mscatter_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m) { ; ; RV64ZVE32F-LABEL: mscatter_v4i8: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: ld a4, 8(a0) +; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a1, 24(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 ; RV64ZVE32F-NEXT: andi a5, a3, 1 @@ -336,17 +336,17 @@ define void @mscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) { ; RV64ZVE32F-LABEL: mscatter_truemask_v4i8: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 0(a0) -; RV64ZVE32F-NEXT: ld a2, 24(a0) -; RV64ZVE32F-NEXT: ld a3, 8(a0) -; RV64ZVE32F-NEXT: ld a0, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 8(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vse8.v v8, (a1) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse8.v v9, (a3) +; RV64ZVE32F-NEXT: vse8.v v9, (a2) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse8.v v9, (a0) +; RV64ZVE32F-NEXT: vse8.v v9, (a3) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse8.v v8, (a2) +; RV64ZVE32F-NEXT: vse8.v v8, (a0) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1)) ret void @@ -377,37 +377,37 @@ define void @mscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; ; RV64ZVE32F-LABEL: mscatter_v8i8: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 56(a0) +; RV64ZVE32F-NEXT: ld a3, 40(a0) ; RV64ZVE32F-NEXT: ld a2, 48(a0) -; RV64ZVE32F-NEXT: ld a4, 40(a0) -; RV64ZVE32F-NEXT: ld a5, 32(a0) -; RV64ZVE32F-NEXT: ld a6, 24(a0) -; RV64ZVE32F-NEXT: ld a7, 16(a0) +; RV64ZVE32F-NEXT: ld a1, 56(a0) ; RV64ZVE32F-NEXT: ld t0, 8(a0) +; RV64ZVE32F-NEXT: ld a7, 16(a0) +; RV64ZVE32F-NEXT: ld a6, 24(a0) +; RV64ZVE32F-NEXT: ld a5, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a3, v0 -; RV64ZVE32F-NEXT: andi t1, a3, 1 +; RV64ZVE32F-NEXT: vmv.x.s a4, v0 +; RV64ZVE32F-NEXT: andi t1, a4, 1 ; RV64ZVE32F-NEXT: bnez t1, .LBB8_9 ; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 +; RV64ZVE32F-NEXT: andi a0, a4, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_10 ; RV64ZVE32F-NEXT: .LBB8_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 +; RV64ZVE32F-NEXT: andi a0, a4, 4 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_11 ; RV64ZVE32F-NEXT: .LBB8_3: # %else4 -; RV64ZVE32F-NEXT: andi a0, a3, 8 +; RV64ZVE32F-NEXT: andi a0, a4, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_12 ; RV64ZVE32F-NEXT: .LBB8_4: # %else6 -; RV64ZVE32F-NEXT: andi a0, a3, 16 +; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_13 ; RV64ZVE32F-NEXT: .LBB8_5: # %else8 -; RV64ZVE32F-NEXT: andi a0, a3, 32 +; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_14 ; RV64ZVE32F-NEXT: .LBB8_6: # %else10 -; RV64ZVE32F-NEXT: andi a0, a3, 64 +; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_15 ; RV64ZVE32F-NEXT: .LBB8_7: # %else12 -; RV64ZVE32F-NEXT: andi a0, a3, -128 +; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_16 ; RV64ZVE32F-NEXT: .LBB8_8: # %else14 ; RV64ZVE32F-NEXT: ret @@ -415,43 +415,43 @@ define void @mscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV64ZVE32F-NEXT: ld a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vse8.v v8, (a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 +; RV64ZVE32F-NEXT: andi a0, a4, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_2 ; RV64ZVE32F-NEXT: .LBB8_10: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vse8.v v9, (t0) -; RV64ZVE32F-NEXT: andi a0, a3, 4 +; RV64ZVE32F-NEXT: andi a0, a4, 4 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_3 ; RV64ZVE32F-NEXT: .LBB8_11: # %cond.store3 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV64ZVE32F-NEXT: vse8.v v9, (a7) -; RV64ZVE32F-NEXT: andi a0, a3, 8 +; RV64ZVE32F-NEXT: andi a0, a4, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_4 ; RV64ZVE32F-NEXT: .LBB8_12: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 ; RV64ZVE32F-NEXT: vse8.v v9, (a6) -; RV64ZVE32F-NEXT: andi a0, a3, 16 +; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_5 ; RV64ZVE32F-NEXT: .LBB8_13: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: vse8.v v9, (a5) -; RV64ZVE32F-NEXT: andi a0, a3, 32 +; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_6 ; RV64ZVE32F-NEXT: .LBB8_14: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 -; RV64ZVE32F-NEXT: vse8.v v9, (a4) -; RV64ZVE32F-NEXT: andi a0, a3, 64 +; RV64ZVE32F-NEXT: vse8.v v9, (a3) +; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_7 ; RV64ZVE32F-NEXT: .LBB8_15: # %cond.store11 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6 ; RV64ZVE32F-NEXT: vse8.v v9, (a2) -; RV64ZVE32F-NEXT: andi a0, a3, -128 +; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_8 ; RV64ZVE32F-NEXT: .LBB8_16: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma @@ -793,9 +793,9 @@ define void @mscatter_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) { ; ; RV64ZVE32F-LABEL: mscatter_v4i16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: ld a4, 8(a0) +; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a1, 24(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 ; RV64ZVE32F-NEXT: andi a5, a3, 1 @@ -854,17 +854,17 @@ define void @mscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) { ; RV64ZVE32F-LABEL: mscatter_truemask_v4i16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 0(a0) -; RV64ZVE32F-NEXT: ld a2, 24(a0) -; RV64ZVE32F-NEXT: ld a3, 8(a0) -; RV64ZVE32F-NEXT: ld a0, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 8(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vse16.v v8, (a1) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v9, (a3) +; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse16.v v9, (a0) +; RV64ZVE32F-NEXT: vse16.v v9, (a3) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse16.v v8, (a2) +; RV64ZVE32F-NEXT: vse16.v v8, (a0) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1)) ret void @@ -895,37 +895,37 @@ define void @mscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; ; RV64ZVE32F-LABEL: mscatter_v8i16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 56(a0) +; RV64ZVE32F-NEXT: ld a3, 40(a0) ; RV64ZVE32F-NEXT: ld a2, 48(a0) -; RV64ZVE32F-NEXT: ld a4, 40(a0) -; RV64ZVE32F-NEXT: ld a5, 32(a0) -; RV64ZVE32F-NEXT: ld a6, 24(a0) -; RV64ZVE32F-NEXT: ld a7, 16(a0) +; RV64ZVE32F-NEXT: ld a1, 56(a0) ; RV64ZVE32F-NEXT: ld t0, 8(a0) +; RV64ZVE32F-NEXT: ld a7, 16(a0) +; RV64ZVE32F-NEXT: ld a6, 24(a0) +; RV64ZVE32F-NEXT: ld a5, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a3, v0 -; RV64ZVE32F-NEXT: andi t1, a3, 1 +; RV64ZVE32F-NEXT: vmv.x.s a4, v0 +; RV64ZVE32F-NEXT: andi t1, a4, 1 ; RV64ZVE32F-NEXT: bnez t1, .LBB17_9 ; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 +; RV64ZVE32F-NEXT: andi a0, a4, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_10 ; RV64ZVE32F-NEXT: .LBB17_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 +; RV64ZVE32F-NEXT: andi a0, a4, 4 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_11 ; RV64ZVE32F-NEXT: .LBB17_3: # %else4 -; RV64ZVE32F-NEXT: andi a0, a3, 8 +; RV64ZVE32F-NEXT: andi a0, a4, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_12 ; RV64ZVE32F-NEXT: .LBB17_4: # %else6 -; RV64ZVE32F-NEXT: andi a0, a3, 16 +; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_13 ; RV64ZVE32F-NEXT: .LBB17_5: # %else8 -; RV64ZVE32F-NEXT: andi a0, a3, 32 +; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_14 ; RV64ZVE32F-NEXT: .LBB17_6: # %else10 -; RV64ZVE32F-NEXT: andi a0, a3, 64 +; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_15 ; RV64ZVE32F-NEXT: .LBB17_7: # %else12 -; RV64ZVE32F-NEXT: andi a0, a3, -128 +; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_16 ; RV64ZVE32F-NEXT: .LBB17_8: # %else14 ; RV64ZVE32F-NEXT: ret @@ -933,43 +933,43 @@ define void @mscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV64ZVE32F-NEXT: ld a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vse16.v v8, (a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 +; RV64ZVE32F-NEXT: andi a0, a4, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_2 ; RV64ZVE32F-NEXT: .LBB17_10: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vse16.v v9, (t0) -; RV64ZVE32F-NEXT: andi a0, a3, 4 +; RV64ZVE32F-NEXT: andi a0, a4, 4 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_3 ; RV64ZVE32F-NEXT: .LBB17_11: # %cond.store3 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV64ZVE32F-NEXT: vse16.v v9, (a7) -; RV64ZVE32F-NEXT: andi a0, a3, 8 +; RV64ZVE32F-NEXT: andi a0, a4, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_4 ; RV64ZVE32F-NEXT: .LBB17_12: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 ; RV64ZVE32F-NEXT: vse16.v v9, (a6) -; RV64ZVE32F-NEXT: andi a0, a3, 16 +; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_5 ; RV64ZVE32F-NEXT: .LBB17_13: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: vse16.v v9, (a5) -; RV64ZVE32F-NEXT: andi a0, a3, 32 +; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_6 ; RV64ZVE32F-NEXT: .LBB17_14: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 -; RV64ZVE32F-NEXT: vse16.v v9, (a4) -; RV64ZVE32F-NEXT: andi a0, a3, 64 +; RV64ZVE32F-NEXT: vse16.v v9, (a3) +; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_7 ; RV64ZVE32F-NEXT: .LBB17_15: # %cond.store11 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) -; RV64ZVE32F-NEXT: andi a0, a3, -128 +; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_8 ; RV64ZVE32F-NEXT: .LBB17_16: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma @@ -1670,9 +1670,9 @@ define void @mscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m) { ; ; RV64ZVE32F-LABEL: mscatter_v4i32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: ld a4, 8(a0) +; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a1, 24(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 ; RV64ZVE32F-NEXT: andi a5, a3, 1 @@ -1731,17 +1731,17 @@ define void @mscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) { ; RV64ZVE32F-LABEL: mscatter_truemask_v4i32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 0(a0) -; RV64ZVE32F-NEXT: ld a2, 24(a0) -; RV64ZVE32F-NEXT: ld a3, 8(a0) -; RV64ZVE32F-NEXT: ld a0, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 8(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v8, (a1) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse32.v v9, (a3) +; RV64ZVE32F-NEXT: vse32.v v9, (a2) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse32.v v9, (a0) +; RV64ZVE32F-NEXT: vse32.v v9, (a3) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse32.v v8, (a2) +; RV64ZVE32F-NEXT: vse32.v v8, (a0) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1)) ret void @@ -1772,37 +1772,37 @@ define void @mscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; ; RV64ZVE32F-LABEL: mscatter_v8i32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 56(a0) +; RV64ZVE32F-NEXT: ld a3, 40(a0) ; RV64ZVE32F-NEXT: ld a2, 48(a0) -; RV64ZVE32F-NEXT: ld a4, 40(a0) -; RV64ZVE32F-NEXT: ld a5, 32(a0) -; RV64ZVE32F-NEXT: ld a6, 24(a0) -; RV64ZVE32F-NEXT: ld a7, 16(a0) +; RV64ZVE32F-NEXT: ld a1, 56(a0) ; RV64ZVE32F-NEXT: ld t0, 8(a0) +; RV64ZVE32F-NEXT: ld a7, 16(a0) +; RV64ZVE32F-NEXT: ld a6, 24(a0) +; RV64ZVE32F-NEXT: ld a5, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a3, v0 -; RV64ZVE32F-NEXT: andi t1, a3, 1 +; RV64ZVE32F-NEXT: vmv.x.s a4, v0 +; RV64ZVE32F-NEXT: andi t1, a4, 1 ; RV64ZVE32F-NEXT: bnez t1, .LBB28_9 ; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 +; RV64ZVE32F-NEXT: andi a0, a4, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_10 ; RV64ZVE32F-NEXT: .LBB28_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 +; RV64ZVE32F-NEXT: andi a0, a4, 4 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_11 ; RV64ZVE32F-NEXT: .LBB28_3: # %else4 -; RV64ZVE32F-NEXT: andi a0, a3, 8 +; RV64ZVE32F-NEXT: andi a0, a4, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_12 ; RV64ZVE32F-NEXT: .LBB28_4: # %else6 -; RV64ZVE32F-NEXT: andi a0, a3, 16 +; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_13 ; RV64ZVE32F-NEXT: .LBB28_5: # %else8 -; RV64ZVE32F-NEXT: andi a0, a3, 32 +; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_14 ; RV64ZVE32F-NEXT: .LBB28_6: # %else10 -; RV64ZVE32F-NEXT: andi a0, a3, 64 +; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_15 ; RV64ZVE32F-NEXT: .LBB28_7: # %else12 -; RV64ZVE32F-NEXT: andi a0, a3, -128 +; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_16 ; RV64ZVE32F-NEXT: .LBB28_8: # %else14 ; RV64ZVE32F-NEXT: ret @@ -1810,46 +1810,46 @@ define void @mscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV64ZVE32F-NEXT: ld a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v8, (a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 +; RV64ZVE32F-NEXT: andi a0, a4, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_2 ; RV64ZVE32F-NEXT: .LBB28_10: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vse32.v v10, (t0) -; RV64ZVE32F-NEXT: andi a0, a3, 4 +; RV64ZVE32F-NEXT: andi a0, a4, 4 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_3 ; RV64ZVE32F-NEXT: .LBB28_11: # %cond.store3 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v10, (a7) -; RV64ZVE32F-NEXT: andi a0, a3, 8 +; RV64ZVE32F-NEXT: andi a0, a4, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_4 ; RV64ZVE32F-NEXT: .LBB28_12: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vse32.v v10, (a6) -; RV64ZVE32F-NEXT: andi a0, a3, 16 +; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_5 ; RV64ZVE32F-NEXT: .LBB28_13: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v10, (a5) -; RV64ZVE32F-NEXT: andi a0, a3, 32 +; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_6 ; RV64ZVE32F-NEXT: .LBB28_14: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v10, (a4) -; RV64ZVE32F-NEXT: andi a0, a3, 64 +; RV64ZVE32F-NEXT: vse32.v v10, (a3) +; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_7 ; RV64ZVE32F-NEXT: .LBB28_15: # %cond.store11 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v10, (a2) -; RV64ZVE32F-NEXT: andi a0, a3, -128 +; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_8 ; RV64ZVE32F-NEXT: .LBB28_16: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -2898,8 +2898,8 @@ define void @mscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) { ; ; RV32ZVE32F-LABEL: mscatter_v2i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: lw a2, 12(a0) ; RV32ZVE32F-NEXT: lw a1, 8(a0) +; RV32ZVE32F-NEXT: lw a2, 12(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a3, v0 ; RV32ZVE32F-NEXT: andi a4, a3, 1 @@ -2965,12 +2965,12 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) { ; ; RV32ZVE32F-LABEL: mscatter_v4i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: lw a1, 28(a0) -; RV32ZVE32F-NEXT: lw a2, 24(a0) -; RV32ZVE32F-NEXT: lw a3, 20(a0) -; RV32ZVE32F-NEXT: lw a4, 16(a0) -; RV32ZVE32F-NEXT: lw a7, 12(a0) +; RV32ZVE32F-NEXT: lw a1, 24(a0) +; RV32ZVE32F-NEXT: lw a2, 28(a0) ; RV32ZVE32F-NEXT: lw a6, 8(a0) +; RV32ZVE32F-NEXT: lw a7, 12(a0) +; RV32ZVE32F-NEXT: lw a3, 16(a0) +; RV32ZVE32F-NEXT: lw a4, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a5, v0 ; RV32ZVE32F-NEXT: andi t0, a5, 1 @@ -3007,38 +3007,38 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) { ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9 -; RV32ZVE32F-NEXT: sw a4, 0(a0) -; RV32ZVE32F-NEXT: sw a3, 4(a0) +; RV32ZVE32F-NEXT: sw a3, 0(a0) +; RV32ZVE32F-NEXT: sw a4, 4(a0) ; RV32ZVE32F-NEXT: andi a5, a5, 8 ; RV32ZVE32F-NEXT: beqz a5, .LBB38_4 ; RV32ZVE32F-NEXT: .LBB38_8: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: sw a2, 0(a0) -; RV32ZVE32F-NEXT: sw a1, 4(a0) +; RV32ZVE32F-NEXT: sw a1, 0(a0) +; RV32ZVE32F-NEXT: sw a2, 4(a0) ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_v4i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a2, 24(a1) +; RV64ZVE32F-NEXT: ld a6, 8(a1) ; RV64ZVE32F-NEXT: ld a4, 16(a1) -; RV64ZVE32F-NEXT: ld a7, 8(a1) -; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a5, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 24(a1) ; RV64ZVE32F-NEXT: ld t0, 8(a0) +; RV64ZVE32F-NEXT: ld a5, 16(a0) +; RV64ZVE32F-NEXT: ld a3, 24(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a6, v0 -; RV64ZVE32F-NEXT: andi t1, a6, 1 +; RV64ZVE32F-NEXT: vmv.x.s a7, v0 +; RV64ZVE32F-NEXT: andi t1, a7, 1 ; RV64ZVE32F-NEXT: bnez t1, .LBB38_5 ; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a6, 2 +; RV64ZVE32F-NEXT: andi a0, a7, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB38_6 ; RV64ZVE32F-NEXT: .LBB38_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a6, 4 +; RV64ZVE32F-NEXT: andi a0, a7, 4 ; RV64ZVE32F-NEXT: bnez a0, .LBB38_7 ; RV64ZVE32F-NEXT: .LBB38_3: # %else4 -; RV64ZVE32F-NEXT: andi a0, a6, 8 +; RV64ZVE32F-NEXT: andi a0, a7, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB38_8 ; RV64ZVE32F-NEXT: .LBB38_4: # %else6 ; RV64ZVE32F-NEXT: ret @@ -3046,15 +3046,15 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) { ; RV64ZVE32F-NEXT: ld a1, 0(a1) ; RV64ZVE32F-NEXT: ld a0, 0(a0) ; RV64ZVE32F-NEXT: sd a0, 0(a1) -; RV64ZVE32F-NEXT: andi a0, a6, 2 +; RV64ZVE32F-NEXT: andi a0, a7, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB38_2 ; RV64ZVE32F-NEXT: .LBB38_6: # %cond.store1 -; RV64ZVE32F-NEXT: sd t0, 0(a7) -; RV64ZVE32F-NEXT: andi a0, a6, 4 +; RV64ZVE32F-NEXT: sd t0, 0(a6) +; RV64ZVE32F-NEXT: andi a0, a7, 4 ; RV64ZVE32F-NEXT: beqz a0, .LBB38_3 ; RV64ZVE32F-NEXT: .LBB38_7: # %cond.store3 ; RV64ZVE32F-NEXT: sd a5, 0(a4) -; RV64ZVE32F-NEXT: andi a0, a6, 8 +; RV64ZVE32F-NEXT: andi a0, a7, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB38_4 ; RV64ZVE32F-NEXT: .LBB38_8: # %cond.store5 ; RV64ZVE32F-NEXT: sd a3, 0(a2) @@ -3078,46 +3078,46 @@ define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) { ; ; RV32ZVE32F-LABEL: mscatter_truemask_v4i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: lw a1, 28(a0) -; RV32ZVE32F-NEXT: lw a2, 24(a0) -; RV32ZVE32F-NEXT: lw a3, 20(a0) -; RV32ZVE32F-NEXT: lw a4, 16(a0) -; RV32ZVE32F-NEXT: lw a5, 12(a0) -; RV32ZVE32F-NEXT: lw a6, 0(a0) -; RV32ZVE32F-NEXT: lw a7, 4(a0) -; RV32ZVE32F-NEXT: lw a0, 8(a0) +; RV32ZVE32F-NEXT: lw a1, 16(a0) +; RV32ZVE32F-NEXT: lw a2, 20(a0) +; RV32ZVE32F-NEXT: lw a3, 24(a0) +; RV32ZVE32F-NEXT: lw a4, 28(a0) +; RV32ZVE32F-NEXT: lw a5, 0(a0) +; RV32ZVE32F-NEXT: lw a6, 4(a0) +; RV32ZVE32F-NEXT: lw a7, 8(a0) +; RV32ZVE32F-NEXT: lw a0, 12(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v8 -; RV32ZVE32F-NEXT: sw a6, 0(t0) -; RV32ZVE32F-NEXT: sw a7, 4(t0) +; RV32ZVE32F-NEXT: sw a5, 0(t0) +; RV32ZVE32F-NEXT: sw a6, 4(t0) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a6, v9 -; RV32ZVE32F-NEXT: sw a0, 0(a6) -; RV32ZVE32F-NEXT: sw a5, 4(a6) +; RV32ZVE32F-NEXT: vmv.x.s a5, v9 +; RV32ZVE32F-NEXT: sw a7, 0(a5) +; RV32ZVE32F-NEXT: sw a0, 4(a5) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9 -; RV32ZVE32F-NEXT: sw a4, 0(a0) -; RV32ZVE32F-NEXT: sw a3, 4(a0) +; RV32ZVE32F-NEXT: sw a1, 0(a0) +; RV32ZVE32F-NEXT: sw a2, 4(a0) ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: sw a2, 0(a0) -; RV32ZVE32F-NEXT: sw a1, 4(a0) +; RV32ZVE32F-NEXT: sw a3, 0(a0) +; RV32ZVE32F-NEXT: sw a4, 4(a0) ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_truemask_v4i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a2, 24(a1) -; RV64ZVE32F-NEXT: ld a3, 16(a1) -; RV64ZVE32F-NEXT: ld a4, 8(a1) -; RV64ZVE32F-NEXT: ld a1, 0(a1) +; RV64ZVE32F-NEXT: ld a2, 0(a1) +; RV64ZVE32F-NEXT: ld a3, 8(a1) +; RV64ZVE32F-NEXT: ld a4, 16(a1) +; RV64ZVE32F-NEXT: ld a1, 24(a1) ; RV64ZVE32F-NEXT: ld a5, 0(a0) ; RV64ZVE32F-NEXT: ld a6, 8(a0) ; RV64ZVE32F-NEXT: ld a7, 16(a0) ; RV64ZVE32F-NEXT: ld a0, 24(a0) -; RV64ZVE32F-NEXT: sd a5, 0(a1) -; RV64ZVE32F-NEXT: sd a6, 0(a4) -; RV64ZVE32F-NEXT: sd a7, 0(a3) -; RV64ZVE32F-NEXT: sd a0, 0(a2) +; RV64ZVE32F-NEXT: sd a5, 0(a2) +; RV64ZVE32F-NEXT: sd a6, 0(a3) +; RV64ZVE32F-NEXT: sd a7, 0(a4) +; RV64ZVE32F-NEXT: sd a0, 0(a1) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1)) ret void @@ -3156,51 +3156,51 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 -; RV32ZVE32F-NEXT: lw a1, 60(a0) -; RV32ZVE32F-NEXT: lw a2, 56(a0) -; RV32ZVE32F-NEXT: lw a3, 52(a0) -; RV32ZVE32F-NEXT: lw a4, 48(a0) -; RV32ZVE32F-NEXT: lw a5, 44(a0) -; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) +; RV32ZVE32F-NEXT: lw a1, 56(a0) +; RV32ZVE32F-NEXT: lw a2, 60(a0) +; RV32ZVE32F-NEXT: lw a5, 40(a0) +; RV32ZVE32F-NEXT: lw a6, 44(a0) +; RV32ZVE32F-NEXT: lw a3, 48(a0) +; RV32ZVE32F-NEXT: lw a4, 52(a0) +; RV32ZVE32F-NEXT: lw t2, 24(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t0, 32(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) ; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw s0, 12(a0) +; RV32ZVE32F-NEXT: lw t4, 16(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a6, v0 -; RV32ZVE32F-NEXT: andi s1, a6, 1 +; RV32ZVE32F-NEXT: vmv.x.s a7, v0 +; RV32ZVE32F-NEXT: andi s1, a7, 1 ; RV32ZVE32F-NEXT: bnez s1, .LBB41_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a6, 2 +; RV32ZVE32F-NEXT: andi a0, a7, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_11 ; RV32ZVE32F-NEXT: .LBB41_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a6, 4 +; RV32ZVE32F-NEXT: andi a0, a7, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_12 ; RV32ZVE32F-NEXT: .LBB41_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a6, 8 +; RV32ZVE32F-NEXT: andi a0, a7, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_13 ; RV32ZVE32F-NEXT: .LBB41_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a6, 16 +; RV32ZVE32F-NEXT: andi a0, a7, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_14 ; RV32ZVE32F-NEXT: .LBB41_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a6, 32 +; RV32ZVE32F-NEXT: andi a0, a7, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_15 ; RV32ZVE32F-NEXT: .LBB41_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a6, 64 +; RV32ZVE32F-NEXT: andi a0, a7, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_16 ; RV32ZVE32F-NEXT: .LBB41_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a6, -128 +; RV32ZVE32F-NEXT: andi a0, a7, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_9 ; RV32ZVE32F-NEXT: .LBB41_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: sw a2, 0(a0) -; RV32ZVE32F-NEXT: sw a1, 4(a0) +; RV32ZVE32F-NEXT: sw a1, 0(a0) +; RV32ZVE32F-NEXT: sw a2, 4(a0) ; RV32ZVE32F-NEXT: .LBB41_9: # %else14 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload @@ -3214,7 +3214,7 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 ; RV32ZVE32F-NEXT: sw s1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a6, 2 +; RV32ZVE32F-NEXT: andi a0, a7, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_2 ; RV32ZVE32F-NEXT: .LBB41_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -3222,47 +3222,47 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw s0, 4(a0) ; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a6, 4 +; RV32ZVE32F-NEXT: andi a0, a7, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_3 ; RV32ZVE32F-NEXT: .LBB41_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a6, 8 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_4 ; RV32ZVE32F-NEXT: .LBB41_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a6, 16 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_5 ; RV32ZVE32F-NEXT: .LBB41_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a6, 32 +; RV32ZVE32F-NEXT: sw t0, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_6 ; RV32ZVE32F-NEXT: .LBB41_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a7, 0(a0) -; RV32ZVE32F-NEXT: sw a5, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a6, 64 +; RV32ZVE32F-NEXT: sw a5, 0(a0) +; RV32ZVE32F-NEXT: sw a6, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_7 ; RV32ZVE32F-NEXT: .LBB41_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a4, 0(a0) -; RV32ZVE32F-NEXT: sw a3, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a6, -128 +; RV32ZVE32F-NEXT: sw a3, 0(a0) +; RV32ZVE32F-NEXT: sw a4, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_8 ; RV32ZVE32F-NEXT: j .LBB41_9 ; @@ -3276,47 +3276,47 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV64ZVE32F-NEXT: .cfi_offset s0, -8 ; RV64ZVE32F-NEXT: .cfi_offset s1, -16 ; RV64ZVE32F-NEXT: .cfi_offset s2, -24 +; RV64ZVE32F-NEXT: ld a4, 40(a1) +; RV64ZVE32F-NEXT: ld a3, 48(a1) ; RV64ZVE32F-NEXT: ld a2, 56(a1) -; RV64ZVE32F-NEXT: ld a4, 48(a1) -; RV64ZVE32F-NEXT: ld a6, 40(a1) -; RV64ZVE32F-NEXT: ld t1, 32(a1) -; RV64ZVE32F-NEXT: ld t3, 24(a1) -; RV64ZVE32F-NEXT: ld t5, 16(a1) -; RV64ZVE32F-NEXT: ld s0, 8(a1) -; RV64ZVE32F-NEXT: ld a3, 56(a0) -; RV64ZVE32F-NEXT: ld a5, 48(a0) -; RV64ZVE32F-NEXT: ld t0, 40(a0) -; RV64ZVE32F-NEXT: ld t2, 32(a0) -; RV64ZVE32F-NEXT: ld t4, 24(a0) -; RV64ZVE32F-NEXT: ld t6, 16(a0) +; RV64ZVE32F-NEXT: ld t5, 8(a1) +; RV64ZVE32F-NEXT: ld t3, 16(a1) +; RV64ZVE32F-NEXT: ld t2, 24(a1) +; RV64ZVE32F-NEXT: ld t0, 32(a1) +; RV64ZVE32F-NEXT: ld a7, 40(a0) +; RV64ZVE32F-NEXT: ld a6, 48(a0) +; RV64ZVE32F-NEXT: ld a5, 56(a0) ; RV64ZVE32F-NEXT: ld s1, 8(a0) +; RV64ZVE32F-NEXT: ld s0, 16(a0) +; RV64ZVE32F-NEXT: ld t6, 24(a0) +; RV64ZVE32F-NEXT: ld t4, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a7, v0 -; RV64ZVE32F-NEXT: andi s2, a7, 1 +; RV64ZVE32F-NEXT: vmv.x.s t1, v0 +; RV64ZVE32F-NEXT: andi s2, t1, 1 ; RV64ZVE32F-NEXT: bnez s2, .LBB41_10 ; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a7, 2 +; RV64ZVE32F-NEXT: andi a0, t1, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_11 ; RV64ZVE32F-NEXT: .LBB41_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a7, 4 +; RV64ZVE32F-NEXT: andi a0, t1, 4 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_12 ; RV64ZVE32F-NEXT: .LBB41_3: # %else4 -; RV64ZVE32F-NEXT: andi a0, a7, 8 +; RV64ZVE32F-NEXT: andi a0, t1, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_13 ; RV64ZVE32F-NEXT: .LBB41_4: # %else6 -; RV64ZVE32F-NEXT: andi a0, a7, 16 +; RV64ZVE32F-NEXT: andi a0, t1, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_14 ; RV64ZVE32F-NEXT: .LBB41_5: # %else8 -; RV64ZVE32F-NEXT: andi a0, a7, 32 +; RV64ZVE32F-NEXT: andi a0, t1, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_15 ; RV64ZVE32F-NEXT: .LBB41_6: # %else10 -; RV64ZVE32F-NEXT: andi a0, a7, 64 +; RV64ZVE32F-NEXT: andi a0, t1, 64 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_16 ; RV64ZVE32F-NEXT: .LBB41_7: # %else12 -; RV64ZVE32F-NEXT: andi a0, a7, -128 +; RV64ZVE32F-NEXT: andi a0, t1, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_9 ; RV64ZVE32F-NEXT: .LBB41_8: # %cond.store13 -; RV64ZVE32F-NEXT: sd a3, 0(a2) +; RV64ZVE32F-NEXT: sd a5, 0(a2) ; RV64ZVE32F-NEXT: .LBB41_9: # %else14 ; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload ; RV64ZVE32F-NEXT: ld s1, 16(sp) # 8-byte Folded Reload @@ -3327,31 +3327,31 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV64ZVE32F-NEXT: ld a1, 0(a1) ; RV64ZVE32F-NEXT: ld a0, 0(a0) ; RV64ZVE32F-NEXT: sd a0, 0(a1) -; RV64ZVE32F-NEXT: andi a0, a7, 2 +; RV64ZVE32F-NEXT: andi a0, t1, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_2 ; RV64ZVE32F-NEXT: .LBB41_11: # %cond.store1 -; RV64ZVE32F-NEXT: sd s1, 0(s0) -; RV64ZVE32F-NEXT: andi a0, a7, 4 +; RV64ZVE32F-NEXT: sd s1, 0(t5) +; RV64ZVE32F-NEXT: andi a0, t1, 4 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_3 ; RV64ZVE32F-NEXT: .LBB41_12: # %cond.store3 -; RV64ZVE32F-NEXT: sd t6, 0(t5) -; RV64ZVE32F-NEXT: andi a0, a7, 8 +; RV64ZVE32F-NEXT: sd s0, 0(t3) +; RV64ZVE32F-NEXT: andi a0, t1, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_4 ; RV64ZVE32F-NEXT: .LBB41_13: # %cond.store5 -; RV64ZVE32F-NEXT: sd t4, 0(t3) -; RV64ZVE32F-NEXT: andi a0, a7, 16 +; RV64ZVE32F-NEXT: sd t6, 0(t2) +; RV64ZVE32F-NEXT: andi a0, t1, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_5 ; RV64ZVE32F-NEXT: .LBB41_14: # %cond.store7 -; RV64ZVE32F-NEXT: sd t2, 0(t1) -; RV64ZVE32F-NEXT: andi a0, a7, 32 +; RV64ZVE32F-NEXT: sd t4, 0(t0) +; RV64ZVE32F-NEXT: andi a0, t1, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_6 ; RV64ZVE32F-NEXT: .LBB41_15: # %cond.store9 -; RV64ZVE32F-NEXT: sd t0, 0(a6) -; RV64ZVE32F-NEXT: andi a0, a7, 64 +; RV64ZVE32F-NEXT: sd a7, 0(a4) +; RV64ZVE32F-NEXT: andi a0, t1, 64 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_7 ; RV64ZVE32F-NEXT: .LBB41_16: # %cond.store11 -; RV64ZVE32F-NEXT: sd a5, 0(a4) -; RV64ZVE32F-NEXT: andi a0, a7, -128 +; RV64ZVE32F-NEXT: sd a6, 0(a3) +; RV64ZVE32F-NEXT: andi a0, t1, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_8 ; RV64ZVE32F-NEXT: j .LBB41_9 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) @@ -3386,20 +3386,20 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 -; RV32ZVE32F-NEXT: lw a2, 60(a0) -; RV32ZVE32F-NEXT: lw a3, 56(a0) -; RV32ZVE32F-NEXT: lw a4, 52(a0) -; RV32ZVE32F-NEXT: lw a5, 48(a0) -; RV32ZVE32F-NEXT: lw a6, 44(a0) -; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t1, 36(a0) -; RV32ZVE32F-NEXT: lw t2, 32(a0) -; RV32ZVE32F-NEXT: lw t3, 28(a0) -; RV32ZVE32F-NEXT: lw t4, 24(a0) -; RV32ZVE32F-NEXT: lw t5, 20(a0) -; RV32ZVE32F-NEXT: lw t6, 16(a0) -; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw a2, 56(a0) +; RV32ZVE32F-NEXT: lw a3, 60(a0) +; RV32ZVE32F-NEXT: lw a6, 40(a0) +; RV32ZVE32F-NEXT: lw a7, 44(a0) +; RV32ZVE32F-NEXT: lw a4, 48(a0) +; RV32ZVE32F-NEXT: lw a5, 52(a0) +; RV32ZVE32F-NEXT: lw t3, 24(a0) +; RV32ZVE32F-NEXT: lw t4, 28(a0) +; RV32ZVE32F-NEXT: lw t1, 32(a0) +; RV32ZVE32F-NEXT: lw t2, 36(a0) ; RV32ZVE32F-NEXT: lw s0, 8(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw t5, 16(a0) +; RV32ZVE32F-NEXT: lw t6, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 @@ -3434,8 +3434,8 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a2, 4(a0) +; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) ; RV32ZVE32F-NEXT: .LBB42_9: # %else14 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload @@ -3462,56 +3462,56 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: sw t5, 0(a0) +; RV32ZVE32F-NEXT: sw t6, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_4 ; RV32ZVE32F-NEXT: .LBB42_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t4, 0(a0) -; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: sw t3, 0(a0) +; RV32ZVE32F-NEXT: sw t4, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_5 ; RV32ZVE32F-NEXT: .LBB42_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t2, 0(a0) -; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: sw t1, 0(a0) +; RV32ZVE32F-NEXT: sw t2, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_6 ; RV32ZVE32F-NEXT: .LBB42_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a7, 0(a0) -; RV32ZVE32F-NEXT: sw a6, 4(a0) +; RV32ZVE32F-NEXT: sw a6, 0(a0) +; RV32ZVE32F-NEXT: sw a7, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_7 ; RV32ZVE32F-NEXT: .LBB42_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a5, 0(a0) -; RV32ZVE32F-NEXT: sw a4, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 0(a0) +; RV32ZVE32F-NEXT: sw a5, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_8 ; RV32ZVE32F-NEXT: j .LBB42_9 ; ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a2, 56(a0) +; RV64ZVE32F-NEXT: ld a4, 40(a0) ; RV64ZVE32F-NEXT: ld a3, 48(a0) -; RV64ZVE32F-NEXT: ld a5, 40(a0) -; RV64ZVE32F-NEXT: ld a6, 32(a0) -; RV64ZVE32F-NEXT: ld a7, 24(a0) -; RV64ZVE32F-NEXT: ld t0, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 56(a0) ; RV64ZVE32F-NEXT: ld t1, 8(a0) +; RV64ZVE32F-NEXT: ld t0, 16(a0) +; RV64ZVE32F-NEXT: ld a7, 24(a0) +; RV64ZVE32F-NEXT: ld a6, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a4, v0 -; RV64ZVE32F-NEXT: andi t2, a4, 1 +; RV64ZVE32F-NEXT: vmv.x.s a5, v0 +; RV64ZVE32F-NEXT: andi t2, a5, 1 ; RV64ZVE32F-NEXT: beqz t2, .LBB42_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: ld a0, 0(a0) @@ -3520,7 +3520,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: add t2, a1, t2 ; RV64ZVE32F-NEXT: sd a0, 0(t2) ; RV64ZVE32F-NEXT: .LBB42_2: # %else -; RV64ZVE32F-NEXT: andi a0, a4, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -3532,31 +3532,31 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: .LBB42_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_13 ; RV64ZVE32F-NEXT: .LBB42_6: # %else6 -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_14 ; RV64ZVE32F-NEXT: .LBB42_7: # %else8 -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_9 ; RV64ZVE32F-NEXT: .LBB42_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 -; RV64ZVE32F-NEXT: sd a5, 0(a0) +; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB42_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a4, 64 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_16 ; RV64ZVE32F-NEXT: .LBB42_11: # %else14 ; RV64ZVE32F-NEXT: ret @@ -3565,7 +3565,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd t0, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_6 ; RV64ZVE32F-NEXT: .LBB42_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -3573,14 +3573,14 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a7, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_7 ; RV64ZVE32F-NEXT: .LBB42_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a6, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_8 ; RV64ZVE32F-NEXT: j .LBB42_9 ; RV64ZVE32F-NEXT: .LBB42_15: # %cond.store11 @@ -3588,7 +3588,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a3, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_11 ; RV64ZVE32F-NEXT: .LBB42_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -3630,20 +3630,20 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 -; RV32ZVE32F-NEXT: lw a2, 60(a0) -; RV32ZVE32F-NEXT: lw a3, 56(a0) -; RV32ZVE32F-NEXT: lw a4, 52(a0) -; RV32ZVE32F-NEXT: lw a5, 48(a0) -; RV32ZVE32F-NEXT: lw a6, 44(a0) -; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t1, 36(a0) -; RV32ZVE32F-NEXT: lw t2, 32(a0) -; RV32ZVE32F-NEXT: lw t3, 28(a0) -; RV32ZVE32F-NEXT: lw t4, 24(a0) -; RV32ZVE32F-NEXT: lw t5, 20(a0) -; RV32ZVE32F-NEXT: lw t6, 16(a0) -; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw a2, 56(a0) +; RV32ZVE32F-NEXT: lw a3, 60(a0) +; RV32ZVE32F-NEXT: lw a6, 40(a0) +; RV32ZVE32F-NEXT: lw a7, 44(a0) +; RV32ZVE32F-NEXT: lw a4, 48(a0) +; RV32ZVE32F-NEXT: lw a5, 52(a0) +; RV32ZVE32F-NEXT: lw t3, 24(a0) +; RV32ZVE32F-NEXT: lw t4, 28(a0) +; RV32ZVE32F-NEXT: lw t1, 32(a0) +; RV32ZVE32F-NEXT: lw t2, 36(a0) ; RV32ZVE32F-NEXT: lw s0, 8(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw t5, 16(a0) +; RV32ZVE32F-NEXT: lw t6, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 @@ -3678,8 +3678,8 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a2, 4(a0) +; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) ; RV32ZVE32F-NEXT: .LBB43_9: # %else14 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload @@ -3706,56 +3706,56 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: sw t5, 0(a0) +; RV32ZVE32F-NEXT: sw t6, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_4 ; RV32ZVE32F-NEXT: .LBB43_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t4, 0(a0) -; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: sw t3, 0(a0) +; RV32ZVE32F-NEXT: sw t4, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_5 ; RV32ZVE32F-NEXT: .LBB43_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t2, 0(a0) -; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: sw t1, 0(a0) +; RV32ZVE32F-NEXT: sw t2, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_6 ; RV32ZVE32F-NEXT: .LBB43_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a7, 0(a0) -; RV32ZVE32F-NEXT: sw a6, 4(a0) +; RV32ZVE32F-NEXT: sw a6, 0(a0) +; RV32ZVE32F-NEXT: sw a7, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_7 ; RV32ZVE32F-NEXT: .LBB43_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a5, 0(a0) -; RV32ZVE32F-NEXT: sw a4, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 0(a0) +; RV32ZVE32F-NEXT: sw a5, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_8 ; RV32ZVE32F-NEXT: j .LBB43_9 ; ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a2, 56(a0) +; RV64ZVE32F-NEXT: ld a4, 40(a0) ; RV64ZVE32F-NEXT: ld a3, 48(a0) -; RV64ZVE32F-NEXT: ld a5, 40(a0) -; RV64ZVE32F-NEXT: ld a6, 32(a0) -; RV64ZVE32F-NEXT: ld a7, 24(a0) -; RV64ZVE32F-NEXT: ld t0, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 56(a0) ; RV64ZVE32F-NEXT: ld t1, 8(a0) +; RV64ZVE32F-NEXT: ld t0, 16(a0) +; RV64ZVE32F-NEXT: ld a7, 24(a0) +; RV64ZVE32F-NEXT: ld a6, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a4, v0 -; RV64ZVE32F-NEXT: andi t2, a4, 1 +; RV64ZVE32F-NEXT: vmv.x.s a5, v0 +; RV64ZVE32F-NEXT: andi t2, a5, 1 ; RV64ZVE32F-NEXT: beqz t2, .LBB43_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: ld a0, 0(a0) @@ -3764,7 +3764,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: add t2, a1, t2 ; RV64ZVE32F-NEXT: sd a0, 0(t2) ; RV64ZVE32F-NEXT: .LBB43_2: # %else -; RV64ZVE32F-NEXT: andi a0, a4, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -3776,31 +3776,31 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB43_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_13 ; RV64ZVE32F-NEXT: .LBB43_6: # %else6 -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_14 ; RV64ZVE32F-NEXT: .LBB43_7: # %else8 -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_9 ; RV64ZVE32F-NEXT: .LBB43_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 -; RV64ZVE32F-NEXT: sd a5, 0(a0) +; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB43_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a4, 64 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_16 ; RV64ZVE32F-NEXT: .LBB43_11: # %else14 ; RV64ZVE32F-NEXT: ret @@ -3809,7 +3809,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd t0, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_6 ; RV64ZVE32F-NEXT: .LBB43_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -3817,14 +3817,14 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a7, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_7 ; RV64ZVE32F-NEXT: .LBB43_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a6, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_8 ; RV64ZVE32F-NEXT: j .LBB43_9 ; RV64ZVE32F-NEXT: .LBB43_15: # %cond.store11 @@ -3832,7 +3832,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a3, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_11 ; RV64ZVE32F-NEXT: .LBB43_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -3876,20 +3876,20 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 -; RV32ZVE32F-NEXT: lw a2, 60(a0) -; RV32ZVE32F-NEXT: lw a3, 56(a0) -; RV32ZVE32F-NEXT: lw a4, 52(a0) -; RV32ZVE32F-NEXT: lw a5, 48(a0) -; RV32ZVE32F-NEXT: lw a6, 44(a0) -; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t1, 36(a0) -; RV32ZVE32F-NEXT: lw t2, 32(a0) -; RV32ZVE32F-NEXT: lw t3, 28(a0) -; RV32ZVE32F-NEXT: lw t4, 24(a0) -; RV32ZVE32F-NEXT: lw t5, 20(a0) -; RV32ZVE32F-NEXT: lw t6, 16(a0) -; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw a2, 56(a0) +; RV32ZVE32F-NEXT: lw a3, 60(a0) +; RV32ZVE32F-NEXT: lw a6, 40(a0) +; RV32ZVE32F-NEXT: lw a7, 44(a0) +; RV32ZVE32F-NEXT: lw a4, 48(a0) +; RV32ZVE32F-NEXT: lw a5, 52(a0) +; RV32ZVE32F-NEXT: lw t3, 24(a0) +; RV32ZVE32F-NEXT: lw t4, 28(a0) +; RV32ZVE32F-NEXT: lw t1, 32(a0) +; RV32ZVE32F-NEXT: lw t2, 36(a0) ; RV32ZVE32F-NEXT: lw s0, 8(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw t5, 16(a0) +; RV32ZVE32F-NEXT: lw t6, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 @@ -3924,8 +3924,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a2, 4(a0) +; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) ; RV32ZVE32F-NEXT: .LBB44_9: # %else14 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload @@ -3952,56 +3952,56 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: sw t5, 0(a0) +; RV32ZVE32F-NEXT: sw t6, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_4 ; RV32ZVE32F-NEXT: .LBB44_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t4, 0(a0) -; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: sw t3, 0(a0) +; RV32ZVE32F-NEXT: sw t4, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_5 ; RV32ZVE32F-NEXT: .LBB44_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t2, 0(a0) -; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: sw t1, 0(a0) +; RV32ZVE32F-NEXT: sw t2, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_6 ; RV32ZVE32F-NEXT: .LBB44_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a7, 0(a0) -; RV32ZVE32F-NEXT: sw a6, 4(a0) +; RV32ZVE32F-NEXT: sw a6, 0(a0) +; RV32ZVE32F-NEXT: sw a7, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_7 ; RV32ZVE32F-NEXT: .LBB44_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a5, 0(a0) -; RV32ZVE32F-NEXT: sw a4, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 0(a0) +; RV32ZVE32F-NEXT: sw a5, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_8 ; RV32ZVE32F-NEXT: j .LBB44_9 ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a2, 56(a0) +; RV64ZVE32F-NEXT: ld a4, 40(a0) ; RV64ZVE32F-NEXT: ld a3, 48(a0) -; RV64ZVE32F-NEXT: ld a5, 40(a0) -; RV64ZVE32F-NEXT: ld a6, 32(a0) -; RV64ZVE32F-NEXT: ld a7, 24(a0) -; RV64ZVE32F-NEXT: ld t0, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 56(a0) ; RV64ZVE32F-NEXT: ld t1, 8(a0) +; RV64ZVE32F-NEXT: ld t0, 16(a0) +; RV64ZVE32F-NEXT: ld a7, 24(a0) +; RV64ZVE32F-NEXT: ld a6, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a4, v0 -; RV64ZVE32F-NEXT: andi t2, a4, 1 +; RV64ZVE32F-NEXT: vmv.x.s a5, v0 +; RV64ZVE32F-NEXT: andi t2, a5, 1 ; RV64ZVE32F-NEXT: beqz t2, .LBB44_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: ld a0, 0(a0) @@ -4011,7 +4011,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: add t2, a1, t2 ; RV64ZVE32F-NEXT: sd a0, 0(t2) ; RV64ZVE32F-NEXT: .LBB44_2: # %else -; RV64ZVE32F-NEXT: andi a0, a4, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -4024,18 +4024,18 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB44_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_13 ; RV64ZVE32F-NEXT: .LBB44_6: # %else6 -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_14 ; RV64ZVE32F-NEXT: .LBB44_7: # %else8 -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_9 ; RV64ZVE32F-NEXT: .LBB44_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 @@ -4043,13 +4043,13 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: andi a0, a0, 255 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 -; RV64ZVE32F-NEXT: sd a5, 0(a0) +; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB44_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a4, 64 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_16 ; RV64ZVE32F-NEXT: .LBB44_11: # %else14 ; RV64ZVE32F-NEXT: ret @@ -4059,7 +4059,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd t0, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_6 ; RV64ZVE32F-NEXT: .LBB44_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -4068,7 +4068,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a7, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_7 ; RV64ZVE32F-NEXT: .LBB44_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 @@ -4076,7 +4076,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a6, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_8 ; RV64ZVE32F-NEXT: j .LBB44_9 ; RV64ZVE32F-NEXT: .LBB44_15: # %cond.store11 @@ -4085,7 +4085,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a3, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_11 ; RV64ZVE32F-NEXT: .LBB44_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -4129,20 +4129,20 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 -; RV32ZVE32F-NEXT: lw a2, 60(a0) -; RV32ZVE32F-NEXT: lw a3, 56(a0) -; RV32ZVE32F-NEXT: lw a4, 52(a0) -; RV32ZVE32F-NEXT: lw a5, 48(a0) -; RV32ZVE32F-NEXT: lw a6, 44(a0) -; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t1, 36(a0) -; RV32ZVE32F-NEXT: lw t2, 32(a0) -; RV32ZVE32F-NEXT: lw t3, 28(a0) -; RV32ZVE32F-NEXT: lw t4, 24(a0) -; RV32ZVE32F-NEXT: lw t5, 20(a0) -; RV32ZVE32F-NEXT: lw t6, 16(a0) -; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw a2, 56(a0) +; RV32ZVE32F-NEXT: lw a3, 60(a0) +; RV32ZVE32F-NEXT: lw a6, 40(a0) +; RV32ZVE32F-NEXT: lw a7, 44(a0) +; RV32ZVE32F-NEXT: lw a4, 48(a0) +; RV32ZVE32F-NEXT: lw a5, 52(a0) +; RV32ZVE32F-NEXT: lw t3, 24(a0) +; RV32ZVE32F-NEXT: lw t4, 28(a0) +; RV32ZVE32F-NEXT: lw t1, 32(a0) +; RV32ZVE32F-NEXT: lw t2, 36(a0) ; RV32ZVE32F-NEXT: lw s0, 8(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw t5, 16(a0) +; RV32ZVE32F-NEXT: lw t6, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 @@ -4177,8 +4177,8 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a2, 4(a0) +; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) ; RV32ZVE32F-NEXT: .LBB45_9: # %else14 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload @@ -4205,56 +4205,56 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: sw t5, 0(a0) +; RV32ZVE32F-NEXT: sw t6, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_4 ; RV32ZVE32F-NEXT: .LBB45_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t4, 0(a0) -; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: sw t3, 0(a0) +; RV32ZVE32F-NEXT: sw t4, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_5 ; RV32ZVE32F-NEXT: .LBB45_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t2, 0(a0) -; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: sw t1, 0(a0) +; RV32ZVE32F-NEXT: sw t2, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_6 ; RV32ZVE32F-NEXT: .LBB45_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a7, 0(a0) -; RV32ZVE32F-NEXT: sw a6, 4(a0) +; RV32ZVE32F-NEXT: sw a6, 0(a0) +; RV32ZVE32F-NEXT: sw a7, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_7 ; RV32ZVE32F-NEXT: .LBB45_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a5, 0(a0) -; RV32ZVE32F-NEXT: sw a4, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 0(a0) +; RV32ZVE32F-NEXT: sw a5, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_8 ; RV32ZVE32F-NEXT: j .LBB45_9 ; ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a2, 56(a0) +; RV64ZVE32F-NEXT: ld a4, 40(a0) ; RV64ZVE32F-NEXT: ld a3, 48(a0) -; RV64ZVE32F-NEXT: ld a5, 40(a0) -; RV64ZVE32F-NEXT: ld a6, 32(a0) -; RV64ZVE32F-NEXT: ld a7, 24(a0) -; RV64ZVE32F-NEXT: ld t0, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 56(a0) ; RV64ZVE32F-NEXT: ld t1, 8(a0) +; RV64ZVE32F-NEXT: ld t0, 16(a0) +; RV64ZVE32F-NEXT: ld a7, 24(a0) +; RV64ZVE32F-NEXT: ld a6, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a4, v0 -; RV64ZVE32F-NEXT: andi t2, a4, 1 +; RV64ZVE32F-NEXT: vmv.x.s a5, v0 +; RV64ZVE32F-NEXT: andi t2, a5, 1 ; RV64ZVE32F-NEXT: beqz t2, .LBB45_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: ld a0, 0(a0) @@ -4264,7 +4264,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: add t2, a1, t2 ; RV64ZVE32F-NEXT: sd a0, 0(t2) ; RV64ZVE32F-NEXT: .LBB45_2: # %else -; RV64ZVE32F-NEXT: andi a0, a4, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -4276,31 +4276,31 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: .LBB45_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_13 ; RV64ZVE32F-NEXT: .LBB45_6: # %else6 -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_14 ; RV64ZVE32F-NEXT: .LBB45_7: # %else8 -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_9 ; RV64ZVE32F-NEXT: .LBB45_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 -; RV64ZVE32F-NEXT: sd a5, 0(a0) +; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB45_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a4, 64 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_16 ; RV64ZVE32F-NEXT: .LBB45_11: # %else14 ; RV64ZVE32F-NEXT: ret @@ -4309,7 +4309,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd t0, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_6 ; RV64ZVE32F-NEXT: .LBB45_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -4317,14 +4317,14 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a7, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_7 ; RV64ZVE32F-NEXT: .LBB45_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a6, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_8 ; RV64ZVE32F-NEXT: j .LBB45_9 ; RV64ZVE32F-NEXT: .LBB45_15: # %cond.store11 @@ -4332,7 +4332,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a3, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_11 ; RV64ZVE32F-NEXT: .LBB45_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -4374,20 +4374,20 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 -; RV32ZVE32F-NEXT: lw a2, 60(a0) -; RV32ZVE32F-NEXT: lw a3, 56(a0) -; RV32ZVE32F-NEXT: lw a4, 52(a0) -; RV32ZVE32F-NEXT: lw a5, 48(a0) -; RV32ZVE32F-NEXT: lw a6, 44(a0) -; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t1, 36(a0) -; RV32ZVE32F-NEXT: lw t2, 32(a0) -; RV32ZVE32F-NEXT: lw t3, 28(a0) -; RV32ZVE32F-NEXT: lw t4, 24(a0) -; RV32ZVE32F-NEXT: lw t5, 20(a0) -; RV32ZVE32F-NEXT: lw t6, 16(a0) -; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw a2, 56(a0) +; RV32ZVE32F-NEXT: lw a3, 60(a0) +; RV32ZVE32F-NEXT: lw a6, 40(a0) +; RV32ZVE32F-NEXT: lw a7, 44(a0) +; RV32ZVE32F-NEXT: lw a4, 48(a0) +; RV32ZVE32F-NEXT: lw a5, 52(a0) +; RV32ZVE32F-NEXT: lw t3, 24(a0) +; RV32ZVE32F-NEXT: lw t4, 28(a0) +; RV32ZVE32F-NEXT: lw t1, 32(a0) +; RV32ZVE32F-NEXT: lw t2, 36(a0) ; RV32ZVE32F-NEXT: lw s0, 8(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw t5, 16(a0) +; RV32ZVE32F-NEXT: lw t6, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 @@ -4422,8 +4422,8 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a2, 4(a0) +; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) ; RV32ZVE32F-NEXT: .LBB46_9: # %else14 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload @@ -4450,56 +4450,56 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: sw t5, 0(a0) +; RV32ZVE32F-NEXT: sw t6, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_4 ; RV32ZVE32F-NEXT: .LBB46_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t4, 0(a0) -; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: sw t3, 0(a0) +; RV32ZVE32F-NEXT: sw t4, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_5 ; RV32ZVE32F-NEXT: .LBB46_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t2, 0(a0) -; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: sw t1, 0(a0) +; RV32ZVE32F-NEXT: sw t2, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_6 ; RV32ZVE32F-NEXT: .LBB46_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a7, 0(a0) -; RV32ZVE32F-NEXT: sw a6, 4(a0) +; RV32ZVE32F-NEXT: sw a6, 0(a0) +; RV32ZVE32F-NEXT: sw a7, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_7 ; RV32ZVE32F-NEXT: .LBB46_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a5, 0(a0) -; RV32ZVE32F-NEXT: sw a4, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 0(a0) +; RV32ZVE32F-NEXT: sw a5, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_8 ; RV32ZVE32F-NEXT: j .LBB46_9 ; ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a2, 56(a0) +; RV64ZVE32F-NEXT: ld a4, 40(a0) ; RV64ZVE32F-NEXT: ld a3, 48(a0) -; RV64ZVE32F-NEXT: ld a5, 40(a0) -; RV64ZVE32F-NEXT: ld a6, 32(a0) -; RV64ZVE32F-NEXT: ld a7, 24(a0) -; RV64ZVE32F-NEXT: ld t0, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 56(a0) ; RV64ZVE32F-NEXT: ld t1, 8(a0) +; RV64ZVE32F-NEXT: ld t0, 16(a0) +; RV64ZVE32F-NEXT: ld a7, 24(a0) +; RV64ZVE32F-NEXT: ld a6, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a4, v0 -; RV64ZVE32F-NEXT: andi t2, a4, 1 +; RV64ZVE32F-NEXT: vmv.x.s a5, v0 +; RV64ZVE32F-NEXT: andi t2, a5, 1 ; RV64ZVE32F-NEXT: beqz t2, .LBB46_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: ld a0, 0(a0) @@ -4509,7 +4509,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add t2, a1, t2 ; RV64ZVE32F-NEXT: sd a0, 0(t2) ; RV64ZVE32F-NEXT: .LBB46_2: # %else -; RV64ZVE32F-NEXT: andi a0, a4, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -4521,31 +4521,31 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB46_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_13 ; RV64ZVE32F-NEXT: .LBB46_6: # %else6 -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_14 ; RV64ZVE32F-NEXT: .LBB46_7: # %else8 -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_9 ; RV64ZVE32F-NEXT: .LBB46_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 -; RV64ZVE32F-NEXT: sd a5, 0(a0) +; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB46_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a4, 64 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_16 ; RV64ZVE32F-NEXT: .LBB46_11: # %else14 ; RV64ZVE32F-NEXT: ret @@ -4554,7 +4554,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd t0, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_6 ; RV64ZVE32F-NEXT: .LBB46_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -4562,14 +4562,14 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a7, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_7 ; RV64ZVE32F-NEXT: .LBB46_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a6, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_8 ; RV64ZVE32F-NEXT: j .LBB46_9 ; RV64ZVE32F-NEXT: .LBB46_15: # %cond.store11 @@ -4577,7 +4577,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a3, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_11 ; RV64ZVE32F-NEXT: .LBB46_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -4621,20 +4621,20 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 -; RV32ZVE32F-NEXT: lw a2, 60(a0) -; RV32ZVE32F-NEXT: lw a3, 56(a0) -; RV32ZVE32F-NEXT: lw a4, 52(a0) -; RV32ZVE32F-NEXT: lw a5, 48(a0) -; RV32ZVE32F-NEXT: lw a6, 44(a0) -; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t1, 36(a0) -; RV32ZVE32F-NEXT: lw t2, 32(a0) -; RV32ZVE32F-NEXT: lw t3, 28(a0) -; RV32ZVE32F-NEXT: lw t4, 24(a0) -; RV32ZVE32F-NEXT: lw t5, 20(a0) -; RV32ZVE32F-NEXT: lw t6, 16(a0) -; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw a2, 56(a0) +; RV32ZVE32F-NEXT: lw a3, 60(a0) +; RV32ZVE32F-NEXT: lw a6, 40(a0) +; RV32ZVE32F-NEXT: lw a7, 44(a0) +; RV32ZVE32F-NEXT: lw a4, 48(a0) +; RV32ZVE32F-NEXT: lw a5, 52(a0) +; RV32ZVE32F-NEXT: lw t3, 24(a0) +; RV32ZVE32F-NEXT: lw t4, 28(a0) +; RV32ZVE32F-NEXT: lw t1, 32(a0) +; RV32ZVE32F-NEXT: lw t2, 36(a0) ; RV32ZVE32F-NEXT: lw s0, 8(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw t5, 16(a0) +; RV32ZVE32F-NEXT: lw t6, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 @@ -4669,8 +4669,8 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a2, 4(a0) +; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) ; RV32ZVE32F-NEXT: .LBB47_9: # %else14 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload @@ -4697,57 +4697,57 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: sw t5, 0(a0) +; RV32ZVE32F-NEXT: sw t6, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_4 ; RV32ZVE32F-NEXT: .LBB47_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t4, 0(a0) -; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: sw t3, 0(a0) +; RV32ZVE32F-NEXT: sw t4, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_5 ; RV32ZVE32F-NEXT: .LBB47_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t2, 0(a0) -; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: sw t1, 0(a0) +; RV32ZVE32F-NEXT: sw t2, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_6 ; RV32ZVE32F-NEXT: .LBB47_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a7, 0(a0) -; RV32ZVE32F-NEXT: sw a6, 4(a0) +; RV32ZVE32F-NEXT: sw a6, 0(a0) +; RV32ZVE32F-NEXT: sw a7, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_7 ; RV32ZVE32F-NEXT: .LBB47_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a5, 0(a0) -; RV32ZVE32F-NEXT: sw a4, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 0(a0) +; RV32ZVE32F-NEXT: sw a5, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_8 ; RV32ZVE32F-NEXT: j .LBB47_9 ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a2, 56(a0) +; RV64ZVE32F-NEXT: ld a5, 40(a0) ; RV64ZVE32F-NEXT: ld a3, 48(a0) -; RV64ZVE32F-NEXT: ld a6, 40(a0) -; RV64ZVE32F-NEXT: ld a7, 32(a0) -; RV64ZVE32F-NEXT: ld t0, 24(a0) -; RV64ZVE32F-NEXT: ld t1, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 56(a0) ; RV64ZVE32F-NEXT: ld t2, 8(a0) +; RV64ZVE32F-NEXT: ld t1, 16(a0) +; RV64ZVE32F-NEXT: ld t0, 24(a0) +; RV64ZVE32F-NEXT: ld a7, 32(a0) ; RV64ZVE32F-NEXT: lui a4, 16 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a5, v0 -; RV64ZVE32F-NEXT: andi t3, a5, 1 +; RV64ZVE32F-NEXT: vmv.x.s a6, v0 +; RV64ZVE32F-NEXT: andi t3, a6, 1 ; RV64ZVE32F-NEXT: addiw a4, a4, -1 ; RV64ZVE32F-NEXT: beqz t3, .LBB47_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store @@ -4759,7 +4759,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add t3, a1, t3 ; RV64ZVE32F-NEXT: sd a0, 0(t3) ; RV64ZVE32F-NEXT: .LBB47_2: # %else -; RV64ZVE32F-NEXT: andi a0, a5, 2 +; RV64ZVE32F-NEXT: andi a0, a6, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -4772,18 +4772,18 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB47_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a5, 4 +; RV64ZVE32F-NEXT: andi a0, a6, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 -; RV64ZVE32F-NEXT: andi a0, a5, 8 +; RV64ZVE32F-NEXT: andi a0, a6, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_13 ; RV64ZVE32F-NEXT: .LBB47_6: # %else6 -; RV64ZVE32F-NEXT: andi a0, a5, 16 +; RV64ZVE32F-NEXT: andi a0, a6, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_14 ; RV64ZVE32F-NEXT: .LBB47_7: # %else8 -; RV64ZVE32F-NEXT: andi a0, a5, 32 +; RV64ZVE32F-NEXT: andi a0, a6, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_9 ; RV64ZVE32F-NEXT: .LBB47_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 @@ -4791,13 +4791,13 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: and a0, a0, a4 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 -; RV64ZVE32F-NEXT: sd a6, 0(a0) +; RV64ZVE32F-NEXT: sd a5, 0(a0) ; RV64ZVE32F-NEXT: .LBB47_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a5, 64 +; RV64ZVE32F-NEXT: andi a0, a6, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 -; RV64ZVE32F-NEXT: andi a0, a5, -128 +; RV64ZVE32F-NEXT: andi a0, a6, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_16 ; RV64ZVE32F-NEXT: .LBB47_11: # %else14 ; RV64ZVE32F-NEXT: ret @@ -4807,7 +4807,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd t1, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a5, 8 +; RV64ZVE32F-NEXT: andi a0, a6, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_6 ; RV64ZVE32F-NEXT: .LBB47_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -4816,7 +4816,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd t0, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a5, 16 +; RV64ZVE32F-NEXT: andi a0, a6, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_7 ; RV64ZVE32F-NEXT: .LBB47_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 @@ -4824,7 +4824,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a7, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a5, 32 +; RV64ZVE32F-NEXT: andi a0, a6, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_8 ; RV64ZVE32F-NEXT: j .LBB47_9 ; RV64ZVE32F-NEXT: .LBB47_15: # %cond.store11 @@ -4833,7 +4833,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a3, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a5, -128 +; RV64ZVE32F-NEXT: andi a0, a6, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_11 ; RV64ZVE32F-NEXT: .LBB47_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -4876,55 +4876,55 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 -; RV32ZVE32F-NEXT: lw a2, 60(a0) -; RV32ZVE32F-NEXT: lw a3, 56(a0) -; RV32ZVE32F-NEXT: lw a4, 52(a0) -; RV32ZVE32F-NEXT: lw a5, 48(a0) -; RV32ZVE32F-NEXT: lw a6, 44(a0) -; RV32ZVE32F-NEXT: lw t0, 40(a0) -; RV32ZVE32F-NEXT: lw t1, 36(a0) -; RV32ZVE32F-NEXT: lw t2, 32(a0) -; RV32ZVE32F-NEXT: lw t3, 28(a0) -; RV32ZVE32F-NEXT: lw t4, 24(a0) -; RV32ZVE32F-NEXT: lw t5, 20(a0) -; RV32ZVE32F-NEXT: lw t6, 16(a0) -; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw a2, 56(a0) +; RV32ZVE32F-NEXT: lw a3, 60(a0) +; RV32ZVE32F-NEXT: lw a6, 40(a0) +; RV32ZVE32F-NEXT: lw a7, 44(a0) +; RV32ZVE32F-NEXT: lw a4, 48(a0) +; RV32ZVE32F-NEXT: lw a5, 52(a0) +; RV32ZVE32F-NEXT: lw t3, 24(a0) +; RV32ZVE32F-NEXT: lw t4, 28(a0) +; RV32ZVE32F-NEXT: lw t1, 32(a0) +; RV32ZVE32F-NEXT: lw t2, 36(a0) ; RV32ZVE32F-NEXT: lw s0, 8(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw t5, 16(a0) +; RV32ZVE32F-NEXT: lw t6, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a7, v0 -; RV32ZVE32F-NEXT: andi s2, a7, 1 +; RV32ZVE32F-NEXT: vmv.x.s t0, v0 +; RV32ZVE32F-NEXT: andi s2, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: bnez s2, .LBB48_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a7, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_11 ; RV32ZVE32F-NEXT: .LBB48_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a7, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_12 ; RV32ZVE32F-NEXT: .LBB48_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a7, 8 +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_13 ; RV32ZVE32F-NEXT: .LBB48_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a7, 16 +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_14 ; RV32ZVE32F-NEXT: .LBB48_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a7, 32 +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_15 ; RV32ZVE32F-NEXT: .LBB48_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a7, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_16 ; RV32ZVE32F-NEXT: .LBB48_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a7, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_9 ; RV32ZVE32F-NEXT: .LBB48_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a2, 4(a0) +; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) ; RV32ZVE32F-NEXT: .LBB48_9: # %else14 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload @@ -4937,7 +4937,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 ; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a7, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_2 ; RV32ZVE32F-NEXT: .LBB48_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -4945,62 +4945,62 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: sw s0, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a7, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_3 ; RV32ZVE32F-NEXT: .LBB48_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: sw t5, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a7, 8 +; RV32ZVE32F-NEXT: sw t5, 0(a0) +; RV32ZVE32F-NEXT: sw t6, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_4 ; RV32ZVE32F-NEXT: .LBB48_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t4, 0(a0) -; RV32ZVE32F-NEXT: sw t3, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a7, 16 +; RV32ZVE32F-NEXT: sw t3, 0(a0) +; RV32ZVE32F-NEXT: sw t4, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_5 ; RV32ZVE32F-NEXT: .LBB48_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t2, 0(a0) -; RV32ZVE32F-NEXT: sw t1, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a7, 32 +; RV32ZVE32F-NEXT: sw t1, 0(a0) +; RV32ZVE32F-NEXT: sw t2, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_6 ; RV32ZVE32F-NEXT: .LBB48_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t0, 0(a0) -; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a7, 64 +; RV32ZVE32F-NEXT: sw a6, 0(a0) +; RV32ZVE32F-NEXT: sw a7, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_7 ; RV32ZVE32F-NEXT: .LBB48_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a5, 0(a0) -; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a7, -128 +; RV32ZVE32F-NEXT: sw a4, 0(a0) +; RV32ZVE32F-NEXT: sw a5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_8 ; RV32ZVE32F-NEXT: j .LBB48_9 ; ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a2, 56(a0) +; RV64ZVE32F-NEXT: ld a4, 40(a0) ; RV64ZVE32F-NEXT: ld a3, 48(a0) -; RV64ZVE32F-NEXT: ld a5, 40(a0) -; RV64ZVE32F-NEXT: ld a6, 32(a0) -; RV64ZVE32F-NEXT: ld a7, 24(a0) -; RV64ZVE32F-NEXT: ld t0, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 56(a0) ; RV64ZVE32F-NEXT: ld t1, 8(a0) +; RV64ZVE32F-NEXT: ld t0, 16(a0) +; RV64ZVE32F-NEXT: ld a7, 24(a0) +; RV64ZVE32F-NEXT: ld a6, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a4, v0 -; RV64ZVE32F-NEXT: andi t2, a4, 1 +; RV64ZVE32F-NEXT: vmv.x.s a5, v0 +; RV64ZVE32F-NEXT: andi t2, a5, 1 ; RV64ZVE32F-NEXT: beqz t2, .LBB48_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: ld a0, 0(a0) @@ -5010,7 +5010,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV64ZVE32F-NEXT: add t2, a1, t2 ; RV64ZVE32F-NEXT: sd a0, 0(t2) ; RV64ZVE32F-NEXT: .LBB48_2: # %else -; RV64ZVE32F-NEXT: andi a0, a4, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -5022,31 +5022,31 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV64ZVE32F-NEXT: .LBB48_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_13 ; RV64ZVE32F-NEXT: .LBB48_6: # %else6 -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_14 ; RV64ZVE32F-NEXT: .LBB48_7: # %else8 -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_9 ; RV64ZVE32F-NEXT: .LBB48_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 -; RV64ZVE32F-NEXT: sd a5, 0(a0) +; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB48_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a4, 64 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_16 ; RV64ZVE32F-NEXT: .LBB48_11: # %else14 ; RV64ZVE32F-NEXT: ret @@ -5055,7 +5055,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd t0, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_6 ; RV64ZVE32F-NEXT: .LBB48_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -5063,14 +5063,14 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a7, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_7 ; RV64ZVE32F-NEXT: .LBB48_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a6, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_8 ; RV64ZVE32F-NEXT: j .LBB48_9 ; RV64ZVE32F-NEXT: .LBB48_15: # %cond.store11 @@ -5078,7 +5078,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a3, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_11 ; RV64ZVE32F-NEXT: .LBB48_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -5119,55 +5119,55 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 -; RV32ZVE32F-NEXT: lw a2, 60(a0) -; RV32ZVE32F-NEXT: lw a3, 56(a0) -; RV32ZVE32F-NEXT: lw a4, 52(a0) -; RV32ZVE32F-NEXT: lw a5, 48(a0) -; RV32ZVE32F-NEXT: lw a6, 44(a0) -; RV32ZVE32F-NEXT: lw t0, 40(a0) -; RV32ZVE32F-NEXT: lw t1, 36(a0) -; RV32ZVE32F-NEXT: lw t2, 32(a0) -; RV32ZVE32F-NEXT: lw t3, 28(a0) -; RV32ZVE32F-NEXT: lw t4, 24(a0) -; RV32ZVE32F-NEXT: lw t5, 20(a0) -; RV32ZVE32F-NEXT: lw t6, 16(a0) -; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw a2, 56(a0) +; RV32ZVE32F-NEXT: lw a3, 60(a0) +; RV32ZVE32F-NEXT: lw a6, 40(a0) +; RV32ZVE32F-NEXT: lw a7, 44(a0) +; RV32ZVE32F-NEXT: lw a4, 48(a0) +; RV32ZVE32F-NEXT: lw a5, 52(a0) +; RV32ZVE32F-NEXT: lw t3, 24(a0) +; RV32ZVE32F-NEXT: lw t4, 28(a0) +; RV32ZVE32F-NEXT: lw t1, 32(a0) +; RV32ZVE32F-NEXT: lw t2, 36(a0) ; RV32ZVE32F-NEXT: lw s0, 8(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw t5, 16(a0) +; RV32ZVE32F-NEXT: lw t6, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a7, v0 -; RV32ZVE32F-NEXT: andi s2, a7, 1 +; RV32ZVE32F-NEXT: vmv.x.s t0, v0 +; RV32ZVE32F-NEXT: andi s2, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: bnez s2, .LBB49_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a7, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_11 ; RV32ZVE32F-NEXT: .LBB49_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a7, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_12 ; RV32ZVE32F-NEXT: .LBB49_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a7, 8 +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_13 ; RV32ZVE32F-NEXT: .LBB49_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a7, 16 +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_14 ; RV32ZVE32F-NEXT: .LBB49_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a7, 32 +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_15 ; RV32ZVE32F-NEXT: .LBB49_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a7, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_16 ; RV32ZVE32F-NEXT: .LBB49_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a7, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_9 ; RV32ZVE32F-NEXT: .LBB49_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a2, 4(a0) +; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) ; RV32ZVE32F-NEXT: .LBB49_9: # %else14 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload @@ -5180,7 +5180,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 ; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a7, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_2 ; RV32ZVE32F-NEXT: .LBB49_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -5188,62 +5188,62 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: sw s0, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a7, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_3 ; RV32ZVE32F-NEXT: .LBB49_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: sw t5, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a7, 8 +; RV32ZVE32F-NEXT: sw t5, 0(a0) +; RV32ZVE32F-NEXT: sw t6, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_4 ; RV32ZVE32F-NEXT: .LBB49_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t4, 0(a0) -; RV32ZVE32F-NEXT: sw t3, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a7, 16 +; RV32ZVE32F-NEXT: sw t3, 0(a0) +; RV32ZVE32F-NEXT: sw t4, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_5 ; RV32ZVE32F-NEXT: .LBB49_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t2, 0(a0) -; RV32ZVE32F-NEXT: sw t1, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a7, 32 +; RV32ZVE32F-NEXT: sw t1, 0(a0) +; RV32ZVE32F-NEXT: sw t2, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_6 ; RV32ZVE32F-NEXT: .LBB49_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t0, 0(a0) -; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a7, 64 +; RV32ZVE32F-NEXT: sw a6, 0(a0) +; RV32ZVE32F-NEXT: sw a7, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_7 ; RV32ZVE32F-NEXT: .LBB49_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a5, 0(a0) -; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a7, -128 +; RV32ZVE32F-NEXT: sw a4, 0(a0) +; RV32ZVE32F-NEXT: sw a5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_8 ; RV32ZVE32F-NEXT: j .LBB49_9 ; ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a2, 56(a0) +; RV64ZVE32F-NEXT: ld a4, 40(a0) ; RV64ZVE32F-NEXT: ld a3, 48(a0) -; RV64ZVE32F-NEXT: ld a5, 40(a0) -; RV64ZVE32F-NEXT: ld a6, 32(a0) -; RV64ZVE32F-NEXT: ld a7, 24(a0) -; RV64ZVE32F-NEXT: ld t0, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 56(a0) ; RV64ZVE32F-NEXT: ld t1, 8(a0) +; RV64ZVE32F-NEXT: ld t0, 16(a0) +; RV64ZVE32F-NEXT: ld a7, 24(a0) +; RV64ZVE32F-NEXT: ld a6, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a4, v0 -; RV64ZVE32F-NEXT: andi t2, a4, 1 +; RV64ZVE32F-NEXT: vmv.x.s a5, v0 +; RV64ZVE32F-NEXT: andi t2, a5, 1 ; RV64ZVE32F-NEXT: beqz t2, .LBB49_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: ld a0, 0(a0) @@ -5253,7 +5253,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add t2, a1, t2 ; RV64ZVE32F-NEXT: sd a0, 0(t2) ; RV64ZVE32F-NEXT: .LBB49_2: # %else -; RV64ZVE32F-NEXT: andi a0, a4, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -5265,31 +5265,31 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB49_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_13 ; RV64ZVE32F-NEXT: .LBB49_6: # %else6 -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_14 ; RV64ZVE32F-NEXT: .LBB49_7: # %else8 -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_9 ; RV64ZVE32F-NEXT: .LBB49_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 -; RV64ZVE32F-NEXT: sd a5, 0(a0) +; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB49_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a4, 64 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_16 ; RV64ZVE32F-NEXT: .LBB49_11: # %else14 ; RV64ZVE32F-NEXT: ret @@ -5298,7 +5298,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd t0, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_6 ; RV64ZVE32F-NEXT: .LBB49_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -5306,14 +5306,14 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a7, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_7 ; RV64ZVE32F-NEXT: .LBB49_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a6, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_8 ; RV64ZVE32F-NEXT: j .LBB49_9 ; RV64ZVE32F-NEXT: .LBB49_15: # %cond.store11 @@ -5321,7 +5321,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a3, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_11 ; RV64ZVE32F-NEXT: .LBB49_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -5363,55 +5363,55 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 -; RV32ZVE32F-NEXT: lw a2, 60(a0) -; RV32ZVE32F-NEXT: lw a3, 56(a0) -; RV32ZVE32F-NEXT: lw a4, 52(a0) -; RV32ZVE32F-NEXT: lw a5, 48(a0) -; RV32ZVE32F-NEXT: lw a6, 44(a0) -; RV32ZVE32F-NEXT: lw t0, 40(a0) -; RV32ZVE32F-NEXT: lw t1, 36(a0) -; RV32ZVE32F-NEXT: lw t2, 32(a0) -; RV32ZVE32F-NEXT: lw t3, 28(a0) -; RV32ZVE32F-NEXT: lw t4, 24(a0) -; RV32ZVE32F-NEXT: lw t5, 20(a0) -; RV32ZVE32F-NEXT: lw t6, 16(a0) -; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw a2, 56(a0) +; RV32ZVE32F-NEXT: lw a3, 60(a0) +; RV32ZVE32F-NEXT: lw a6, 40(a0) +; RV32ZVE32F-NEXT: lw a7, 44(a0) +; RV32ZVE32F-NEXT: lw a4, 48(a0) +; RV32ZVE32F-NEXT: lw a5, 52(a0) +; RV32ZVE32F-NEXT: lw t3, 24(a0) +; RV32ZVE32F-NEXT: lw t4, 28(a0) +; RV32ZVE32F-NEXT: lw t1, 32(a0) +; RV32ZVE32F-NEXT: lw t2, 36(a0) ; RV32ZVE32F-NEXT: lw s0, 8(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw t5, 16(a0) +; RV32ZVE32F-NEXT: lw t6, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a7, v0 -; RV32ZVE32F-NEXT: andi s2, a7, 1 +; RV32ZVE32F-NEXT: vmv.x.s t0, v0 +; RV32ZVE32F-NEXT: andi s2, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: bnez s2, .LBB50_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a7, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_11 ; RV32ZVE32F-NEXT: .LBB50_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a7, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_12 ; RV32ZVE32F-NEXT: .LBB50_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a7, 8 +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_13 ; RV32ZVE32F-NEXT: .LBB50_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a7, 16 +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_14 ; RV32ZVE32F-NEXT: .LBB50_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a7, 32 +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_15 ; RV32ZVE32F-NEXT: .LBB50_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a7, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_16 ; RV32ZVE32F-NEXT: .LBB50_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a7, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_9 ; RV32ZVE32F-NEXT: .LBB50_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a2, 4(a0) +; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) ; RV32ZVE32F-NEXT: .LBB50_9: # %else14 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload @@ -5424,7 +5424,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 ; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a7, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_2 ; RV32ZVE32F-NEXT: .LBB50_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -5432,62 +5432,62 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: sw s0, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a7, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_3 ; RV32ZVE32F-NEXT: .LBB50_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: sw t5, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a7, 8 +; RV32ZVE32F-NEXT: sw t5, 0(a0) +; RV32ZVE32F-NEXT: sw t6, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_4 ; RV32ZVE32F-NEXT: .LBB50_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t4, 0(a0) -; RV32ZVE32F-NEXT: sw t3, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a7, 16 +; RV32ZVE32F-NEXT: sw t3, 0(a0) +; RV32ZVE32F-NEXT: sw t4, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_5 ; RV32ZVE32F-NEXT: .LBB50_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t2, 0(a0) -; RV32ZVE32F-NEXT: sw t1, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a7, 32 +; RV32ZVE32F-NEXT: sw t1, 0(a0) +; RV32ZVE32F-NEXT: sw t2, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_6 ; RV32ZVE32F-NEXT: .LBB50_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t0, 0(a0) -; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a7, 64 +; RV32ZVE32F-NEXT: sw a6, 0(a0) +; RV32ZVE32F-NEXT: sw a7, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_7 ; RV32ZVE32F-NEXT: .LBB50_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a5, 0(a0) -; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a7, -128 +; RV32ZVE32F-NEXT: sw a4, 0(a0) +; RV32ZVE32F-NEXT: sw a5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_8 ; RV32ZVE32F-NEXT: j .LBB50_9 ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a2, 56(a0) +; RV64ZVE32F-NEXT: ld a4, 40(a0) ; RV64ZVE32F-NEXT: ld a3, 48(a0) -; RV64ZVE32F-NEXT: ld a5, 40(a0) -; RV64ZVE32F-NEXT: ld a6, 32(a0) -; RV64ZVE32F-NEXT: ld a7, 24(a0) -; RV64ZVE32F-NEXT: ld t0, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 56(a0) ; RV64ZVE32F-NEXT: ld t1, 8(a0) +; RV64ZVE32F-NEXT: ld t0, 16(a0) +; RV64ZVE32F-NEXT: ld a7, 24(a0) +; RV64ZVE32F-NEXT: ld a6, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a4, v0 -; RV64ZVE32F-NEXT: andi t2, a4, 1 +; RV64ZVE32F-NEXT: vmv.x.s a5, v0 +; RV64ZVE32F-NEXT: andi t2, a5, 1 ; RV64ZVE32F-NEXT: beqz t2, .LBB50_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: ld a0, 0(a0) @@ -5498,7 +5498,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add t2, a1, t2 ; RV64ZVE32F-NEXT: sd a0, 0(t2) ; RV64ZVE32F-NEXT: .LBB50_2: # %else -; RV64ZVE32F-NEXT: andi a0, a4, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -5511,18 +5511,18 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB50_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_13 ; RV64ZVE32F-NEXT: .LBB50_6: # %else6 -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_14 ; RV64ZVE32F-NEXT: .LBB50_7: # %else8 -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_9 ; RV64ZVE32F-NEXT: .LBB50_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 @@ -5530,13 +5530,13 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: slli a0, a0, 32 ; RV64ZVE32F-NEXT: srli a0, a0, 29 ; RV64ZVE32F-NEXT: add a0, a1, a0 -; RV64ZVE32F-NEXT: sd a5, 0(a0) +; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB50_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a4, 64 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_16 ; RV64ZVE32F-NEXT: .LBB50_11: # %else14 ; RV64ZVE32F-NEXT: ret @@ -5546,7 +5546,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: srli a0, a0, 29 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd t0, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_6 ; RV64ZVE32F-NEXT: .LBB50_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -5555,7 +5555,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: srli a0, a0, 29 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a7, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_7 ; RV64ZVE32F-NEXT: .LBB50_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10 @@ -5563,7 +5563,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: srli a0, a0, 29 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a6, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_8 ; RV64ZVE32F-NEXT: j .LBB50_9 ; RV64ZVE32F-NEXT: .LBB50_15: # %cond.store11 @@ -5572,7 +5572,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: srli a0, a0, 29 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a3, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_11 ; RV64ZVE32F-NEXT: .LBB50_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -5627,24 +5627,24 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: .cfi_offset s6, -28 ; RV32ZVE32F-NEXT: .cfi_offset s7, -32 ; RV32ZVE32F-NEXT: .cfi_offset s8, -36 -; RV32ZVE32F-NEXT: lw a3, 60(a0) -; RV32ZVE32F-NEXT: lw a4, 56(a0) -; RV32ZVE32F-NEXT: lw a5, 52(a0) -; RV32ZVE32F-NEXT: lw a6, 48(a0) -; RV32ZVE32F-NEXT: lw a7, 44(a0) -; RV32ZVE32F-NEXT: lw t0, 40(a0) -; RV32ZVE32F-NEXT: lw t1, 36(a0) -; RV32ZVE32F-NEXT: lw t2, 32(a0) -; RV32ZVE32F-NEXT: lw t3, 28(a0) -; RV32ZVE32F-NEXT: lw t4, 24(a0) -; RV32ZVE32F-NEXT: lw t5, 20(a0) -; RV32ZVE32F-NEXT: lw t6, 16(a0) -; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw a3, 56(a0) +; RV32ZVE32F-NEXT: lw a4, 60(a0) +; RV32ZVE32F-NEXT: lw a7, 40(a0) +; RV32ZVE32F-NEXT: lw t0, 44(a0) +; RV32ZVE32F-NEXT: lw a5, 48(a0) +; RV32ZVE32F-NEXT: lw a6, 52(a0) +; RV32ZVE32F-NEXT: lw t3, 24(a0) +; RV32ZVE32F-NEXT: lw t4, 28(a0) +; RV32ZVE32F-NEXT: lw t1, 32(a0) +; RV32ZVE32F-NEXT: lw t2, 36(a0) ; RV32ZVE32F-NEXT: lw s0, 8(a0) -; RV32ZVE32F-NEXT: lw s2, 56(a2) -; RV32ZVE32F-NEXT: lw s3, 48(a2) -; RV32ZVE32F-NEXT: lw s4, 40(a2) -; RV32ZVE32F-NEXT: lw s5, 32(a2) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw t5, 16(a0) +; RV32ZVE32F-NEXT: lw t6, 20(a0) +; RV32ZVE32F-NEXT: lw s2, 32(a2) +; RV32ZVE32F-NEXT: lw s3, 40(a2) +; RV32ZVE32F-NEXT: lw s4, 48(a2) +; RV32ZVE32F-NEXT: lw s5, 56(a2) ; RV32ZVE32F-NEXT: lw s6, 0(a2) ; RV32ZVE32F-NEXT: lw s7, 8(a2) ; RV32ZVE32F-NEXT: lw s8, 16(a2) @@ -5654,10 +5654,10 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s7 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s8 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s5 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s4 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s3 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s2 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s3 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s4 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s5 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a2, v0 @@ -5690,8 +5690,8 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: sw a4, 0(a0) -; RV32ZVE32F-NEXT: sw a3, 4(a0) +; RV32ZVE32F-NEXT: sw a3, 0(a0) +; RV32ZVE32F-NEXT: sw a4, 4(a0) ; RV32ZVE32F-NEXT: .LBB51_9: # %else14 ; RV32ZVE32F-NEXT: lw s0, 44(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s1, 40(sp) # 4-byte Folded Reload @@ -5724,40 +5724,40 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: sw t5, 0(a0) +; RV32ZVE32F-NEXT: sw t6, 4(a0) ; RV32ZVE32F-NEXT: andi a0, a2, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_4 ; RV32ZVE32F-NEXT: .LBB51_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t4, 0(a0) -; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: sw t3, 0(a0) +; RV32ZVE32F-NEXT: sw t4, 4(a0) ; RV32ZVE32F-NEXT: andi a0, a2, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_5 ; RV32ZVE32F-NEXT: .LBB51_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t2, 0(a0) -; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: sw t1, 0(a0) +; RV32ZVE32F-NEXT: sw t2, 4(a0) ; RV32ZVE32F-NEXT: andi a0, a2, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_6 ; RV32ZVE32F-NEXT: .LBB51_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t0, 0(a0) -; RV32ZVE32F-NEXT: sw a7, 4(a0) +; RV32ZVE32F-NEXT: sw a7, 0(a0) +; RV32ZVE32F-NEXT: sw t0, 4(a0) ; RV32ZVE32F-NEXT: andi a0, a2, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_7 ; RV32ZVE32F-NEXT: .LBB51_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a6, 0(a0) -; RV32ZVE32F-NEXT: sw a5, 4(a0) +; RV32ZVE32F-NEXT: sw a5, 0(a0) +; RV32ZVE32F-NEXT: sw a6, 4(a0) ; RV32ZVE32F-NEXT: andi a0, a2, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_8 ; RV32ZVE32F-NEXT: j .LBB51_9 @@ -5774,20 +5774,20 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV64ZVE32F-NEXT: .cfi_offset s1, -16 ; RV64ZVE32F-NEXT: .cfi_offset s2, -24 ; RV64ZVE32F-NEXT: .cfi_offset s3, -32 -; RV64ZVE32F-NEXT: ld a3, 56(a0) +; RV64ZVE32F-NEXT: ld a5, 40(a0) ; RV64ZVE32F-NEXT: ld a4, 48(a0) -; RV64ZVE32F-NEXT: ld a6, 40(a0) -; RV64ZVE32F-NEXT: ld t1, 32(a0) +; RV64ZVE32F-NEXT: ld a3, 56(a0) +; RV64ZVE32F-NEXT: ld s0, 8(a0) +; RV64ZVE32F-NEXT: ld t5, 16(a0) ; RV64ZVE32F-NEXT: ld t3, 24(a0) -; RV64ZVE32F-NEXT: ld t6, 16(a0) -; RV64ZVE32F-NEXT: ld s1, 8(a0) +; RV64ZVE32F-NEXT: ld t1, 32(a0) ; RV64ZVE32F-NEXT: ld s2, 8(a2) -; RV64ZVE32F-NEXT: ld s0, 16(a2) -; RV64ZVE32F-NEXT: ld t5, 24(a2) +; RV64ZVE32F-NEXT: ld s1, 16(a2) +; RV64ZVE32F-NEXT: ld t6, 24(a2) ; RV64ZVE32F-NEXT: ld t4, 32(a2) ; RV64ZVE32F-NEXT: ld t2, 40(a2) ; RV64ZVE32F-NEXT: ld t0, 48(a2) -; RV64ZVE32F-NEXT: ld a5, 56(a2) +; RV64ZVE32F-NEXT: ld a6, 56(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a7, v0 ; RV64ZVE32F-NEXT: andi s3, a7, 1 @@ -5814,8 +5814,8 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV64ZVE32F-NEXT: andi a0, a7, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_9 ; RV64ZVE32F-NEXT: .LBB51_8: # %cond.store13 -; RV64ZVE32F-NEXT: slli a5, a5, 3 -; RV64ZVE32F-NEXT: add a1, a1, a5 +; RV64ZVE32F-NEXT: slli a6, a6, 3 +; RV64ZVE32F-NEXT: add a1, a1, a6 ; RV64ZVE32F-NEXT: sd a3, 0(a1) ; RV64ZVE32F-NEXT: .LBB51_9: # %else14 ; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload @@ -5835,19 +5835,19 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV64ZVE32F-NEXT: .LBB51_11: # %cond.store1 ; RV64ZVE32F-NEXT: slli s2, s2, 3 ; RV64ZVE32F-NEXT: add s2, a1, s2 -; RV64ZVE32F-NEXT: sd s1, 0(s2) +; RV64ZVE32F-NEXT: sd s0, 0(s2) ; RV64ZVE32F-NEXT: andi a0, a7, 4 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_3 ; RV64ZVE32F-NEXT: .LBB51_12: # %cond.store3 -; RV64ZVE32F-NEXT: slli s0, s0, 3 -; RV64ZVE32F-NEXT: add s0, a1, s0 -; RV64ZVE32F-NEXT: sd t6, 0(s0) +; RV64ZVE32F-NEXT: slli s1, s1, 3 +; RV64ZVE32F-NEXT: add s1, a1, s1 +; RV64ZVE32F-NEXT: sd t5, 0(s1) ; RV64ZVE32F-NEXT: andi a0, a7, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_4 ; RV64ZVE32F-NEXT: .LBB51_13: # %cond.store5 -; RV64ZVE32F-NEXT: slli t5, t5, 3 -; RV64ZVE32F-NEXT: add t5, a1, t5 -; RV64ZVE32F-NEXT: sd t3, 0(t5) +; RV64ZVE32F-NEXT: slli t6, t6, 3 +; RV64ZVE32F-NEXT: add t6, a1, t6 +; RV64ZVE32F-NEXT: sd t3, 0(t6) ; RV64ZVE32F-NEXT: andi a0, a7, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_5 ; RV64ZVE32F-NEXT: .LBB51_14: # %cond.store7 @@ -5859,7 +5859,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV64ZVE32F-NEXT: .LBB51_15: # %cond.store9 ; RV64ZVE32F-NEXT: slli t2, t2, 3 ; RV64ZVE32F-NEXT: add t2, a1, t2 -; RV64ZVE32F-NEXT: sd a6, 0(t2) +; RV64ZVE32F-NEXT: sd a5, 0(t2) ; RV64ZVE32F-NEXT: andi a0, a7, 64 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_7 ; RV64ZVE32F-NEXT: .LBB51_16: # %cond.store11 @@ -5972,9 +5972,9 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) { ; ; RV64ZVE32F-LABEL: mscatter_v4f16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: ld a4, 8(a0) +; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a1, 24(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 ; RV64ZVE32F-NEXT: andi a5, a3, 1 @@ -6033,17 +6033,17 @@ define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) { ; RV64ZVE32F-LABEL: mscatter_truemask_v4f16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 0(a0) -; RV64ZVE32F-NEXT: ld a2, 24(a0) -; RV64ZVE32F-NEXT: ld a3, 8(a0) -; RV64ZVE32F-NEXT: ld a0, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 8(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vse16.v v8, (a1) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v9, (a3) +; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse16.v v9, (a0) +; RV64ZVE32F-NEXT: vse16.v v9, (a3) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse16.v v8, (a2) +; RV64ZVE32F-NEXT: vse16.v v8, (a0) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1)) ret void @@ -6074,37 +6074,37 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; ; RV64ZVE32F-LABEL: mscatter_v8f16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 56(a0) +; RV64ZVE32F-NEXT: ld a3, 40(a0) ; RV64ZVE32F-NEXT: ld a2, 48(a0) -; RV64ZVE32F-NEXT: ld a4, 40(a0) -; RV64ZVE32F-NEXT: ld a5, 32(a0) -; RV64ZVE32F-NEXT: ld a6, 24(a0) -; RV64ZVE32F-NEXT: ld a7, 16(a0) +; RV64ZVE32F-NEXT: ld a1, 56(a0) ; RV64ZVE32F-NEXT: ld t0, 8(a0) +; RV64ZVE32F-NEXT: ld a7, 16(a0) +; RV64ZVE32F-NEXT: ld a6, 24(a0) +; RV64ZVE32F-NEXT: ld a5, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a3, v0 -; RV64ZVE32F-NEXT: andi t1, a3, 1 +; RV64ZVE32F-NEXT: vmv.x.s a4, v0 +; RV64ZVE32F-NEXT: andi t1, a4, 1 ; RV64ZVE32F-NEXT: bnez t1, .LBB57_9 ; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 +; RV64ZVE32F-NEXT: andi a0, a4, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_10 ; RV64ZVE32F-NEXT: .LBB57_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 +; RV64ZVE32F-NEXT: andi a0, a4, 4 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_11 ; RV64ZVE32F-NEXT: .LBB57_3: # %else4 -; RV64ZVE32F-NEXT: andi a0, a3, 8 +; RV64ZVE32F-NEXT: andi a0, a4, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_12 ; RV64ZVE32F-NEXT: .LBB57_4: # %else6 -; RV64ZVE32F-NEXT: andi a0, a3, 16 +; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_13 ; RV64ZVE32F-NEXT: .LBB57_5: # %else8 -; RV64ZVE32F-NEXT: andi a0, a3, 32 +; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_14 ; RV64ZVE32F-NEXT: .LBB57_6: # %else10 -; RV64ZVE32F-NEXT: andi a0, a3, 64 +; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_15 ; RV64ZVE32F-NEXT: .LBB57_7: # %else12 -; RV64ZVE32F-NEXT: andi a0, a3, -128 +; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_16 ; RV64ZVE32F-NEXT: .LBB57_8: # %else14 ; RV64ZVE32F-NEXT: ret @@ -6112,43 +6112,43 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV64ZVE32F-NEXT: ld a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vse16.v v8, (a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 +; RV64ZVE32F-NEXT: andi a0, a4, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_2 ; RV64ZVE32F-NEXT: .LBB57_10: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vse16.v v9, (t0) -; RV64ZVE32F-NEXT: andi a0, a3, 4 +; RV64ZVE32F-NEXT: andi a0, a4, 4 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_3 ; RV64ZVE32F-NEXT: .LBB57_11: # %cond.store3 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV64ZVE32F-NEXT: vse16.v v9, (a7) -; RV64ZVE32F-NEXT: andi a0, a3, 8 +; RV64ZVE32F-NEXT: andi a0, a4, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_4 ; RV64ZVE32F-NEXT: .LBB57_12: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 ; RV64ZVE32F-NEXT: vse16.v v9, (a6) -; RV64ZVE32F-NEXT: andi a0, a3, 16 +; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_5 ; RV64ZVE32F-NEXT: .LBB57_13: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: vse16.v v9, (a5) -; RV64ZVE32F-NEXT: andi a0, a3, 32 +; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_6 ; RV64ZVE32F-NEXT: .LBB57_14: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 -; RV64ZVE32F-NEXT: vse16.v v9, (a4) -; RV64ZVE32F-NEXT: andi a0, a3, 64 +; RV64ZVE32F-NEXT: vse16.v v9, (a3) +; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_7 ; RV64ZVE32F-NEXT: .LBB57_15: # %cond.store11 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) -; RV64ZVE32F-NEXT: andi a0, a3, -128 +; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_8 ; RV64ZVE32F-NEXT: .LBB57_16: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma @@ -6794,9 +6794,9 @@ define void @mscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m) { ; ; RV64ZVE32F-LABEL: mscatter_v4f32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: ld a4, 8(a0) +; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a1, 24(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 ; RV64ZVE32F-NEXT: andi a5, a3, 1 @@ -6855,17 +6855,17 @@ define void @mscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) { ; RV64ZVE32F-LABEL: mscatter_truemask_v4f32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 0(a0) -; RV64ZVE32F-NEXT: ld a2, 24(a0) -; RV64ZVE32F-NEXT: ld a3, 8(a0) -; RV64ZVE32F-NEXT: ld a0, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 8(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v8, (a1) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse32.v v9, (a3) +; RV64ZVE32F-NEXT: vse32.v v9, (a2) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse32.v v9, (a0) +; RV64ZVE32F-NEXT: vse32.v v9, (a3) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse32.v v8, (a2) +; RV64ZVE32F-NEXT: vse32.v v8, (a0) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1)) ret void @@ -6896,37 +6896,37 @@ define void @mscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; ; RV64ZVE32F-LABEL: mscatter_v8f32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 56(a0) +; RV64ZVE32F-NEXT: ld a3, 40(a0) ; RV64ZVE32F-NEXT: ld a2, 48(a0) -; RV64ZVE32F-NEXT: ld a4, 40(a0) -; RV64ZVE32F-NEXT: ld a5, 32(a0) -; RV64ZVE32F-NEXT: ld a6, 24(a0) -; RV64ZVE32F-NEXT: ld a7, 16(a0) +; RV64ZVE32F-NEXT: ld a1, 56(a0) ; RV64ZVE32F-NEXT: ld t0, 8(a0) +; RV64ZVE32F-NEXT: ld a7, 16(a0) +; RV64ZVE32F-NEXT: ld a6, 24(a0) +; RV64ZVE32F-NEXT: ld a5, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a3, v0 -; RV64ZVE32F-NEXT: andi t1, a3, 1 +; RV64ZVE32F-NEXT: vmv.x.s a4, v0 +; RV64ZVE32F-NEXT: andi t1, a4, 1 ; RV64ZVE32F-NEXT: bnez t1, .LBB67_9 ; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 +; RV64ZVE32F-NEXT: andi a0, a4, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_10 ; RV64ZVE32F-NEXT: .LBB67_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 +; RV64ZVE32F-NEXT: andi a0, a4, 4 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_11 ; RV64ZVE32F-NEXT: .LBB67_3: # %else4 -; RV64ZVE32F-NEXT: andi a0, a3, 8 +; RV64ZVE32F-NEXT: andi a0, a4, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_12 ; RV64ZVE32F-NEXT: .LBB67_4: # %else6 -; RV64ZVE32F-NEXT: andi a0, a3, 16 +; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_13 ; RV64ZVE32F-NEXT: .LBB67_5: # %else8 -; RV64ZVE32F-NEXT: andi a0, a3, 32 +; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_14 ; RV64ZVE32F-NEXT: .LBB67_6: # %else10 -; RV64ZVE32F-NEXT: andi a0, a3, 64 +; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_15 ; RV64ZVE32F-NEXT: .LBB67_7: # %else12 -; RV64ZVE32F-NEXT: andi a0, a3, -128 +; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_16 ; RV64ZVE32F-NEXT: .LBB67_8: # %else14 ; RV64ZVE32F-NEXT: ret @@ -6934,46 +6934,46 @@ define void @mscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV64ZVE32F-NEXT: ld a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v8, (a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 +; RV64ZVE32F-NEXT: andi a0, a4, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_2 ; RV64ZVE32F-NEXT: .LBB67_10: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vse32.v v10, (t0) -; RV64ZVE32F-NEXT: andi a0, a3, 4 +; RV64ZVE32F-NEXT: andi a0, a4, 4 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_3 ; RV64ZVE32F-NEXT: .LBB67_11: # %cond.store3 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v10, (a7) -; RV64ZVE32F-NEXT: andi a0, a3, 8 +; RV64ZVE32F-NEXT: andi a0, a4, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_4 ; RV64ZVE32F-NEXT: .LBB67_12: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vse32.v v10, (a6) -; RV64ZVE32F-NEXT: andi a0, a3, 16 +; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_5 ; RV64ZVE32F-NEXT: .LBB67_13: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v10, (a5) -; RV64ZVE32F-NEXT: andi a0, a3, 32 +; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_6 ; RV64ZVE32F-NEXT: .LBB67_14: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v10, (a4) -; RV64ZVE32F-NEXT: andi a0, a3, 64 +; RV64ZVE32F-NEXT: vse32.v v10, (a3) +; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_7 ; RV64ZVE32F-NEXT: .LBB67_15: # %cond.store11 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v10, (a2) -; RV64ZVE32F-NEXT: andi a0, a3, -128 +; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_8 ; RV64ZVE32F-NEXT: .LBB67_16: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -8126,9 +8126,9 @@ define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) { ; ; RV64ZVE32F-LABEL: mscatter_v4f64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: ld a4, 8(a0) +; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a1, 24(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 ; RV64ZVE32F-NEXT: andi a5, a3, 1 @@ -8317,68 +8317,68 @@ define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; ; RV64ZVE32F-LABEL: mscatter_v8f64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 56(a0) +; RV64ZVE32F-NEXT: ld a3, 40(a0) ; RV64ZVE32F-NEXT: ld a2, 48(a0) -; RV64ZVE32F-NEXT: ld a4, 40(a0) -; RV64ZVE32F-NEXT: ld a5, 32(a0) -; RV64ZVE32F-NEXT: ld a6, 24(a0) -; RV64ZVE32F-NEXT: ld a7, 16(a0) +; RV64ZVE32F-NEXT: ld a1, 56(a0) ; RV64ZVE32F-NEXT: ld t0, 8(a0) +; RV64ZVE32F-NEXT: ld a7, 16(a0) +; RV64ZVE32F-NEXT: ld a6, 24(a0) +; RV64ZVE32F-NEXT: ld a5, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a3, v0 -; RV64ZVE32F-NEXT: andi t1, a3, 1 +; RV64ZVE32F-NEXT: vmv.x.s a4, v0 +; RV64ZVE32F-NEXT: andi t1, a4, 1 ; RV64ZVE32F-NEXT: bnez t1, .LBB80_9 ; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 +; RV64ZVE32F-NEXT: andi a0, a4, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_10 ; RV64ZVE32F-NEXT: .LBB80_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 +; RV64ZVE32F-NEXT: andi a0, a4, 4 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_11 ; RV64ZVE32F-NEXT: .LBB80_3: # %else4 -; RV64ZVE32F-NEXT: andi a0, a3, 8 +; RV64ZVE32F-NEXT: andi a0, a4, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_12 ; RV64ZVE32F-NEXT: .LBB80_4: # %else6 -; RV64ZVE32F-NEXT: andi a0, a3, 16 +; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_13 ; RV64ZVE32F-NEXT: .LBB80_5: # %else8 -; RV64ZVE32F-NEXT: andi a0, a3, 32 +; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_14 ; RV64ZVE32F-NEXT: .LBB80_6: # %else10 -; RV64ZVE32F-NEXT: andi a0, a3, 64 +; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_15 ; RV64ZVE32F-NEXT: .LBB80_7: # %else12 -; RV64ZVE32F-NEXT: andi a0, a3, -128 +; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_16 ; RV64ZVE32F-NEXT: .LBB80_8: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB80_9: # %cond.store ; RV64ZVE32F-NEXT: ld a0, 0(a0) ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 +; RV64ZVE32F-NEXT: andi a0, a4, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_2 ; RV64ZVE32F-NEXT: .LBB80_10: # %cond.store1 ; RV64ZVE32F-NEXT: fsd fa1, 0(t0) -; RV64ZVE32F-NEXT: andi a0, a3, 4 +; RV64ZVE32F-NEXT: andi a0, a4, 4 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_3 ; RV64ZVE32F-NEXT: .LBB80_11: # %cond.store3 ; RV64ZVE32F-NEXT: fsd fa2, 0(a7) -; RV64ZVE32F-NEXT: andi a0, a3, 8 +; RV64ZVE32F-NEXT: andi a0, a4, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_4 ; RV64ZVE32F-NEXT: .LBB80_12: # %cond.store5 ; RV64ZVE32F-NEXT: fsd fa3, 0(a6) -; RV64ZVE32F-NEXT: andi a0, a3, 16 +; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_5 ; RV64ZVE32F-NEXT: .LBB80_13: # %cond.store7 ; RV64ZVE32F-NEXT: fsd fa4, 0(a5) -; RV64ZVE32F-NEXT: andi a0, a3, 32 +; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_6 ; RV64ZVE32F-NEXT: .LBB80_14: # %cond.store9 -; RV64ZVE32F-NEXT: fsd fa5, 0(a4) -; RV64ZVE32F-NEXT: andi a0, a3, 64 +; RV64ZVE32F-NEXT: fsd fa5, 0(a3) +; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_7 ; RV64ZVE32F-NEXT: .LBB80_15: # %cond.store11 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) -; RV64ZVE32F-NEXT: andi a0, a3, -128 +; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_8 ; RV64ZVE32F-NEXT: .LBB80_16: # %cond.store13 ; RV64ZVE32F-NEXT: fsd fa7, 0(a1) @@ -10240,10 +10240,10 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx ; ; RV32ZVE32F-LABEL: mscatter_baseidx_v8f64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: lw a2, 56(a1) -; RV32ZVE32F-NEXT: lw a3, 48(a1) -; RV32ZVE32F-NEXT: lw a4, 40(a1) -; RV32ZVE32F-NEXT: lw a5, 32(a1) +; RV32ZVE32F-NEXT: lw a2, 32(a1) +; RV32ZVE32F-NEXT: lw a3, 40(a1) +; RV32ZVE32F-NEXT: lw a4, 48(a1) +; RV32ZVE32F-NEXT: lw a5, 56(a1) ; RV32ZVE32F-NEXT: lw a6, 0(a1) ; RV32ZVE32F-NEXT: lw a7, 8(a1) ; RV32ZVE32F-NEXT: lw t0, 16(a1) @@ -10253,10 +10253,10 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a1 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll index e57b6a22dd6eab..9385fa69b2f049 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll @@ -736,18 +736,18 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu ; ZVE32F-NEXT: li a5, 40 ; ZVE32F-NEXT: .LBB13_1: # %bb2 ; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 -; ZVE32F-NEXT: ld a6, 8(a1) -; ZVE32F-NEXT: ld a7, 0(a1) -; ZVE32F-NEXT: ld t0, 24(a1) -; ZVE32F-NEXT: ld t1, 16(a1) +; ZVE32F-NEXT: ld a6, 0(a1) +; ZVE32F-NEXT: ld a7, 8(a1) +; ZVE32F-NEXT: ld t0, 16(a1) +; ZVE32F-NEXT: ld t1, 24(a1) ; ZVE32F-NEXT: mul t2, a4, a5 ; ZVE32F-NEXT: add t2, a0, t2 ; ZVE32F-NEXT: mul t3, a2, a5 ; ZVE32F-NEXT: add t3, a0, t3 -; ZVE32F-NEXT: sd a7, 0(t3) -; ZVE32F-NEXT: sd a6, 0(t2) -; ZVE32F-NEXT: sd t1, 80(t3) -; ZVE32F-NEXT: sd t0, 80(t2) +; ZVE32F-NEXT: sd a6, 0(t3) +; ZVE32F-NEXT: sd a7, 0(t2) +; ZVE32F-NEXT: sd t0, 80(t3) +; ZVE32F-NEXT: sd t1, 80(t2) ; ZVE32F-NEXT: addi a2, a2, 4 ; ZVE32F-NEXT: addi a1, a1, 32 ; ZVE32F-NEXT: addi a4, a4, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index ac830b34b5957d..5a880105f68379 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -340,21 +340,21 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs0, -48 ; CHECK-NOV-NEXT: .cfi_offset fs1, -56 ; CHECK-NOV-NEXT: .cfi_offset fs2, -64 -; CHECK-NOV-NEXT: lhu s1, 24(a1) -; CHECK-NOV-NEXT: lhu s2, 0(a1) -; CHECK-NOV-NEXT: lhu s3, 8(a1) -; CHECK-NOV-NEXT: lhu a1, 16(a1) +; CHECK-NOV-NEXT: lhu s1, 0(a1) +; CHECK-NOV-NEXT: lhu s2, 8(a1) +; CHECK-NOV-NEXT: lhu a2, 16(a1) +; CHECK-NOV-NEXT: lhu s3, 24(a1) ; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a1 +; CHECK-NOV-NEXT: fmv.w.x fa0, a2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs1, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s1 +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs2, rtz ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz @@ -524,17 +524,17 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs1, -56 ; CHECK-NOV-NEXT: .cfi_offset fs2, -64 ; CHECK-NOV-NEXT: lhu s1, 0(a1) -; CHECK-NOV-NEXT: lhu s2, 24(a1) -; CHECK-NOV-NEXT: lhu s3, 16(a1) -; CHECK-NOV-NEXT: lhu a1, 8(a1) +; CHECK-NOV-NEXT: lhu a2, 8(a1) +; CHECK-NOV-NEXT: lhu s2, 16(a1) +; CHECK-NOV-NEXT: lhu s3, 24(a1) ; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a1 +; CHECK-NOV-NEXT: fmv.w.x fa0, a2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs1, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 ; CHECK-NOV-NEXT: fmv.w.x fa0, s1 @@ -684,21 +684,21 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs0, -48 ; CHECK-NOV-NEXT: .cfi_offset fs1, -56 ; CHECK-NOV-NEXT: .cfi_offset fs2, -64 -; CHECK-NOV-NEXT: lhu s1, 24(a1) -; CHECK-NOV-NEXT: lhu s2, 0(a1) -; CHECK-NOV-NEXT: lhu s3, 8(a1) -; CHECK-NOV-NEXT: lhu a1, 16(a1) +; CHECK-NOV-NEXT: lhu s1, 0(a1) +; CHECK-NOV-NEXT: lhu s2, 8(a1) +; CHECK-NOV-NEXT: lhu a2, 16(a1) +; CHECK-NOV-NEXT: lhu s3, 24(a1) ; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a1 +; CHECK-NOV-NEXT: fmv.w.x fa0, a2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs1, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s1 +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs2, rtz ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz @@ -1196,37 +1196,37 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs4, -112 ; CHECK-NOV-NEXT: .cfi_offset fs5, -120 ; CHECK-NOV-NEXT: .cfi_offset fs6, -128 -; CHECK-NOV-NEXT: lhu s1, 56(a1) -; CHECK-NOV-NEXT: lhu s2, 0(a1) -; CHECK-NOV-NEXT: lhu s3, 8(a1) -; CHECK-NOV-NEXT: lhu s4, 16(a1) -; CHECK-NOV-NEXT: lhu s5, 24(a1) -; CHECK-NOV-NEXT: lhu s6, 32(a1) -; CHECK-NOV-NEXT: lhu s7, 40(a1) -; CHECK-NOV-NEXT: lhu a1, 48(a1) +; CHECK-NOV-NEXT: lhu s1, 32(a1) +; CHECK-NOV-NEXT: lhu s2, 40(a1) +; CHECK-NOV-NEXT: lhu a2, 48(a1) +; CHECK-NOV-NEXT: lhu s3, 56(a1) +; CHECK-NOV-NEXT: lhu s4, 0(a1) +; CHECK-NOV-NEXT: lhu s5, 8(a1) +; CHECK-NOV-NEXT: lhu s6, 16(a1) +; CHECK-NOV-NEXT: lhu s7, 24(a1) ; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a1 +; CHECK-NOV-NEXT: fmv.w.x fa0, a2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs6, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s7 +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs5, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s6 +; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs4, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s5 +; CHECK-NOV-NEXT: fmv.w.x fa0, s7 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs3, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s4 +; CHECK-NOV-NEXT: fmv.w.x fa0, s6 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 +; CHECK-NOV-NEXT: fmv.w.x fa0, s5 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs1, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: fmv.w.x fa0, s4 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s1 +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs6, rtz ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz @@ -1551,37 +1551,37 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs4, -112 ; CHECK-NOV-NEXT: .cfi_offset fs5, -120 ; CHECK-NOV-NEXT: .cfi_offset fs6, -128 -; CHECK-NOV-NEXT: lhu s1, 0(a1) -; CHECK-NOV-NEXT: lhu s2, 56(a1) +; CHECK-NOV-NEXT: lhu s1, 32(a1) +; CHECK-NOV-NEXT: lhu s2, 40(a1) ; CHECK-NOV-NEXT: lhu s3, 48(a1) -; CHECK-NOV-NEXT: lhu s4, 40(a1) -; CHECK-NOV-NEXT: lhu s5, 32(a1) -; CHECK-NOV-NEXT: lhu s6, 24(a1) -; CHECK-NOV-NEXT: lhu s7, 16(a1) -; CHECK-NOV-NEXT: lhu a1, 8(a1) +; CHECK-NOV-NEXT: lhu s4, 56(a1) +; CHECK-NOV-NEXT: lhu s5, 0(a1) +; CHECK-NOV-NEXT: lhu a2, 8(a1) +; CHECK-NOV-NEXT: lhu s6, 16(a1) +; CHECK-NOV-NEXT: lhu s7, 24(a1) ; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a1 +; CHECK-NOV-NEXT: fmv.w.x fa0, a2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs6, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s7 +; CHECK-NOV-NEXT: fmv.w.x fa0, s6 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs5, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s6 +; CHECK-NOV-NEXT: fmv.w.x fa0, s7 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs4, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s5 +; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs3, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s4 +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 ; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs1, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: fmv.w.x fa0, s4 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s1 +; CHECK-NOV-NEXT: fmv.w.x fa0, s5 ; CHECK-NOV-NEXT: fcvt.lu.s s1, fs6, rtz ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.lu.s a0, fa0, rtz @@ -1862,37 +1862,37 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs4, -112 ; CHECK-NOV-NEXT: .cfi_offset fs5, -120 ; CHECK-NOV-NEXT: .cfi_offset fs6, -128 -; CHECK-NOV-NEXT: lhu s1, 56(a1) -; CHECK-NOV-NEXT: lhu s2, 0(a1) -; CHECK-NOV-NEXT: lhu s3, 8(a1) -; CHECK-NOV-NEXT: lhu s4, 16(a1) -; CHECK-NOV-NEXT: lhu s5, 24(a1) -; CHECK-NOV-NEXT: lhu s6, 32(a1) -; CHECK-NOV-NEXT: lhu s7, 40(a1) -; CHECK-NOV-NEXT: lhu a1, 48(a1) +; CHECK-NOV-NEXT: lhu s1, 32(a1) +; CHECK-NOV-NEXT: lhu s2, 40(a1) +; CHECK-NOV-NEXT: lhu a2, 48(a1) +; CHECK-NOV-NEXT: lhu s3, 56(a1) +; CHECK-NOV-NEXT: lhu s4, 0(a1) +; CHECK-NOV-NEXT: lhu s5, 8(a1) +; CHECK-NOV-NEXT: lhu s6, 16(a1) +; CHECK-NOV-NEXT: lhu s7, 24(a1) ; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a1 +; CHECK-NOV-NEXT: fmv.w.x fa0, a2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs6, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s7 +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs5, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s6 +; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs4, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s5 +; CHECK-NOV-NEXT: fmv.w.x fa0, s7 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs3, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s4 +; CHECK-NOV-NEXT: fmv.w.x fa0, s6 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 +; CHECK-NOV-NEXT: fmv.w.x fa0, s5 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs1, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: fmv.w.x fa0, s4 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s1 +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs6, rtz ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz @@ -3669,21 +3669,21 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs0, -48 ; CHECK-NOV-NEXT: .cfi_offset fs1, -56 ; CHECK-NOV-NEXT: .cfi_offset fs2, -64 -; CHECK-NOV-NEXT: lhu s1, 24(a1) -; CHECK-NOV-NEXT: lhu s2, 0(a1) -; CHECK-NOV-NEXT: lhu s3, 8(a1) -; CHECK-NOV-NEXT: lhu a1, 16(a1) +; CHECK-NOV-NEXT: lhu s1, 0(a1) +; CHECK-NOV-NEXT: lhu s2, 8(a1) +; CHECK-NOV-NEXT: lhu a2, 16(a1) +; CHECK-NOV-NEXT: lhu s3, 24(a1) ; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a1 +; CHECK-NOV-NEXT: fmv.w.x fa0, a2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs1, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s1 +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs2, rtz ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz @@ -3851,17 +3851,17 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs1, -56 ; CHECK-NOV-NEXT: .cfi_offset fs2, -64 ; CHECK-NOV-NEXT: lhu s1, 0(a1) -; CHECK-NOV-NEXT: lhu s2, 24(a1) -; CHECK-NOV-NEXT: lhu s3, 16(a1) -; CHECK-NOV-NEXT: lhu a1, 8(a1) +; CHECK-NOV-NEXT: lhu a2, 8(a1) +; CHECK-NOV-NEXT: lhu s2, 16(a1) +; CHECK-NOV-NEXT: lhu s3, 24(a1) ; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a1 +; CHECK-NOV-NEXT: fmv.w.x fa0, a2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs1, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 ; CHECK-NOV-NEXT: fmv.w.x fa0, s1 @@ -4010,21 +4010,21 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs0, -48 ; CHECK-NOV-NEXT: .cfi_offset fs1, -56 ; CHECK-NOV-NEXT: .cfi_offset fs2, -64 -; CHECK-NOV-NEXT: lhu s1, 24(a1) -; CHECK-NOV-NEXT: lhu s2, 0(a1) -; CHECK-NOV-NEXT: lhu s3, 8(a1) -; CHECK-NOV-NEXT: lhu a1, 16(a1) +; CHECK-NOV-NEXT: lhu s1, 0(a1) +; CHECK-NOV-NEXT: lhu s2, 8(a1) +; CHECK-NOV-NEXT: lhu a2, 16(a1) +; CHECK-NOV-NEXT: lhu s3, 24(a1) ; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a1 +; CHECK-NOV-NEXT: fmv.w.x fa0, a2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs1, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s1 +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs2, rtz ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz @@ -4510,37 +4510,37 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs4, -112 ; CHECK-NOV-NEXT: .cfi_offset fs5, -120 ; CHECK-NOV-NEXT: .cfi_offset fs6, -128 -; CHECK-NOV-NEXT: lhu s1, 56(a1) -; CHECK-NOV-NEXT: lhu s2, 0(a1) -; CHECK-NOV-NEXT: lhu s3, 8(a1) -; CHECK-NOV-NEXT: lhu s4, 16(a1) -; CHECK-NOV-NEXT: lhu s5, 24(a1) -; CHECK-NOV-NEXT: lhu s6, 32(a1) -; CHECK-NOV-NEXT: lhu s7, 40(a1) -; CHECK-NOV-NEXT: lhu a1, 48(a1) +; CHECK-NOV-NEXT: lhu s1, 32(a1) +; CHECK-NOV-NEXT: lhu s2, 40(a1) +; CHECK-NOV-NEXT: lhu a2, 48(a1) +; CHECK-NOV-NEXT: lhu s3, 56(a1) +; CHECK-NOV-NEXT: lhu s4, 0(a1) +; CHECK-NOV-NEXT: lhu s5, 8(a1) +; CHECK-NOV-NEXT: lhu s6, 16(a1) +; CHECK-NOV-NEXT: lhu s7, 24(a1) ; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a1 +; CHECK-NOV-NEXT: fmv.w.x fa0, a2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs6, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s7 +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs5, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s6 +; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs4, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s5 +; CHECK-NOV-NEXT: fmv.w.x fa0, s7 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs3, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s4 +; CHECK-NOV-NEXT: fmv.w.x fa0, s6 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 +; CHECK-NOV-NEXT: fmv.w.x fa0, s5 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs1, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: fmv.w.x fa0, s4 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s1 +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs6, rtz ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz @@ -4863,37 +4863,37 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs4, -112 ; CHECK-NOV-NEXT: .cfi_offset fs5, -120 ; CHECK-NOV-NEXT: .cfi_offset fs6, -128 -; CHECK-NOV-NEXT: lhu s1, 0(a1) -; CHECK-NOV-NEXT: lhu s2, 56(a1) +; CHECK-NOV-NEXT: lhu s1, 32(a1) +; CHECK-NOV-NEXT: lhu s2, 40(a1) ; CHECK-NOV-NEXT: lhu s3, 48(a1) -; CHECK-NOV-NEXT: lhu s4, 40(a1) -; CHECK-NOV-NEXT: lhu s5, 32(a1) -; CHECK-NOV-NEXT: lhu s6, 24(a1) -; CHECK-NOV-NEXT: lhu s7, 16(a1) -; CHECK-NOV-NEXT: lhu a1, 8(a1) +; CHECK-NOV-NEXT: lhu s4, 56(a1) +; CHECK-NOV-NEXT: lhu s5, 0(a1) +; CHECK-NOV-NEXT: lhu a2, 8(a1) +; CHECK-NOV-NEXT: lhu s6, 16(a1) +; CHECK-NOV-NEXT: lhu s7, 24(a1) ; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a1 +; CHECK-NOV-NEXT: fmv.w.x fa0, a2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs6, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s7 +; CHECK-NOV-NEXT: fmv.w.x fa0, s6 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs5, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s6 +; CHECK-NOV-NEXT: fmv.w.x fa0, s7 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs4, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s5 +; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs3, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s4 +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 ; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs1, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: fmv.w.x fa0, s4 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s1 +; CHECK-NOV-NEXT: fmv.w.x fa0, s5 ; CHECK-NOV-NEXT: fcvt.lu.s s1, fs6, rtz ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.lu.s a0, fa0, rtz @@ -5173,37 +5173,37 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs4, -112 ; CHECK-NOV-NEXT: .cfi_offset fs5, -120 ; CHECK-NOV-NEXT: .cfi_offset fs6, -128 -; CHECK-NOV-NEXT: lhu s1, 56(a1) -; CHECK-NOV-NEXT: lhu s2, 0(a1) -; CHECK-NOV-NEXT: lhu s3, 8(a1) -; CHECK-NOV-NEXT: lhu s4, 16(a1) -; CHECK-NOV-NEXT: lhu s5, 24(a1) -; CHECK-NOV-NEXT: lhu s6, 32(a1) -; CHECK-NOV-NEXT: lhu s7, 40(a1) -; CHECK-NOV-NEXT: lhu a1, 48(a1) +; CHECK-NOV-NEXT: lhu s1, 32(a1) +; CHECK-NOV-NEXT: lhu s2, 40(a1) +; CHECK-NOV-NEXT: lhu a2, 48(a1) +; CHECK-NOV-NEXT: lhu s3, 56(a1) +; CHECK-NOV-NEXT: lhu s4, 0(a1) +; CHECK-NOV-NEXT: lhu s5, 8(a1) +; CHECK-NOV-NEXT: lhu s6, 16(a1) +; CHECK-NOV-NEXT: lhu s7, 24(a1) ; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a1 +; CHECK-NOV-NEXT: fmv.w.x fa0, a2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs6, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s7 +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs5, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s6 +; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs4, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s5 +; CHECK-NOV-NEXT: fmv.w.x fa0, s7 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs3, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s4 +; CHECK-NOV-NEXT: fmv.w.x fa0, s6 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 +; CHECK-NOV-NEXT: fmv.w.x fa0, s5 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs1, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: fmv.w.x fa0, s4 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s1 +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs6, rtz ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz diff --git a/llvm/test/CodeGen/RISCV/scmp.ll b/llvm/test/CodeGen/RISCV/scmp.ll index e79b6989410a6c..a212714db53e09 100644 --- a/llvm/test/CodeGen/RISCV/scmp.ll +++ b/llvm/test/CodeGen/RISCV/scmp.ll @@ -87,10 +87,10 @@ define i8 @scmp.8.128(i128 %x, i128 %y) nounwind { ; RV32I-LABEL: scmp.8.128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a2, 4(a1) -; RV32I-NEXT: lw a3, 4(a0) ; RV32I-NEXT: lw a4, 8(a1) ; RV32I-NEXT: lw a5, 12(a1) ; RV32I-NEXT: lw a6, 12(a0) +; RV32I-NEXT: lw a3, 4(a0) ; RV32I-NEXT: lw a7, 8(a0) ; RV32I-NEXT: beq a6, a5, .LBB4_2 ; RV32I-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll index 5ba8755201ddf5..dcc99ebaa5514b 100644 --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -171,21 +171,21 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: add a1, a3, a1 ; RV32I-NEXT: lw a3, 0(a1) ; RV32I-NEXT: lw a4, 4(a1) +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) ; RV32I-NEXT: srl a3, a3, a2 -; RV32I-NEXT: slli a5, a4, 1 -; RV32I-NEXT: andi a6, a2, 31 -; RV32I-NEXT: xori a6, a6, 31 -; RV32I-NEXT: lw a7, 8(a1) -; RV32I-NEXT: sll a5, a5, a6 -; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: slli a6, a4, 1 +; RV32I-NEXT: andi a7, a2, 31 +; RV32I-NEXT: xori a7, a7, 31 +; RV32I-NEXT: sll a6, a6, a7 +; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: srl a4, a4, a2 -; RV32I-NEXT: slli a5, a7, 1 -; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: sll a5, a5, a6 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: srl a5, a7, a2 -; RV32I-NEXT: slli a7, a1, 1 -; RV32I-NEXT: sll a6, a7, a6 +; RV32I-NEXT: slli a6, a5, 1 +; RV32I-NEXT: sll a6, a6, a7 +; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: srl a5, a5, a2 +; RV32I-NEXT: slli a6, a1, 1 +; RV32I-NEXT: sll a6, a6, a7 ; RV32I-NEXT: or a5, a5, a6 ; RV32I-NEXT: srl a1, a1, a2 ; RV32I-NEXT: sw a1, 12(a0) @@ -221,41 +221,41 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: ashr128: ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: lw a3, 8(a1) +; RV32I-NEXT: lw a4, 12(a1) +; RV32I-NEXT: lw a5, 0(a1) +; RV32I-NEXT: lw a1, 4(a1) ; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: lw a3, 12(a1) -; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: lw a5, 4(a1) -; RV32I-NEXT: lw a1, 0(a1) -; RV32I-NEXT: sw a3, 12(sp) -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 4(sp) -; RV32I-NEXT: sw a1, 0(sp) -; RV32I-NEXT: srai a3, a3, 31 -; RV32I-NEXT: sw a3, 28(sp) -; RV32I-NEXT: sw a3, 24(sp) -; RV32I-NEXT: sw a3, 20(sp) -; RV32I-NEXT: sw a3, 16(sp) +; RV32I-NEXT: sw a4, 12(sp) +; RV32I-NEXT: sw a3, 8(sp) +; RV32I-NEXT: sw a1, 4(sp) +; RV32I-NEXT: sw a5, 0(sp) +; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: sw a4, 28(sp) +; RV32I-NEXT: sw a4, 24(sp) +; RV32I-NEXT: sw a4, 20(sp) +; RV32I-NEXT: sw a4, 16(sp) ; RV32I-NEXT: srli a1, a2, 3 ; RV32I-NEXT: andi a1, a1, 12 ; RV32I-NEXT: mv a3, sp ; RV32I-NEXT: add a1, a3, a1 ; RV32I-NEXT: lw a3, 0(a1) ; RV32I-NEXT: lw a4, 4(a1) +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) ; RV32I-NEXT: srl a3, a3, a2 -; RV32I-NEXT: slli a5, a4, 1 -; RV32I-NEXT: andi a6, a2, 31 -; RV32I-NEXT: xori a6, a6, 31 -; RV32I-NEXT: lw a7, 8(a1) -; RV32I-NEXT: sll a5, a5, a6 -; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: slli a6, a4, 1 +; RV32I-NEXT: andi a7, a2, 31 +; RV32I-NEXT: xori a7, a7, 31 +; RV32I-NEXT: sll a6, a6, a7 +; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: srl a4, a4, a2 -; RV32I-NEXT: slli a5, a7, 1 -; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: sll a5, a5, a6 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: srl a5, a7, a2 -; RV32I-NEXT: slli a7, a1, 1 -; RV32I-NEXT: sll a6, a7, a6 +; RV32I-NEXT: slli a6, a5, 1 +; RV32I-NEXT: sll a6, a6, a7 +; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: srl a5, a5, a2 +; RV32I-NEXT: slli a6, a1, 1 +; RV32I-NEXT: sll a6, a6, a7 ; RV32I-NEXT: or a5, a5, a6 ; RV32I-NEXT: sra a1, a1, a2 ; RV32I-NEXT: sw a1, 12(a0) @@ -310,27 +310,27 @@ define i128 @shl128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: sub a3, a3, a1 ; RV32I-NEXT: lw a1, 4(a3) ; RV32I-NEXT: lw a4, 0(a3) -; RV32I-NEXT: sll a5, a1, a2 -; RV32I-NEXT: srli a6, a4, 1 -; RV32I-NEXT: andi a7, a2, 31 -; RV32I-NEXT: lw t0, 8(a3) -; RV32I-NEXT: xori a7, a7, 31 -; RV32I-NEXT: srl a6, a6, a7 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: sll a6, t0, a2 +; RV32I-NEXT: lw a5, 8(a3) ; RV32I-NEXT: lw a3, 12(a3) +; RV32I-NEXT: sll a6, a1, a2 +; RV32I-NEXT: srli a7, a4, 1 +; RV32I-NEXT: andi t0, a2, 31 +; RV32I-NEXT: xori t0, t0, 31 +; RV32I-NEXT: srl a7, a7, t0 +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: sll a7, a5, a2 ; RV32I-NEXT: srli a1, a1, 1 -; RV32I-NEXT: srl a1, a1, a7 -; RV32I-NEXT: or a1, a6, a1 +; RV32I-NEXT: srl a1, a1, t0 +; RV32I-NEXT: or a1, a7, a1 ; RV32I-NEXT: sll a3, a3, a2 -; RV32I-NEXT: srli a6, t0, 1 -; RV32I-NEXT: srl a6, a6, a7 -; RV32I-NEXT: or a3, a3, a6 +; RV32I-NEXT: srli a5, a5, 1 +; RV32I-NEXT: srl a5, a5, t0 +; RV32I-NEXT: or a3, a3, a5 ; RV32I-NEXT: sll a2, a4, a2 ; RV32I-NEXT: sw a2, 0(a0) ; RV32I-NEXT: sw a3, 12(a0) ; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a5, 4(a0) +; RV32I-NEXT: sw a6, 4(a0) ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -394,10 +394,10 @@ define i64 @fshr64_minsize(i64 %a, i64 %b) minsize nounwind { define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { ; RV32I-LABEL: fshr128_minsize: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 8(a1) -; RV32I-NEXT: lw t2, 0(a1) ; RV32I-NEXT: lw a2, 0(a2) +; RV32I-NEXT: lw t2, 0(a1) ; RV32I-NEXT: lw a7, 4(a1) +; RV32I-NEXT: lw a3, 8(a1) ; RV32I-NEXT: lw a1, 12(a1) ; RV32I-NEXT: andi t1, a2, 64 ; RV32I-NEXT: mv t0, a7 diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index 162f7e34536a7c..5d00e90366c3be 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -308,22 +308,22 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lbu a0, 12(a0) -; RV32-NEXT: lw a1, 8(s0) -; RV32-NEXT: slli a2, a0, 30 -; RV32-NEXT: lw a3, 4(s0) -; RV32-NEXT: srli s1, a1, 2 -; RV32-NEXT: or s1, s1, a2 -; RV32-NEXT: slli a2, a1, 31 -; RV32-NEXT: srli a4, a3, 1 -; RV32-NEXT: or s2, a4, a2 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai s3, a0, 31 -; RV32-NEXT: srli a1, a1, 1 +; RV32-NEXT: lbu a1, 12(a0) +; RV32-NEXT: lw a2, 8(a0) +; RV32-NEXT: lw a3, 4(a0) +; RV32-NEXT: lw a0, 0(a0) +; RV32-NEXT: slli a4, a1, 30 +; RV32-NEXT: srli s1, a2, 2 +; RV32-NEXT: or s1, s1, a4 +; RV32-NEXT: slli a4, a2, 31 +; RV32-NEXT: srli a5, a3, 1 +; RV32-NEXT: or s2, a5, a4 +; RV32-NEXT: srli a1, a1, 2 ; RV32-NEXT: slli a1, a1, 31 -; RV32-NEXT: lw a0, 0(s0) -; RV32-NEXT: srai s4, a1, 31 +; RV32-NEXT: srai s3, a1, 31 +; RV32-NEXT: srli a2, a2, 1 +; RV32-NEXT: slli a2, a2, 31 +; RV32-NEXT: srai s4, a2, 31 ; RV32-NEXT: slli a1, a3, 31 ; RV32-NEXT: srai a1, a1, 31 ; RV32-NEXT: li a2, 6 @@ -389,8 +389,8 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64-NEXT: mv s0, a0 ; RV64-NEXT: lbu a0, 12(a0) ; RV64-NEXT: lwu a1, 8(s0) -; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: ld a2, 0(s0) +; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: or a0, a1, a0 ; RV64-NEXT: slli a0, a0, 29 ; RV64-NEXT: srai s1, a0, 31 @@ -460,22 +460,22 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32M-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32M-NEXT: sw s6, 0(sp) # 4-byte Folded Spill ; RV32M-NEXT: mv s0, a0 -; RV32M-NEXT: lbu a0, 12(a0) -; RV32M-NEXT: lw a1, 8(s0) -; RV32M-NEXT: slli a2, a0, 30 -; RV32M-NEXT: lw a3, 4(s0) -; RV32M-NEXT: srli s1, a1, 2 -; RV32M-NEXT: or s1, s1, a2 -; RV32M-NEXT: slli a2, a1, 31 -; RV32M-NEXT: srli a4, a3, 1 -; RV32M-NEXT: or s2, a4, a2 -; RV32M-NEXT: srli a0, a0, 2 -; RV32M-NEXT: slli a0, a0, 31 -; RV32M-NEXT: srai s3, a0, 31 -; RV32M-NEXT: srli a1, a1, 1 +; RV32M-NEXT: lbu a1, 12(a0) +; RV32M-NEXT: lw a2, 8(a0) +; RV32M-NEXT: lw a3, 4(a0) +; RV32M-NEXT: lw a0, 0(a0) +; RV32M-NEXT: slli a4, a1, 30 +; RV32M-NEXT: srli s1, a2, 2 +; RV32M-NEXT: or s1, s1, a4 +; RV32M-NEXT: slli a4, a2, 31 +; RV32M-NEXT: srli a5, a3, 1 +; RV32M-NEXT: or s2, a5, a4 +; RV32M-NEXT: srli a1, a1, 2 ; RV32M-NEXT: slli a1, a1, 31 -; RV32M-NEXT: lw a0, 0(s0) -; RV32M-NEXT: srai s4, a1, 31 +; RV32M-NEXT: srai s3, a1, 31 +; RV32M-NEXT: srli a2, a2, 1 +; RV32M-NEXT: slli a2, a2, 31 +; RV32M-NEXT: srai s4, a2, 31 ; RV32M-NEXT: slli a1, a3, 31 ; RV32M-NEXT: srai a1, a1, 31 ; RV32M-NEXT: li a2, 6 @@ -534,34 +534,34 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64M: # %bb.0: ; RV64M-NEXT: ld a1, 0(a0) ; RV64M-NEXT: lwu a2, 8(a0) -; RV64M-NEXT: srli a3, a1, 2 -; RV64M-NEXT: lbu a4, 12(a0) +; RV64M-NEXT: lbu a3, 12(a0) +; RV64M-NEXT: srli a4, a1, 2 ; RV64M-NEXT: slli a5, a2, 62 -; RV64M-NEXT: or a3, a5, a3 -; RV64M-NEXT: srai a3, a3, 31 -; RV64M-NEXT: slli a4, a4, 32 -; RV64M-NEXT: or a2, a2, a4 +; RV64M-NEXT: or a4, a5, a4 +; RV64M-NEXT: srai a4, a4, 31 +; RV64M-NEXT: slli a3, a3, 32 +; RV64M-NEXT: or a2, a2, a3 ; RV64M-NEXT: slli a2, a2, 29 -; RV64M-NEXT: lui a4, %hi(.LCPI3_0) -; RV64M-NEXT: ld a4, %lo(.LCPI3_0)(a4) +; RV64M-NEXT: lui a3, %hi(.LCPI3_0) +; RV64M-NEXT: ld a3, %lo(.LCPI3_0)(a3) ; RV64M-NEXT: srai a2, a2, 31 ; RV64M-NEXT: slli a1, a1, 31 ; RV64M-NEXT: srai a1, a1, 31 -; RV64M-NEXT: mulh a4, a2, a4 -; RV64M-NEXT: srli a5, a4, 63 -; RV64M-NEXT: srai a4, a4, 1 -; RV64M-NEXT: add a4, a4, a5 +; RV64M-NEXT: mulh a3, a2, a3 +; RV64M-NEXT: srli a5, a3, 63 +; RV64M-NEXT: srai a3, a3, 1 +; RV64M-NEXT: add a3, a3, a5 ; RV64M-NEXT: lui a5, %hi(.LCPI3_1) ; RV64M-NEXT: ld a5, %lo(.LCPI3_1)(a5) -; RV64M-NEXT: add a2, a2, a4 -; RV64M-NEXT: slli a4, a4, 2 -; RV64M-NEXT: add a2, a2, a4 -; RV64M-NEXT: mulh a4, a3, a5 -; RV64M-NEXT: srli a5, a4, 63 -; RV64M-NEXT: srai a4, a4, 1 -; RV64M-NEXT: add a4, a4, a5 -; RV64M-NEXT: slli a5, a4, 3 -; RV64M-NEXT: add a3, a3, a4 +; RV64M-NEXT: add a2, a2, a3 +; RV64M-NEXT: slli a3, a3, 2 +; RV64M-NEXT: add a2, a2, a3 +; RV64M-NEXT: mulh a3, a4, a5 +; RV64M-NEXT: srli a5, a3, 63 +; RV64M-NEXT: srai a3, a3, 1 +; RV64M-NEXT: add a3, a3, a5 +; RV64M-NEXT: slli a5, a3, 3 +; RV64M-NEXT: add a3, a4, a3 ; RV64M-NEXT: sub a3, a3, a5 ; RV64M-NEXT: addi a3, a3, -1 ; RV64M-NEXT: seqz a3, a3 @@ -610,22 +610,22 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32MV-NEXT: slli a1, a1, 1 ; RV32MV-NEXT: sub sp, sp, a1 ; RV32MV-NEXT: mv s0, a0 -; RV32MV-NEXT: lbu a0, 12(a0) -; RV32MV-NEXT: lw a1, 8(s0) -; RV32MV-NEXT: slli a2, a0, 30 -; RV32MV-NEXT: lw a3, 4(s0) -; RV32MV-NEXT: srli s1, a1, 2 -; RV32MV-NEXT: or s1, s1, a2 -; RV32MV-NEXT: slli a2, a1, 31 -; RV32MV-NEXT: srli a4, a3, 1 -; RV32MV-NEXT: or s2, a4, a2 -; RV32MV-NEXT: srli a0, a0, 2 -; RV32MV-NEXT: slli a0, a0, 31 -; RV32MV-NEXT: srai s3, a0, 31 -; RV32MV-NEXT: srli a1, a1, 1 +; RV32MV-NEXT: lbu a1, 12(a0) +; RV32MV-NEXT: lw a2, 8(a0) +; RV32MV-NEXT: lw a3, 4(a0) +; RV32MV-NEXT: lw a0, 0(a0) +; RV32MV-NEXT: slli a4, a1, 30 +; RV32MV-NEXT: srli s1, a2, 2 +; RV32MV-NEXT: or s1, s1, a4 +; RV32MV-NEXT: slli a4, a2, 31 +; RV32MV-NEXT: srli a5, a3, 1 +; RV32MV-NEXT: or s2, a5, a4 +; RV32MV-NEXT: srli a1, a1, 2 ; RV32MV-NEXT: slli a1, a1, 31 -; RV32MV-NEXT: srai s4, a1, 31 -; RV32MV-NEXT: lw a0, 0(s0) +; RV32MV-NEXT: srai s3, a1, 31 +; RV32MV-NEXT: srli a2, a2, 1 +; RV32MV-NEXT: slli a2, a2, 31 +; RV32MV-NEXT: srai s4, a2, 31 ; RV32MV-NEXT: slli a1, a3, 31 ; RV32MV-NEXT: srai a1, a1, 31 ; RV32MV-NEXT: li a2, 1 @@ -728,8 +728,8 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64MV: # %bb.0: ; RV64MV-NEXT: lbu a1, 12(a0) ; RV64MV-NEXT: lwu a2, 8(a0) -; RV64MV-NEXT: slli a1, a1, 32 ; RV64MV-NEXT: ld a3, 0(a0) +; RV64MV-NEXT: slli a1, a1, 32 ; RV64MV-NEXT: or a1, a2, a1 ; RV64MV-NEXT: slli a1, a1, 29 ; RV64MV-NEXT: srai a1, a1, 31 diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll index 7fc4713ac2d6e1..90443051d4b574 100644 --- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll @@ -18,29 +18,29 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lh s0, 12(a1) -; RV32I-NEXT: lh s1, 8(a1) -; RV32I-NEXT: lh s2, 4(a1) ; RV32I-NEXT: lh a2, 0(a1) +; RV32I-NEXT: lh s0, 4(a1) +; RV32I-NEXT: lh s1, 8(a1) +; RV32I-NEXT: lh s2, 12(a1) ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __modsi3 ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, -124 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __modsi3 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 98 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __modsi3 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: li a1, -1003 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __modsi3 ; RV32I-NEXT: sh a0, 6(s3) ; RV32I-NEXT: sh s1, 4(s3) -; RV32I-NEXT: sh s2, 2(s3) +; RV32I-NEXT: sh s0, 2(s3) ; RV32I-NEXT: sh s4, 0(s3) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -53,52 +53,52 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind { ; ; RV32IM-LABEL: fold_srem_vec_1: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lh a2, 12(a1) -; RV32IM-NEXT: lh a3, 8(a1) -; RV32IM-NEXT: lh a4, 0(a1) -; RV32IM-NEXT: lh a1, 4(a1) +; RV32IM-NEXT: lh a2, 0(a1) +; RV32IM-NEXT: lh a3, 4(a1) +; RV32IM-NEXT: lh a4, 8(a1) +; RV32IM-NEXT: lh a1, 12(a1) ; RV32IM-NEXT: lui a5, 706409 ; RV32IM-NEXT: addi a5, a5, 389 -; RV32IM-NEXT: mulh a5, a4, a5 -; RV32IM-NEXT: add a5, a5, a4 +; RV32IM-NEXT: mulh a5, a2, a5 +; RV32IM-NEXT: add a5, a5, a2 ; RV32IM-NEXT: srli a6, a5, 31 ; RV32IM-NEXT: srli a5, a5, 6 ; RV32IM-NEXT: add a5, a5, a6 ; RV32IM-NEXT: li a6, 95 ; RV32IM-NEXT: mul a5, a5, a6 -; RV32IM-NEXT: sub a4, a4, a5 +; RV32IM-NEXT: sub a2, a2, a5 ; RV32IM-NEXT: lui a5, 507375 ; RV32IM-NEXT: addi a5, a5, 1981 -; RV32IM-NEXT: mulh a5, a1, a5 -; RV32IM-NEXT: sub a5, a5, a1 +; RV32IM-NEXT: mulh a5, a3, a5 +; RV32IM-NEXT: sub a5, a5, a3 ; RV32IM-NEXT: srli a6, a5, 31 ; RV32IM-NEXT: srli a5, a5, 6 ; RV32IM-NEXT: add a5, a5, a6 ; RV32IM-NEXT: li a6, -124 ; RV32IM-NEXT: mul a5, a5, a6 -; RV32IM-NEXT: sub a1, a1, a5 +; RV32IM-NEXT: sub a3, a3, a5 ; RV32IM-NEXT: lui a5, 342392 ; RV32IM-NEXT: addi a5, a5, 669 -; RV32IM-NEXT: mulh a5, a3, a5 +; RV32IM-NEXT: mulh a5, a4, a5 ; RV32IM-NEXT: srli a6, a5, 31 ; RV32IM-NEXT: srli a5, a5, 5 ; RV32IM-NEXT: add a5, a5, a6 ; RV32IM-NEXT: li a6, 98 ; RV32IM-NEXT: mul a5, a5, a6 -; RV32IM-NEXT: sub a3, a3, a5 +; RV32IM-NEXT: sub a4, a4, a5 ; RV32IM-NEXT: lui a5, 780943 ; RV32IM-NEXT: addi a5, a5, 1809 -; RV32IM-NEXT: mulh a5, a2, a5 +; RV32IM-NEXT: mulh a5, a1, a5 ; RV32IM-NEXT: srli a6, a5, 31 ; RV32IM-NEXT: srli a5, a5, 8 ; RV32IM-NEXT: add a5, a5, a6 ; RV32IM-NEXT: li a6, -1003 ; RV32IM-NEXT: mul a5, a5, a6 -; RV32IM-NEXT: sub a2, a2, a5 -; RV32IM-NEXT: sh a2, 6(a0) -; RV32IM-NEXT: sh a3, 4(a0) -; RV32IM-NEXT: sh a1, 2(a0) -; RV32IM-NEXT: sh a4, 0(a0) +; RV32IM-NEXT: sub a1, a1, a5 +; RV32IM-NEXT: sh a1, 6(a0) +; RV32IM-NEXT: sh a4, 4(a0) +; RV32IM-NEXT: sh a3, 2(a0) +; RV32IM-NEXT: sh a2, 0(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: fold_srem_vec_1: @@ -110,29 +110,29 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lh s0, 24(a1) -; RV64I-NEXT: lh s1, 16(a1) -; RV64I-NEXT: lh s2, 8(a1) ; RV64I-NEXT: lh a2, 0(a1) +; RV64I-NEXT: lh s0, 8(a1) +; RV64I-NEXT: lh s1, 16(a1) +; RV64I-NEXT: lh s2, 24(a1) ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __moddi3 ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, -124 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a1, 98 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a1, -1003 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __moddi3 ; RV64I-NEXT: sh a0, 6(s3) ; RV64I-NEXT: sh s1, 4(s3) -; RV64I-NEXT: sh s2, 2(s3) +; RV64I-NEXT: sh s0, 2(s3) ; RV64I-NEXT: sh s4, 0(s3) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -145,52 +145,52 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: fold_srem_vec_1: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lh a2, 0(a1) -; RV64IM-NEXT: lui a3, %hi(.LCPI0_0) -; RV64IM-NEXT: ld a3, %lo(.LCPI0_0)(a3) -; RV64IM-NEXT: lh a4, 24(a1) +; RV64IM-NEXT: lui a2, %hi(.LCPI0_0) +; RV64IM-NEXT: ld a2, %lo(.LCPI0_0)(a2) +; RV64IM-NEXT: lh a3, 0(a1) +; RV64IM-NEXT: lh a4, 8(a1) ; RV64IM-NEXT: lh a5, 16(a1) -; RV64IM-NEXT: lh a1, 8(a1) -; RV64IM-NEXT: mulh a3, a2, a3 -; RV64IM-NEXT: add a3, a3, a2 -; RV64IM-NEXT: srli a6, a3, 63 -; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: add a3, a3, a6 +; RV64IM-NEXT: lh a1, 24(a1) +; RV64IM-NEXT: mulh a2, a3, a2 +; RV64IM-NEXT: add a2, a2, a3 +; RV64IM-NEXT: srli a6, a2, 63 +; RV64IM-NEXT: srli a2, a2, 6 +; RV64IM-NEXT: add a2, a2, a6 ; RV64IM-NEXT: lui a6, %hi(.LCPI0_1) ; RV64IM-NEXT: ld a6, %lo(.LCPI0_1)(a6) ; RV64IM-NEXT: li a7, 95 -; RV64IM-NEXT: mul a3, a3, a7 -; RV64IM-NEXT: subw a2, a2, a3 -; RV64IM-NEXT: mulh a3, a1, a6 -; RV64IM-NEXT: sub a3, a3, a1 -; RV64IM-NEXT: srli a6, a3, 63 -; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: add a3, a3, a6 +; RV64IM-NEXT: mul a2, a2, a7 +; RV64IM-NEXT: subw a3, a3, a2 +; RV64IM-NEXT: mulh a2, a4, a6 +; RV64IM-NEXT: sub a2, a2, a4 +; RV64IM-NEXT: srli a6, a2, 63 +; RV64IM-NEXT: srli a2, a2, 6 +; RV64IM-NEXT: add a2, a2, a6 ; RV64IM-NEXT: lui a6, %hi(.LCPI0_2) ; RV64IM-NEXT: ld a6, %lo(.LCPI0_2)(a6) ; RV64IM-NEXT: li a7, -124 -; RV64IM-NEXT: mul a3, a3, a7 -; RV64IM-NEXT: subw a1, a1, a3 -; RV64IM-NEXT: mulh a3, a5, a6 -; RV64IM-NEXT: srli a6, a3, 63 -; RV64IM-NEXT: srli a3, a3, 5 -; RV64IM-NEXT: add a3, a3, a6 +; RV64IM-NEXT: mul a2, a2, a7 +; RV64IM-NEXT: subw a4, a4, a2 +; RV64IM-NEXT: mulh a2, a5, a6 +; RV64IM-NEXT: srli a6, a2, 63 +; RV64IM-NEXT: srli a2, a2, 5 +; RV64IM-NEXT: add a2, a2, a6 ; RV64IM-NEXT: lui a6, %hi(.LCPI0_3) ; RV64IM-NEXT: ld a6, %lo(.LCPI0_3)(a6) ; RV64IM-NEXT: li a7, 98 -; RV64IM-NEXT: mul a3, a3, a7 -; RV64IM-NEXT: subw a5, a5, a3 -; RV64IM-NEXT: mulh a3, a4, a6 -; RV64IM-NEXT: srli a6, a3, 63 -; RV64IM-NEXT: srli a3, a3, 7 -; RV64IM-NEXT: add a3, a3, a6 +; RV64IM-NEXT: mul a2, a2, a7 +; RV64IM-NEXT: subw a5, a5, a2 +; RV64IM-NEXT: mulh a2, a1, a6 +; RV64IM-NEXT: srli a6, a2, 63 +; RV64IM-NEXT: srli a2, a2, 7 +; RV64IM-NEXT: add a2, a2, a6 ; RV64IM-NEXT: li a6, -1003 -; RV64IM-NEXT: mul a3, a3, a6 -; RV64IM-NEXT: subw a4, a4, a3 -; RV64IM-NEXT: sh a4, 6(a0) +; RV64IM-NEXT: mul a2, a2, a6 +; RV64IM-NEXT: subw a1, a1, a2 +; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: sh a5, 4(a0) -; RV64IM-NEXT: sh a1, 2(a0) -; RV64IM-NEXT: sh a2, 0(a0) +; RV64IM-NEXT: sh a4, 2(a0) +; RV64IM-NEXT: sh a3, 0(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -206,29 +206,29 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lh s0, 12(a1) -; RV32I-NEXT: lh s1, 8(a1) -; RV32I-NEXT: lh s2, 4(a1) ; RV32I-NEXT: lh a2, 0(a1) +; RV32I-NEXT: lh s0, 4(a1) +; RV32I-NEXT: lh s1, 8(a1) +; RV32I-NEXT: lh s2, 12(a1) ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __modsi3 ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __modsi3 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __modsi3 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __modsi3 ; RV32I-NEXT: sh a0, 6(s3) ; RV32I-NEXT: sh s1, 4(s3) -; RV32I-NEXT: sh s2, 2(s3) +; RV32I-NEXT: sh s0, 2(s3) ; RV32I-NEXT: sh s4, 0(s3) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -241,45 +241,45 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind { ; ; RV32IM-LABEL: fold_srem_vec_2: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lh a2, 12(a1) -; RV32IM-NEXT: lh a3, 8(a1) -; RV32IM-NEXT: lh a4, 0(a1) -; RV32IM-NEXT: lh a1, 4(a1) +; RV32IM-NEXT: lh a2, 0(a1) +; RV32IM-NEXT: lh a3, 4(a1) +; RV32IM-NEXT: lh a4, 8(a1) +; RV32IM-NEXT: lh a1, 12(a1) ; RV32IM-NEXT: lui a5, 706409 ; RV32IM-NEXT: addi a5, a5, 389 -; RV32IM-NEXT: mulh a6, a4, a5 -; RV32IM-NEXT: add a6, a6, a4 +; RV32IM-NEXT: mulh a6, a2, a5 +; RV32IM-NEXT: add a6, a6, a2 ; RV32IM-NEXT: srli a7, a6, 31 ; RV32IM-NEXT: srli a6, a6, 6 ; RV32IM-NEXT: add a6, a6, a7 ; RV32IM-NEXT: li a7, 95 ; RV32IM-NEXT: mul a6, a6, a7 -; RV32IM-NEXT: sub a4, a4, a6 -; RV32IM-NEXT: mulh a6, a1, a5 -; RV32IM-NEXT: add a6, a6, a1 +; RV32IM-NEXT: sub a2, a2, a6 +; RV32IM-NEXT: mulh a6, a3, a5 +; RV32IM-NEXT: add a6, a6, a3 ; RV32IM-NEXT: srli t0, a6, 31 ; RV32IM-NEXT: srli a6, a6, 6 ; RV32IM-NEXT: add a6, a6, t0 ; RV32IM-NEXT: mul a6, a6, a7 -; RV32IM-NEXT: sub a1, a1, a6 -; RV32IM-NEXT: mulh a6, a3, a5 -; RV32IM-NEXT: add a6, a6, a3 +; RV32IM-NEXT: sub a3, a3, a6 +; RV32IM-NEXT: mulh a6, a4, a5 +; RV32IM-NEXT: add a6, a6, a4 ; RV32IM-NEXT: srli t0, a6, 31 ; RV32IM-NEXT: srli a6, a6, 6 ; RV32IM-NEXT: add a6, a6, t0 ; RV32IM-NEXT: mul a6, a6, a7 -; RV32IM-NEXT: sub a3, a3, a6 -; RV32IM-NEXT: mulh a5, a2, a5 -; RV32IM-NEXT: add a5, a5, a2 +; RV32IM-NEXT: sub a4, a4, a6 +; RV32IM-NEXT: mulh a5, a1, a5 +; RV32IM-NEXT: add a5, a5, a1 ; RV32IM-NEXT: srli a6, a5, 31 ; RV32IM-NEXT: srli a5, a5, 6 ; RV32IM-NEXT: add a5, a5, a6 ; RV32IM-NEXT: mul a5, a5, a7 -; RV32IM-NEXT: sub a2, a2, a5 -; RV32IM-NEXT: sh a2, 6(a0) -; RV32IM-NEXT: sh a3, 4(a0) -; RV32IM-NEXT: sh a1, 2(a0) -; RV32IM-NEXT: sh a4, 0(a0) +; RV32IM-NEXT: sub a1, a1, a5 +; RV32IM-NEXT: sh a1, 6(a0) +; RV32IM-NEXT: sh a4, 4(a0) +; RV32IM-NEXT: sh a3, 2(a0) +; RV32IM-NEXT: sh a2, 0(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: fold_srem_vec_2: @@ -291,29 +291,29 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lh s0, 24(a1) -; RV64I-NEXT: lh s1, 16(a1) -; RV64I-NEXT: lh s2, 8(a1) ; RV64I-NEXT: lh a2, 0(a1) +; RV64I-NEXT: lh s0, 8(a1) +; RV64I-NEXT: lh s1, 16(a1) +; RV64I-NEXT: lh s2, 24(a1) ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __moddi3 ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __moddi3 ; RV64I-NEXT: sh a0, 6(s3) ; RV64I-NEXT: sh s1, 4(s3) -; RV64I-NEXT: sh s2, 2(s3) +; RV64I-NEXT: sh s0, 2(s3) ; RV64I-NEXT: sh s4, 0(s3) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -326,45 +326,45 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: fold_srem_vec_2: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lh a2, 0(a1) -; RV64IM-NEXT: lui a3, %hi(.LCPI1_0) -; RV64IM-NEXT: ld a3, %lo(.LCPI1_0)(a3) -; RV64IM-NEXT: lh a4, 24(a1) +; RV64IM-NEXT: lui a2, %hi(.LCPI1_0) +; RV64IM-NEXT: ld a2, %lo(.LCPI1_0)(a2) +; RV64IM-NEXT: lh a3, 0(a1) +; RV64IM-NEXT: lh a4, 8(a1) ; RV64IM-NEXT: lh a5, 16(a1) -; RV64IM-NEXT: lh a1, 8(a1) -; RV64IM-NEXT: mulh a6, a2, a3 -; RV64IM-NEXT: add a6, a6, a2 +; RV64IM-NEXT: lh a1, 24(a1) +; RV64IM-NEXT: mulh a6, a3, a2 +; RV64IM-NEXT: add a6, a6, a3 ; RV64IM-NEXT: srli a7, a6, 63 ; RV64IM-NEXT: srli a6, a6, 6 ; RV64IM-NEXT: add a6, a6, a7 ; RV64IM-NEXT: li a7, 95 ; RV64IM-NEXT: mul a6, a6, a7 -; RV64IM-NEXT: subw a2, a2, a6 -; RV64IM-NEXT: mulh a6, a1, a3 -; RV64IM-NEXT: add a6, a6, a1 +; RV64IM-NEXT: subw a3, a3, a6 +; RV64IM-NEXT: mulh a6, a4, a2 +; RV64IM-NEXT: add a6, a6, a4 ; RV64IM-NEXT: srli t0, a6, 63 ; RV64IM-NEXT: srli a6, a6, 6 ; RV64IM-NEXT: add a6, a6, t0 ; RV64IM-NEXT: mul a6, a6, a7 -; RV64IM-NEXT: subw a1, a1, a6 -; RV64IM-NEXT: mulh a6, a5, a3 +; RV64IM-NEXT: subw a4, a4, a6 +; RV64IM-NEXT: mulh a6, a5, a2 ; RV64IM-NEXT: add a6, a6, a5 ; RV64IM-NEXT: srli t0, a6, 63 ; RV64IM-NEXT: srli a6, a6, 6 ; RV64IM-NEXT: add a6, a6, t0 ; RV64IM-NEXT: mul a6, a6, a7 ; RV64IM-NEXT: subw a5, a5, a6 -; RV64IM-NEXT: mulh a3, a4, a3 -; RV64IM-NEXT: add a3, a3, a4 -; RV64IM-NEXT: srli a6, a3, 63 -; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: add a3, a3, a6 -; RV64IM-NEXT: mul a3, a3, a7 -; RV64IM-NEXT: subw a4, a4, a3 -; RV64IM-NEXT: sh a4, 6(a0) +; RV64IM-NEXT: mulh a2, a1, a2 +; RV64IM-NEXT: add a2, a2, a1 +; RV64IM-NEXT: srli a6, a2, 63 +; RV64IM-NEXT: srli a2, a2, 6 +; RV64IM-NEXT: add a2, a2, a6 +; RV64IM-NEXT: mul a2, a2, a7 +; RV64IM-NEXT: subw a1, a1, a2 +; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: sh a5, 4(a0) -; RV64IM-NEXT: sh a1, 2(a0) -; RV64IM-NEXT: sh a2, 0(a0) +; RV64IM-NEXT: sh a4, 2(a0) +; RV64IM-NEXT: sh a3, 0(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -445,14 +445,14 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; ; RV32IM-LABEL: combine_srem_sdiv: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lh a2, 0(a1) -; RV32IM-NEXT: lh a3, 4(a1) -; RV32IM-NEXT: lh a4, 12(a1) +; RV32IM-NEXT: lh a2, 12(a1) +; RV32IM-NEXT: lh a3, 0(a1) +; RV32IM-NEXT: lh a4, 4(a1) ; RV32IM-NEXT: lh a1, 8(a1) ; RV32IM-NEXT: lui a5, 706409 ; RV32IM-NEXT: addi a5, a5, 389 -; RV32IM-NEXT: mulh a6, a4, a5 -; RV32IM-NEXT: add a6, a6, a4 +; RV32IM-NEXT: mulh a6, a2, a5 +; RV32IM-NEXT: add a6, a6, a2 ; RV32IM-NEXT: srli a7, a6, 31 ; RV32IM-NEXT: srai a6, a6, 6 ; RV32IM-NEXT: add a6, a6, a7 @@ -464,30 +464,30 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; RV32IM-NEXT: srai t1, t1, 6 ; RV32IM-NEXT: add t1, t1, t2 ; RV32IM-NEXT: mul t2, t1, a7 -; RV32IM-NEXT: mulh t3, a3, a5 -; RV32IM-NEXT: add t3, t3, a3 +; RV32IM-NEXT: mulh t3, a4, a5 +; RV32IM-NEXT: add t3, t3, a4 ; RV32IM-NEXT: srli t4, t3, 31 ; RV32IM-NEXT: srai t3, t3, 6 ; RV32IM-NEXT: add t3, t3, t4 ; RV32IM-NEXT: mul t4, t3, a7 -; RV32IM-NEXT: mulh a5, a2, a5 -; RV32IM-NEXT: add a5, a5, a2 +; RV32IM-NEXT: mulh a5, a3, a5 +; RV32IM-NEXT: add a5, a5, a3 ; RV32IM-NEXT: srli t5, a5, 31 ; RV32IM-NEXT: srai a5, a5, 6 ; RV32IM-NEXT: add a5, a5, t5 ; RV32IM-NEXT: mul a7, a5, a7 -; RV32IM-NEXT: add a2, a2, a5 -; RV32IM-NEXT: sub a2, a2, a7 -; RV32IM-NEXT: add a3, a3, t3 -; RV32IM-NEXT: sub a3, a3, t4 +; RV32IM-NEXT: add a3, a3, a5 +; RV32IM-NEXT: sub a3, a3, a7 +; RV32IM-NEXT: add a4, a4, t3 +; RV32IM-NEXT: sub a4, a4, t4 ; RV32IM-NEXT: add a1, a1, t1 ; RV32IM-NEXT: sub a1, a1, t2 -; RV32IM-NEXT: add a4, a4, a6 -; RV32IM-NEXT: sub a4, a4, t0 -; RV32IM-NEXT: sh a4, 6(a0) +; RV32IM-NEXT: add a2, a2, a6 +; RV32IM-NEXT: sub a2, a2, t0 +; RV32IM-NEXT: sh a2, 6(a0) ; RV32IM-NEXT: sh a1, 4(a0) -; RV32IM-NEXT: sh a3, 2(a0) -; RV32IM-NEXT: sh a2, 0(a0) +; RV32IM-NEXT: sh a4, 2(a0) +; RV32IM-NEXT: sh a3, 0(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: combine_srem_sdiv: @@ -624,21 +624,21 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lh a2, 0(a1) +; RV32I-NEXT: lh a3, 4(a1) +; RV32I-NEXT: lh a4, 8(a1) ; RV32I-NEXT: lh a0, 12(a1) -; RV32I-NEXT: lh a3, 8(a1) -; RV32I-NEXT: lh a1, 4(a1) -; RV32I-NEXT: srli a4, a2, 26 -; RV32I-NEXT: add a4, a2, a4 -; RV32I-NEXT: andi a4, a4, -64 -; RV32I-NEXT: sub s1, a2, a4 -; RV32I-NEXT: srli a2, a1, 27 -; RV32I-NEXT: add a2, a1, a2 -; RV32I-NEXT: andi a2, a2, -32 -; RV32I-NEXT: sub s2, a1, a2 -; RV32I-NEXT: srli a1, a3, 29 +; RV32I-NEXT: srli a1, a2, 26 +; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: andi a1, a1, -64 +; RV32I-NEXT: sub s1, a2, a1 +; RV32I-NEXT: srli a1, a3, 27 ; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: andi a1, a1, -32 +; RV32I-NEXT: sub s2, a3, a1 +; RV32I-NEXT: srli a1, a4, 29 +; RV32I-NEXT: add a1, a4, a1 ; RV32I-NEXT: andi a1, a1, -8 -; RV32I-NEXT: sub s3, a3, a1 +; RV32I-NEXT: sub s3, a4, a1 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: call __modsi3 ; RV32I-NEXT: sh a0, 6(s0) @@ -655,8 +655,8 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; ; RV32IM-LABEL: dont_fold_srem_power_of_two: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lh a2, 8(a1) -; RV32IM-NEXT: lh a3, 4(a1) +; RV32IM-NEXT: lh a2, 4(a1) +; RV32IM-NEXT: lh a3, 8(a1) ; RV32IM-NEXT: lh a4, 12(a1) ; RV32IM-NEXT: lh a1, 0(a1) ; RV32IM-NEXT: lui a5, 706409 @@ -673,16 +673,16 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; RV32IM-NEXT: add a5, a1, a5 ; RV32IM-NEXT: andi a5, a5, -64 ; RV32IM-NEXT: sub a1, a1, a5 -; RV32IM-NEXT: srli a5, a3, 27 -; RV32IM-NEXT: add a5, a3, a5 -; RV32IM-NEXT: andi a5, a5, -32 -; RV32IM-NEXT: sub a3, a3, a5 -; RV32IM-NEXT: srli a5, a2, 29 +; RV32IM-NEXT: srli a5, a2, 27 ; RV32IM-NEXT: add a5, a2, a5 -; RV32IM-NEXT: andi a5, a5, -8 +; RV32IM-NEXT: andi a5, a5, -32 ; RV32IM-NEXT: sub a2, a2, a5 -; RV32IM-NEXT: sh a2, 4(a0) -; RV32IM-NEXT: sh a3, 2(a0) +; RV32IM-NEXT: srli a5, a3, 29 +; RV32IM-NEXT: add a5, a3, a5 +; RV32IM-NEXT: andi a5, a5, -8 +; RV32IM-NEXT: sub a3, a3, a5 +; RV32IM-NEXT: sh a3, 4(a0) +; RV32IM-NEXT: sh a2, 2(a0) ; RV32IM-NEXT: sh a1, 0(a0) ; RV32IM-NEXT: sh a4, 6(a0) ; RV32IM-NEXT: ret @@ -697,21 +697,21 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lh a2, 0(a1) +; RV64I-NEXT: lh a3, 8(a1) +; RV64I-NEXT: lh a4, 16(a1) ; RV64I-NEXT: lh a0, 24(a1) -; RV64I-NEXT: lh a3, 16(a1) -; RV64I-NEXT: lh a1, 8(a1) -; RV64I-NEXT: srli a4, a2, 58 -; RV64I-NEXT: add a4, a2, a4 -; RV64I-NEXT: andi a4, a4, -64 -; RV64I-NEXT: subw s1, a2, a4 -; RV64I-NEXT: srli a2, a1, 59 -; RV64I-NEXT: add a2, a1, a2 -; RV64I-NEXT: andi a2, a2, -32 -; RV64I-NEXT: subw s2, a1, a2 -; RV64I-NEXT: srli a1, a3, 61 +; RV64I-NEXT: srli a1, a2, 58 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: andi a1, a1, -64 +; RV64I-NEXT: subw s1, a2, a1 +; RV64I-NEXT: srli a1, a3, 59 ; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: andi a1, a1, -32 +; RV64I-NEXT: subw s2, a3, a1 +; RV64I-NEXT: srli a1, a4, 61 +; RV64I-NEXT: add a1, a4, a1 ; RV64I-NEXT: andi a1, a1, -8 -; RV64I-NEXT: subw s3, a3, a1 +; RV64I-NEXT: subw s3, a4, a1 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: call __moddi3 ; RV64I-NEXT: sh a0, 6(s0) @@ -773,24 +773,24 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lh s0, 12(a1) -; RV32I-NEXT: lh s1, 8(a1) ; RV32I-NEXT: lh a2, 4(a1) +; RV32I-NEXT: lh s0, 8(a1) +; RV32I-NEXT: lh s1, 12(a1) ; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 654 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __modsi3 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 23 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __modsi3 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a1, a0, 1327 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __modsi3 ; RV32I-NEXT: sh a0, 6(s2) -; RV32I-NEXT: sh s1, 4(s2) +; RV32I-NEXT: sh s0, 4(s2) ; RV32I-NEXT: sh s3, 2(s2) ; RV32I-NEXT: sh zero, 0(s2) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -803,43 +803,43 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; ; RV32IM-LABEL: dont_fold_srem_one: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lh a2, 12(a1) -; RV32IM-NEXT: lh a3, 4(a1) -; RV32IM-NEXT: lh a1, 8(a1) +; RV32IM-NEXT: lh a2, 4(a1) +; RV32IM-NEXT: lh a3, 8(a1) +; RV32IM-NEXT: lh a1, 12(a1) ; RV32IM-NEXT: lui a4, 820904 ; RV32IM-NEXT: addi a4, a4, -1903 -; RV32IM-NEXT: mulh a4, a3, a4 -; RV32IM-NEXT: add a4, a4, a3 +; RV32IM-NEXT: mulh a4, a2, a4 +; RV32IM-NEXT: add a4, a4, a2 ; RV32IM-NEXT: srli a5, a4, 31 ; RV32IM-NEXT: srli a4, a4, 9 ; RV32IM-NEXT: add a4, a4, a5 ; RV32IM-NEXT: li a5, 654 ; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a3, a3, a4 +; RV32IM-NEXT: sub a2, a2, a4 ; RV32IM-NEXT: lui a4, 729444 ; RV32IM-NEXT: addi a4, a4, 713 -; RV32IM-NEXT: mulh a4, a1, a4 -; RV32IM-NEXT: add a4, a4, a1 +; RV32IM-NEXT: mulh a4, a3, a4 +; RV32IM-NEXT: add a4, a4, a3 ; RV32IM-NEXT: srli a5, a4, 31 ; RV32IM-NEXT: srli a4, a4, 4 ; RV32IM-NEXT: add a4, a4, a5 ; RV32IM-NEXT: li a5, 23 ; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a1, a1, a4 +; RV32IM-NEXT: sub a3, a3, a4 ; RV32IM-NEXT: lui a4, 395996 ; RV32IM-NEXT: addi a4, a4, -2009 -; RV32IM-NEXT: mulh a4, a2, a4 +; RV32IM-NEXT: mulh a4, a1, a4 ; RV32IM-NEXT: srli a5, a4, 31 ; RV32IM-NEXT: srli a4, a4, 11 ; RV32IM-NEXT: add a4, a4, a5 ; RV32IM-NEXT: lui a5, 1 ; RV32IM-NEXT: addi a5, a5, 1327 ; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a2, a2, a4 +; RV32IM-NEXT: sub a1, a1, a4 ; RV32IM-NEXT: sh zero, 0(a0) -; RV32IM-NEXT: sh a2, 6(a0) -; RV32IM-NEXT: sh a1, 4(a0) -; RV32IM-NEXT: sh a3, 2(a0) +; RV32IM-NEXT: sh a1, 6(a0) +; RV32IM-NEXT: sh a3, 4(a0) +; RV32IM-NEXT: sh a2, 2(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: dont_fold_srem_one: @@ -850,24 +850,24 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lh s0, 24(a1) -; RV64I-NEXT: lh s1, 16(a1) ; RV64I-NEXT: lh a2, 8(a1) +; RV64I-NEXT: lh s0, 16(a1) +; RV64I-NEXT: lh s1, 24(a1) ; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 654 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __moddi3 ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 23 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3 ; RV64I-NEXT: sh a0, 6(s2) -; RV64I-NEXT: sh s1, 4(s2) +; RV64I-NEXT: sh s0, 4(s2) ; RV64I-NEXT: sh s3, 2(s2) ; RV64I-NEXT: sh zero, 0(s2) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -880,42 +880,42 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: dont_fold_srem_one: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lh a2, 16(a1) -; RV64IM-NEXT: lui a3, %hi(.LCPI4_0) -; RV64IM-NEXT: ld a3, %lo(.LCPI4_0)(a3) -; RV64IM-NEXT: lh a4, 24(a1) -; RV64IM-NEXT: lh a1, 8(a1) -; RV64IM-NEXT: mulh a3, a2, a3 -; RV64IM-NEXT: add a3, a3, a2 -; RV64IM-NEXT: srli a5, a3, 63 -; RV64IM-NEXT: srli a3, a3, 4 -; RV64IM-NEXT: add a3, a3, a5 +; RV64IM-NEXT: lui a2, %hi(.LCPI4_0) +; RV64IM-NEXT: ld a2, %lo(.LCPI4_0)(a2) +; RV64IM-NEXT: lh a3, 16(a1) +; RV64IM-NEXT: lh a4, 8(a1) +; RV64IM-NEXT: lh a1, 24(a1) +; RV64IM-NEXT: mulh a2, a3, a2 +; RV64IM-NEXT: add a2, a2, a3 +; RV64IM-NEXT: srli a5, a2, 63 +; RV64IM-NEXT: srli a2, a2, 4 +; RV64IM-NEXT: add a2, a2, a5 ; RV64IM-NEXT: lui a5, %hi(.LCPI4_1) ; RV64IM-NEXT: ld a5, %lo(.LCPI4_1)(a5) ; RV64IM-NEXT: li a6, 23 -; RV64IM-NEXT: mul a3, a3, a6 -; RV64IM-NEXT: subw a2, a2, a3 -; RV64IM-NEXT: mulh a3, a1, a5 -; RV64IM-NEXT: srli a5, a3, 63 -; RV64IM-NEXT: srli a3, a3, 8 -; RV64IM-NEXT: add a3, a3, a5 +; RV64IM-NEXT: mul a2, a2, a6 +; RV64IM-NEXT: subw a3, a3, a2 +; RV64IM-NEXT: mulh a2, a4, a5 +; RV64IM-NEXT: srli a5, a2, 63 +; RV64IM-NEXT: srli a2, a2, 8 +; RV64IM-NEXT: add a2, a2, a5 ; RV64IM-NEXT: lui a5, %hi(.LCPI4_2) ; RV64IM-NEXT: ld a5, %lo(.LCPI4_2)(a5) ; RV64IM-NEXT: li a6, 654 -; RV64IM-NEXT: mul a3, a3, a6 -; RV64IM-NEXT: subw a1, a1, a3 -; RV64IM-NEXT: mulh a3, a4, a5 -; RV64IM-NEXT: srli a5, a3, 63 -; RV64IM-NEXT: srli a3, a3, 11 -; RV64IM-NEXT: add a3, a3, a5 +; RV64IM-NEXT: mul a2, a2, a6 +; RV64IM-NEXT: subw a4, a4, a2 +; RV64IM-NEXT: mulh a2, a1, a5 +; RV64IM-NEXT: srli a5, a2, 63 +; RV64IM-NEXT: srli a2, a2, 11 +; RV64IM-NEXT: add a2, a2, a5 ; RV64IM-NEXT: lui a5, 1 ; RV64IM-NEXT: addi a5, a5, 1327 -; RV64IM-NEXT: mul a3, a3, a5 -; RV64IM-NEXT: subw a4, a4, a3 +; RV64IM-NEXT: mul a2, a2, a5 +; RV64IM-NEXT: subw a1, a1, a2 ; RV64IM-NEXT: sh zero, 0(a0) -; RV64IM-NEXT: sh a4, 6(a0) -; RV64IM-NEXT: sh a1, 2(a0) -; RV64IM-NEXT: sh a2, 4(a0) +; RV64IM-NEXT: sh a1, 6(a0) +; RV64IM-NEXT: sh a4, 2(a0) +; RV64IM-NEXT: sh a3, 4(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -933,8 +933,8 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lh a2, 4(a1) ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lh s1, 12(a1) ; RV32I-NEXT: lh a0, 8(a1) +; RV32I-NEXT: lh s1, 12(a1) ; RV32I-NEXT: srli a1, a2, 17 ; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: lui a3, 8 @@ -1005,8 +1005,8 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lh a2, 8(a1) ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lh s1, 24(a1) ; RV64I-NEXT: lh a0, 16(a1) +; RV64I-NEXT: lh s1, 24(a1) ; RV64I-NEXT: srli a1, a2, 49 ; RV64I-NEXT: add a1, a2, a1 ; RV64I-NEXT: lui a3, 8 @@ -1033,38 +1033,38 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: dont_fold_urem_i16_smax: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lh a2, 16(a1) -; RV64IM-NEXT: lui a3, %hi(.LCPI5_0) -; RV64IM-NEXT: ld a3, %lo(.LCPI5_0)(a3) -; RV64IM-NEXT: lh a4, 24(a1) -; RV64IM-NEXT: mulh a3, a2, a3 -; RV64IM-NEXT: add a3, a3, a2 -; RV64IM-NEXT: srli a5, a3, 63 -; RV64IM-NEXT: srli a3, a3, 4 -; RV64IM-NEXT: add a3, a3, a5 -; RV64IM-NEXT: li a5, 23 -; RV64IM-NEXT: lui a6, %hi(.LCPI5_1) -; RV64IM-NEXT: ld a6, %lo(.LCPI5_1)(a6) -; RV64IM-NEXT: mul a3, a3, a5 -; RV64IM-NEXT: lh a1, 8(a1) -; RV64IM-NEXT: subw a2, a2, a3 -; RV64IM-NEXT: mulh a3, a4, a6 -; RV64IM-NEXT: srli a5, a3, 63 -; RV64IM-NEXT: srli a3, a3, 11 -; RV64IM-NEXT: add a3, a3, a5 +; RV64IM-NEXT: lui a2, %hi(.LCPI5_0) +; RV64IM-NEXT: ld a2, %lo(.LCPI5_0)(a2) +; RV64IM-NEXT: lh a3, 16(a1) +; RV64IM-NEXT: lh a4, 8(a1) +; RV64IM-NEXT: lh a1, 24(a1) +; RV64IM-NEXT: mulh a2, a3, a2 +; RV64IM-NEXT: add a2, a2, a3 +; RV64IM-NEXT: srli a5, a2, 63 +; RV64IM-NEXT: srli a2, a2, 4 +; RV64IM-NEXT: add a2, a2, a5 +; RV64IM-NEXT: lui a5, %hi(.LCPI5_1) +; RV64IM-NEXT: ld a5, %lo(.LCPI5_1)(a5) +; RV64IM-NEXT: li a6, 23 +; RV64IM-NEXT: mul a2, a2, a6 +; RV64IM-NEXT: subw a3, a3, a2 +; RV64IM-NEXT: mulh a2, a1, a5 +; RV64IM-NEXT: srli a5, a2, 63 +; RV64IM-NEXT: srli a2, a2, 11 +; RV64IM-NEXT: add a2, a2, a5 ; RV64IM-NEXT: lui a5, 1 ; RV64IM-NEXT: addi a5, a5, 1327 -; RV64IM-NEXT: mul a3, a3, a5 -; RV64IM-NEXT: subw a4, a4, a3 -; RV64IM-NEXT: srli a3, a1, 49 -; RV64IM-NEXT: add a3, a1, a3 +; RV64IM-NEXT: mul a2, a2, a5 +; RV64IM-NEXT: subw a1, a1, a2 +; RV64IM-NEXT: srli a2, a4, 49 +; RV64IM-NEXT: add a2, a4, a2 ; RV64IM-NEXT: lui a5, 8 -; RV64IM-NEXT: and a3, a3, a5 -; RV64IM-NEXT: subw a1, a1, a3 +; RV64IM-NEXT: and a2, a2, a5 +; RV64IM-NEXT: subw a4, a4, a2 ; RV64IM-NEXT: sh zero, 0(a0) -; RV64IM-NEXT: sh a1, 2(a0) -; RV64IM-NEXT: sh a4, 6(a0) -; RV64IM-NEXT: sh a2, 4(a0) +; RV64IM-NEXT: sh a4, 2(a0) +; RV64IM-NEXT: sh a1, 6(a0) +; RV64IM-NEXT: sh a3, 4(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -1085,17 +1085,18 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw s0, 24(a1) -; RV32I-NEXT: lw s1, 28(a1) -; RV32I-NEXT: lw s2, 16(a1) -; RV32I-NEXT: lw s3, 20(a1) +; RV32I-NEXT: lw s0, 16(a1) +; RV32I-NEXT: lw s1, 20(a1) +; RV32I-NEXT: lw s2, 24(a1) +; RV32I-NEXT: lw s3, 28(a1) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw s4, 8(a1) ; RV32I-NEXT: lw s5, 12(a1) -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a1, 4(a1) ; RV32I-NEXT: mv s6, a0 ; RV32I-NEXT: li a2, 1 ; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __moddi3 ; RV32I-NEXT: mv s7, a0 @@ -1108,22 +1109,22 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: mv s5, a1 ; RV32I-NEXT: li a2, 23 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __moddi3 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a2, a0, 1327 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __moddi3 ; RV32I-NEXT: sw a1, 28(s6) ; RV32I-NEXT: sw a0, 24(s6) -; RV32I-NEXT: sw s3, 20(s6) -; RV32I-NEXT: sw s2, 16(s6) +; RV32I-NEXT: sw s1, 20(s6) +; RV32I-NEXT: sw s0, 16(s6) ; RV32I-NEXT: sw s5, 12(s6) ; RV32I-NEXT: sw s4, 8(s6) ; RV32I-NEXT: sw s8, 4(s6) @@ -1154,17 +1155,18 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s0, 24(a1) -; RV32IM-NEXT: lw s1, 28(a1) -; RV32IM-NEXT: lw s2, 16(a1) -; RV32IM-NEXT: lw s3, 20(a1) +; RV32IM-NEXT: lw s0, 16(a1) +; RV32IM-NEXT: lw s1, 20(a1) +; RV32IM-NEXT: lw s2, 24(a1) +; RV32IM-NEXT: lw s3, 28(a1) +; RV32IM-NEXT: lw a3, 0(a1) +; RV32IM-NEXT: lw a4, 4(a1) ; RV32IM-NEXT: lw s4, 8(a1) ; RV32IM-NEXT: lw s5, 12(a1) -; RV32IM-NEXT: lw a3, 0(a1) -; RV32IM-NEXT: lw a1, 4(a1) ; RV32IM-NEXT: mv s6, a0 ; RV32IM-NEXT: li a2, 1 ; RV32IM-NEXT: mv a0, a3 +; RV32IM-NEXT: mv a1, a4 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __moddi3 ; RV32IM-NEXT: mv s7, a0 @@ -1177,22 +1179,22 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: mv s4, a0 ; RV32IM-NEXT: mv s5, a1 ; RV32IM-NEXT: li a2, 23 -; RV32IM-NEXT: mv a0, s2 -; RV32IM-NEXT: mv a1, s3 +; RV32IM-NEXT: mv a0, s0 +; RV32IM-NEXT: mv a1, s1 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __moddi3 -; RV32IM-NEXT: mv s2, a0 -; RV32IM-NEXT: mv s3, a1 +; RV32IM-NEXT: mv s0, a0 +; RV32IM-NEXT: mv s1, a1 ; RV32IM-NEXT: lui a0, 1 ; RV32IM-NEXT: addi a2, a0, 1327 -; RV32IM-NEXT: mv a0, s0 -; RV32IM-NEXT: mv a1, s1 +; RV32IM-NEXT: mv a0, s2 +; RV32IM-NEXT: mv a1, s3 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __moddi3 ; RV32IM-NEXT: sw a1, 28(s6) ; RV32IM-NEXT: sw a0, 24(s6) -; RV32IM-NEXT: sw s3, 20(s6) -; RV32IM-NEXT: sw s2, 16(s6) +; RV32IM-NEXT: sw s1, 20(s6) +; RV32IM-NEXT: sw s0, 16(s6) ; RV32IM-NEXT: sw s5, 12(s6) ; RV32IM-NEXT: sw s4, 8(s6) ; RV32IM-NEXT: sw s8, 4(s6) @@ -1218,24 +1220,24 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: ld s0, 24(a1) -; RV64I-NEXT: ld s1, 16(a1) ; RV64I-NEXT: ld a2, 8(a1) +; RV64I-NEXT: ld s0, 16(a1) +; RV64I-NEXT: ld s1, 24(a1) ; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 654 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __moddi3 ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 23 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3 ; RV64I-NEXT: sd a0, 24(s2) -; RV64I-NEXT: sd s1, 16(s2) +; RV64I-NEXT: sd s0, 16(s2) ; RV64I-NEXT: sd s3, 8(s2) ; RV64I-NEXT: sd zero, 0(s2) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1248,42 +1250,42 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; ; RV64IM-LABEL: dont_fold_srem_i64: ; RV64IM: # %bb.0: -; RV64IM-NEXT: ld a2, 16(a1) -; RV64IM-NEXT: lui a3, %hi(.LCPI6_0) -; RV64IM-NEXT: ld a3, %lo(.LCPI6_0)(a3) -; RV64IM-NEXT: ld a4, 24(a1) -; RV64IM-NEXT: ld a1, 8(a1) -; RV64IM-NEXT: mulh a3, a2, a3 -; RV64IM-NEXT: add a3, a3, a2 -; RV64IM-NEXT: srli a5, a3, 63 -; RV64IM-NEXT: srai a3, a3, 4 -; RV64IM-NEXT: add a3, a3, a5 +; RV64IM-NEXT: lui a2, %hi(.LCPI6_0) +; RV64IM-NEXT: ld a2, %lo(.LCPI6_0)(a2) +; RV64IM-NEXT: ld a3, 16(a1) +; RV64IM-NEXT: ld a4, 8(a1) +; RV64IM-NEXT: ld a1, 24(a1) +; RV64IM-NEXT: mulh a2, a3, a2 +; RV64IM-NEXT: add a2, a2, a3 +; RV64IM-NEXT: srli a5, a2, 63 +; RV64IM-NEXT: srai a2, a2, 4 +; RV64IM-NEXT: add a2, a2, a5 ; RV64IM-NEXT: lui a5, %hi(.LCPI6_1) ; RV64IM-NEXT: ld a5, %lo(.LCPI6_1)(a5) ; RV64IM-NEXT: li a6, 23 -; RV64IM-NEXT: mul a3, a3, a6 -; RV64IM-NEXT: sub a2, a2, a3 -; RV64IM-NEXT: mulh a3, a1, a5 -; RV64IM-NEXT: srli a5, a3, 63 -; RV64IM-NEXT: srai a3, a3, 8 -; RV64IM-NEXT: add a3, a3, a5 +; RV64IM-NEXT: mul a2, a2, a6 +; RV64IM-NEXT: sub a3, a3, a2 +; RV64IM-NEXT: mulh a2, a4, a5 +; RV64IM-NEXT: srli a5, a2, 63 +; RV64IM-NEXT: srai a2, a2, 8 +; RV64IM-NEXT: add a2, a2, a5 ; RV64IM-NEXT: lui a5, %hi(.LCPI6_2) ; RV64IM-NEXT: ld a5, %lo(.LCPI6_2)(a5) ; RV64IM-NEXT: li a6, 654 -; RV64IM-NEXT: mul a3, a3, a6 -; RV64IM-NEXT: sub a1, a1, a3 -; RV64IM-NEXT: mulh a3, a4, a5 -; RV64IM-NEXT: srli a5, a3, 63 -; RV64IM-NEXT: srai a3, a3, 11 -; RV64IM-NEXT: add a3, a3, a5 +; RV64IM-NEXT: mul a2, a2, a6 +; RV64IM-NEXT: sub a4, a4, a2 +; RV64IM-NEXT: mulh a2, a1, a5 +; RV64IM-NEXT: srli a5, a2, 63 +; RV64IM-NEXT: srai a2, a2, 11 +; RV64IM-NEXT: add a2, a2, a5 ; RV64IM-NEXT: lui a5, 1 ; RV64IM-NEXT: addiw a5, a5, 1327 -; RV64IM-NEXT: mul a3, a3, a5 -; RV64IM-NEXT: sub a4, a4, a3 +; RV64IM-NEXT: mul a2, a2, a5 +; RV64IM-NEXT: sub a1, a1, a2 ; RV64IM-NEXT: sd zero, 0(a0) -; RV64IM-NEXT: sd a4, 24(a0) -; RV64IM-NEXT: sd a1, 8(a0) -; RV64IM-NEXT: sd a2, 16(a0) +; RV64IM-NEXT: sd a1, 24(a0) +; RV64IM-NEXT: sd a4, 8(a0) +; RV64IM-NEXT: sd a3, 16(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i64> %x, ret <4 x i64> %1 diff --git a/llvm/test/CodeGen/RISCV/stack-store-check.ll b/llvm/test/CodeGen/RISCV/stack-store-check.ll index 91cfb2a4cef706..b51a759a87b859 100644 --- a/llvm/test/CodeGen/RISCV/stack-store-check.ll +++ b/llvm/test/CodeGen/RISCV/stack-store-check.ll @@ -143,15 +143,15 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: addi a2, sp, 392 ; CHECK-NEXT: sw a3, 392(sp) ; CHECK-NEXT: call __subtf3 -; CHECK-NEXT: lw a0, 424(sp) +; CHECK-NEXT: lw a0, 432(sp) ; CHECK-NEXT: lw a1, 436(sp) -; CHECK-NEXT: lw a2, 432(sp) +; CHECK-NEXT: lw a2, 424(sp) ; CHECK-NEXT: lw a3, 428(sp) ; CHECK-NEXT: lui a4, %hi(X) ; CHECK-NEXT: sw a1, %lo(X+12)(a4) -; CHECK-NEXT: sw a2, %lo(X+8)(a4) +; CHECK-NEXT: sw a0, %lo(X+8)(a4) ; CHECK-NEXT: sw a3, %lo(X+4)(a4) -; CHECK-NEXT: sw a0, %lo(X)(a4) +; CHECK-NEXT: sw a2, %lo(X)(a4) ; CHECK-NEXT: lw s8, 4(sp) # 4-byte Folded Reload ; CHECK-NEXT: sw s8, 212(sp) ; CHECK-NEXT: lw s4, 8(sp) # 4-byte Folded Reload @@ -190,15 +190,15 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: addi a2, sp, 344 ; CHECK-NEXT: sw s9, 360(sp) ; CHECK-NEXT: call __multf3 -; CHECK-NEXT: lw a0, 376(sp) +; CHECK-NEXT: lw a0, 384(sp) ; CHECK-NEXT: lw a1, 388(sp) -; CHECK-NEXT: lw a2, 384(sp) +; CHECK-NEXT: lw a2, 376(sp) ; CHECK-NEXT: lw a3, 380(sp) ; CHECK-NEXT: lui a4, %hi(S) ; CHECK-NEXT: sw a1, %lo(S+12)(a4) -; CHECK-NEXT: sw a2, %lo(S+8)(a4) +; CHECK-NEXT: sw a0, %lo(S+8)(a4) ; CHECK-NEXT: sw a3, %lo(S+4)(a4) -; CHECK-NEXT: sw a0, %lo(S)(a4) +; CHECK-NEXT: sw a2, %lo(S)(a4) ; CHECK-NEXT: lw a0, 48(sp) # 4-byte Folded Reload ; CHECK-NEXT: sw a0, 260(sp) ; CHECK-NEXT: sw s10, 256(sp) @@ -216,15 +216,15 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: lw a3, 44(sp) # 4-byte Folded Reload ; CHECK-NEXT: sw a3, 264(sp) ; CHECK-NEXT: call __subtf3 -; CHECK-NEXT: lw a0, 280(sp) +; CHECK-NEXT: lw a0, 288(sp) ; CHECK-NEXT: lw a1, 292(sp) -; CHECK-NEXT: lw a2, 288(sp) +; CHECK-NEXT: lw a2, 280(sp) ; CHECK-NEXT: lw a3, 284(sp) ; CHECK-NEXT: lui a4, %hi(T) ; CHECK-NEXT: sw a1, %lo(T+12)(a4) -; CHECK-NEXT: sw a2, %lo(T+8)(a4) +; CHECK-NEXT: sw a0, %lo(T+8)(a4) ; CHECK-NEXT: sw a3, %lo(T+4)(a4) -; CHECK-NEXT: sw a0, %lo(T)(a4) +; CHECK-NEXT: sw a2, %lo(T)(a4) ; CHECK-NEXT: sw zero, 164(sp) ; CHECK-NEXT: sw zero, 160(sp) ; CHECK-NEXT: sw zero, 156(sp) @@ -238,15 +238,15 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: addi a2, sp, 152 ; CHECK-NEXT: sw s1, 168(sp) ; CHECK-NEXT: call __addtf3 -; CHECK-NEXT: lw a0, 184(sp) +; CHECK-NEXT: lw a0, 192(sp) ; CHECK-NEXT: lw a1, 196(sp) -; CHECK-NEXT: lw a2, 192(sp) +; CHECK-NEXT: lw a2, 184(sp) ; CHECK-NEXT: lw a3, 188(sp) ; CHECK-NEXT: lui a4, %hi(Y) ; CHECK-NEXT: sw a1, %lo(Y+12)(a4) -; CHECK-NEXT: sw a2, %lo(Y+8)(a4) +; CHECK-NEXT: sw a0, %lo(Y+8)(a4) ; CHECK-NEXT: sw a3, %lo(Y+4)(a4) -; CHECK-NEXT: sw a0, %lo(Y)(a4) +; CHECK-NEXT: sw a2, %lo(Y)(a4) ; CHECK-NEXT: sw zero, 116(sp) ; CHECK-NEXT: sw zero, 112(sp) ; CHECK-NEXT: sw zero, 108(sp) diff --git a/llvm/test/CodeGen/RISCV/ucmp.ll b/llvm/test/CodeGen/RISCV/ucmp.ll index c74bc6838ff7df..50da56fbc59518 100644 --- a/llvm/test/CodeGen/RISCV/ucmp.ll +++ b/llvm/test/CodeGen/RISCV/ucmp.ll @@ -87,10 +87,10 @@ define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind { ; RV32I-LABEL: ucmp.8.128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a2, 4(a1) -; RV32I-NEXT: lw a3, 4(a0) ; RV32I-NEXT: lw a4, 8(a1) ; RV32I-NEXT: lw a5, 12(a1) ; RV32I-NEXT: lw a6, 12(a0) +; RV32I-NEXT: lw a3, 4(a0) ; RV32I-NEXT: lw a7, 8(a0) ; RV32I-NEXT: beq a6, a5, .LBB4_2 ; RV32I-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll index f1ae3200175636..dde69667b8ec30 100644 --- a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll @@ -10,47 +10,47 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) #0 { ; RISCV32-NEXT: sw s2, 20(sp) # 4-byte Folded Spill ; RISCV32-NEXT: sw s3, 16(sp) # 4-byte Folded Spill ; RISCV32-NEXT: sw s4, 12(sp) # 4-byte Folded Spill -; RISCV32-NEXT: lw a3, 12(a1) -; RISCV32-NEXT: lw a7, 12(a2) -; RISCV32-NEXT: lw a6, 8(a1) -; RISCV32-NEXT: lw a4, 0(a2) -; RISCV32-NEXT: lw a5, 0(a1) +; RISCV32-NEXT: lw a3, 0(a1) ; RISCV32-NEXT: lw t2, 4(a1) -; RISCV32-NEXT: lw t0, 8(a2) -; RISCV32-NEXT: lw a2, 4(a2) -; RISCV32-NEXT: mulhu a1, a5, a4 -; RISCV32-NEXT: mul t1, t2, a4 -; RISCV32-NEXT: add a1, t1, a1 -; RISCV32-NEXT: sltu t1, a1, t1 -; RISCV32-NEXT: mulhu t3, t2, a4 +; RISCV32-NEXT: lw a4, 8(a1) +; RISCV32-NEXT: lw a5, 12(a1) +; RISCV32-NEXT: lw a1, 0(a2) +; RISCV32-NEXT: lw t0, 4(a2) +; RISCV32-NEXT: lw a6, 8(a2) +; RISCV32-NEXT: lw a7, 12(a2) +; RISCV32-NEXT: mulhu a2, a3, a1 +; RISCV32-NEXT: mul t1, t2, a1 +; RISCV32-NEXT: add a2, t1, a2 +; RISCV32-NEXT: sltu t1, a2, t1 +; RISCV32-NEXT: mulhu t3, t2, a1 ; RISCV32-NEXT: add t4, t3, t1 -; RISCV32-NEXT: mul t1, a5, a2 -; RISCV32-NEXT: add a1, t1, a1 -; RISCV32-NEXT: sltu t1, a1, t1 -; RISCV32-NEXT: mulhu t3, a5, a2 +; RISCV32-NEXT: mul t1, a3, t0 +; RISCV32-NEXT: add a2, t1, a2 +; RISCV32-NEXT: sltu t1, a2, t1 +; RISCV32-NEXT: mulhu t3, a3, t0 ; RISCV32-NEXT: add t1, t3, t1 ; RISCV32-NEXT: add t5, t4, t1 -; RISCV32-NEXT: mul t6, t2, a2 +; RISCV32-NEXT: mul t6, t2, t0 ; RISCV32-NEXT: add s0, t6, t5 -; RISCV32-NEXT: mul t1, t0, a5 -; RISCV32-NEXT: mul s3, a6, a4 +; RISCV32-NEXT: mul t1, a6, a3 +; RISCV32-NEXT: mul s3, a4, a1 ; RISCV32-NEXT: add s4, s3, t1 ; RISCV32-NEXT: add t1, s0, s4 ; RISCV32-NEXT: sltu t3, t1, s0 ; RISCV32-NEXT: sltu s0, s0, t6 ; RISCV32-NEXT: sltu t4, t5, t4 -; RISCV32-NEXT: mulhu t5, t2, a2 +; RISCV32-NEXT: mulhu t5, t2, t0 ; RISCV32-NEXT: add t4, t5, t4 ; RISCV32-NEXT: add s0, t4, s0 -; RISCV32-NEXT: mul t4, t2, t0 -; RISCV32-NEXT: mul t5, a7, a5 +; RISCV32-NEXT: mul t4, t2, a6 +; RISCV32-NEXT: mul t5, a7, a3 ; RISCV32-NEXT: add t4, t5, t4 -; RISCV32-NEXT: mulhu s1, t0, a5 +; RISCV32-NEXT: mulhu s1, a6, a3 ; RISCV32-NEXT: add s2, s1, t4 -; RISCV32-NEXT: mul t4, a2, a6 -; RISCV32-NEXT: mul t5, a3, a4 +; RISCV32-NEXT: mul t4, t0, a4 +; RISCV32-NEXT: mul t5, a5, a1 ; RISCV32-NEXT: add t4, t5, t4 -; RISCV32-NEXT: mulhu t5, a6, a4 +; RISCV32-NEXT: mulhu t5, a4, a1 ; RISCV32-NEXT: add t6, t5, t4 ; RISCV32-NEXT: add t4, t6, s2 ; RISCV32-NEXT: sltu s3, s4, s3 @@ -65,39 +65,39 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) #0 { ; RISCV32-NEXT: snez s1, t2 ; RISCV32-NEXT: snez s2, a7 ; RISCV32-NEXT: and s1, s2, s1 -; RISCV32-NEXT: mulhu s2, a7, a5 +; RISCV32-NEXT: mulhu s2, a7, a3 ; RISCV32-NEXT: snez s2, s2 ; RISCV32-NEXT: or s1, s1, s2 -; RISCV32-NEXT: mulhu t2, t2, t0 +; RISCV32-NEXT: mulhu t2, t2, a6 ; RISCV32-NEXT: snez t2, t2 ; RISCV32-NEXT: or t2, s1, t2 ; RISCV32-NEXT: or t2, t2, s0 ; RISCV32-NEXT: sltu t5, t6, t5 -; RISCV32-NEXT: snez t6, a2 -; RISCV32-NEXT: snez s0, a3 +; RISCV32-NEXT: snez t6, t0 +; RISCV32-NEXT: snez s0, a5 ; RISCV32-NEXT: and t6, s0, t6 -; RISCV32-NEXT: mulhu s0, a3, a4 +; RISCV32-NEXT: mulhu s0, a5, a1 ; RISCV32-NEXT: snez s0, s0 ; RISCV32-NEXT: or t6, t6, s0 -; RISCV32-NEXT: mulhu a2, a2, a6 -; RISCV32-NEXT: snez a2, a2 -; RISCV32-NEXT: or a2, t6, a2 -; RISCV32-NEXT: or a2, a2, t5 -; RISCV32-NEXT: or a7, t0, a7 -; RISCV32-NEXT: snez a7, a7 -; RISCV32-NEXT: or a3, a6, a3 -; RISCV32-NEXT: snez a3, a3 -; RISCV32-NEXT: and a3, a3, a7 -; RISCV32-NEXT: or a2, a3, a2 -; RISCV32-NEXT: or a2, a2, t2 -; RISCV32-NEXT: or a2, a2, t3 -; RISCV32-NEXT: mul a3, a5, a4 -; RISCV32-NEXT: andi a2, a2, 1 -; RISCV32-NEXT: sw a3, 0(a0) -; RISCV32-NEXT: sw a1, 4(a0) +; RISCV32-NEXT: mulhu t0, t0, a4 +; RISCV32-NEXT: snez t0, t0 +; RISCV32-NEXT: or t0, t6, t0 +; RISCV32-NEXT: or t0, t0, t5 +; RISCV32-NEXT: or a6, a6, a7 +; RISCV32-NEXT: snez a6, a6 +; RISCV32-NEXT: or a4, a4, a5 +; RISCV32-NEXT: snez a4, a4 +; RISCV32-NEXT: and a4, a4, a6 +; RISCV32-NEXT: or a4, a4, t0 +; RISCV32-NEXT: or a4, a4, t2 +; RISCV32-NEXT: or a4, a4, t3 +; RISCV32-NEXT: mul a1, a3, a1 +; RISCV32-NEXT: andi a4, a4, 1 +; RISCV32-NEXT: sw a1, 0(a0) +; RISCV32-NEXT: sw a2, 4(a0) ; RISCV32-NEXT: sw t1, 8(a0) ; RISCV32-NEXT: sw t4, 12(a0) -; RISCV32-NEXT: sb a2, 16(a0) +; RISCV32-NEXT: sb a4, 16(a0) ; RISCV32-NEXT: lw s0, 28(sp) # 4-byte Folded Reload ; RISCV32-NEXT: lw s1, 24(sp) # 4-byte Folded Reload ; RISCV32-NEXT: lw s2, 20(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll index 9af18428adf196..74d34b2b64d41f 100644 --- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll +++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll @@ -54,19 +54,19 @@ define i24 @load_i24(ptr %p) { ; ; RV32IZBKB-LABEL: load_i24: ; RV32IZBKB: # %bb.0: -; RV32IZBKB-NEXT: lbu a1, 1(a0) -; RV32IZBKB-NEXT: lbu a2, 0(a0) +; RV32IZBKB-NEXT: lbu a1, 0(a0) +; RV32IZBKB-NEXT: lbu a2, 1(a0) ; RV32IZBKB-NEXT: lbu a0, 2(a0) -; RV32IZBKB-NEXT: packh a1, a2, a1 +; RV32IZBKB-NEXT: packh a1, a1, a2 ; RV32IZBKB-NEXT: pack a0, a1, a0 ; RV32IZBKB-NEXT: ret ; ; RV64IZBKB-LABEL: load_i24: ; RV64IZBKB: # %bb.0: -; RV64IZBKB-NEXT: lbu a1, 1(a0) -; RV64IZBKB-NEXT: lbu a2, 0(a0) +; RV64IZBKB-NEXT: lbu a1, 0(a0) +; RV64IZBKB-NEXT: lbu a2, 1(a0) ; RV64IZBKB-NEXT: lbu a0, 2(a0) -; RV64IZBKB-NEXT: packh a1, a2, a1 +; RV64IZBKB-NEXT: packh a1, a1, a2 ; RV64IZBKB-NEXT: slli a0, a0, 16 ; RV64IZBKB-NEXT: or a0, a1, a0 ; RV64IZBKB-NEXT: ret @@ -99,11 +99,11 @@ define i32 @load_i32(ptr %p) { ; ; SLOWZBKB-LABEL: load_i32: ; SLOWZBKB: # %bb.0: -; SLOWZBKB-NEXT: lbu a1, 1(a0) -; SLOWZBKB-NEXT: lbu a2, 0(a0) +; SLOWZBKB-NEXT: lbu a1, 0(a0) +; SLOWZBKB-NEXT: lbu a2, 1(a0) ; SLOWZBKB-NEXT: lbu a3, 2(a0) ; SLOWZBKB-NEXT: lbu a0, 3(a0) -; SLOWZBKB-NEXT: packh a1, a2, a1 +; SLOWZBKB-NEXT: packh a1, a1, a2 ; SLOWZBKB-NEXT: slli a3, a3, 16 ; SLOWZBKB-NEXT: slli a0, a0, 24 ; SLOWZBKB-NEXT: or a0, a0, a3 @@ -130,17 +130,17 @@ define i64 @load_i64(ptr %p) { ; RV32I-NEXT: slli a3, a3, 16 ; RV32I-NEXT: slli a4, a4, 24 ; RV32I-NEXT: or a2, a4, a3 -; RV32I-NEXT: or a2, a2, a1 -; RV32I-NEXT: lbu a1, 5(a0) ; RV32I-NEXT: lbu a3, 4(a0) -; RV32I-NEXT: lbu a4, 6(a0) +; RV32I-NEXT: lbu a4, 5(a0) +; RV32I-NEXT: or a2, a2, a1 +; RV32I-NEXT: lbu a1, 6(a0) ; RV32I-NEXT: lbu a0, 7(a0) -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: or a1, a0, a3 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; @@ -155,16 +155,16 @@ define i64 @load_i64(ptr %p) { ; RV64I-NEXT: slli a3, a3, 16 ; RV64I-NEXT: slli a4, a4, 24 ; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a2, 4(a0) +; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: or a1, a3, a1 -; RV64I-NEXT: lbu a2, 5(a0) -; RV64I-NEXT: lbu a3, 4(a0) -; RV64I-NEXT: lbu a4, 6(a0) +; RV64I-NEXT: lbu a3, 6(a0) ; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli a2, a2, 8 -; RV64I-NEXT: or a2, a2, a3 -; RV64I-NEXT: slli a4, a4, 16 +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a2, a4, a2 +; RV64I-NEXT: slli a3, a3, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a1 @@ -172,20 +172,20 @@ define i64 @load_i64(ptr %p) { ; ; RV32IZBKB-LABEL: load_i64: ; RV32IZBKB: # %bb.0: -; RV32IZBKB-NEXT: lbu a1, 1(a0) -; RV32IZBKB-NEXT: lbu a2, 0(a0) +; RV32IZBKB-NEXT: lbu a1, 0(a0) +; RV32IZBKB-NEXT: lbu a2, 1(a0) ; RV32IZBKB-NEXT: lbu a3, 2(a0) ; RV32IZBKB-NEXT: lbu a4, 3(a0) -; RV32IZBKB-NEXT: packh a1, a2, a1 +; RV32IZBKB-NEXT: packh a1, a1, a2 ; RV32IZBKB-NEXT: slli a3, a3, 16 ; RV32IZBKB-NEXT: slli a4, a4, 24 ; RV32IZBKB-NEXT: or a3, a4, a3 -; RV32IZBKB-NEXT: lbu a2, 5(a0) -; RV32IZBKB-NEXT: lbu a4, 4(a0) +; RV32IZBKB-NEXT: lbu a2, 4(a0) +; RV32IZBKB-NEXT: lbu a4, 5(a0) ; RV32IZBKB-NEXT: lbu a5, 6(a0) ; RV32IZBKB-NEXT: lbu a6, 7(a0) ; RV32IZBKB-NEXT: or a0, a3, a1 -; RV32IZBKB-NEXT: packh a1, a4, a2 +; RV32IZBKB-NEXT: packh a1, a2, a4 ; RV32IZBKB-NEXT: slli a5, a5, 16 ; RV32IZBKB-NEXT: slli a6, a6, 24 ; RV32IZBKB-NEXT: or a2, a6, a5 @@ -194,20 +194,20 @@ define i64 @load_i64(ptr %p) { ; ; RV64IZBKB-LABEL: load_i64: ; RV64IZBKB: # %bb.0: -; RV64IZBKB-NEXT: lbu a1, 5(a0) -; RV64IZBKB-NEXT: lbu a2, 4(a0) +; RV64IZBKB-NEXT: lbu a1, 4(a0) +; RV64IZBKB-NEXT: lbu a2, 5(a0) ; RV64IZBKB-NEXT: lbu a3, 6(a0) ; RV64IZBKB-NEXT: lbu a4, 7(a0) -; RV64IZBKB-NEXT: packh a1, a2, a1 +; RV64IZBKB-NEXT: packh a1, a1, a2 ; RV64IZBKB-NEXT: slli a3, a3, 16 ; RV64IZBKB-NEXT: slli a4, a4, 24 ; RV64IZBKB-NEXT: or a3, a4, a3 -; RV64IZBKB-NEXT: lbu a2, 1(a0) -; RV64IZBKB-NEXT: lbu a4, 0(a0) +; RV64IZBKB-NEXT: lbu a2, 0(a0) +; RV64IZBKB-NEXT: lbu a4, 1(a0) ; RV64IZBKB-NEXT: lbu a5, 2(a0) ; RV64IZBKB-NEXT: lbu a0, 3(a0) ; RV64IZBKB-NEXT: or a1, a3, a1 -; RV64IZBKB-NEXT: packh a2, a4, a2 +; RV64IZBKB-NEXT: packh a2, a2, a4 ; RV64IZBKB-NEXT: slli a5, a5, 16 ; RV64IZBKB-NEXT: slli a0, a0, 24 ; RV64IZBKB-NEXT: or a0, a0, a5 diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll index c016e8f3163635..5a5ae66b5fa767 100644 --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -522,10 +522,10 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32MV-LABEL: test_urem_vec: ; RV32MV: # %bb.0: ; RV32MV-NEXT: lw a1, 0(a0) -; RV32MV-NEXT: andi a2, a1, 2047 -; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32MV-NEXT: vmv.v.x v8, a2 ; RV32MV-NEXT: lbu a2, 4(a0) +; RV32MV-NEXT: andi a3, a1, 2047 +; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32MV-NEXT: vmv.v.x v8, a3 ; RV32MV-NEXT: slli a3, a1, 10 ; RV32MV-NEXT: srli a3, a3, 21 ; RV32MV-NEXT: vslide1down.vx v8, v8, a3 diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll index c057c656e0fb70..b0e790ed606350 100644 --- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll @@ -19,29 +19,29 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu s0, 12(a1) -; RV32I-NEXT: lhu s1, 8(a1) -; RV32I-NEXT: lhu s2, 4(a1) ; RV32I-NEXT: lhu a2, 0(a1) +; RV32I-NEXT: lhu s0, 4(a1) +; RV32I-NEXT: lhu s1, 8(a1) +; RV32I-NEXT: lhu s2, 12(a1) ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __umodsi3 ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 124 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __umodsi3 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 98 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __umodsi3 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: li a1, 1003 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __umodsi3 ; RV32I-NEXT: sh a0, 6(s3) ; RV32I-NEXT: sh s1, 4(s3) -; RV32I-NEXT: sh s2, 2(s3) +; RV32I-NEXT: sh s0, 2(s3) ; RV32I-NEXT: sh s4, 0(s3) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -54,39 +54,39 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind { ; ; RV32IM-LABEL: fold_urem_vec_1: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lhu a2, 12(a1) -; RV32IM-NEXT: lhu a3, 8(a1) -; RV32IM-NEXT: lhu a4, 4(a1) -; RV32IM-NEXT: lhu a1, 0(a1) +; RV32IM-NEXT: lhu a2, 0(a1) +; RV32IM-NEXT: lhu a3, 4(a1) +; RV32IM-NEXT: lhu a4, 8(a1) +; RV32IM-NEXT: lhu a1, 12(a1) ; RV32IM-NEXT: lui a5, 8456 ; RV32IM-NEXT: addi a5, a5, 1058 -; RV32IM-NEXT: mulhu a5, a4, a5 +; RV32IM-NEXT: mulhu a5, a3, a5 ; RV32IM-NEXT: slli a6, a5, 7 ; RV32IM-NEXT: slli a5, a5, 2 ; RV32IM-NEXT: sub a5, a5, a6 -; RV32IM-NEXT: add a4, a4, a5 +; RV32IM-NEXT: add a3, a3, a5 ; RV32IM-NEXT: lui a5, 11038 ; RV32IM-NEXT: addi a5, a5, -1465 -; RV32IM-NEXT: mulhu a5, a1, a5 +; RV32IM-NEXT: mulhu a5, a2, a5 ; RV32IM-NEXT: li a6, 95 ; RV32IM-NEXT: mul a5, a5, a6 -; RV32IM-NEXT: sub a1, a1, a5 +; RV32IM-NEXT: sub a2, a2, a5 ; RV32IM-NEXT: lui a5, 10700 ; RV32IM-NEXT: addi a5, a5, -1003 -; RV32IM-NEXT: mulhu a5, a3, a5 +; RV32IM-NEXT: mulhu a5, a4, a5 ; RV32IM-NEXT: li a6, 98 ; RV32IM-NEXT: mul a5, a5, a6 -; RV32IM-NEXT: sub a3, a3, a5 +; RV32IM-NEXT: sub a4, a4, a5 ; RV32IM-NEXT: lui a5, 1045 ; RV32IM-NEXT: addi a5, a5, 1801 -; RV32IM-NEXT: mulhu a5, a2, a5 +; RV32IM-NEXT: mulhu a5, a1, a5 ; RV32IM-NEXT: li a6, 1003 ; RV32IM-NEXT: mul a5, a5, a6 -; RV32IM-NEXT: sub a2, a2, a5 -; RV32IM-NEXT: sh a2, 6(a0) -; RV32IM-NEXT: sh a3, 4(a0) -; RV32IM-NEXT: sh a1, 0(a0) -; RV32IM-NEXT: sh a4, 2(a0) +; RV32IM-NEXT: sub a1, a1, a5 +; RV32IM-NEXT: sh a1, 6(a0) +; RV32IM-NEXT: sh a4, 4(a0) +; RV32IM-NEXT: sh a2, 0(a0) +; RV32IM-NEXT: sh a3, 2(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: fold_urem_vec_1: @@ -98,29 +98,29 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu s0, 24(a1) -; RV64I-NEXT: lhu s1, 16(a1) -; RV64I-NEXT: lhu s2, 8(a1) ; RV64I-NEXT: lhu a2, 0(a1) +; RV64I-NEXT: lhu s0, 8(a1) +; RV64I-NEXT: lhu s1, 16(a1) +; RV64I-NEXT: lhu s2, 24(a1) ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3 ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 124 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __umoddi3 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a1, 98 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __umoddi3 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a1, 1003 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __umoddi3 ; RV64I-NEXT: sh a0, 6(s3) ; RV64I-NEXT: sh s1, 4(s3) -; RV64I-NEXT: sh s2, 2(s3) +; RV64I-NEXT: sh s0, 2(s3) ; RV64I-NEXT: sh s4, 0(s3) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -133,38 +133,38 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: fold_urem_vec_1: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lhu a2, 8(a1) -; RV64IM-NEXT: lui a3, %hi(.LCPI0_0) -; RV64IM-NEXT: ld a3, %lo(.LCPI0_0)(a3) -; RV64IM-NEXT: lhu a4, 24(a1) +; RV64IM-NEXT: lui a2, %hi(.LCPI0_0) +; RV64IM-NEXT: ld a2, %lo(.LCPI0_0)(a2) +; RV64IM-NEXT: lhu a3, 8(a1) +; RV64IM-NEXT: lhu a4, 0(a1) ; RV64IM-NEXT: lhu a5, 16(a1) -; RV64IM-NEXT: lhu a1, 0(a1) -; RV64IM-NEXT: mulhu a3, a2, a3 -; RV64IM-NEXT: slli a6, a3, 7 +; RV64IM-NEXT: lhu a1, 24(a1) +; RV64IM-NEXT: mulhu a2, a3, a2 +; RV64IM-NEXT: slli a6, a2, 7 ; RV64IM-NEXT: lui a7, %hi(.LCPI0_1) ; RV64IM-NEXT: ld a7, %lo(.LCPI0_1)(a7) -; RV64IM-NEXT: slli a3, a3, 2 -; RV64IM-NEXT: subw a3, a3, a6 -; RV64IM-NEXT: add a2, a2, a3 -; RV64IM-NEXT: mulhu a3, a1, a7 +; RV64IM-NEXT: slli a2, a2, 2 +; RV64IM-NEXT: subw a2, a2, a6 +; RV64IM-NEXT: add a2, a3, a2 +; RV64IM-NEXT: mulhu a3, a4, a7 ; RV64IM-NEXT: lui a6, %hi(.LCPI0_2) ; RV64IM-NEXT: ld a6, %lo(.LCPI0_2)(a6) ; RV64IM-NEXT: li a7, 95 ; RV64IM-NEXT: mul a3, a3, a7 -; RV64IM-NEXT: subw a1, a1, a3 +; RV64IM-NEXT: subw a4, a4, a3 ; RV64IM-NEXT: mulhu a3, a5, a6 ; RV64IM-NEXT: lui a6, %hi(.LCPI0_3) ; RV64IM-NEXT: ld a6, %lo(.LCPI0_3)(a6) ; RV64IM-NEXT: li a7, 98 ; RV64IM-NEXT: mul a3, a3, a7 ; RV64IM-NEXT: subw a5, a5, a3 -; RV64IM-NEXT: mulhu a3, a4, a6 +; RV64IM-NEXT: mulhu a3, a1, a6 ; RV64IM-NEXT: li a6, 1003 ; RV64IM-NEXT: mul a3, a3, a6 -; RV64IM-NEXT: subw a4, a4, a3 -; RV64IM-NEXT: sh a4, 6(a0) +; RV64IM-NEXT: subw a1, a1, a3 +; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: sh a5, 4(a0) -; RV64IM-NEXT: sh a1, 0(a0) +; RV64IM-NEXT: sh a4, 0(a0) ; RV64IM-NEXT: sh a2, 2(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, @@ -181,29 +181,29 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu s0, 12(a1) -; RV32I-NEXT: lhu s1, 8(a1) -; RV32I-NEXT: lhu s2, 4(a1) ; RV32I-NEXT: lhu a2, 0(a1) +; RV32I-NEXT: lhu s0, 4(a1) +; RV32I-NEXT: lhu s1, 8(a1) +; RV32I-NEXT: lhu s2, 12(a1) ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __umodsi3 ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __umodsi3 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __umodsi3 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __umodsi3 ; RV32I-NEXT: sh a0, 6(s3) ; RV32I-NEXT: sh s1, 4(s3) -; RV32I-NEXT: sh s2, 2(s3) +; RV32I-NEXT: sh s0, 2(s3) ; RV32I-NEXT: sh s4, 0(s3) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -216,29 +216,29 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind { ; ; RV32IM-LABEL: fold_urem_vec_2: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lhu a2, 12(a1) -; RV32IM-NEXT: lhu a3, 8(a1) -; RV32IM-NEXT: lhu a4, 0(a1) -; RV32IM-NEXT: lhu a1, 4(a1) +; RV32IM-NEXT: lhu a2, 0(a1) +; RV32IM-NEXT: lhu a3, 4(a1) +; RV32IM-NEXT: lhu a4, 8(a1) +; RV32IM-NEXT: lhu a1, 12(a1) ; RV32IM-NEXT: lui a5, 11038 ; RV32IM-NEXT: addi a5, a5, -1465 -; RV32IM-NEXT: mulhu a6, a4, a5 +; RV32IM-NEXT: mulhu a6, a2, a5 ; RV32IM-NEXT: li a7, 95 ; RV32IM-NEXT: mul a6, a6, a7 -; RV32IM-NEXT: sub a4, a4, a6 -; RV32IM-NEXT: mulhu a6, a1, a5 -; RV32IM-NEXT: mul a6, a6, a7 -; RV32IM-NEXT: sub a1, a1, a6 +; RV32IM-NEXT: sub a2, a2, a6 ; RV32IM-NEXT: mulhu a6, a3, a5 ; RV32IM-NEXT: mul a6, a6, a7 ; RV32IM-NEXT: sub a3, a3, a6 -; RV32IM-NEXT: mulhu a5, a2, a5 +; RV32IM-NEXT: mulhu a6, a4, a5 +; RV32IM-NEXT: mul a6, a6, a7 +; RV32IM-NEXT: sub a4, a4, a6 +; RV32IM-NEXT: mulhu a5, a1, a5 ; RV32IM-NEXT: mul a5, a5, a7 -; RV32IM-NEXT: sub a2, a2, a5 -; RV32IM-NEXT: sh a2, 6(a0) -; RV32IM-NEXT: sh a3, 4(a0) -; RV32IM-NEXT: sh a1, 2(a0) -; RV32IM-NEXT: sh a4, 0(a0) +; RV32IM-NEXT: sub a1, a1, a5 +; RV32IM-NEXT: sh a1, 6(a0) +; RV32IM-NEXT: sh a4, 4(a0) +; RV32IM-NEXT: sh a3, 2(a0) +; RV32IM-NEXT: sh a2, 0(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: fold_urem_vec_2: @@ -250,29 +250,29 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu s0, 24(a1) -; RV64I-NEXT: lhu s1, 16(a1) -; RV64I-NEXT: lhu s2, 8(a1) ; RV64I-NEXT: lhu a2, 0(a1) +; RV64I-NEXT: lhu s0, 8(a1) +; RV64I-NEXT: lhu s1, 16(a1) +; RV64I-NEXT: lhu s2, 24(a1) ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3 ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __umoddi3 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __umoddi3 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __umoddi3 ; RV64I-NEXT: sh a0, 6(s3) ; RV64I-NEXT: sh s1, 4(s3) -; RV64I-NEXT: sh s2, 2(s3) +; RV64I-NEXT: sh s0, 2(s3) ; RV64I-NEXT: sh s4, 0(s3) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -285,29 +285,29 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: fold_urem_vec_2: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lhu a2, 0(a1) -; RV64IM-NEXT: lui a3, %hi(.LCPI1_0) -; RV64IM-NEXT: ld a3, %lo(.LCPI1_0)(a3) -; RV64IM-NEXT: lhu a4, 24(a1) +; RV64IM-NEXT: lui a2, %hi(.LCPI1_0) +; RV64IM-NEXT: ld a2, %lo(.LCPI1_0)(a2) +; RV64IM-NEXT: lhu a3, 0(a1) +; RV64IM-NEXT: lhu a4, 8(a1) ; RV64IM-NEXT: lhu a5, 16(a1) -; RV64IM-NEXT: lhu a1, 8(a1) -; RV64IM-NEXT: mulhu a6, a2, a3 +; RV64IM-NEXT: lhu a1, 24(a1) +; RV64IM-NEXT: mulhu a6, a3, a2 ; RV64IM-NEXT: li a7, 95 ; RV64IM-NEXT: mul a6, a6, a7 -; RV64IM-NEXT: subw a2, a2, a6 -; RV64IM-NEXT: mulhu a6, a1, a3 +; RV64IM-NEXT: subw a3, a3, a6 +; RV64IM-NEXT: mulhu a6, a4, a2 ; RV64IM-NEXT: mul a6, a6, a7 -; RV64IM-NEXT: subw a1, a1, a6 -; RV64IM-NEXT: mulhu a6, a5, a3 +; RV64IM-NEXT: subw a4, a4, a6 +; RV64IM-NEXT: mulhu a6, a5, a2 ; RV64IM-NEXT: mul a6, a6, a7 ; RV64IM-NEXT: subw a5, a5, a6 -; RV64IM-NEXT: mulhu a3, a4, a3 -; RV64IM-NEXT: mul a3, a3, a7 -; RV64IM-NEXT: subw a4, a4, a3 -; RV64IM-NEXT: sh a4, 6(a0) +; RV64IM-NEXT: mulhu a2, a1, a2 +; RV64IM-NEXT: mul a2, a2, a7 +; RV64IM-NEXT: subw a1, a1, a2 +; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: sh a5, 4(a0) -; RV64IM-NEXT: sh a1, 2(a0) -; RV64IM-NEXT: sh a2, 0(a0) +; RV64IM-NEXT: sh a4, 2(a0) +; RV64IM-NEXT: sh a3, 0(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -388,33 +388,33 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; ; RV32IM-LABEL: combine_urem_udiv: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lhu a2, 0(a1) -; RV32IM-NEXT: lhu a3, 4(a1) -; RV32IM-NEXT: lhu a4, 12(a1) +; RV32IM-NEXT: lhu a2, 12(a1) +; RV32IM-NEXT: lhu a3, 0(a1) +; RV32IM-NEXT: lhu a4, 4(a1) ; RV32IM-NEXT: lhu a1, 8(a1) ; RV32IM-NEXT: lui a5, 11038 ; RV32IM-NEXT: addi a5, a5, -1465 -; RV32IM-NEXT: mulhu a6, a4, a5 +; RV32IM-NEXT: mulhu a6, a2, a5 ; RV32IM-NEXT: li a7, 95 ; RV32IM-NEXT: mul t0, a6, a7 ; RV32IM-NEXT: mulhu t1, a1, a5 ; RV32IM-NEXT: mul t2, t1, a7 -; RV32IM-NEXT: mulhu t3, a3, a5 +; RV32IM-NEXT: mulhu t3, a4, a5 ; RV32IM-NEXT: mul t4, t3, a7 -; RV32IM-NEXT: mulhu a5, a2, a5 +; RV32IM-NEXT: mulhu a5, a3, a5 ; RV32IM-NEXT: mul a7, a5, a7 -; RV32IM-NEXT: add a2, a2, a5 -; RV32IM-NEXT: sub a2, a2, a7 -; RV32IM-NEXT: add a3, a3, t3 -; RV32IM-NEXT: sub a3, a3, t4 +; RV32IM-NEXT: add a3, a3, a5 +; RV32IM-NEXT: sub a3, a3, a7 +; RV32IM-NEXT: add a4, a4, t3 +; RV32IM-NEXT: sub a4, a4, t4 ; RV32IM-NEXT: add a1, a1, t1 ; RV32IM-NEXT: sub a1, a1, t2 -; RV32IM-NEXT: add a4, a4, a6 -; RV32IM-NEXT: sub a4, a4, t0 -; RV32IM-NEXT: sh a4, 6(a0) +; RV32IM-NEXT: add a2, a2, a6 +; RV32IM-NEXT: sub a2, a2, t0 +; RV32IM-NEXT: sh a2, 6(a0) ; RV32IM-NEXT: sh a1, 4(a0) -; RV32IM-NEXT: sh a3, 2(a0) -; RV32IM-NEXT: sh a2, 0(a0) +; RV32IM-NEXT: sh a4, 2(a0) +; RV32IM-NEXT: sh a3, 0(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: combine_urem_udiv: @@ -533,19 +533,19 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu s1, 8(a1) +; RV32I-NEXT: lhu s1, 0(a1) ; RV32I-NEXT: lhu s2, 4(a1) -; RV32I-NEXT: lhu s3, 0(a1) +; RV32I-NEXT: lhu s3, 8(a1) ; RV32I-NEXT: lhu a2, 12(a1) ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __umodsi3 -; RV32I-NEXT: andi a1, s3, 63 +; RV32I-NEXT: andi a1, s1, 63 ; RV32I-NEXT: andi a2, s2, 31 -; RV32I-NEXT: andi s1, s1, 7 +; RV32I-NEXT: andi a3, s3, 7 ; RV32I-NEXT: sh a0, 6(s0) -; RV32I-NEXT: sh s1, 4(s0) +; RV32I-NEXT: sh a3, 4(s0) ; RV32I-NEXT: sh a2, 2(s0) ; RV32I-NEXT: sh a1, 0(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -558,8 +558,8 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind { ; ; RV32IM-LABEL: dont_fold_urem_power_of_two: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lhu a2, 8(a1) -; RV32IM-NEXT: lhu a3, 4(a1) +; RV32IM-NEXT: lhu a2, 4(a1) +; RV32IM-NEXT: lhu a3, 8(a1) ; RV32IM-NEXT: lhu a4, 12(a1) ; RV32IM-NEXT: lhu a1, 0(a1) ; RV32IM-NEXT: lui a5, 11038 @@ -569,10 +569,10 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind { ; RV32IM-NEXT: mul a5, a5, a6 ; RV32IM-NEXT: sub a4, a4, a5 ; RV32IM-NEXT: andi a1, a1, 63 -; RV32IM-NEXT: andi a3, a3, 31 -; RV32IM-NEXT: andi a2, a2, 7 -; RV32IM-NEXT: sh a2, 4(a0) -; RV32IM-NEXT: sh a3, 2(a0) +; RV32IM-NEXT: andi a2, a2, 31 +; RV32IM-NEXT: andi a3, a3, 7 +; RV32IM-NEXT: sh a3, 4(a0) +; RV32IM-NEXT: sh a2, 2(a0) ; RV32IM-NEXT: sh a1, 0(a0) ; RV32IM-NEXT: sh a4, 6(a0) ; RV32IM-NEXT: ret @@ -585,19 +585,19 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu s1, 16(a1) +; RV64I-NEXT: lhu s1, 0(a1) ; RV64I-NEXT: lhu s2, 8(a1) -; RV64I-NEXT: lhu s3, 0(a1) +; RV64I-NEXT: lhu s3, 16(a1) ; RV64I-NEXT: lhu a2, 24(a1) ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3 -; RV64I-NEXT: andi a1, s3, 63 +; RV64I-NEXT: andi a1, s1, 63 ; RV64I-NEXT: andi a2, s2, 31 -; RV64I-NEXT: andi s1, s1, 7 +; RV64I-NEXT: andi a3, s3, 7 ; RV64I-NEXT: sh a0, 6(s0) -; RV64I-NEXT: sh s1, 4(s0) +; RV64I-NEXT: sh a3, 4(s0) ; RV64I-NEXT: sh a2, 2(s0) ; RV64I-NEXT: sh a1, 0(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -642,24 +642,24 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu s0, 12(a1) -; RV32I-NEXT: lhu s1, 8(a1) ; RV32I-NEXT: lhu a2, 4(a1) +; RV32I-NEXT: lhu s0, 8(a1) +; RV32I-NEXT: lhu s1, 12(a1) ; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 654 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __umodsi3 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 23 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __umodsi3 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a1, a0, 1327 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __umodsi3 ; RV32I-NEXT: sh a0, 6(s2) -; RV32I-NEXT: sh s1, 4(s2) +; RV32I-NEXT: sh s0, 4(s2) ; RV32I-NEXT: sh s3, 2(s2) ; RV32I-NEXT: sh zero, 0(s2) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -672,32 +672,32 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; ; RV32IM-LABEL: dont_fold_urem_one: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lhu a2, 12(a1) -; RV32IM-NEXT: lhu a3, 4(a1) -; RV32IM-NEXT: lhu a1, 8(a1) +; RV32IM-NEXT: lhu a2, 4(a1) +; RV32IM-NEXT: lhu a3, 8(a1) +; RV32IM-NEXT: lhu a1, 12(a1) ; RV32IM-NEXT: lui a4, 1603 ; RV32IM-NEXT: addi a4, a4, 1341 -; RV32IM-NEXT: mulhu a4, a3, a4 +; RV32IM-NEXT: mulhu a4, a2, a4 ; RV32IM-NEXT: li a5, 654 ; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a3, a3, a4 +; RV32IM-NEXT: sub a2, a2, a4 ; RV32IM-NEXT: lui a4, 45590 ; RV32IM-NEXT: addi a4, a4, 1069 -; RV32IM-NEXT: mulhu a4, a1, a4 +; RV32IM-NEXT: mulhu a4, a3, a4 ; RV32IM-NEXT: li a5, 23 ; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a1, a1, a4 +; RV32IM-NEXT: sub a3, a3, a4 ; RV32IM-NEXT: lui a4, 193 ; RV32IM-NEXT: addi a4, a4, 1464 -; RV32IM-NEXT: mulhu a4, a2, a4 +; RV32IM-NEXT: mulhu a4, a1, a4 ; RV32IM-NEXT: lui a5, 1 ; RV32IM-NEXT: addi a5, a5, 1327 ; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a2, a2, a4 +; RV32IM-NEXT: sub a1, a1, a4 ; RV32IM-NEXT: sh zero, 0(a0) -; RV32IM-NEXT: sh a2, 6(a0) -; RV32IM-NEXT: sh a1, 4(a0) -; RV32IM-NEXT: sh a3, 2(a0) +; RV32IM-NEXT: sh a1, 6(a0) +; RV32IM-NEXT: sh a3, 4(a0) +; RV32IM-NEXT: sh a2, 2(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: dont_fold_urem_one: @@ -708,24 +708,24 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu s0, 24(a1) -; RV64I-NEXT: lhu s1, 16(a1) ; RV64I-NEXT: lhu a2, 8(a1) +; RV64I-NEXT: lhu s0, 16(a1) +; RV64I-NEXT: lhu s1, 24(a1) ; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 654 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3 ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 23 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __umoddi3 -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __umoddi3 ; RV64I-NEXT: sh a0, 6(s2) -; RV64I-NEXT: sh s1, 4(s2) +; RV64I-NEXT: sh s0, 4(s2) ; RV64I-NEXT: sh s3, 2(s2) ; RV64I-NEXT: sh zero, 0(s2) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -738,32 +738,32 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: dont_fold_urem_one: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lhu a2, 8(a1) -; RV64IM-NEXT: lui a3, %hi(.LCPI4_0) -; RV64IM-NEXT: ld a3, %lo(.LCPI4_0)(a3) -; RV64IM-NEXT: lhu a4, 24(a1) -; RV64IM-NEXT: lhu a1, 16(a1) -; RV64IM-NEXT: mulhu a3, a2, a3 +; RV64IM-NEXT: lui a2, %hi(.LCPI4_0) +; RV64IM-NEXT: ld a2, %lo(.LCPI4_0)(a2) +; RV64IM-NEXT: lhu a3, 8(a1) +; RV64IM-NEXT: lhu a4, 16(a1) +; RV64IM-NEXT: lhu a1, 24(a1) +; RV64IM-NEXT: mulhu a2, a3, a2 ; RV64IM-NEXT: lui a5, %hi(.LCPI4_1) ; RV64IM-NEXT: ld a5, %lo(.LCPI4_1)(a5) ; RV64IM-NEXT: li a6, 654 -; RV64IM-NEXT: mul a3, a3, a6 -; RV64IM-NEXT: subw a2, a2, a3 -; RV64IM-NEXT: mulhu a3, a1, a5 +; RV64IM-NEXT: mul a2, a2, a6 +; RV64IM-NEXT: subw a3, a3, a2 +; RV64IM-NEXT: mulhu a2, a4, a5 ; RV64IM-NEXT: lui a5, %hi(.LCPI4_2) ; RV64IM-NEXT: ld a5, %lo(.LCPI4_2)(a5) ; RV64IM-NEXT: li a6, 23 -; RV64IM-NEXT: mul a3, a3, a6 -; RV64IM-NEXT: subw a1, a1, a3 -; RV64IM-NEXT: mulhu a3, a4, a5 +; RV64IM-NEXT: mul a2, a2, a6 +; RV64IM-NEXT: subw a4, a4, a2 +; RV64IM-NEXT: mulhu a2, a1, a5 ; RV64IM-NEXT: lui a5, 1 ; RV64IM-NEXT: addi a5, a5, 1327 -; RV64IM-NEXT: mul a3, a3, a5 -; RV64IM-NEXT: subw a4, a4, a3 +; RV64IM-NEXT: mul a2, a2, a5 +; RV64IM-NEXT: subw a1, a1, a2 ; RV64IM-NEXT: sh zero, 0(a0) -; RV64IM-NEXT: sh a4, 6(a0) -; RV64IM-NEXT: sh a1, 4(a0) -; RV64IM-NEXT: sh a2, 2(a0) +; RV64IM-NEXT: sh a1, 6(a0) +; RV64IM-NEXT: sh a4, 4(a0) +; RV64IM-NEXT: sh a3, 2(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -793,17 +793,18 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw s0, 24(a1) -; RV32I-NEXT: lw s1, 28(a1) -; RV32I-NEXT: lw s2, 16(a1) -; RV32I-NEXT: lw s3, 20(a1) +; RV32I-NEXT: lw s0, 16(a1) +; RV32I-NEXT: lw s1, 20(a1) +; RV32I-NEXT: lw s2, 24(a1) +; RV32I-NEXT: lw s3, 28(a1) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw s4, 8(a1) ; RV32I-NEXT: lw s5, 12(a1) -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a1, 4(a1) ; RV32I-NEXT: mv s6, a0 ; RV32I-NEXT: li a2, 1 ; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __umoddi3 ; RV32I-NEXT: mv s7, a0 @@ -816,22 +817,22 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: mv s5, a1 ; RV32I-NEXT: li a2, 23 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __umoddi3 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a2, a0, 1327 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __umoddi3 ; RV32I-NEXT: sw a1, 28(s6) ; RV32I-NEXT: sw a0, 24(s6) -; RV32I-NEXT: sw s3, 20(s6) -; RV32I-NEXT: sw s2, 16(s6) +; RV32I-NEXT: sw s1, 20(s6) +; RV32I-NEXT: sw s0, 16(s6) ; RV32I-NEXT: sw s5, 12(s6) ; RV32I-NEXT: sw s4, 8(s6) ; RV32I-NEXT: sw s8, 4(s6) @@ -862,17 +863,18 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s0, 24(a1) -; RV32IM-NEXT: lw s1, 28(a1) -; RV32IM-NEXT: lw s2, 16(a1) -; RV32IM-NEXT: lw s3, 20(a1) +; RV32IM-NEXT: lw s0, 16(a1) +; RV32IM-NEXT: lw s1, 20(a1) +; RV32IM-NEXT: lw s2, 24(a1) +; RV32IM-NEXT: lw s3, 28(a1) +; RV32IM-NEXT: lw a3, 0(a1) +; RV32IM-NEXT: lw a4, 4(a1) ; RV32IM-NEXT: lw s4, 8(a1) ; RV32IM-NEXT: lw s5, 12(a1) -; RV32IM-NEXT: lw a3, 0(a1) -; RV32IM-NEXT: lw a1, 4(a1) ; RV32IM-NEXT: mv s6, a0 ; RV32IM-NEXT: li a2, 1 ; RV32IM-NEXT: mv a0, a3 +; RV32IM-NEXT: mv a1, a4 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __umoddi3 ; RV32IM-NEXT: mv s7, a0 @@ -885,22 +887,22 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: mv s4, a0 ; RV32IM-NEXT: mv s5, a1 ; RV32IM-NEXT: li a2, 23 -; RV32IM-NEXT: mv a0, s2 -; RV32IM-NEXT: mv a1, s3 +; RV32IM-NEXT: mv a0, s0 +; RV32IM-NEXT: mv a1, s1 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __umoddi3 -; RV32IM-NEXT: mv s2, a0 -; RV32IM-NEXT: mv s3, a1 +; RV32IM-NEXT: mv s0, a0 +; RV32IM-NEXT: mv s1, a1 ; RV32IM-NEXT: lui a0, 1 ; RV32IM-NEXT: addi a2, a0, 1327 -; RV32IM-NEXT: mv a0, s0 -; RV32IM-NEXT: mv a1, s1 +; RV32IM-NEXT: mv a0, s2 +; RV32IM-NEXT: mv a1, s3 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __umoddi3 ; RV32IM-NEXT: sw a1, 28(s6) ; RV32IM-NEXT: sw a0, 24(s6) -; RV32IM-NEXT: sw s3, 20(s6) -; RV32IM-NEXT: sw s2, 16(s6) +; RV32IM-NEXT: sw s1, 20(s6) +; RV32IM-NEXT: sw s0, 16(s6) ; RV32IM-NEXT: sw s5, 12(s6) ; RV32IM-NEXT: sw s4, 8(s6) ; RV32IM-NEXT: sw s8, 4(s6) @@ -926,24 +928,24 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: ld s0, 24(a1) -; RV64I-NEXT: ld s1, 16(a1) ; RV64I-NEXT: ld a2, 8(a1) +; RV64I-NEXT: ld s0, 16(a1) +; RV64I-NEXT: ld s1, 24(a1) ; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 654 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3 ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 23 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __umoddi3 -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __umoddi3 ; RV64I-NEXT: sd a0, 24(s2) -; RV64I-NEXT: sd s1, 16(s2) +; RV64I-NEXT: sd s0, 16(s2) ; RV64I-NEXT: sd s3, 8(s2) ; RV64I-NEXT: sd zero, 0(s2) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -956,39 +958,39 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; ; RV64IM-LABEL: dont_fold_urem_i64: ; RV64IM: # %bb.0: -; RV64IM-NEXT: ld a2, 16(a1) -; RV64IM-NEXT: lui a3, %hi(.LCPI6_0) -; RV64IM-NEXT: ld a3, %lo(.LCPI6_0)(a3) -; RV64IM-NEXT: ld a4, 24(a1) -; RV64IM-NEXT: ld a1, 8(a1) -; RV64IM-NEXT: mulhu a3, a2, a3 -; RV64IM-NEXT: sub a5, a2, a3 +; RV64IM-NEXT: lui a2, %hi(.LCPI6_0) +; RV64IM-NEXT: ld a2, %lo(.LCPI6_0)(a2) +; RV64IM-NEXT: ld a3, 16(a1) +; RV64IM-NEXT: ld a4, 8(a1) +; RV64IM-NEXT: ld a1, 24(a1) +; RV64IM-NEXT: mulhu a2, a3, a2 +; RV64IM-NEXT: sub a5, a3, a2 ; RV64IM-NEXT: srli a5, a5, 1 -; RV64IM-NEXT: add a3, a5, a3 -; RV64IM-NEXT: srli a3, a3, 4 +; RV64IM-NEXT: add a2, a5, a2 +; RV64IM-NEXT: srli a2, a2, 4 ; RV64IM-NEXT: li a5, 23 ; RV64IM-NEXT: lui a6, %hi(.LCPI6_1) ; RV64IM-NEXT: ld a6, %lo(.LCPI6_1)(a6) -; RV64IM-NEXT: mul a3, a3, a5 -; RV64IM-NEXT: sub a2, a2, a3 -; RV64IM-NEXT: srli a3, a1, 1 -; RV64IM-NEXT: mulhu a3, a3, a6 -; RV64IM-NEXT: srli a3, a3, 7 +; RV64IM-NEXT: mul a2, a2, a5 +; RV64IM-NEXT: sub a3, a3, a2 +; RV64IM-NEXT: srli a2, a4, 1 +; RV64IM-NEXT: mulhu a2, a2, a6 +; RV64IM-NEXT: srli a2, a2, 7 ; RV64IM-NEXT: lui a5, %hi(.LCPI6_2) ; RV64IM-NEXT: ld a5, %lo(.LCPI6_2)(a5) ; RV64IM-NEXT: li a6, 654 -; RV64IM-NEXT: mul a3, a3, a6 -; RV64IM-NEXT: sub a1, a1, a3 -; RV64IM-NEXT: mulhu a3, a4, a5 -; RV64IM-NEXT: srli a3, a3, 12 +; RV64IM-NEXT: mul a2, a2, a6 +; RV64IM-NEXT: sub a4, a4, a2 +; RV64IM-NEXT: mulhu a2, a1, a5 +; RV64IM-NEXT: srli a2, a2, 12 ; RV64IM-NEXT: lui a5, 1 ; RV64IM-NEXT: addiw a5, a5, 1327 -; RV64IM-NEXT: mul a3, a3, a5 -; RV64IM-NEXT: sub a4, a4, a3 +; RV64IM-NEXT: mul a2, a2, a5 +; RV64IM-NEXT: sub a1, a1, a2 ; RV64IM-NEXT: sd zero, 0(a0) -; RV64IM-NEXT: sd a4, 24(a0) -; RV64IM-NEXT: sd a1, 8(a0) -; RV64IM-NEXT: sd a2, 16(a0) +; RV64IM-NEXT: sd a1, 24(a0) +; RV64IM-NEXT: sd a4, 8(a0) +; RV64IM-NEXT: sd a3, 16(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i64> %x, ret <4 x i64> %1 diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll index 621f54946e4cdf..d2c30c54390702 100644 --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -822,11 +822,11 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 20 ; ILP32-ILP32F-FPELIM-NEXT: sw a0, 12(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 27 -; ILP32-ILP32F-FPELIM-NEXT: andi a0, a0, -8 -; ILP32-ILP32F-FPELIM-NEXT: addi a1, sp, 35 -; ILP32-ILP32F-FPELIM-NEXT: sw a1, 12(sp) -; ILP32-ILP32F-FPELIM-NEXT: lw a1, 4(a0) -; ILP32-ILP32F-FPELIM-NEXT: lw a0, 0(a0) +; ILP32-ILP32F-FPELIM-NEXT: andi a1, a0, -8 +; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 35 +; ILP32-ILP32F-FPELIM-NEXT: sw a0, 12(sp) +; ILP32-ILP32F-FPELIM-NEXT: lw a0, 0(a1) +; ILP32-ILP32F-FPELIM-NEXT: lw a1, 4(a1) ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 48 ; ILP32-ILP32F-FPELIM-NEXT: ret ; @@ -846,11 +846,11 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 4 ; ILP32-ILP32F-WITHFP-NEXT: sw a0, -12(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 11 -; ILP32-ILP32F-WITHFP-NEXT: andi a0, a0, -8 -; ILP32-ILP32F-WITHFP-NEXT: addi a1, s0, 19 -; ILP32-ILP32F-WITHFP-NEXT: sw a1, -12(s0) -; ILP32-ILP32F-WITHFP-NEXT: lw a1, 4(a0) -; ILP32-ILP32F-WITHFP-NEXT: lw a0, 0(a0) +; ILP32-ILP32F-WITHFP-NEXT: andi a1, a0, -8 +; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 19 +; ILP32-ILP32F-WITHFP-NEXT: sw a0, -12(s0) +; ILP32-ILP32F-WITHFP-NEXT: lw a0, 0(a1) +; ILP32-ILP32F-WITHFP-NEXT: lw a1, 4(a1) ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; ILP32-ILP32F-WITHFP-NEXT: addi sp, sp, 48 @@ -869,11 +869,11 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 20 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 12(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 27 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: andi a0, a0, -8 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, sp, 35 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 12(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a1, 4(a0) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 0(a0) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: andi a1, a0, -8 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 35 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 12(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 0(a1) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a1, 4(a1) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; @@ -888,11 +888,11 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; ILP32E-FPELIM-NEXT: addi a0, sp, 8 ; ILP32E-FPELIM-NEXT: sw a0, 0(sp) ; ILP32E-FPELIM-NEXT: addi a0, sp, 15 -; ILP32E-FPELIM-NEXT: andi a0, a0, -8 -; ILP32E-FPELIM-NEXT: addi a1, sp, 23 -; ILP32E-FPELIM-NEXT: sw a1, 0(sp) -; ILP32E-FPELIM-NEXT: lw a1, 4(a0) -; ILP32E-FPELIM-NEXT: lw a0, 0(a0) +; ILP32E-FPELIM-NEXT: andi a1, a0, -8 +; ILP32E-FPELIM-NEXT: addi a0, sp, 23 +; ILP32E-FPELIM-NEXT: sw a0, 0(sp) +; ILP32E-FPELIM-NEXT: lw a0, 0(a1) +; ILP32E-FPELIM-NEXT: lw a1, 4(a1) ; ILP32E-FPELIM-NEXT: addi sp, sp, 28 ; ILP32E-FPELIM-NEXT: ret ; @@ -910,11 +910,11 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; ILP32E-WITHFP-NEXT: addi a0, s0, 4 ; ILP32E-WITHFP-NEXT: sw a0, -12(s0) ; ILP32E-WITHFP-NEXT: addi a0, s0, 11 -; ILP32E-WITHFP-NEXT: andi a0, a0, -8 -; ILP32E-WITHFP-NEXT: addi a1, s0, 19 -; ILP32E-WITHFP-NEXT: sw a1, -12(s0) -; ILP32E-WITHFP-NEXT: lw a1, 4(a0) -; ILP32E-WITHFP-NEXT: lw a0, 0(a0) +; ILP32E-WITHFP-NEXT: andi a1, a0, -8 +; ILP32E-WITHFP-NEXT: addi a0, s0, 19 +; ILP32E-WITHFP-NEXT: sw a0, -12(s0) +; ILP32E-WITHFP-NEXT: lw a0, 0(a1) +; ILP32E-WITHFP-NEXT: lw a1, 4(a1) ; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload ; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload ; ILP32E-WITHFP-NEXT: addi sp, sp, 36 diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll index 29fe0a7de6b3d4..3e14317a004745 100644 --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -8,8 +8,8 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a3, 1(a0) ; RV64I-NEXT: lbu a4, 0(a0) ; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: lb a0, 3(a0) +; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: lbu a1, 0(a1) @@ -38,17 +38,17 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, a5 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: lbu a3, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a5, 2(a1) +; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: lbu a3, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: slli a3, a3, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 ; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: srl a0, a0, a1 ; RV32I-NEXT: sb a0, 0(a2) @@ -72,8 +72,8 @@ define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a3, 1(a0) ; RV64I-NEXT: lbu a4, 0(a0) ; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: lb a0, 3(a0) +; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: lbu a1, 0(a1) @@ -102,17 +102,17 @@ define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, a5 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: lbu a3, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a5, 2(a1) +; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: lbu a3, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: slli a3, a3, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 ; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: sll a0, a0, a1 ; RV32I-NEXT: sb a0, 0(a2) @@ -136,8 +136,8 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a3, 1(a0) ; RV64I-NEXT: lbu a4, 0(a0) ; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: lb a0, 3(a0) +; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: lbu a1, 0(a1) @@ -166,17 +166,17 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, a5 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: lbu a3, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a5, 2(a1) +; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: lbu a3, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: slli a3, a3, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 ; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: sra a0, a0, a1 ; RV32I-NEXT: sb a0, 0(a2) @@ -207,39 +207,39 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a0, a0, a5 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: lbu a3, 5(a1) ; RV64I-NEXT: lbu a4, 4(a1) -; RV64I-NEXT: lbu a5, 6(a1) +; RV64I-NEXT: lbu a5, 5(a1) +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: lbu a3, 6(a1) ; RV64I-NEXT: lbu a6, 7(a1) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: slli a3, a3, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 1(a1) +; RV64I-NEXT: or a3, a6, a3 ; RV64I-NEXT: lbu a5, 0(a1) -; RV64I-NEXT: lbu a6, 2(a1) +; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: lbu a4, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: or a1, a1, a4 +; RV64I-NEXT: or a1, a1, a5 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: slli a3, a3, 35 ; RV64I-NEXT: or a1, a3, a1 @@ -272,17 +272,17 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 1(a1) ; RV32I-NEXT: lbu a5, 0(a1) -; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: lbu a6, 1(a1) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a6 ; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: or a1, a1, a5 ; RV32I-NEXT: slli a5, a1, 3 ; RV32I-NEXT: addi a4, a5, -32 ; RV32I-NEXT: srl a1, a3, a5 @@ -343,39 +343,39 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a0, a0, a5 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: lbu a3, 5(a1) ; RV64I-NEXT: lbu a4, 4(a1) -; RV64I-NEXT: lbu a5, 6(a1) +; RV64I-NEXT: lbu a5, 5(a1) +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: lbu a3, 6(a1) ; RV64I-NEXT: lbu a6, 7(a1) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: slli a3, a3, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 1(a1) +; RV64I-NEXT: or a3, a6, a3 ; RV64I-NEXT: lbu a5, 0(a1) -; RV64I-NEXT: lbu a6, 2(a1) +; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: lbu a4, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: or a1, a1, a4 +; RV64I-NEXT: or a1, a1, a5 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: slli a3, a3, 35 ; RV64I-NEXT: or a1, a3, a1 @@ -408,17 +408,17 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 1(a1) ; RV32I-NEXT: lbu a5, 0(a1) -; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: lbu a6, 1(a1) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a6 ; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: or a1, a1, a5 ; RV32I-NEXT: slli a5, a1, 3 ; RV32I-NEXT: addi a4, a5, -32 ; RV32I-NEXT: sll a1, a3, a5 @@ -479,39 +479,39 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a0, a0, a5 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: lbu a3, 5(a1) ; RV64I-NEXT: lbu a4, 4(a1) -; RV64I-NEXT: lbu a5, 6(a1) +; RV64I-NEXT: lbu a5, 5(a1) +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: lbu a3, 6(a1) ; RV64I-NEXT: lbu a6, 7(a1) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: slli a3, a3, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 1(a1) +; RV64I-NEXT: or a3, a6, a3 ; RV64I-NEXT: lbu a5, 0(a1) -; RV64I-NEXT: lbu a6, 2(a1) +; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: lbu a4, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: or a1, a1, a4 +; RV64I-NEXT: or a1, a1, a5 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: slli a3, a3, 35 ; RV64I-NEXT: or a1, a3, a1 @@ -544,18 +544,18 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a4, a6, 24 ; RV32I-NEXT: or a5, a4, a5 -; RV32I-NEXT: or a3, a5, a3 -; RV32I-NEXT: lbu a5, 1(a1) ; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 2(a1) +; RV32I-NEXT: lbu a7, 1(a1) +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: lbu a5, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: slli a5, a1, 3 +; RV32I-NEXT: or a5, a1, a6 +; RV32I-NEXT: slli a5, a5, 3 ; RV32I-NEXT: addi a6, a5, -32 ; RV32I-NEXT: sra a1, a3, a5 ; RV32I-NEXT: bltz a6, .LBB5_2 @@ -616,39 +616,39 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 13(a0) ; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 14(a0) +; RV64I-NEXT: lbu a6, 13(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 14(a0) ; RV64I-NEXT: lbu a7, 15(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a1) ; RV64I-NEXT: lbu a5, 4(a1) -; RV64I-NEXT: lbu a6, 6(a1) +; RV64I-NEXT: lbu a6, 5(a1) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a1) ; RV64I-NEXT: lbu a7, 7(a1) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 1(a1) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 0(a1) -; RV64I-NEXT: lbu a7, 2(a1) +; RV64I-NEXT: lbu a7, 1(a1) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: slli a4, a4, 35 ; RV64I-NEXT: or a5, a4, a1 @@ -668,17 +668,17 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli t1, t1, 24 ; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 5(a0) ; RV64I-NEXT: lbu t0, 4(a0) -; RV64I-NEXT: lbu t1, 6(a0) +; RV64I-NEXT: lbu t1, 5(a0) +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 6(a0) ; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: srl a0, a0, a5 @@ -733,46 +733,46 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu a0, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, t0 ; RV32I-NEXT: or a0, a0, a6 -; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: lbu a7, 0(a1) -; RV32I-NEXT: lbu t0, 2(a1) +; RV32I-NEXT: lbu a6, 0(a1) +; RV32I-NEXT: lbu t0, 1(a1) +; RV32I-NEXT: or a0, a0, a7 +; RV32I-NEXT: lbu a7, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a6, t0, a6 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: or a1, a1, a6 ; RV32I-NEXT: sw zero, 28(sp) ; RV32I-NEXT: sw zero, 24(sp) @@ -785,48 +785,48 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: andi a0, a1, 12 ; RV32I-NEXT: mv a3, sp ; RV32I-NEXT: add a0, a3, a0 -; RV32I-NEXT: lw a3, 4(a0) +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: slli a1, a1, 3 -; RV32I-NEXT: srl a4, a3, a1 ; RV32I-NEXT: lw a5, 8(a0) -; RV32I-NEXT: andi a6, a1, 24 -; RV32I-NEXT: xori a6, a6, 31 -; RV32I-NEXT: lw a7, 0(a0) -; RV32I-NEXT: slli t0, a5, 1 -; RV32I-NEXT: sll t0, t0, a6 -; RV32I-NEXT: or t0, a4, t0 -; RV32I-NEXT: srl a7, a7, a1 -; RV32I-NEXT: slli a3, a3, 1 ; RV32I-NEXT: lw a0, 12(a0) -; RV32I-NEXT: sll a3, a3, a6 -; RV32I-NEXT: or a3, a7, a3 +; RV32I-NEXT: srl a6, a4, a1 +; RV32I-NEXT: andi a7, a1, 24 +; RV32I-NEXT: xori a7, a7, 31 +; RV32I-NEXT: slli t0, a5, 1 +; RV32I-NEXT: sll t0, t0, a7 +; RV32I-NEXT: or t0, a6, t0 +; RV32I-NEXT: srl a3, a3, a1 +; RV32I-NEXT: slli a4, a4, 1 +; RV32I-NEXT: sll a4, a4, a7 +; RV32I-NEXT: or a4, a3, a4 ; RV32I-NEXT: srl a5, a5, a1 ; RV32I-NEXT: slli t1, a0, 1 -; RV32I-NEXT: sll a6, t1, a6 -; RV32I-NEXT: or a6, a5, a6 +; RV32I-NEXT: sll a7, t1, a7 +; RV32I-NEXT: or a7, a5, a7 ; RV32I-NEXT: srl a0, a0, a1 ; RV32I-NEXT: sb a5, 8(a2) ; RV32I-NEXT: sb a0, 12(a2) -; RV32I-NEXT: sb a7, 0(a2) -; RV32I-NEXT: sb a4, 4(a2) +; RV32I-NEXT: sb a3, 0(a2) +; RV32I-NEXT: sb a6, 4(a2) ; RV32I-NEXT: srli a1, a0, 16 ; RV32I-NEXT: sb a1, 14(a2) ; RV32I-NEXT: srli a1, a0, 24 ; RV32I-NEXT: sb a1, 15(a2) ; RV32I-NEXT: srli a0, a0, 8 ; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: srli a0, a6, 16 +; RV32I-NEXT: srli a0, a7, 16 ; RV32I-NEXT: sb a0, 10(a2) -; RV32I-NEXT: srli a0, a6, 24 +; RV32I-NEXT: srli a0, a7, 24 ; RV32I-NEXT: sb a0, 11(a2) -; RV32I-NEXT: srli a0, a6, 8 +; RV32I-NEXT: srli a0, a7, 8 ; RV32I-NEXT: sb a0, 9(a2) -; RV32I-NEXT: srli a0, a3, 16 +; RV32I-NEXT: srli a0, a4, 16 ; RV32I-NEXT: sb a0, 2(a2) -; RV32I-NEXT: srli a0, a3, 24 +; RV32I-NEXT: srli a0, a4, 24 ; RV32I-NEXT: sb a0, 3(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 1(a2) +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a4, 1(a2) ; RV32I-NEXT: srli a0, t0, 16 ; RV32I-NEXT: sb a0, 6(a2) ; RV32I-NEXT: srli a0, t0, 24 @@ -855,39 +855,39 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 13(a0) ; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 14(a0) +; RV64I-NEXT: lbu a6, 13(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 14(a0) ; RV64I-NEXT: lbu a7, 15(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a1) ; RV64I-NEXT: lbu a5, 4(a1) -; RV64I-NEXT: lbu a6, 6(a1) +; RV64I-NEXT: lbu a6, 5(a1) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a1) ; RV64I-NEXT: lbu a7, 7(a1) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 1(a1) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 0(a1) -; RV64I-NEXT: lbu a7, 2(a1) +; RV64I-NEXT: lbu a7, 1(a1) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a1, a1, 5 ; RV64I-NEXT: slli a4, a4, 37 ; RV64I-NEXT: or a5, a4, a1 @@ -907,17 +907,17 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli t1, t1, 24 ; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 5(a0) ; RV64I-NEXT: lbu t0, 4(a0) -; RV64I-NEXT: lbu t1, 6(a0) +; RV64I-NEXT: lbu t1, 5(a0) +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 6(a0) ; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: srl a0, a0, a5 @@ -972,37 +972,37 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu a0, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, t0 ; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a0, a0, a7 ; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: sw zero, 28(sp) ; RV32I-NEXT: sw zero, 24(sp) @@ -1070,39 +1070,39 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a1) ; RV64I-NEXT: lbu a5, 4(a1) -; RV64I-NEXT: lbu a6, 6(a1) +; RV64I-NEXT: lbu a6, 5(a1) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a1) ; RV64I-NEXT: lbu a7, 7(a1) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 1(a1) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 0(a1) -; RV64I-NEXT: lbu a7, 2(a1) +; RV64I-NEXT: lbu a7, 1(a1) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: slli a4, a4, 35 ; RV64I-NEXT: or a5, a4, a1 @@ -1122,17 +1122,17 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli t1, t1, 24 ; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 13(a0) ; RV64I-NEXT: lbu t0, 12(a0) -; RV64I-NEXT: lbu t1, 14(a0) +; RV64I-NEXT: lbu t1, 13(a0) +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 14(a0) ; RV64I-NEXT: lbu a0, 15(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: sll a0, a0, a5 @@ -1187,46 +1187,46 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu a0, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, t0 ; RV32I-NEXT: or a0, a0, a6 -; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: lbu a7, 0(a1) -; RV32I-NEXT: lbu t0, 2(a1) +; RV32I-NEXT: lbu a6, 0(a1) +; RV32I-NEXT: lbu t0, 1(a1) +; RV32I-NEXT: or a0, a0, a7 +; RV32I-NEXT: lbu a7, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a6, t0, a6 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: or a1, a1, a6 ; RV32I-NEXT: sw zero, 12(sp) ; RV32I-NEXT: sw zero, 8(sp) @@ -1239,53 +1239,53 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: andi a0, a1, 12 ; RV32I-NEXT: addi a3, sp, 16 ; RV32I-NEXT: sub a3, a3, a0 -; RV32I-NEXT: lw a0, 4(a3) +; RV32I-NEXT: lw a0, 0(a3) +; RV32I-NEXT: lw a4, 4(a3) ; RV32I-NEXT: slli a1, a1, 3 -; RV32I-NEXT: lw a4, 0(a3) -; RV32I-NEXT: sll a5, a0, a1 -; RV32I-NEXT: andi a6, a1, 24 -; RV32I-NEXT: xori a6, a6, 31 -; RV32I-NEXT: srli a7, a4, 1 -; RV32I-NEXT: lw t0, 12(a3) -; RV32I-NEXT: lw a3, 8(a3) -; RV32I-NEXT: srl a7, a7, a6 -; RV32I-NEXT: or a7, a5, a7 -; RV32I-NEXT: sll t0, t0, a1 -; RV32I-NEXT: srli t1, a3, 1 -; RV32I-NEXT: srl t1, t1, a6 -; RV32I-NEXT: or t1, t0, t1 +; RV32I-NEXT: lw a5, 8(a3) +; RV32I-NEXT: lw a3, 12(a3) +; RV32I-NEXT: sll a6, a4, a1 +; RV32I-NEXT: andi a7, a1, 24 +; RV32I-NEXT: xori a7, a7, 31 +; RV32I-NEXT: srli t0, a0, 1 +; RV32I-NEXT: srl t0, t0, a7 +; RV32I-NEXT: or t0, a6, t0 ; RV32I-NEXT: sll a3, a3, a1 -; RV32I-NEXT: srli a0, a0, 1 -; RV32I-NEXT: srl a0, a0, a6 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: sll a1, a4, a1 -; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: srli t1, a5, 1 +; RV32I-NEXT: srl t1, t1, a7 +; RV32I-NEXT: or t1, a3, t1 +; RV32I-NEXT: sll a5, a5, a1 +; RV32I-NEXT: srli a4, a4, 1 +; RV32I-NEXT: srl a4, a4, a7 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: sll a0, a0, a1 +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: srli a5, a5, 24 +; RV32I-NEXT: sb a5, 11(a2) ; RV32I-NEXT: srli a3, a3, 24 -; RV32I-NEXT: sb a3, 11(a2) -; RV32I-NEXT: srli a3, t0, 24 ; RV32I-NEXT: sb a3, 15(a2) -; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 2(a2) -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 3(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 1(a2) -; RV32I-NEXT: srli a5, a5, 24 -; RV32I-NEXT: sb a5, 7(a2) -; RV32I-NEXT: sb a0, 8(a2) -; RV32I-NEXT: sb t1, 12(a2) -; RV32I-NEXT: sb a7, 4(a2) ; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 10(a2) +; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: sb a1, 3(a2) ; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 9(a2) +; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: srli a0, a6, 24 +; RV32I-NEXT: sb a0, 7(a2) +; RV32I-NEXT: sb a4, 8(a2) +; RV32I-NEXT: sb t1, 12(a2) +; RV32I-NEXT: sb t0, 4(a2) +; RV32I-NEXT: srli a0, a4, 16 +; RV32I-NEXT: sb a0, 10(a2) +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a4, 9(a2) ; RV32I-NEXT: srli a0, t1, 16 ; RV32I-NEXT: sb a0, 14(a2) ; RV32I-NEXT: srli a0, t1, 8 ; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: srli a0, a7, 16 +; RV32I-NEXT: srli a0, t0, 16 ; RV32I-NEXT: sb a0, 6(a2) -; RV32I-NEXT: srli a0, a7, 8 +; RV32I-NEXT: srli a0, t0, 8 ; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret @@ -1309,39 +1309,39 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a1) ; RV64I-NEXT: lbu a5, 4(a1) -; RV64I-NEXT: lbu a6, 6(a1) +; RV64I-NEXT: lbu a6, 5(a1) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a1) ; RV64I-NEXT: lbu a7, 7(a1) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 1(a1) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 0(a1) -; RV64I-NEXT: lbu a7, 2(a1) +; RV64I-NEXT: lbu a7, 1(a1) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a1, a1, 5 ; RV64I-NEXT: slli a4, a4, 37 ; RV64I-NEXT: or a5, a4, a1 @@ -1361,17 +1361,17 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli t1, t1, 24 ; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 13(a0) ; RV64I-NEXT: lbu t0, 12(a0) -; RV64I-NEXT: lbu t1, 14(a0) +; RV64I-NEXT: lbu t1, 13(a0) +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 14(a0) ; RV64I-NEXT: lbu a0, 15(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: sll a0, a0, a5 @@ -1426,37 +1426,37 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu a0, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, t0 ; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a0, a0, a7 ; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: sw zero, 12(sp) ; RV32I-NEXT: sw zero, 8(sp) @@ -1525,39 +1525,39 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 13(a0) ; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 14(a0) +; RV64I-NEXT: lbu a6, 13(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 14(a0) ; RV64I-NEXT: lbu a7, 15(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a5, a4, 32 -; RV64I-NEXT: or a3, a5, a3 -; RV64I-NEXT: lbu a5, 5(a1) ; RV64I-NEXT: lbu a6, 4(a1) -; RV64I-NEXT: lbu a7, 6(a1) +; RV64I-NEXT: lbu a7, 5(a1) +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: lbu a5, 6(a1) ; RV64I-NEXT: lbu t0, 7(a1) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: lbu a7, 0(a1) -; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: lbu t0, 1(a1) +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: lbu a6, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: slli a5, a5, 35 ; RV64I-NEXT: or a5, a5, a1 @@ -1579,17 +1579,17 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli t0, t0, 24 ; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a4, a6, a4 -; RV64I-NEXT: lbu a6, 5(a0) ; RV64I-NEXT: lbu a7, 4(a0) -; RV64I-NEXT: lbu t0, 6(a0) +; RV64I-NEXT: lbu t0, 5(a0) +; RV64I-NEXT: or a4, a6, a4 +; RV64I-NEXT: lbu a6, 6(a0) ; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: or a0, a0, a7 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: srl a0, a0, a5 @@ -1642,47 +1642,47 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu a0, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a7, a0, t0 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 1(a1) +; RV32I-NEXT: or a6, a0, a6 ; RV32I-NEXT: lbu t0, 0(a1) -; RV32I-NEXT: lbu t1, 2(a1) +; RV32I-NEXT: lbu t1, 1(a1) +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: lbu a7, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t0 -; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, t1 ; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: or a1, a1, t0 ; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: sw a0, 28(sp) ; RV32I-NEXT: sw a0, 24(sp) @@ -1695,48 +1695,48 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: andi a0, a1, 12 ; RV32I-NEXT: mv a3, sp ; RV32I-NEXT: add a0, a3, a0 -; RV32I-NEXT: lw a3, 4(a0) +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) ; RV32I-NEXT: slli a1, a1, 3 -; RV32I-NEXT: srl a4, a3, a1 ; RV32I-NEXT: lw a5, 8(a0) -; RV32I-NEXT: andi a6, a1, 24 -; RV32I-NEXT: xori a6, a6, 31 -; RV32I-NEXT: lw a7, 0(a0) -; RV32I-NEXT: slli t0, a5, 1 -; RV32I-NEXT: sll t0, t0, a6 -; RV32I-NEXT: or t0, a4, t0 -; RV32I-NEXT: srl a7, a7, a1 -; RV32I-NEXT: slli a3, a3, 1 ; RV32I-NEXT: lw a0, 12(a0) -; RV32I-NEXT: sll a3, a3, a6 -; RV32I-NEXT: or a3, a7, a3 +; RV32I-NEXT: srl a6, a4, a1 +; RV32I-NEXT: andi a7, a1, 24 +; RV32I-NEXT: xori a7, a7, 31 +; RV32I-NEXT: slli t0, a5, 1 +; RV32I-NEXT: sll t0, t0, a7 +; RV32I-NEXT: or t0, a6, t0 +; RV32I-NEXT: srl a3, a3, a1 +; RV32I-NEXT: slli a4, a4, 1 +; RV32I-NEXT: sll a4, a4, a7 +; RV32I-NEXT: or a4, a3, a4 ; RV32I-NEXT: srl a5, a5, a1 ; RV32I-NEXT: slli t1, a0, 1 -; RV32I-NEXT: sll a6, t1, a6 -; RV32I-NEXT: or a6, a5, a6 +; RV32I-NEXT: sll a7, t1, a7 +; RV32I-NEXT: or a7, a5, a7 ; RV32I-NEXT: sra a0, a0, a1 ; RV32I-NEXT: sb a5, 8(a2) ; RV32I-NEXT: sb a0, 12(a2) -; RV32I-NEXT: sb a7, 0(a2) -; RV32I-NEXT: sb a4, 4(a2) +; RV32I-NEXT: sb a3, 0(a2) +; RV32I-NEXT: sb a6, 4(a2) ; RV32I-NEXT: srli a1, a0, 16 ; RV32I-NEXT: sb a1, 14(a2) ; RV32I-NEXT: srli a1, a0, 24 ; RV32I-NEXT: sb a1, 15(a2) ; RV32I-NEXT: srli a0, a0, 8 ; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: srli a0, a6, 16 +; RV32I-NEXT: srli a0, a7, 16 ; RV32I-NEXT: sb a0, 10(a2) -; RV32I-NEXT: srli a0, a6, 24 +; RV32I-NEXT: srli a0, a7, 24 ; RV32I-NEXT: sb a0, 11(a2) -; RV32I-NEXT: srli a0, a6, 8 +; RV32I-NEXT: srli a0, a7, 8 ; RV32I-NEXT: sb a0, 9(a2) -; RV32I-NEXT: srli a0, a3, 16 +; RV32I-NEXT: srli a0, a4, 16 ; RV32I-NEXT: sb a0, 2(a2) -; RV32I-NEXT: srli a0, a3, 24 +; RV32I-NEXT: srli a0, a4, 24 ; RV32I-NEXT: sb a0, 3(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 1(a2) +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a4, 1(a2) ; RV32I-NEXT: srli a0, t0, 16 ; RV32I-NEXT: sb a0, 6(a2) ; RV32I-NEXT: srli a0, t0, 24 @@ -1765,39 +1765,39 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 13(a0) ; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 14(a0) +; RV64I-NEXT: lbu a6, 13(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 14(a0) ; RV64I-NEXT: lbu a7, 15(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a5, a4, 32 -; RV64I-NEXT: or a3, a5, a3 -; RV64I-NEXT: lbu a5, 5(a1) ; RV64I-NEXT: lbu a6, 4(a1) -; RV64I-NEXT: lbu a7, 6(a1) +; RV64I-NEXT: lbu a7, 5(a1) +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: lbu a5, 6(a1) ; RV64I-NEXT: lbu t0, 7(a1) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: lbu a7, 0(a1) -; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: lbu t0, 1(a1) +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: lbu a6, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: slli a1, a1, 5 ; RV64I-NEXT: slli a5, a5, 37 ; RV64I-NEXT: or a5, a5, a1 @@ -1819,17 +1819,17 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli t0, t0, 24 ; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a4, a6, a4 -; RV64I-NEXT: lbu a6, 5(a0) ; RV64I-NEXT: lbu a7, 4(a0) -; RV64I-NEXT: lbu t0, 6(a0) +; RV64I-NEXT: lbu t0, 5(a0) +; RV64I-NEXT: or a4, a6, a4 +; RV64I-NEXT: lbu a6, 6(a0) ; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: or a0, a0, a7 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: srl a0, a0, a5 @@ -1882,37 +1882,37 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu a0, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a7, a0, t0 -; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: or a6, a0, a6 +; RV32I-NEXT: or a6, a6, a7 ; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: sw a0, 28(sp) @@ -1982,105 +1982,105 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 9(a0) ; RV64I-NEXT: lbu a5, 8(a0) -; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a6, 9(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 10(a0) ; RV64I-NEXT: lbu a7, 11(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 12(a0) -; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 14(a0) ; RV64I-NEXT: lbu t0, 15(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a5, a5, 32 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 17(a0) ; RV64I-NEXT: lbu a6, 16(a0) -; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu a7, 17(a0) +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 18(a0) ; RV64I-NEXT: lbu t0, 19(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: lbu a7, 20(a0) -; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t0, 21(a0) +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: lbu a6, 22(a0) ; RV64I-NEXT: lbu t1, 23(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a6, t1, a6 +; RV64I-NEXT: or a6, a6, a7 ; RV64I-NEXT: slli a6, a6, 32 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 25(a0) ; RV64I-NEXT: lbu a7, 24(a0) -; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t0, 25(a0) +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 26(a0) ; RV64I-NEXT: lbu t1, 27(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 28(a0) -; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu t1, 29(a0) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 30(a0) ; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a0, a0, a6 -; RV64I-NEXT: lbu a6, 1(a1) ; RV64I-NEXT: lbu a7, 0(a1) -; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: lbu t0, 1(a1) +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: lbu a6, 2(a1) ; RV64I-NEXT: lbu t1, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 5(a1) +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 4(a1) -; RV64I-NEXT: lbu t1, 6(a1) +; RV64I-NEXT: lbu t1, 5(a1) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t1 ; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: sd zero, 56(sp) @@ -2093,31 +2093,31 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sd a3, 0(sp) ; RV64I-NEXT: andi a0, a1, 24 ; RV64I-NEXT: mv a3, sp -; RV64I-NEXT: add a3, a3, a0 -; RV64I-NEXT: ld a4, 8(a3) +; RV64I-NEXT: add a0, a3, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: ld a4, 8(a0) ; RV64I-NEXT: slli a1, a1, 3 -; RV64I-NEXT: srl a5, a4, a1 -; RV64I-NEXT: ld a6, 16(a3) +; RV64I-NEXT: ld a5, 16(a0) +; RV64I-NEXT: ld a6, 24(a0) +; RV64I-NEXT: srl a7, a4, a1 ; RV64I-NEXT: andi a0, a1, 56 -; RV64I-NEXT: xori a7, a0, 63 -; RV64I-NEXT: ld t0, 0(a3) -; RV64I-NEXT: slli a0, a6, 1 -; RV64I-NEXT: sll a0, a0, a7 -; RV64I-NEXT: or a0, a5, a0 -; RV64I-NEXT: srl t0, t0, a1 +; RV64I-NEXT: xori t0, a0, 63 +; RV64I-NEXT: slli a0, a5, 1 +; RV64I-NEXT: sll a0, a0, t0 +; RV64I-NEXT: or a0, a7, a0 +; RV64I-NEXT: srl a3, a3, a1 ; RV64I-NEXT: slli a4, a4, 1 -; RV64I-NEXT: ld a3, 24(a3) -; RV64I-NEXT: sll a4, a4, a7 -; RV64I-NEXT: or a4, t0, a4 -; RV64I-NEXT: srl a6, a6, a1 -; RV64I-NEXT: slli t1, a3, 1 -; RV64I-NEXT: sll a7, t1, a7 -; RV64I-NEXT: or a7, a6, a7 -; RV64I-NEXT: srl a1, a3, a1 -; RV64I-NEXT: sb a6, 16(a2) +; RV64I-NEXT: sll a4, a4, t0 +; RV64I-NEXT: or a4, a3, a4 +; RV64I-NEXT: srl a5, a5, a1 +; RV64I-NEXT: slli t1, a6, 1 +; RV64I-NEXT: sll t0, t1, t0 +; RV64I-NEXT: or t0, a5, t0 +; RV64I-NEXT: srl a1, a6, a1 +; RV64I-NEXT: sb a5, 16(a2) ; RV64I-NEXT: sb a1, 24(a2) -; RV64I-NEXT: sb t0, 0(a2) -; RV64I-NEXT: sb a5, 8(a2) +; RV64I-NEXT: sb a3, 0(a2) +; RV64I-NEXT: sb a7, 8(a2) ; RV64I-NEXT: srli a3, a1, 56 ; RV64I-NEXT: sb a3, 31(a2) ; RV64I-NEXT: srli a3, a1, 48 @@ -2132,19 +2132,19 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sb a3, 26(a2) ; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: srli a1, a7, 56 +; RV64I-NEXT: srli a1, t0, 56 ; RV64I-NEXT: sb a1, 23(a2) -; RV64I-NEXT: srli a1, a7, 48 +; RV64I-NEXT: srli a1, t0, 48 ; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, a7, 40 +; RV64I-NEXT: srli a1, t0, 40 ; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, a7, 32 +; RV64I-NEXT: srli a1, t0, 32 ; RV64I-NEXT: sb a1, 20(a2) -; RV64I-NEXT: srli a1, a7, 24 +; RV64I-NEXT: srli a1, t0, 24 ; RV64I-NEXT: sb a1, 19(a2) -; RV64I-NEXT: srli a1, a7, 16 +; RV64I-NEXT: srli a1, t0, 16 ; RV64I-NEXT: sb a1, 18(a2) -; RV64I-NEXT: srli a1, a7, 8 +; RV64I-NEXT: srli a1, t0, 8 ; RV64I-NEXT: sb a1, 17(a2) ; RV64I-NEXT: srli a1, a4, 56 ; RV64I-NEXT: sb a1, 7(a2) @@ -2192,86 +2192,86 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu t1, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 -; RV32I-NEXT: slli t1, t1, 24 -; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli t1, t1, 24 +; RV32I-NEXT: or a6, t1, a6 ; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t1, 17(a0) +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: lbu a7, 18(a0) ; RV32I-NEXT: lbu t2, 19(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t0 -; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli t2, t2, 24 -; RV32I-NEXT: or t0, t2, t1 -; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: lbu t0, 21(a0) +; RV32I-NEXT: or a7, t2, a7 ; RV32I-NEXT: lbu t1, 20(a0) -; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t2, 21(a0) +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: lbu t0, 22(a0) ; RV32I-NEXT: lbu t3, 23(a0) -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t0, t0, t1 -; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli t3, t3, 24 -; RV32I-NEXT: or t1, t3, t2 -; RV32I-NEXT: or t0, t1, t0 -; RV32I-NEXT: lbu t1, 25(a0) +; RV32I-NEXT: or t0, t3, t0 ; RV32I-NEXT: lbu t2, 24(a0) -; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: or t0, t0, t1 +; RV32I-NEXT: lbu t1, 26(a0) ; RV32I-NEXT: lbu t4, 27(a0) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t1, t1, t2 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: or t1, t4, t1 ; RV32I-NEXT: lbu t3, 28(a0) -; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu t4, 29(a0) +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: lbu t2, 30(a0) ; RV32I-NEXT: lbu a0, 31(a0) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t2, t2, t3 -; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: slli t2, t2, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, t4 ; RV32I-NEXT: or a0, a0, t2 -; RV32I-NEXT: lbu t2, 1(a1) -; RV32I-NEXT: lbu t3, 0(a1) -; RV32I-NEXT: lbu t4, 2(a1) +; RV32I-NEXT: lbu t2, 0(a1) +; RV32I-NEXT: lbu t4, 1(a1) +; RV32I-NEXT: or a0, a0, t3 +; RV32I-NEXT: lbu t3, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t2, t2, t3 -; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t2, t4, t2 +; RV32I-NEXT: slli t3, t3, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, t4 +; RV32I-NEXT: or a1, a1, t3 ; RV32I-NEXT: or a1, a1, t2 ; RV32I-NEXT: sw zero, 60(sp) ; RV32I-NEXT: sw zero, 56(sp) @@ -2291,54 +2291,54 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw a3, 0(sp) ; RV32I-NEXT: andi a0, a1, 28 ; RV32I-NEXT: mv a3, sp -; RV32I-NEXT: add a5, a3, a0 -; RV32I-NEXT: lw a3, 4(a5) -; RV32I-NEXT: slli a6, a1, 3 -; RV32I-NEXT: srl a4, a3, a6 -; RV32I-NEXT: lw a7, 8(a5) -; RV32I-NEXT: andi a0, a6, 24 -; RV32I-NEXT: xori t0, a0, 31 -; RV32I-NEXT: lw a1, 0(a5) -; RV32I-NEXT: slli a0, a7, 1 -; RV32I-NEXT: sll a0, a0, t0 +; RV32I-NEXT: add a3, a3, a0 +; RV32I-NEXT: lw a6, 0(a3) +; RV32I-NEXT: lw a7, 4(a3) +; RV32I-NEXT: slli a5, a1, 3 +; RV32I-NEXT: lw t0, 8(a3) +; RV32I-NEXT: lw t1, 12(a3) +; RV32I-NEXT: srl a4, a7, a5 +; RV32I-NEXT: andi a0, a5, 24 +; RV32I-NEXT: xori t2, a0, 31 +; RV32I-NEXT: slli a0, t0, 1 +; RV32I-NEXT: sll a0, a0, t2 ; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: srl t1, a1, a6 -; RV32I-NEXT: slli a3, a3, 1 -; RV32I-NEXT: lw t2, 12(a5) -; RV32I-NEXT: lw t3, 16(a5) -; RV32I-NEXT: sll a1, a3, t0 -; RV32I-NEXT: or a1, t1, a1 -; RV32I-NEXT: srl t4, t2, a6 +; RV32I-NEXT: srl a6, a6, a5 +; RV32I-NEXT: slli a7, a7, 1 +; RV32I-NEXT: sll a1, a7, t2 +; RV32I-NEXT: or a1, a6, a1 +; RV32I-NEXT: srl a7, t1, a5 +; RV32I-NEXT: lw t3, 16(a3) +; RV32I-NEXT: lw t4, 20(a3) +; RV32I-NEXT: lw t5, 24(a3) +; RV32I-NEXT: lw t6, 28(a3) ; RV32I-NEXT: slli a3, t3, 1 -; RV32I-NEXT: sll a3, a3, t0 -; RV32I-NEXT: or a3, t4, a3 -; RV32I-NEXT: srl a7, a7, a6 -; RV32I-NEXT: slli t2, t2, 1 -; RV32I-NEXT: lw t5, 20(a5) -; RV32I-NEXT: lw t6, 24(a5) -; RV32I-NEXT: sll t2, t2, t0 -; RV32I-NEXT: or t2, a7, t2 -; RV32I-NEXT: srl s0, t5, a6 -; RV32I-NEXT: slli s1, t6, 1 -; RV32I-NEXT: sll s1, s1, t0 +; RV32I-NEXT: sll a3, a3, t2 +; RV32I-NEXT: or a3, a7, a3 +; RV32I-NEXT: srl t0, t0, a5 +; RV32I-NEXT: slli t1, t1, 1 +; RV32I-NEXT: sll t1, t1, t2 +; RV32I-NEXT: or t1, t0, t1 +; RV32I-NEXT: srl s0, t4, a5 +; RV32I-NEXT: slli s1, t5, 1 +; RV32I-NEXT: sll s1, s1, t2 ; RV32I-NEXT: or s1, s0, s1 -; RV32I-NEXT: srl t3, t3, a6 -; RV32I-NEXT: slli t5, t5, 1 -; RV32I-NEXT: lw a5, 28(a5) -; RV32I-NEXT: sll t5, t5, t0 -; RV32I-NEXT: or t5, t3, t5 -; RV32I-NEXT: srl t6, t6, a6 -; RV32I-NEXT: slli s2, a5, 1 -; RV32I-NEXT: sll t0, s2, t0 -; RV32I-NEXT: or t0, t6, t0 -; RV32I-NEXT: srl a5, a5, a6 -; RV32I-NEXT: sb t6, 24(a2) +; RV32I-NEXT: srl t3, t3, a5 +; RV32I-NEXT: slli t4, t4, 1 +; RV32I-NEXT: sll t4, t4, t2 +; RV32I-NEXT: or t4, t3, t4 +; RV32I-NEXT: srl t5, t5, a5 +; RV32I-NEXT: slli s2, t6, 1 +; RV32I-NEXT: sll t2, s2, t2 +; RV32I-NEXT: or t2, t5, t2 +; RV32I-NEXT: srl a5, t6, a5 +; RV32I-NEXT: sb t5, 24(a2) ; RV32I-NEXT: sb a5, 28(a2) ; RV32I-NEXT: sb t3, 16(a2) ; RV32I-NEXT: sb s0, 20(a2) -; RV32I-NEXT: sb a7, 8(a2) -; RV32I-NEXT: sb t4, 12(a2) -; RV32I-NEXT: sb t1, 0(a2) +; RV32I-NEXT: sb t0, 8(a2) +; RV32I-NEXT: sb a7, 12(a2) +; RV32I-NEXT: sb a6, 0(a2) ; RV32I-NEXT: sb a4, 4(a2) ; RV32I-NEXT: srli a4, a5, 24 ; RV32I-NEXT: sb a4, 31(a2) @@ -2346,17 +2346,17 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sb a4, 30(a2) ; RV32I-NEXT: srli a5, a5, 8 ; RV32I-NEXT: sb a5, 29(a2) -; RV32I-NEXT: srli a4, t0, 24 +; RV32I-NEXT: srli a4, t2, 24 ; RV32I-NEXT: sb a4, 27(a2) -; RV32I-NEXT: srli a4, t0, 16 +; RV32I-NEXT: srli a4, t2, 16 ; RV32I-NEXT: sb a4, 26(a2) -; RV32I-NEXT: srli a4, t0, 8 +; RV32I-NEXT: srli a4, t2, 8 ; RV32I-NEXT: sb a4, 25(a2) -; RV32I-NEXT: srli a4, t5, 24 +; RV32I-NEXT: srli a4, t4, 24 ; RV32I-NEXT: sb a4, 19(a2) -; RV32I-NEXT: srli a4, t5, 16 +; RV32I-NEXT: srli a4, t4, 16 ; RV32I-NEXT: sb a4, 18(a2) -; RV32I-NEXT: srli a4, t5, 8 +; RV32I-NEXT: srli a4, t4, 8 ; RV32I-NEXT: sb a4, 17(a2) ; RV32I-NEXT: srli a4, s1, 24 ; RV32I-NEXT: sb a4, 23(a2) @@ -2364,11 +2364,11 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sb a4, 22(a2) ; RV32I-NEXT: srli s1, s1, 8 ; RV32I-NEXT: sb s1, 21(a2) -; RV32I-NEXT: srli a4, t2, 24 +; RV32I-NEXT: srli a4, t1, 24 ; RV32I-NEXT: sb a4, 11(a2) -; RV32I-NEXT: srli a4, t2, 16 +; RV32I-NEXT: srli a4, t1, 16 ; RV32I-NEXT: sb a4, 10(a2) -; RV32I-NEXT: srli a4, t2, 8 +; RV32I-NEXT: srli a4, t1, 8 ; RV32I-NEXT: sb a4, 9(a2) ; RV32I-NEXT: srli a4, a3, 24 ; RV32I-NEXT: sb a4, 15(a2) @@ -2414,105 +2414,105 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 9(a0) ; RV64I-NEXT: lbu a5, 8(a0) -; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a6, 9(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 10(a0) ; RV64I-NEXT: lbu a7, 11(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 12(a0) -; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 14(a0) ; RV64I-NEXT: lbu t0, 15(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a5, a5, 32 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 17(a0) ; RV64I-NEXT: lbu a6, 16(a0) -; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu a7, 17(a0) +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 18(a0) ; RV64I-NEXT: lbu t0, 19(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: lbu a7, 20(a0) -; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t0, 21(a0) +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: lbu a6, 22(a0) ; RV64I-NEXT: lbu t1, 23(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a6, t1, a6 +; RV64I-NEXT: or a6, a6, a7 ; RV64I-NEXT: slli a6, a6, 32 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 25(a0) ; RV64I-NEXT: lbu a7, 24(a0) -; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t0, 25(a0) +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 26(a0) ; RV64I-NEXT: lbu t1, 27(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 28(a0) -; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu t1, 29(a0) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 30(a0) ; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a0, a0, a6 -; RV64I-NEXT: lbu a6, 1(a1) ; RV64I-NEXT: lbu a7, 0(a1) -; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: lbu t0, 1(a1) +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: lbu a6, 2(a1) ; RV64I-NEXT: lbu t1, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 5(a1) +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 4(a1) -; RV64I-NEXT: lbu t1, 6(a1) +; RV64I-NEXT: lbu t1, 5(a1) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t1 ; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: sd zero, 56(sp) @@ -2526,70 +2526,70 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: slli a0, a1, 2 ; RV64I-NEXT: andi a0, a0, 24 ; RV64I-NEXT: mv a3, sp -; RV64I-NEXT: add a3, a3, a0 -; RV64I-NEXT: ld a4, 8(a3) -; RV64I-NEXT: slli a5, a1, 5 -; RV64I-NEXT: srl a1, a4, a5 -; RV64I-NEXT: ld a6, 16(a3) -; RV64I-NEXT: andi a0, a5, 32 -; RV64I-NEXT: xori a7, a0, 63 -; RV64I-NEXT: ld t0, 0(a3) -; RV64I-NEXT: slli a0, a6, 1 -; RV64I-NEXT: sll a0, a0, a7 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srl t0, t0, a5 +; RV64I-NEXT: add a0, a3, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: ld a4, 8(a0) +; RV64I-NEXT: slli a1, a1, 5 +; RV64I-NEXT: ld a5, 16(a0) +; RV64I-NEXT: ld a6, 24(a0) +; RV64I-NEXT: srl a7, a4, a1 +; RV64I-NEXT: andi a0, a1, 32 +; RV64I-NEXT: xori t0, a0, 63 +; RV64I-NEXT: slli a0, a5, 1 +; RV64I-NEXT: sll a0, a0, t0 +; RV64I-NEXT: or a0, a7, a0 +; RV64I-NEXT: srl a3, a3, a1 ; RV64I-NEXT: slli a4, a4, 1 -; RV64I-NEXT: ld a3, 24(a3) -; RV64I-NEXT: sll a4, a4, a7 -; RV64I-NEXT: or a4, t0, a4 -; RV64I-NEXT: srl a6, a6, a5 -; RV64I-NEXT: slli t1, a3, 1 -; RV64I-NEXT: sll a7, t1, a7 -; RV64I-NEXT: or a7, a6, a7 -; RV64I-NEXT: srl a3, a3, a5 -; RV64I-NEXT: sb a6, 16(a2) -; RV64I-NEXT: sb a3, 24(a2) -; RV64I-NEXT: sb t0, 0(a2) -; RV64I-NEXT: sb a1, 8(a2) -; RV64I-NEXT: srli a5, a6, 24 -; RV64I-NEXT: sb a5, 19(a2) -; RV64I-NEXT: srli a5, a6, 16 -; RV64I-NEXT: sb a5, 18(a2) -; RV64I-NEXT: srli a5, a6, 8 +; RV64I-NEXT: sll a4, a4, t0 +; RV64I-NEXT: or a4, a3, a4 +; RV64I-NEXT: srl a5, a5, a1 +; RV64I-NEXT: slli t1, a6, 1 +; RV64I-NEXT: sll t0, t1, t0 +; RV64I-NEXT: or t0, a5, t0 +; RV64I-NEXT: srl a1, a6, a1 +; RV64I-NEXT: sb a5, 16(a2) +; RV64I-NEXT: sb a1, 24(a2) +; RV64I-NEXT: sb a3, 0(a2) +; RV64I-NEXT: sb a7, 8(a2) +; RV64I-NEXT: srli a6, a5, 24 +; RV64I-NEXT: sb a6, 19(a2) +; RV64I-NEXT: srli a6, a5, 16 +; RV64I-NEXT: sb a6, 18(a2) +; RV64I-NEXT: srli a5, a5, 8 ; RV64I-NEXT: sb a5, 17(a2) -; RV64I-NEXT: srli a5, a3, 56 +; RV64I-NEXT: srli a5, a1, 56 ; RV64I-NEXT: sb a5, 31(a2) -; RV64I-NEXT: srli a5, a3, 48 +; RV64I-NEXT: srli a5, a1, 48 ; RV64I-NEXT: sb a5, 30(a2) -; RV64I-NEXT: srli a5, a3, 40 +; RV64I-NEXT: srli a5, a1, 40 ; RV64I-NEXT: sb a5, 29(a2) -; RV64I-NEXT: srli a5, a3, 32 +; RV64I-NEXT: srli a5, a1, 32 ; RV64I-NEXT: sb a5, 28(a2) -; RV64I-NEXT: srli a5, a3, 24 +; RV64I-NEXT: srli a5, a1, 24 ; RV64I-NEXT: sb a5, 27(a2) -; RV64I-NEXT: srli a5, a3, 16 +; RV64I-NEXT: srli a5, a1, 16 ; RV64I-NEXT: sb a5, 26(a2) +; RV64I-NEXT: srli a1, a1, 8 +; RV64I-NEXT: sb a1, 25(a2) +; RV64I-NEXT: srli a1, a3, 24 +; RV64I-NEXT: sb a1, 3(a2) +; RV64I-NEXT: srli a1, a3, 16 +; RV64I-NEXT: sb a1, 2(a2) ; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a3, 25(a2) -; RV64I-NEXT: srli a3, t0, 24 -; RV64I-NEXT: sb a3, 3(a2) -; RV64I-NEXT: srli a3, t0, 16 -; RV64I-NEXT: sb a3, 2(a2) -; RV64I-NEXT: srli a3, t0, 8 ; RV64I-NEXT: sb a3, 1(a2) -; RV64I-NEXT: srli a3, a1, 24 -; RV64I-NEXT: sb a3, 11(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 10(a2) -; RV64I-NEXT: srli a1, a1, 8 +; RV64I-NEXT: srli a1, a7, 24 +; RV64I-NEXT: sb a1, 11(a2) +; RV64I-NEXT: srli a1, a7, 16 +; RV64I-NEXT: sb a1, 10(a2) +; RV64I-NEXT: srli a1, a7, 8 ; RV64I-NEXT: sb a1, 9(a2) -; RV64I-NEXT: srli a1, a7, 56 +; RV64I-NEXT: srli a1, t0, 56 ; RV64I-NEXT: sb a1, 23(a2) -; RV64I-NEXT: srli a1, a7, 48 +; RV64I-NEXT: srli a1, t0, 48 ; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, a7, 40 +; RV64I-NEXT: srli a1, t0, 40 ; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, a7, 32 +; RV64I-NEXT: srli a1, t0, 32 ; RV64I-NEXT: sb a1, 20(a2) ; RV64I-NEXT: srli a1, a4, 56 ; RV64I-NEXT: sb a1, 7(a2) @@ -2622,77 +2622,77 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu t1, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t1, t1, 24 -; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: or a6, t1, a6 ; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t1, 17(a0) +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: lbu a7, 18(a0) ; RV32I-NEXT: lbu t2, 19(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t0 -; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli t2, t2, 24 -; RV32I-NEXT: or t0, t2, t1 -; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: lbu t0, 21(a0) +; RV32I-NEXT: or a7, t2, a7 ; RV32I-NEXT: lbu t1, 20(a0) -; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t2, 21(a0) +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: lbu t0, 22(a0) ; RV32I-NEXT: lbu t3, 23(a0) -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t0, t0, t1 -; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli t3, t3, 24 -; RV32I-NEXT: or t1, t3, t2 -; RV32I-NEXT: or t0, t1, t0 -; RV32I-NEXT: lbu t1, 25(a0) +; RV32I-NEXT: or t0, t3, t0 ; RV32I-NEXT: lbu t2, 24(a0) -; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: or t0, t0, t1 +; RV32I-NEXT: lbu t1, 26(a0) ; RV32I-NEXT: lbu t4, 27(a0) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t1, t1, t2 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: or t1, t4, t1 ; RV32I-NEXT: lbu t3, 28(a0) -; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu t4, 29(a0) +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: lbu t2, 30(a0) ; RV32I-NEXT: lbu a0, 31(a0) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t2, t2, t3 -; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: slli t2, t2, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, t4 ; RV32I-NEXT: or a0, a0, t2 +; RV32I-NEXT: or a0, a0, t3 ; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: sw zero, 60(sp) ; RV32I-NEXT: sw zero, 56(sp) @@ -2713,64 +2713,64 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: andi a1, a1, 28 ; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: add a3, a0, a1 -; RV32I-NEXT: lw a0, 4(a3) -; RV32I-NEXT: lw a1, 0(a3) -; RV32I-NEXT: lw a4, 12(a3) -; RV32I-NEXT: lw a5, 8(a3) -; RV32I-NEXT: lw a6, 24(a3) -; RV32I-NEXT: lw a7, 28(a3) -; RV32I-NEXT: lw t0, 16(a3) -; RV32I-NEXT: lw a3, 20(a3) -; RV32I-NEXT: sb a6, 24(a2) -; RV32I-NEXT: sb a7, 28(a2) -; RV32I-NEXT: sb t0, 16(a2) -; RV32I-NEXT: sb a3, 20(a2) -; RV32I-NEXT: sb a5, 8(a2) -; RV32I-NEXT: sb a4, 12(a2) -; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: lw a3, 16(a1) +; RV32I-NEXT: lw a4, 20(a1) +; RV32I-NEXT: lw a5, 24(a1) +; RV32I-NEXT: lw a6, 28(a1) +; RV32I-NEXT: lw a7, 0(a1) +; RV32I-NEXT: lw a0, 4(a1) +; RV32I-NEXT: lw t0, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: sb a5, 24(a2) +; RV32I-NEXT: sb a6, 28(a2) +; RV32I-NEXT: sb a3, 16(a2) +; RV32I-NEXT: sb a4, 20(a2) +; RV32I-NEXT: sb t0, 8(a2) +; RV32I-NEXT: sb a1, 12(a2) +; RV32I-NEXT: sb a7, 0(a2) ; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli t1, a6, 24 +; RV32I-NEXT: srli t1, a5, 24 ; RV32I-NEXT: sb t1, 27(a2) -; RV32I-NEXT: srli t1, a6, 16 +; RV32I-NEXT: srli t1, a5, 16 ; RV32I-NEXT: sb t1, 26(a2) -; RV32I-NEXT: srli a6, a6, 8 -; RV32I-NEXT: sb a6, 25(a2) -; RV32I-NEXT: srli a6, a7, 24 -; RV32I-NEXT: sb a6, 31(a2) -; RV32I-NEXT: srli a6, a7, 16 -; RV32I-NEXT: sb a6, 30(a2) -; RV32I-NEXT: srli a6, a7, 8 -; RV32I-NEXT: sb a6, 29(a2) -; RV32I-NEXT: srli a6, t0, 24 -; RV32I-NEXT: sb a6, 19(a2) -; RV32I-NEXT: srli a6, t0, 16 -; RV32I-NEXT: sb a6, 18(a2) -; RV32I-NEXT: srli a6, t0, 8 -; RV32I-NEXT: sb a6, 17(a2) -; RV32I-NEXT: srli a6, a3, 24 -; RV32I-NEXT: sb a6, 23(a2) -; RV32I-NEXT: srli a6, a3, 16 -; RV32I-NEXT: sb a6, 22(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 21(a2) -; RV32I-NEXT: srli a3, a5, 24 -; RV32I-NEXT: sb a3, 11(a2) -; RV32I-NEXT: srli a3, a5, 16 -; RV32I-NEXT: sb a3, 10(a2) ; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb a5, 25(a2) +; RV32I-NEXT: srli a5, a6, 24 +; RV32I-NEXT: sb a5, 31(a2) +; RV32I-NEXT: srli a5, a6, 16 +; RV32I-NEXT: sb a5, 30(a2) +; RV32I-NEXT: srli a5, a6, 8 +; RV32I-NEXT: sb a5, 29(a2) +; RV32I-NEXT: srli a5, a3, 24 +; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a5, a3, 16 +; RV32I-NEXT: sb a5, 18(a2) +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a3, 17(a2) ; RV32I-NEXT: srli a3, a4, 24 -; RV32I-NEXT: sb a3, 15(a2) +; RV32I-NEXT: sb a3, 23(a2) ; RV32I-NEXT: srli a3, a4, 16 -; RV32I-NEXT: sb a3, 14(a2) +; RV32I-NEXT: sb a3, 22(a2) ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 13(a2) +; RV32I-NEXT: sb a4, 21(a2) +; RV32I-NEXT: srli a3, t0, 24 +; RV32I-NEXT: sb a3, 11(a2) +; RV32I-NEXT: srli a3, t0, 16 +; RV32I-NEXT: sb a3, 10(a2) +; RV32I-NEXT: srli a3, t0, 8 +; RV32I-NEXT: sb a3, 9(a2) ; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 3(a2) +; RV32I-NEXT: sb a3, 15(a2) ; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 2(a2) +; RV32I-NEXT: sb a3, 14(a2) ; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a1, 13(a2) +; RV32I-NEXT: srli a1, a7, 24 +; RV32I-NEXT: sb a1, 3(a2) +; RV32I-NEXT: srli a1, a7, 16 +; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: srli a1, a7, 8 ; RV32I-NEXT: sb a1, 1(a2) ; RV32I-NEXT: srli a1, a0, 24 ; RV32I-NEXT: sb a1, 7(a2) @@ -2801,83 +2801,83 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 9(a0) ; RV64I-NEXT: lbu a5, 8(a0) -; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a6, 9(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 10(a0) ; RV64I-NEXT: lbu a7, 11(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 12(a0) -; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 14(a0) ; RV64I-NEXT: lbu t0, 15(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a5, a5, 32 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 17(a0) ; RV64I-NEXT: lbu a6, 16(a0) -; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu a7, 17(a0) +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 18(a0) ; RV64I-NEXT: lbu t0, 19(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: lbu a7, 20(a0) -; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t0, 21(a0) +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: lbu a6, 22(a0) ; RV64I-NEXT: lbu t1, 23(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a6, t1, a6 +; RV64I-NEXT: or a6, a6, a7 ; RV64I-NEXT: slli a6, a6, 32 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 25(a0) ; RV64I-NEXT: lbu a7, 24(a0) -; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t0, 25(a0) +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 26(a0) ; RV64I-NEXT: lbu t1, 27(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 28(a0) -; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu t1, 29(a0) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 30(a0) ; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: lbu a1, 0(a1) @@ -2972,77 +2972,77 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu t1, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t1, t1, 24 -; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: or a6, t1, a6 ; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t1, 17(a0) +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: lbu a7, 18(a0) ; RV32I-NEXT: lbu t2, 19(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t0 -; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli t2, t2, 24 -; RV32I-NEXT: or t0, t2, t1 -; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: lbu t0, 21(a0) +; RV32I-NEXT: or a7, t2, a7 ; RV32I-NEXT: lbu t1, 20(a0) -; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t2, 21(a0) +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: lbu t0, 22(a0) ; RV32I-NEXT: lbu t3, 23(a0) -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t0, t0, t1 -; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli t3, t3, 24 -; RV32I-NEXT: or t1, t3, t2 -; RV32I-NEXT: or t0, t1, t0 -; RV32I-NEXT: lbu t1, 25(a0) +; RV32I-NEXT: or t0, t3, t0 ; RV32I-NEXT: lbu t2, 24(a0) -; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: or t0, t0, t1 +; RV32I-NEXT: lbu t1, 26(a0) ; RV32I-NEXT: lbu t4, 27(a0) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t1, t1, t2 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: or t1, t4, t1 ; RV32I-NEXT: lbu t3, 28(a0) -; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu t4, 29(a0) +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: lbu t2, 30(a0) ; RV32I-NEXT: lbu a0, 31(a0) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t2, t2, t3 -; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: slli t2, t2, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, t4 ; RV32I-NEXT: or a0, a0, t2 +; RV32I-NEXT: or a0, a0, t3 ; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: sw zero, 60(sp) ; RV32I-NEXT: sw zero, 56(sp) @@ -3063,64 +3063,64 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: andi a1, a1, 24 ; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: add a3, a0, a1 -; RV32I-NEXT: lw a0, 4(a3) -; RV32I-NEXT: lw a1, 0(a3) -; RV32I-NEXT: lw a4, 12(a3) -; RV32I-NEXT: lw a5, 8(a3) -; RV32I-NEXT: lw a6, 24(a3) -; RV32I-NEXT: lw a7, 28(a3) -; RV32I-NEXT: lw t0, 16(a3) -; RV32I-NEXT: lw a3, 20(a3) -; RV32I-NEXT: sb a6, 24(a2) -; RV32I-NEXT: sb a7, 28(a2) -; RV32I-NEXT: sb t0, 16(a2) -; RV32I-NEXT: sb a3, 20(a2) -; RV32I-NEXT: sb a5, 8(a2) -; RV32I-NEXT: sb a4, 12(a2) -; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: lw a3, 16(a1) +; RV32I-NEXT: lw a4, 20(a1) +; RV32I-NEXT: lw a5, 24(a1) +; RV32I-NEXT: lw a6, 28(a1) +; RV32I-NEXT: lw a7, 0(a1) +; RV32I-NEXT: lw a0, 4(a1) +; RV32I-NEXT: lw t0, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: sb a5, 24(a2) +; RV32I-NEXT: sb a6, 28(a2) +; RV32I-NEXT: sb a3, 16(a2) +; RV32I-NEXT: sb a4, 20(a2) +; RV32I-NEXT: sb t0, 8(a2) +; RV32I-NEXT: sb a1, 12(a2) +; RV32I-NEXT: sb a7, 0(a2) ; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli t1, a6, 24 +; RV32I-NEXT: srli t1, a5, 24 ; RV32I-NEXT: sb t1, 27(a2) -; RV32I-NEXT: srli t1, a6, 16 +; RV32I-NEXT: srli t1, a5, 16 ; RV32I-NEXT: sb t1, 26(a2) -; RV32I-NEXT: srli a6, a6, 8 -; RV32I-NEXT: sb a6, 25(a2) -; RV32I-NEXT: srli a6, a7, 24 -; RV32I-NEXT: sb a6, 31(a2) -; RV32I-NEXT: srli a6, a7, 16 -; RV32I-NEXT: sb a6, 30(a2) -; RV32I-NEXT: srli a6, a7, 8 -; RV32I-NEXT: sb a6, 29(a2) -; RV32I-NEXT: srli a6, t0, 24 -; RV32I-NEXT: sb a6, 19(a2) -; RV32I-NEXT: srli a6, t0, 16 -; RV32I-NEXT: sb a6, 18(a2) -; RV32I-NEXT: srli a6, t0, 8 -; RV32I-NEXT: sb a6, 17(a2) -; RV32I-NEXT: srli a6, a3, 24 -; RV32I-NEXT: sb a6, 23(a2) -; RV32I-NEXT: srli a6, a3, 16 -; RV32I-NEXT: sb a6, 22(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 21(a2) -; RV32I-NEXT: srli a3, a5, 24 -; RV32I-NEXT: sb a3, 11(a2) -; RV32I-NEXT: srli a3, a5, 16 -; RV32I-NEXT: sb a3, 10(a2) ; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb a5, 25(a2) +; RV32I-NEXT: srli a5, a6, 24 +; RV32I-NEXT: sb a5, 31(a2) +; RV32I-NEXT: srli a5, a6, 16 +; RV32I-NEXT: sb a5, 30(a2) +; RV32I-NEXT: srli a5, a6, 8 +; RV32I-NEXT: sb a5, 29(a2) +; RV32I-NEXT: srli a5, a3, 24 +; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a5, a3, 16 +; RV32I-NEXT: sb a5, 18(a2) +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a3, 17(a2) ; RV32I-NEXT: srli a3, a4, 24 -; RV32I-NEXT: sb a3, 15(a2) +; RV32I-NEXT: sb a3, 23(a2) ; RV32I-NEXT: srli a3, a4, 16 -; RV32I-NEXT: sb a3, 14(a2) +; RV32I-NEXT: sb a3, 22(a2) ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 13(a2) +; RV32I-NEXT: sb a4, 21(a2) +; RV32I-NEXT: srli a3, t0, 24 +; RV32I-NEXT: sb a3, 11(a2) +; RV32I-NEXT: srli a3, t0, 16 +; RV32I-NEXT: sb a3, 10(a2) +; RV32I-NEXT: srli a3, t0, 8 +; RV32I-NEXT: sb a3, 9(a2) ; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 3(a2) +; RV32I-NEXT: sb a3, 15(a2) ; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 2(a2) +; RV32I-NEXT: sb a3, 14(a2) ; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a1, 13(a2) +; RV32I-NEXT: srli a1, a7, 24 +; RV32I-NEXT: sb a1, 3(a2) +; RV32I-NEXT: srli a1, a7, 16 +; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: srli a1, a7, 8 ; RV32I-NEXT: sb a1, 1(a2) ; RV32I-NEXT: srli a1, a0, 24 ; RV32I-NEXT: sb a1, 7(a2) @@ -3151,105 +3151,105 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 9(a0) ; RV64I-NEXT: lbu a5, 8(a0) -; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a6, 9(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 10(a0) ; RV64I-NEXT: lbu a7, 11(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 12(a0) -; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 14(a0) ; RV64I-NEXT: lbu t0, 15(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a5, a5, 32 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 17(a0) ; RV64I-NEXT: lbu a6, 16(a0) -; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu a7, 17(a0) +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 18(a0) ; RV64I-NEXT: lbu t0, 19(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: lbu a7, 20(a0) -; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t0, 21(a0) +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: lbu a6, 22(a0) ; RV64I-NEXT: lbu t1, 23(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a6, t1, a6 +; RV64I-NEXT: or a6, a6, a7 ; RV64I-NEXT: slli a6, a6, 32 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 25(a0) ; RV64I-NEXT: lbu a7, 24(a0) -; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t0, 25(a0) +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 26(a0) ; RV64I-NEXT: lbu t1, 27(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 28(a0) -; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu t1, 29(a0) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 30(a0) ; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a0, a0, a6 -; RV64I-NEXT: lbu a6, 1(a1) ; RV64I-NEXT: lbu a7, 0(a1) -; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: lbu t0, 1(a1) +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: lbu a6, 2(a1) ; RV64I-NEXT: lbu t1, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 5(a1) +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 4(a1) -; RV64I-NEXT: lbu t1, 6(a1) +; RV64I-NEXT: lbu t1, 5(a1) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t1 ; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: sd zero, 24(sp) @@ -3263,30 +3263,30 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: andi a0, a1, 24 ; RV64I-NEXT: addi a3, sp, 32 ; RV64I-NEXT: sub a3, a3, a0 -; RV64I-NEXT: ld a4, 8(a3) +; RV64I-NEXT: ld a4, 0(a3) +; RV64I-NEXT: ld a5, 8(a3) ; RV64I-NEXT: slli a1, a1, 3 -; RV64I-NEXT: ld a5, 0(a3) -; RV64I-NEXT: sll a6, a4, a1 +; RV64I-NEXT: ld a6, 16(a3) +; RV64I-NEXT: ld a3, 24(a3) +; RV64I-NEXT: sll a7, a5, a1 ; RV64I-NEXT: andi a0, a1, 56 -; RV64I-NEXT: xori a7, a0, 63 -; RV64I-NEXT: srli a0, a5, 1 -; RV64I-NEXT: ld t0, 24(a3) -; RV64I-NEXT: ld a3, 16(a3) -; RV64I-NEXT: srl a0, a0, a7 -; RV64I-NEXT: or a0, a6, a0 -; RV64I-NEXT: sll t0, t0, a1 -; RV64I-NEXT: srli t1, a3, 1 -; RV64I-NEXT: srl t1, t1, a7 -; RV64I-NEXT: or t1, t0, t1 +; RV64I-NEXT: xori t0, a0, 63 +; RV64I-NEXT: srli a0, a4, 1 +; RV64I-NEXT: srl a0, a0, t0 +; RV64I-NEXT: or a0, a7, a0 ; RV64I-NEXT: sll a3, a3, a1 -; RV64I-NEXT: srli a4, a4, 1 -; RV64I-NEXT: srl a4, a4, a7 -; RV64I-NEXT: or a4, a3, a4 -; RV64I-NEXT: sll a1, a5, a1 +; RV64I-NEXT: srli t1, a6, 1 +; RV64I-NEXT: srl t1, t1, t0 +; RV64I-NEXT: or t1, a3, t1 +; RV64I-NEXT: sll a6, a6, a1 +; RV64I-NEXT: srli a5, a5, 1 +; RV64I-NEXT: srl a5, a5, t0 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: sll a1, a4, a1 ; RV64I-NEXT: sb a1, 0(a2) +; RV64I-NEXT: srli a4, a6, 56 +; RV64I-NEXT: sb a4, 23(a2) ; RV64I-NEXT: srli a3, a3, 56 -; RV64I-NEXT: sb a3, 23(a2) -; RV64I-NEXT: srli a3, t0, 56 ; RV64I-NEXT: sb a3, 31(a2) ; RV64I-NEXT: srli a3, a1, 56 ; RV64I-NEXT: sb a3, 7(a2) @@ -3302,23 +3302,23 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sb a3, 2(a2) ; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: sb a1, 1(a2) -; RV64I-NEXT: srli a1, a6, 56 +; RV64I-NEXT: srli a1, a7, 56 ; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: sb a4, 16(a2) +; RV64I-NEXT: sb a5, 16(a2) ; RV64I-NEXT: sb t1, 24(a2) ; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: srli a1, a4, 48 +; RV64I-NEXT: srli a1, a5, 48 ; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, a4, 40 +; RV64I-NEXT: srli a1, a5, 40 ; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, a4, 32 +; RV64I-NEXT: srli a1, a5, 32 ; RV64I-NEXT: sb a1, 20(a2) -; RV64I-NEXT: srli a1, a4, 24 +; RV64I-NEXT: srli a1, a5, 24 ; RV64I-NEXT: sb a1, 19(a2) -; RV64I-NEXT: srli a1, a4, 16 +; RV64I-NEXT: srli a1, a5, 16 ; RV64I-NEXT: sb a1, 18(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a4, 17(a2) +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: sb a5, 17(a2) ; RV64I-NEXT: srli a1, t1, 48 ; RV64I-NEXT: sb a1, 30(a2) ; RV64I-NEXT: srli a1, t1, 40 @@ -3361,86 +3361,86 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu t1, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t1, t1, 24 -; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: or a6, t1, a6 ; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t1, 17(a0) +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: lbu a7, 18(a0) ; RV32I-NEXT: lbu t2, 19(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t0 -; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli t2, t2, 24 -; RV32I-NEXT: or t0, t2, t1 -; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: lbu t0, 21(a0) +; RV32I-NEXT: or a7, t2, a7 ; RV32I-NEXT: lbu t1, 20(a0) -; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t2, 21(a0) +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: lbu t0, 22(a0) ; RV32I-NEXT: lbu t3, 23(a0) -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t0, t0, t1 -; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli t3, t3, 24 -; RV32I-NEXT: or t1, t3, t2 -; RV32I-NEXT: or t0, t1, t0 -; RV32I-NEXT: lbu t1, 25(a0) +; RV32I-NEXT: or t0, t3, t0 ; RV32I-NEXT: lbu t2, 24(a0) -; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: or t0, t0, t1 +; RV32I-NEXT: lbu t1, 26(a0) ; RV32I-NEXT: lbu t4, 27(a0) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t1, t1, t2 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: or t1, t4, t1 ; RV32I-NEXT: lbu t3, 28(a0) -; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu t4, 29(a0) +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: lbu t2, 30(a0) ; RV32I-NEXT: lbu a0, 31(a0) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t2, t2, t3 -; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: slli t2, t2, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, t4 ; RV32I-NEXT: or a0, a0, t2 -; RV32I-NEXT: lbu t2, 1(a1) -; RV32I-NEXT: lbu t3, 0(a1) -; RV32I-NEXT: lbu t4, 2(a1) +; RV32I-NEXT: lbu t2, 0(a1) +; RV32I-NEXT: lbu t4, 1(a1) +; RV32I-NEXT: or a0, a0, t3 +; RV32I-NEXT: lbu t3, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t2, t2, t3 -; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t2, t4, t2 +; RV32I-NEXT: slli t3, t3, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, t4 +; RV32I-NEXT: or a1, a1, t3 ; RV32I-NEXT: or a1, a1, t2 ; RV32I-NEXT: sw zero, 28(sp) ; RV32I-NEXT: sw zero, 24(sp) @@ -3460,91 +3460,91 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw a3, 32(sp) ; RV32I-NEXT: andi a0, a1, 28 ; RV32I-NEXT: addi a3, sp, 32 -; RV32I-NEXT: sub a6, a3, a0 -; RV32I-NEXT: lw a3, 4(a6) +; RV32I-NEXT: sub a5, a3, a0 +; RV32I-NEXT: lw a6, 0(a5) +; RV32I-NEXT: lw a3, 4(a5) ; RV32I-NEXT: slli a7, a1, 3 -; RV32I-NEXT: lw t0, 0(a6) +; RV32I-NEXT: lw t0, 8(a5) +; RV32I-NEXT: lw t1, 12(a5) ; RV32I-NEXT: sll a4, a3, a7 ; RV32I-NEXT: andi a0, a7, 24 -; RV32I-NEXT: xori t1, a0, 31 -; RV32I-NEXT: srli a0, t0, 1 -; RV32I-NEXT: lw t2, 12(a6) -; RV32I-NEXT: lw a5, 8(a6) -; RV32I-NEXT: srl a0, a0, t1 +; RV32I-NEXT: xori t2, a0, 31 +; RV32I-NEXT: srli a0, a6, 1 +; RV32I-NEXT: srl a0, a0, t2 ; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: sll t3, t2, a7 -; RV32I-NEXT: srli a1, a5, 1 -; RV32I-NEXT: srl a1, a1, t1 +; RV32I-NEXT: sll t3, t1, a7 +; RV32I-NEXT: srli a1, t0, 1 +; RV32I-NEXT: srl a1, a1, t2 ; RV32I-NEXT: or a1, t3, a1 -; RV32I-NEXT: sll t4, a5, a7 +; RV32I-NEXT: sll t0, t0, a7 ; RV32I-NEXT: srli a3, a3, 1 -; RV32I-NEXT: lw t5, 20(a6) -; RV32I-NEXT: lw t6, 16(a6) -; RV32I-NEXT: srl a3, a3, t1 -; RV32I-NEXT: or a3, t4, a3 +; RV32I-NEXT: srl a3, a3, t2 +; RV32I-NEXT: lw t4, 16(a5) +; RV32I-NEXT: lw t5, 20(a5) +; RV32I-NEXT: or a3, t0, a3 +; RV32I-NEXT: lw t6, 24(a5) +; RV32I-NEXT: lw a5, 28(a5) ; RV32I-NEXT: sll s0, t5, a7 -; RV32I-NEXT: srli a5, t6, 1 -; RV32I-NEXT: srl a5, a5, t1 -; RV32I-NEXT: or a5, s0, a5 +; RV32I-NEXT: srli s1, t4, 1 +; RV32I-NEXT: srl s1, s1, t2 +; RV32I-NEXT: or s1, s0, s1 +; RV32I-NEXT: sll t4, t4, a7 +; RV32I-NEXT: srli t1, t1, 1 +; RV32I-NEXT: srl t1, t1, t2 +; RV32I-NEXT: or t1, t4, t1 +; RV32I-NEXT: sll a5, a5, a7 +; RV32I-NEXT: srli s2, t6, 1 +; RV32I-NEXT: srl s2, s2, t2 +; RV32I-NEXT: or s2, a5, s2 ; RV32I-NEXT: sll t6, t6, a7 -; RV32I-NEXT: srli t2, t2, 1 -; RV32I-NEXT: lw s1, 28(a6) -; RV32I-NEXT: lw a6, 24(a6) -; RV32I-NEXT: srl t2, t2, t1 +; RV32I-NEXT: srli t5, t5, 1 +; RV32I-NEXT: srl t2, t5, t2 ; RV32I-NEXT: or t2, t6, t2 -; RV32I-NEXT: sll s1, s1, a7 -; RV32I-NEXT: srli s2, a6, 1 -; RV32I-NEXT: srl s2, s2, t1 -; RV32I-NEXT: or s2, s1, s2 ; RV32I-NEXT: sll a6, a6, a7 -; RV32I-NEXT: srli t5, t5, 1 -; RV32I-NEXT: srl t1, t5, t1 -; RV32I-NEXT: or t1, a6, t1 -; RV32I-NEXT: sll a7, t0, a7 -; RV32I-NEXT: sb a7, 0(a2) -; RV32I-NEXT: srli a6, a6, 24 -; RV32I-NEXT: sb a6, 27(a2) -; RV32I-NEXT: srli s1, s1, 24 -; RV32I-NEXT: sb s1, 31(a2) -; RV32I-NEXT: srli a6, t6, 24 -; RV32I-NEXT: sb a6, 19(a2) +; RV32I-NEXT: sb a6, 0(a2) +; RV32I-NEXT: srli a7, t6, 24 +; RV32I-NEXT: sb a7, 27(a2) +; RV32I-NEXT: srli a5, a5, 24 +; RV32I-NEXT: sb a5, 31(a2) +; RV32I-NEXT: srli a5, t4, 24 +; RV32I-NEXT: sb a5, 19(a2) ; RV32I-NEXT: srli s0, s0, 24 ; RV32I-NEXT: sb s0, 23(a2) -; RV32I-NEXT: srli a6, t4, 24 -; RV32I-NEXT: sb a6, 11(a2) -; RV32I-NEXT: srli a6, t3, 24 -; RV32I-NEXT: sb a6, 15(a2) -; RV32I-NEXT: srli a6, a7, 24 -; RV32I-NEXT: sb a6, 3(a2) -; RV32I-NEXT: srli a6, a7, 16 -; RV32I-NEXT: sb a6, 2(a2) -; RV32I-NEXT: srli a6, a7, 8 -; RV32I-NEXT: sb a6, 1(a2) +; RV32I-NEXT: srli a5, t0, 24 +; RV32I-NEXT: sb a5, 11(a2) +; RV32I-NEXT: srli a5, t3, 24 +; RV32I-NEXT: sb a5, 15(a2) +; RV32I-NEXT: srli a5, a6, 24 +; RV32I-NEXT: sb a5, 3(a2) +; RV32I-NEXT: srli a5, a6, 16 +; RV32I-NEXT: sb a5, 2(a2) +; RV32I-NEXT: srli a5, a6, 8 +; RV32I-NEXT: sb a5, 1(a2) ; RV32I-NEXT: srli a4, a4, 24 ; RV32I-NEXT: sb a4, 7(a2) -; RV32I-NEXT: sb t1, 24(a2) +; RV32I-NEXT: sb t2, 24(a2) ; RV32I-NEXT: sb s2, 28(a2) -; RV32I-NEXT: sb t2, 16(a2) -; RV32I-NEXT: sb a5, 20(a2) +; RV32I-NEXT: sb t1, 16(a2) +; RV32I-NEXT: sb s1, 20(a2) ; RV32I-NEXT: sb a3, 8(a2) ; RV32I-NEXT: sb a1, 12(a2) ; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli a4, t1, 16 +; RV32I-NEXT: srli a4, t2, 16 ; RV32I-NEXT: sb a4, 26(a2) -; RV32I-NEXT: srli a4, t1, 8 +; RV32I-NEXT: srli a4, t2, 8 ; RV32I-NEXT: sb a4, 25(a2) ; RV32I-NEXT: srli a4, s2, 16 ; RV32I-NEXT: sb a4, 30(a2) ; RV32I-NEXT: srli a4, s2, 8 ; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: srli a4, t2, 16 +; RV32I-NEXT: srli a4, t1, 16 ; RV32I-NEXT: sb a4, 18(a2) -; RV32I-NEXT: srli a4, t2, 8 +; RV32I-NEXT: srli a4, t1, 8 ; RV32I-NEXT: sb a4, 17(a2) -; RV32I-NEXT: srli a4, a5, 16 +; RV32I-NEXT: srli a4, s1, 16 ; RV32I-NEXT: sb a4, 22(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 21(a2) +; RV32I-NEXT: srli s1, s1, 8 +; RV32I-NEXT: sb s1, 21(a2) ; RV32I-NEXT: srli a4, a3, 16 ; RV32I-NEXT: sb a4, 10(a2) ; RV32I-NEXT: srli a3, a3, 8 @@ -3583,105 +3583,105 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 9(a0) ; RV64I-NEXT: lbu a5, 8(a0) -; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a6, 9(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 10(a0) ; RV64I-NEXT: lbu a7, 11(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 12(a0) -; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 14(a0) ; RV64I-NEXT: lbu t0, 15(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a5, a5, 32 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 17(a0) ; RV64I-NEXT: lbu a6, 16(a0) -; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu a7, 17(a0) +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 18(a0) ; RV64I-NEXT: lbu t0, 19(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: lbu a7, 20(a0) -; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t0, 21(a0) +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: lbu a6, 22(a0) ; RV64I-NEXT: lbu t1, 23(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a6, t1, a6 +; RV64I-NEXT: or a6, a6, a7 ; RV64I-NEXT: slli a6, a6, 32 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 25(a0) ; RV64I-NEXT: lbu a7, 24(a0) -; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t0, 25(a0) +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 26(a0) ; RV64I-NEXT: lbu t1, 27(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 28(a0) -; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu t1, 29(a0) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 30(a0) ; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a0, a0, a6 -; RV64I-NEXT: lbu a6, 1(a1) ; RV64I-NEXT: lbu a7, 0(a1) -; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: lbu t0, 1(a1) +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: lbu a6, 2(a1) ; RV64I-NEXT: lbu t1, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 5(a1) +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 4(a1) -; RV64I-NEXT: lbu t1, 6(a1) +; RV64I-NEXT: lbu t1, 5(a1) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t1 ; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: sd zero, 24(sp) @@ -3695,75 +3695,75 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: slli a0, a1, 2 ; RV64I-NEXT: andi a0, a0, 24 ; RV64I-NEXT: addi a3, sp, 32 -; RV64I-NEXT: sub a0, a3, a0 -; RV64I-NEXT: ld a4, 8(a0) -; RV64I-NEXT: slli a5, a1, 5 -; RV64I-NEXT: ld a6, 0(a0) -; RV64I-NEXT: sll a3, a4, a5 -; RV64I-NEXT: andi a1, a5, 32 -; RV64I-NEXT: xori a7, a1, 63 -; RV64I-NEXT: srli a1, a6, 1 -; RV64I-NEXT: ld t0, 24(a0) -; RV64I-NEXT: ld t1, 16(a0) -; RV64I-NEXT: srl a0, a1, a7 +; RV64I-NEXT: sub a3, a3, a0 +; RV64I-NEXT: ld a4, 0(a3) +; RV64I-NEXT: ld a5, 8(a3) +; RV64I-NEXT: slli a6, a1, 5 +; RV64I-NEXT: ld a7, 16(a3) +; RV64I-NEXT: ld a1, 24(a3) +; RV64I-NEXT: sll a3, a5, a6 +; RV64I-NEXT: andi a0, a6, 32 +; RV64I-NEXT: xori t0, a0, 63 +; RV64I-NEXT: srli a0, a4, 1 +; RV64I-NEXT: srl a0, a0, t0 ; RV64I-NEXT: or a0, a3, a0 -; RV64I-NEXT: sll t0, t0, a5 -; RV64I-NEXT: srli a1, t1, 1 -; RV64I-NEXT: srl a1, a1, a7 -; RV64I-NEXT: or a1, t0, a1 -; RV64I-NEXT: sll t1, t1, a5 -; RV64I-NEXT: srli a4, a4, 1 -; RV64I-NEXT: srl a4, a4, a7 -; RV64I-NEXT: or a4, t1, a4 -; RV64I-NEXT: sll a5, a6, a5 -; RV64I-NEXT: sb a5, 0(a2) -; RV64I-NEXT: srli a6, t1, 56 +; RV64I-NEXT: sll t1, a1, a6 +; RV64I-NEXT: srli a1, a7, 1 +; RV64I-NEXT: srl a1, a1, t0 +; RV64I-NEXT: or a1, t1, a1 +; RV64I-NEXT: sll a7, a7, a6 +; RV64I-NEXT: srli a5, a5, 1 +; RV64I-NEXT: srl a5, a5, t0 +; RV64I-NEXT: or a5, a7, a5 +; RV64I-NEXT: sll a4, a4, a6 +; RV64I-NEXT: sb a4, 0(a2) +; RV64I-NEXT: srli a6, a7, 56 ; RV64I-NEXT: sb a6, 23(a2) -; RV64I-NEXT: srli a6, t1, 48 +; RV64I-NEXT: srli a6, a7, 48 ; RV64I-NEXT: sb a6, 22(a2) -; RV64I-NEXT: srli a6, t1, 40 +; RV64I-NEXT: srli a6, a7, 40 ; RV64I-NEXT: sb a6, 21(a2) -; RV64I-NEXT: srli a6, t1, 32 +; RV64I-NEXT: srli a6, a7, 32 ; RV64I-NEXT: sb a6, 20(a2) -; RV64I-NEXT: srli a6, t0, 56 +; RV64I-NEXT: srli a6, t1, 56 ; RV64I-NEXT: sb a6, 31(a2) -; RV64I-NEXT: srli a6, t0, 48 +; RV64I-NEXT: srli a6, t1, 48 ; RV64I-NEXT: sb a6, 30(a2) -; RV64I-NEXT: srli a6, t0, 40 +; RV64I-NEXT: srli a6, t1, 40 ; RV64I-NEXT: sb a6, 29(a2) -; RV64I-NEXT: srli a6, t0, 32 +; RV64I-NEXT: srli a6, t1, 32 ; RV64I-NEXT: sb a6, 28(a2) -; RV64I-NEXT: srli a6, a5, 56 +; RV64I-NEXT: srli a6, a4, 56 ; RV64I-NEXT: sb a6, 7(a2) -; RV64I-NEXT: srli a6, a5, 48 +; RV64I-NEXT: srli a6, a4, 48 ; RV64I-NEXT: sb a6, 6(a2) -; RV64I-NEXT: srli a6, a5, 40 +; RV64I-NEXT: srli a6, a4, 40 ; RV64I-NEXT: sb a6, 5(a2) -; RV64I-NEXT: srli a6, a5, 32 +; RV64I-NEXT: srli a6, a4, 32 ; RV64I-NEXT: sb a6, 4(a2) -; RV64I-NEXT: srli a6, a5, 24 +; RV64I-NEXT: srli a6, a4, 24 ; RV64I-NEXT: sb a6, 3(a2) -; RV64I-NEXT: srli a6, a5, 16 +; RV64I-NEXT: srli a6, a4, 16 ; RV64I-NEXT: sb a6, 2(a2) -; RV64I-NEXT: srli a5, a5, 8 -; RV64I-NEXT: sb a5, 1(a2) -; RV64I-NEXT: srli a5, a3, 56 -; RV64I-NEXT: sb a5, 15(a2) -; RV64I-NEXT: srli a5, a3, 48 -; RV64I-NEXT: sb a5, 14(a2) -; RV64I-NEXT: srli a5, a3, 40 -; RV64I-NEXT: sb a5, 13(a2) +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: sb a4, 1(a2) +; RV64I-NEXT: srli a4, a3, 56 +; RV64I-NEXT: sb a4, 15(a2) +; RV64I-NEXT: srli a4, a3, 48 +; RV64I-NEXT: sb a4, 14(a2) +; RV64I-NEXT: srli a4, a3, 40 +; RV64I-NEXT: sb a4, 13(a2) ; RV64I-NEXT: srli a3, a3, 32 ; RV64I-NEXT: sb a3, 12(a2) -; RV64I-NEXT: sb a4, 16(a2) +; RV64I-NEXT: sb a5, 16(a2) ; RV64I-NEXT: sb a1, 24(a2) ; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: srli a3, a4, 24 +; RV64I-NEXT: srli a3, a5, 24 ; RV64I-NEXT: sb a3, 19(a2) -; RV64I-NEXT: srli a3, a4, 16 +; RV64I-NEXT: srli a3, a5, 16 ; RV64I-NEXT: sb a3, 18(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a4, 17(a2) +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: sb a5, 17(a2) ; RV64I-NEXT: srli a3, a1, 24 ; RV64I-NEXT: sb a3, 27(a2) ; RV64I-NEXT: srli a3, a1, 16 @@ -3791,77 +3791,77 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu t1, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t1, t1, 24 -; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: or a6, t1, a6 ; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t1, 17(a0) +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: lbu a7, 18(a0) ; RV32I-NEXT: lbu t2, 19(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t0 -; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli t2, t2, 24 -; RV32I-NEXT: or t0, t2, t1 -; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: lbu t0, 21(a0) +; RV32I-NEXT: or a7, t2, a7 ; RV32I-NEXT: lbu t1, 20(a0) -; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t2, 21(a0) +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: lbu t0, 22(a0) ; RV32I-NEXT: lbu t3, 23(a0) -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t0, t0, t1 -; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli t3, t3, 24 -; RV32I-NEXT: or t1, t3, t2 -; RV32I-NEXT: or t0, t1, t0 -; RV32I-NEXT: lbu t1, 25(a0) +; RV32I-NEXT: or t0, t3, t0 ; RV32I-NEXT: lbu t2, 24(a0) -; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: or t0, t0, t1 +; RV32I-NEXT: lbu t1, 26(a0) ; RV32I-NEXT: lbu t4, 27(a0) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t1, t1, t2 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: or t1, t4, t1 ; RV32I-NEXT: lbu t3, 28(a0) -; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu t4, 29(a0) +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: lbu t2, 30(a0) ; RV32I-NEXT: lbu a0, 31(a0) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t2, t2, t3 -; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: slli t2, t2, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, t4 ; RV32I-NEXT: or a0, a0, t2 +; RV32I-NEXT: or a0, a0, t3 ; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: sw zero, 28(sp) ; RV32I-NEXT: sw zero, 24(sp) @@ -3882,64 +3882,64 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: andi a1, a1, 28 ; RV32I-NEXT: addi a0, sp, 32 -; RV32I-NEXT: sub a3, a0, a1 -; RV32I-NEXT: lw a0, 4(a3) -; RV32I-NEXT: lw a1, 0(a3) -; RV32I-NEXT: lw a4, 12(a3) -; RV32I-NEXT: lw a5, 8(a3) -; RV32I-NEXT: lw a6, 24(a3) -; RV32I-NEXT: lw a7, 28(a3) -; RV32I-NEXT: lw t0, 16(a3) -; RV32I-NEXT: lw a3, 20(a3) -; RV32I-NEXT: sb a6, 24(a2) -; RV32I-NEXT: sb a7, 28(a2) -; RV32I-NEXT: sb t0, 16(a2) -; RV32I-NEXT: sb a3, 20(a2) -; RV32I-NEXT: sb a5, 8(a2) -; RV32I-NEXT: sb a4, 12(a2) -; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: sub a1, a0, a1 +; RV32I-NEXT: lw a3, 16(a1) +; RV32I-NEXT: lw a4, 20(a1) +; RV32I-NEXT: lw a5, 24(a1) +; RV32I-NEXT: lw a6, 28(a1) +; RV32I-NEXT: lw a7, 0(a1) +; RV32I-NEXT: lw a0, 4(a1) +; RV32I-NEXT: lw t0, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: sb a5, 24(a2) +; RV32I-NEXT: sb a6, 28(a2) +; RV32I-NEXT: sb a3, 16(a2) +; RV32I-NEXT: sb a4, 20(a2) +; RV32I-NEXT: sb t0, 8(a2) +; RV32I-NEXT: sb a1, 12(a2) +; RV32I-NEXT: sb a7, 0(a2) ; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli t1, a6, 24 +; RV32I-NEXT: srli t1, a5, 24 ; RV32I-NEXT: sb t1, 27(a2) -; RV32I-NEXT: srli t1, a6, 16 +; RV32I-NEXT: srli t1, a5, 16 ; RV32I-NEXT: sb t1, 26(a2) -; RV32I-NEXT: srli a6, a6, 8 -; RV32I-NEXT: sb a6, 25(a2) -; RV32I-NEXT: srli a6, a7, 24 -; RV32I-NEXT: sb a6, 31(a2) -; RV32I-NEXT: srli a6, a7, 16 -; RV32I-NEXT: sb a6, 30(a2) -; RV32I-NEXT: srli a6, a7, 8 -; RV32I-NEXT: sb a6, 29(a2) -; RV32I-NEXT: srli a6, t0, 24 -; RV32I-NEXT: sb a6, 19(a2) -; RV32I-NEXT: srli a6, t0, 16 -; RV32I-NEXT: sb a6, 18(a2) -; RV32I-NEXT: srli a6, t0, 8 -; RV32I-NEXT: sb a6, 17(a2) -; RV32I-NEXT: srli a6, a3, 24 -; RV32I-NEXT: sb a6, 23(a2) -; RV32I-NEXT: srli a6, a3, 16 -; RV32I-NEXT: sb a6, 22(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 21(a2) -; RV32I-NEXT: srli a3, a5, 24 -; RV32I-NEXT: sb a3, 11(a2) -; RV32I-NEXT: srli a3, a5, 16 -; RV32I-NEXT: sb a3, 10(a2) ; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb a5, 25(a2) +; RV32I-NEXT: srli a5, a6, 24 +; RV32I-NEXT: sb a5, 31(a2) +; RV32I-NEXT: srli a5, a6, 16 +; RV32I-NEXT: sb a5, 30(a2) +; RV32I-NEXT: srli a5, a6, 8 +; RV32I-NEXT: sb a5, 29(a2) +; RV32I-NEXT: srli a5, a3, 24 +; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a5, a3, 16 +; RV32I-NEXT: sb a5, 18(a2) +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a3, 17(a2) ; RV32I-NEXT: srli a3, a4, 24 -; RV32I-NEXT: sb a3, 15(a2) +; RV32I-NEXT: sb a3, 23(a2) ; RV32I-NEXT: srli a3, a4, 16 -; RV32I-NEXT: sb a3, 14(a2) +; RV32I-NEXT: sb a3, 22(a2) ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 13(a2) +; RV32I-NEXT: sb a4, 21(a2) +; RV32I-NEXT: srli a3, t0, 24 +; RV32I-NEXT: sb a3, 11(a2) +; RV32I-NEXT: srli a3, t0, 16 +; RV32I-NEXT: sb a3, 10(a2) +; RV32I-NEXT: srli a3, t0, 8 +; RV32I-NEXT: sb a3, 9(a2) ; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 3(a2) +; RV32I-NEXT: sb a3, 15(a2) ; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 2(a2) +; RV32I-NEXT: sb a3, 14(a2) ; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a1, 13(a2) +; RV32I-NEXT: srli a1, a7, 24 +; RV32I-NEXT: sb a1, 3(a2) +; RV32I-NEXT: srli a1, a7, 16 +; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: srli a1, a7, 8 ; RV32I-NEXT: sb a1, 1(a2) ; RV32I-NEXT: srli a1, a0, 24 ; RV32I-NEXT: sb a1, 7(a2) @@ -3970,83 +3970,83 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 9(a0) ; RV64I-NEXT: lbu a5, 8(a0) -; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a6, 9(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 10(a0) ; RV64I-NEXT: lbu a7, 11(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 12(a0) -; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 14(a0) ; RV64I-NEXT: lbu t0, 15(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a5, a5, 32 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 17(a0) ; RV64I-NEXT: lbu a6, 16(a0) -; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu a7, 17(a0) +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 18(a0) ; RV64I-NEXT: lbu t0, 19(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: lbu a7, 20(a0) -; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t0, 21(a0) +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: lbu a6, 22(a0) ; RV64I-NEXT: lbu t1, 23(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a6, t1, a6 +; RV64I-NEXT: or a6, a6, a7 ; RV64I-NEXT: slli a6, a6, 32 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 25(a0) ; RV64I-NEXT: lbu a7, 24(a0) -; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t0, 25(a0) +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 26(a0) ; RV64I-NEXT: lbu t1, 27(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 28(a0) -; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu t1, 29(a0) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 30(a0) ; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: lbu a1, 0(a1) @@ -4141,77 +4141,77 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu t1, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t1, t1, 24 -; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: or a6, t1, a6 ; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t1, 17(a0) +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: lbu a7, 18(a0) ; RV32I-NEXT: lbu t2, 19(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t0 -; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli t2, t2, 24 -; RV32I-NEXT: or t0, t2, t1 -; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: lbu t0, 21(a0) +; RV32I-NEXT: or a7, t2, a7 ; RV32I-NEXT: lbu t1, 20(a0) -; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t2, 21(a0) +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: lbu t0, 22(a0) ; RV32I-NEXT: lbu t3, 23(a0) -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t0, t0, t1 -; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli t3, t3, 24 -; RV32I-NEXT: or t1, t3, t2 -; RV32I-NEXT: or t0, t1, t0 -; RV32I-NEXT: lbu t1, 25(a0) +; RV32I-NEXT: or t0, t3, t0 ; RV32I-NEXT: lbu t2, 24(a0) -; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: or t0, t0, t1 +; RV32I-NEXT: lbu t1, 26(a0) ; RV32I-NEXT: lbu t4, 27(a0) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t1, t1, t2 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: or t1, t4, t1 ; RV32I-NEXT: lbu t3, 28(a0) -; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu t4, 29(a0) +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: lbu t2, 30(a0) ; RV32I-NEXT: lbu a0, 31(a0) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t2, t2, t3 -; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: slli t2, t2, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, t4 ; RV32I-NEXT: or a0, a0, t2 +; RV32I-NEXT: or a0, a0, t3 ; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: sw zero, 28(sp) ; RV32I-NEXT: sw zero, 24(sp) @@ -4232,64 +4232,64 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: andi a1, a1, 24 ; RV32I-NEXT: addi a0, sp, 32 -; RV32I-NEXT: sub a3, a0, a1 -; RV32I-NEXT: lw a0, 4(a3) -; RV32I-NEXT: lw a1, 0(a3) -; RV32I-NEXT: lw a4, 12(a3) -; RV32I-NEXT: lw a5, 8(a3) -; RV32I-NEXT: lw a6, 24(a3) -; RV32I-NEXT: lw a7, 28(a3) -; RV32I-NEXT: lw t0, 16(a3) -; RV32I-NEXT: lw a3, 20(a3) -; RV32I-NEXT: sb a6, 24(a2) -; RV32I-NEXT: sb a7, 28(a2) -; RV32I-NEXT: sb t0, 16(a2) -; RV32I-NEXT: sb a3, 20(a2) -; RV32I-NEXT: sb a5, 8(a2) -; RV32I-NEXT: sb a4, 12(a2) -; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: sub a1, a0, a1 +; RV32I-NEXT: lw a3, 16(a1) +; RV32I-NEXT: lw a4, 20(a1) +; RV32I-NEXT: lw a5, 24(a1) +; RV32I-NEXT: lw a6, 28(a1) +; RV32I-NEXT: lw a7, 0(a1) +; RV32I-NEXT: lw a0, 4(a1) +; RV32I-NEXT: lw t0, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: sb a5, 24(a2) +; RV32I-NEXT: sb a6, 28(a2) +; RV32I-NEXT: sb a3, 16(a2) +; RV32I-NEXT: sb a4, 20(a2) +; RV32I-NEXT: sb t0, 8(a2) +; RV32I-NEXT: sb a1, 12(a2) +; RV32I-NEXT: sb a7, 0(a2) ; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli t1, a6, 24 +; RV32I-NEXT: srli t1, a5, 24 ; RV32I-NEXT: sb t1, 27(a2) -; RV32I-NEXT: srli t1, a6, 16 +; RV32I-NEXT: srli t1, a5, 16 ; RV32I-NEXT: sb t1, 26(a2) -; RV32I-NEXT: srli a6, a6, 8 -; RV32I-NEXT: sb a6, 25(a2) -; RV32I-NEXT: srli a6, a7, 24 -; RV32I-NEXT: sb a6, 31(a2) -; RV32I-NEXT: srli a6, a7, 16 -; RV32I-NEXT: sb a6, 30(a2) -; RV32I-NEXT: srli a6, a7, 8 -; RV32I-NEXT: sb a6, 29(a2) -; RV32I-NEXT: srli a6, t0, 24 -; RV32I-NEXT: sb a6, 19(a2) -; RV32I-NEXT: srli a6, t0, 16 -; RV32I-NEXT: sb a6, 18(a2) -; RV32I-NEXT: srli a6, t0, 8 -; RV32I-NEXT: sb a6, 17(a2) -; RV32I-NEXT: srli a6, a3, 24 -; RV32I-NEXT: sb a6, 23(a2) -; RV32I-NEXT: srli a6, a3, 16 -; RV32I-NEXT: sb a6, 22(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 21(a2) -; RV32I-NEXT: srli a3, a5, 24 -; RV32I-NEXT: sb a3, 11(a2) -; RV32I-NEXT: srli a3, a5, 16 -; RV32I-NEXT: sb a3, 10(a2) ; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb a5, 25(a2) +; RV32I-NEXT: srli a5, a6, 24 +; RV32I-NEXT: sb a5, 31(a2) +; RV32I-NEXT: srli a5, a6, 16 +; RV32I-NEXT: sb a5, 30(a2) +; RV32I-NEXT: srli a5, a6, 8 +; RV32I-NEXT: sb a5, 29(a2) +; RV32I-NEXT: srli a5, a3, 24 +; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a5, a3, 16 +; RV32I-NEXT: sb a5, 18(a2) +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a3, 17(a2) ; RV32I-NEXT: srli a3, a4, 24 -; RV32I-NEXT: sb a3, 15(a2) +; RV32I-NEXT: sb a3, 23(a2) ; RV32I-NEXT: srli a3, a4, 16 -; RV32I-NEXT: sb a3, 14(a2) +; RV32I-NEXT: sb a3, 22(a2) ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 13(a2) +; RV32I-NEXT: sb a4, 21(a2) +; RV32I-NEXT: srli a3, t0, 24 +; RV32I-NEXT: sb a3, 11(a2) +; RV32I-NEXT: srli a3, t0, 16 +; RV32I-NEXT: sb a3, 10(a2) +; RV32I-NEXT: srli a3, t0, 8 +; RV32I-NEXT: sb a3, 9(a2) ; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 3(a2) +; RV32I-NEXT: sb a3, 15(a2) ; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 2(a2) +; RV32I-NEXT: sb a3, 14(a2) ; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a1, 13(a2) +; RV32I-NEXT: srli a1, a7, 24 +; RV32I-NEXT: sb a1, 3(a2) +; RV32I-NEXT: srli a1, a7, 16 +; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: srli a1, a7, 8 ; RV32I-NEXT: sb a1, 1(a2) ; RV32I-NEXT: srli a1, a0, 24 ; RV32I-NEXT: sb a1, 7(a2) @@ -4320,105 +4320,105 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 9(a0) ; RV64I-NEXT: lbu a5, 8(a0) -; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a6, 9(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 10(a0) ; RV64I-NEXT: lbu a7, 11(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 12(a0) -; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 14(a0) ; RV64I-NEXT: lbu t0, 15(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a5, a5, 32 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 17(a0) ; RV64I-NEXT: lbu a6, 16(a0) -; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu a7, 17(a0) +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 18(a0) ; RV64I-NEXT: lbu t0, 19(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: lbu a7, 20(a0) -; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t0, 21(a0) +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: lbu a6, 22(a0) ; RV64I-NEXT: lbu t1, 23(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a6, t1, a6 +; RV64I-NEXT: or a6, a6, a7 ; RV64I-NEXT: slli a6, a6, 32 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 25(a0) ; RV64I-NEXT: lbu a7, 24(a0) -; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t0, 25(a0) +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 26(a0) ; RV64I-NEXT: lbu t1, 27(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 28(a0) -; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu t1, 29(a0) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 30(a0) ; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a7, a0, 32 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 1(a1) ; RV64I-NEXT: lbu t0, 0(a1) -; RV64I-NEXT: lbu t1, 2(a1) +; RV64I-NEXT: lbu t1, 1(a1) +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 2(a1) ; RV64I-NEXT: lbu t2, 3(a1) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli t2, t2, 24 -; RV64I-NEXT: or t0, t2, t1 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: lbu t0, 5(a1) +; RV64I-NEXT: or a7, t2, a7 ; RV64I-NEXT: lbu t1, 4(a1) -; RV64I-NEXT: lbu t2, 6(a1) +; RV64I-NEXT: lbu t2, 5(a1) +; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: lbu t0, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: or t0, t0, t1 -; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli t2, t2, 8 +; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t2 ; RV64I-NEXT: or a1, a1, t0 +; RV64I-NEXT: or a1, a1, t1 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: sraiw a0, a0, 31 @@ -4432,31 +4432,31 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sd a3, 0(sp) ; RV64I-NEXT: andi a0, a1, 24 ; RV64I-NEXT: mv a3, sp -; RV64I-NEXT: add a3, a3, a0 -; RV64I-NEXT: ld a4, 8(a3) +; RV64I-NEXT: add a0, a3, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: ld a4, 8(a0) ; RV64I-NEXT: slli a1, a1, 3 -; RV64I-NEXT: srl a5, a4, a1 -; RV64I-NEXT: ld a6, 16(a3) +; RV64I-NEXT: ld a5, 16(a0) +; RV64I-NEXT: ld a6, 24(a0) +; RV64I-NEXT: srl a7, a4, a1 ; RV64I-NEXT: andi a0, a1, 56 -; RV64I-NEXT: xori a7, a0, 63 -; RV64I-NEXT: ld t0, 0(a3) -; RV64I-NEXT: slli a0, a6, 1 -; RV64I-NEXT: sll a0, a0, a7 -; RV64I-NEXT: or a0, a5, a0 -; RV64I-NEXT: srl t0, t0, a1 +; RV64I-NEXT: xori t0, a0, 63 +; RV64I-NEXT: slli a0, a5, 1 +; RV64I-NEXT: sll a0, a0, t0 +; RV64I-NEXT: or a0, a7, a0 +; RV64I-NEXT: srl a3, a3, a1 ; RV64I-NEXT: slli a4, a4, 1 -; RV64I-NEXT: ld a3, 24(a3) -; RV64I-NEXT: sll a4, a4, a7 -; RV64I-NEXT: or a4, t0, a4 -; RV64I-NEXT: srl a6, a6, a1 -; RV64I-NEXT: slli t1, a3, 1 -; RV64I-NEXT: sll a7, t1, a7 -; RV64I-NEXT: or a7, a6, a7 -; RV64I-NEXT: sra a1, a3, a1 -; RV64I-NEXT: sb a6, 16(a2) +; RV64I-NEXT: sll a4, a4, t0 +; RV64I-NEXT: or a4, a3, a4 +; RV64I-NEXT: srl a5, a5, a1 +; RV64I-NEXT: slli t1, a6, 1 +; RV64I-NEXT: sll t0, t1, t0 +; RV64I-NEXT: or t0, a5, t0 +; RV64I-NEXT: sra a1, a6, a1 +; RV64I-NEXT: sb a5, 16(a2) ; RV64I-NEXT: sb a1, 24(a2) -; RV64I-NEXT: sb t0, 0(a2) -; RV64I-NEXT: sb a5, 8(a2) +; RV64I-NEXT: sb a3, 0(a2) +; RV64I-NEXT: sb a7, 8(a2) ; RV64I-NEXT: srli a3, a1, 56 ; RV64I-NEXT: sb a3, 31(a2) ; RV64I-NEXT: srli a3, a1, 48 @@ -4471,19 +4471,19 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sb a3, 26(a2) ; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: srli a1, a7, 56 +; RV64I-NEXT: srli a1, t0, 56 ; RV64I-NEXT: sb a1, 23(a2) -; RV64I-NEXT: srli a1, a7, 48 +; RV64I-NEXT: srli a1, t0, 48 ; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, a7, 40 +; RV64I-NEXT: srli a1, t0, 40 ; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, a7, 32 +; RV64I-NEXT: srli a1, t0, 32 ; RV64I-NEXT: sb a1, 20(a2) -; RV64I-NEXT: srli a1, a7, 24 +; RV64I-NEXT: srli a1, t0, 24 ; RV64I-NEXT: sb a1, 19(a2) -; RV64I-NEXT: srli a1, a7, 16 +; RV64I-NEXT: srli a1, t0, 16 ; RV64I-NEXT: sb a1, 18(a2) -; RV64I-NEXT: srli a1, a7, 8 +; RV64I-NEXT: srli a1, t0, 8 ; RV64I-NEXT: sb a1, 17(a2) ; RV64I-NEXT: srli a1, a4, 56 ; RV64I-NEXT: sb a1, 7(a2) @@ -4531,87 +4531,87 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu t1, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t1, t1, 24 -; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: or a6, t1, a6 ; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t1, 17(a0) +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: lbu a7, 18(a0) ; RV32I-NEXT: lbu t2, 19(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t0 -; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli t2, t2, 24 -; RV32I-NEXT: or t0, t2, t1 -; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: lbu t0, 21(a0) +; RV32I-NEXT: or a7, t2, a7 ; RV32I-NEXT: lbu t1, 20(a0) -; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t2, 21(a0) +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: lbu t0, 22(a0) ; RV32I-NEXT: lbu t3, 23(a0) -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t0, t0, t1 -; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli t3, t3, 24 -; RV32I-NEXT: or t1, t3, t2 -; RV32I-NEXT: or t0, t1, t0 -; RV32I-NEXT: lbu t1, 25(a0) +; RV32I-NEXT: or t0, t3, t0 ; RV32I-NEXT: lbu t2, 24(a0) -; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: or t0, t0, t1 +; RV32I-NEXT: lbu t1, 26(a0) ; RV32I-NEXT: lbu t4, 27(a0) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t1, t1, t2 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: or t1, t4, t1 ; RV32I-NEXT: lbu t3, 28(a0) -; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu t4, 29(a0) +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: lbu t2, 30(a0) ; RV32I-NEXT: lbu a0, 31(a0) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t2, t2, t3 -; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: slli t2, t2, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or t3, a0, t4 -; RV32I-NEXT: or t2, t3, t2 -; RV32I-NEXT: lbu t3, 1(a1) +; RV32I-NEXT: or t2, a0, t2 ; RV32I-NEXT: lbu t4, 0(a1) -; RV32I-NEXT: lbu t5, 2(a1) +; RV32I-NEXT: lbu t5, 1(a1) +; RV32I-NEXT: or t2, t2, t3 +; RV32I-NEXT: lbu t3, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: or t3, t3, t4 -; RV32I-NEXT: slli t5, t5, 16 +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: slli t3, t3, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, t5 ; RV32I-NEXT: or a1, a1, t3 +; RV32I-NEXT: or a1, a1, t4 ; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: sw a0, 60(sp) ; RV32I-NEXT: sw a0, 56(sp) @@ -4631,54 +4631,54 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw a3, 0(sp) ; RV32I-NEXT: andi a0, a1, 28 ; RV32I-NEXT: mv a3, sp -; RV32I-NEXT: add a5, a3, a0 -; RV32I-NEXT: lw a3, 4(a5) -; RV32I-NEXT: slli a6, a1, 3 -; RV32I-NEXT: srl a4, a3, a6 -; RV32I-NEXT: lw a7, 8(a5) -; RV32I-NEXT: andi a0, a6, 24 -; RV32I-NEXT: xori t0, a0, 31 -; RV32I-NEXT: lw a1, 0(a5) -; RV32I-NEXT: slli a0, a7, 1 -; RV32I-NEXT: sll a0, a0, t0 +; RV32I-NEXT: add a3, a3, a0 +; RV32I-NEXT: lw a6, 0(a3) +; RV32I-NEXT: lw a7, 4(a3) +; RV32I-NEXT: slli a5, a1, 3 +; RV32I-NEXT: lw t0, 8(a3) +; RV32I-NEXT: lw t1, 12(a3) +; RV32I-NEXT: srl a4, a7, a5 +; RV32I-NEXT: andi a0, a5, 24 +; RV32I-NEXT: xori t2, a0, 31 +; RV32I-NEXT: slli a0, t0, 1 +; RV32I-NEXT: sll a0, a0, t2 ; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: srl t1, a1, a6 -; RV32I-NEXT: slli a3, a3, 1 -; RV32I-NEXT: lw t2, 12(a5) -; RV32I-NEXT: lw t3, 16(a5) -; RV32I-NEXT: sll a1, a3, t0 -; RV32I-NEXT: or a1, t1, a1 -; RV32I-NEXT: srl t4, t2, a6 +; RV32I-NEXT: srl a6, a6, a5 +; RV32I-NEXT: slli a7, a7, 1 +; RV32I-NEXT: sll a1, a7, t2 +; RV32I-NEXT: or a1, a6, a1 +; RV32I-NEXT: srl a7, t1, a5 +; RV32I-NEXT: lw t3, 16(a3) +; RV32I-NEXT: lw t4, 20(a3) +; RV32I-NEXT: lw t5, 24(a3) +; RV32I-NEXT: lw t6, 28(a3) ; RV32I-NEXT: slli a3, t3, 1 -; RV32I-NEXT: sll a3, a3, t0 -; RV32I-NEXT: or a3, t4, a3 -; RV32I-NEXT: srl a7, a7, a6 -; RV32I-NEXT: slli t2, t2, 1 -; RV32I-NEXT: lw t5, 20(a5) -; RV32I-NEXT: lw t6, 24(a5) -; RV32I-NEXT: sll t2, t2, t0 -; RV32I-NEXT: or t2, a7, t2 -; RV32I-NEXT: srl s0, t5, a6 -; RV32I-NEXT: slli s1, t6, 1 -; RV32I-NEXT: sll s1, s1, t0 +; RV32I-NEXT: sll a3, a3, t2 +; RV32I-NEXT: or a3, a7, a3 +; RV32I-NEXT: srl t0, t0, a5 +; RV32I-NEXT: slli t1, t1, 1 +; RV32I-NEXT: sll t1, t1, t2 +; RV32I-NEXT: or t1, t0, t1 +; RV32I-NEXT: srl s0, t4, a5 +; RV32I-NEXT: slli s1, t5, 1 +; RV32I-NEXT: sll s1, s1, t2 ; RV32I-NEXT: or s1, s0, s1 -; RV32I-NEXT: srl t3, t3, a6 -; RV32I-NEXT: slli t5, t5, 1 -; RV32I-NEXT: lw a5, 28(a5) -; RV32I-NEXT: sll t5, t5, t0 -; RV32I-NEXT: or t5, t3, t5 -; RV32I-NEXT: srl t6, t6, a6 -; RV32I-NEXT: slli s2, a5, 1 -; RV32I-NEXT: sll t0, s2, t0 -; RV32I-NEXT: or t0, t6, t0 -; RV32I-NEXT: sra a5, a5, a6 -; RV32I-NEXT: sb t6, 24(a2) +; RV32I-NEXT: srl t3, t3, a5 +; RV32I-NEXT: slli t4, t4, 1 +; RV32I-NEXT: sll t4, t4, t2 +; RV32I-NEXT: or t4, t3, t4 +; RV32I-NEXT: srl t5, t5, a5 +; RV32I-NEXT: slli s2, t6, 1 +; RV32I-NEXT: sll t2, s2, t2 +; RV32I-NEXT: or t2, t5, t2 +; RV32I-NEXT: sra a5, t6, a5 +; RV32I-NEXT: sb t5, 24(a2) ; RV32I-NEXT: sb a5, 28(a2) ; RV32I-NEXT: sb t3, 16(a2) ; RV32I-NEXT: sb s0, 20(a2) -; RV32I-NEXT: sb a7, 8(a2) -; RV32I-NEXT: sb t4, 12(a2) -; RV32I-NEXT: sb t1, 0(a2) +; RV32I-NEXT: sb t0, 8(a2) +; RV32I-NEXT: sb a7, 12(a2) +; RV32I-NEXT: sb a6, 0(a2) ; RV32I-NEXT: sb a4, 4(a2) ; RV32I-NEXT: srli a4, a5, 24 ; RV32I-NEXT: sb a4, 31(a2) @@ -4686,17 +4686,17 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sb a4, 30(a2) ; RV32I-NEXT: srli a5, a5, 8 ; RV32I-NEXT: sb a5, 29(a2) -; RV32I-NEXT: srli a4, t0, 24 +; RV32I-NEXT: srli a4, t2, 24 ; RV32I-NEXT: sb a4, 27(a2) -; RV32I-NEXT: srli a4, t0, 16 +; RV32I-NEXT: srli a4, t2, 16 ; RV32I-NEXT: sb a4, 26(a2) -; RV32I-NEXT: srli a4, t0, 8 +; RV32I-NEXT: srli a4, t2, 8 ; RV32I-NEXT: sb a4, 25(a2) -; RV32I-NEXT: srli a4, t5, 24 +; RV32I-NEXT: srli a4, t4, 24 ; RV32I-NEXT: sb a4, 19(a2) -; RV32I-NEXT: srli a4, t5, 16 +; RV32I-NEXT: srli a4, t4, 16 ; RV32I-NEXT: sb a4, 18(a2) -; RV32I-NEXT: srli a4, t5, 8 +; RV32I-NEXT: srli a4, t4, 8 ; RV32I-NEXT: sb a4, 17(a2) ; RV32I-NEXT: srli a4, s1, 24 ; RV32I-NEXT: sb a4, 23(a2) @@ -4704,11 +4704,11 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sb a4, 22(a2) ; RV32I-NEXT: srli s1, s1, 8 ; RV32I-NEXT: sb s1, 21(a2) -; RV32I-NEXT: srli a4, t2, 24 +; RV32I-NEXT: srli a4, t1, 24 ; RV32I-NEXT: sb a4, 11(a2) -; RV32I-NEXT: srli a4, t2, 16 +; RV32I-NEXT: srli a4, t1, 16 ; RV32I-NEXT: sb a4, 10(a2) -; RV32I-NEXT: srli a4, t2, 8 +; RV32I-NEXT: srli a4, t1, 8 ; RV32I-NEXT: sb a4, 9(a2) ; RV32I-NEXT: srli a4, a3, 24 ; RV32I-NEXT: sb a4, 15(a2) @@ -4754,105 +4754,105 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 9(a0) ; RV64I-NEXT: lbu a5, 8(a0) -; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a6, 9(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 10(a0) ; RV64I-NEXT: lbu a7, 11(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 12(a0) -; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 14(a0) ; RV64I-NEXT: lbu t0, 15(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a5, a5, 32 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 17(a0) ; RV64I-NEXT: lbu a6, 16(a0) -; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu a7, 17(a0) +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 18(a0) ; RV64I-NEXT: lbu t0, 19(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: lbu a7, 20(a0) -; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t0, 21(a0) +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: lbu a6, 22(a0) ; RV64I-NEXT: lbu t1, 23(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a6, t1, a6 +; RV64I-NEXT: or a6, a6, a7 ; RV64I-NEXT: slli a6, a6, 32 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 25(a0) ; RV64I-NEXT: lbu a7, 24(a0) -; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t0, 25(a0) +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 26(a0) ; RV64I-NEXT: lbu t1, 27(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 28(a0) -; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu t1, 29(a0) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 30(a0) ; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a7, a0, 32 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 1(a1) ; RV64I-NEXT: lbu t0, 0(a1) -; RV64I-NEXT: lbu t1, 2(a1) +; RV64I-NEXT: lbu t1, 1(a1) +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 2(a1) ; RV64I-NEXT: lbu t2, 3(a1) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli t2, t2, 24 -; RV64I-NEXT: or t0, t2, t1 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: lbu t0, 5(a1) +; RV64I-NEXT: or a7, t2, a7 ; RV64I-NEXT: lbu t1, 4(a1) -; RV64I-NEXT: lbu t2, 6(a1) +; RV64I-NEXT: lbu t2, 5(a1) +; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: lbu t0, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: or t0, t0, t1 -; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli t2, t2, 8 +; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t2 ; RV64I-NEXT: or a1, a1, t0 +; RV64I-NEXT: or a1, a1, t1 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: sraiw a0, a0, 31 @@ -4867,70 +4867,70 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: slli a0, a1, 2 ; RV64I-NEXT: andi a0, a0, 24 ; RV64I-NEXT: mv a3, sp -; RV64I-NEXT: add a3, a3, a0 -; RV64I-NEXT: ld a4, 8(a3) -; RV64I-NEXT: slli a5, a1, 5 -; RV64I-NEXT: srl a1, a4, a5 -; RV64I-NEXT: ld a6, 16(a3) -; RV64I-NEXT: andi a0, a5, 32 -; RV64I-NEXT: xori a7, a0, 63 -; RV64I-NEXT: ld t0, 0(a3) -; RV64I-NEXT: slli a0, a6, 1 -; RV64I-NEXT: sll a0, a0, a7 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srl t0, t0, a5 -; RV64I-NEXT: slli a4, a4, 1 -; RV64I-NEXT: ld a3, 24(a3) -; RV64I-NEXT: sll a4, a4, a7 -; RV64I-NEXT: or a4, t0, a4 -; RV64I-NEXT: srl a6, a6, a5 -; RV64I-NEXT: slli t1, a3, 1 -; RV64I-NEXT: sll a7, t1, a7 -; RV64I-NEXT: or a7, a6, a7 -; RV64I-NEXT: sra a3, a3, a5 -; RV64I-NEXT: sb a6, 16(a2) -; RV64I-NEXT: sb a3, 24(a2) -; RV64I-NEXT: sb t0, 0(a2) -; RV64I-NEXT: sb a1, 8(a2) -; RV64I-NEXT: srli a5, a6, 24 -; RV64I-NEXT: sb a5, 19(a2) -; RV64I-NEXT: srli a5, a6, 16 -; RV64I-NEXT: sb a5, 18(a2) -; RV64I-NEXT: srli a5, a6, 8 +; RV64I-NEXT: add a0, a3, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: ld a4, 8(a0) +; RV64I-NEXT: slli a1, a1, 5 +; RV64I-NEXT: ld a5, 16(a0) +; RV64I-NEXT: ld a6, 24(a0) +; RV64I-NEXT: srl a7, a4, a1 +; RV64I-NEXT: andi a0, a1, 32 +; RV64I-NEXT: xori t0, a0, 63 +; RV64I-NEXT: slli a0, a5, 1 +; RV64I-NEXT: sll a0, a0, t0 +; RV64I-NEXT: or a0, a7, a0 +; RV64I-NEXT: srl a3, a3, a1 +; RV64I-NEXT: slli a4, a4, 1 +; RV64I-NEXT: sll a4, a4, t0 +; RV64I-NEXT: or a4, a3, a4 +; RV64I-NEXT: srl a5, a5, a1 +; RV64I-NEXT: slli t1, a6, 1 +; RV64I-NEXT: sll t0, t1, t0 +; RV64I-NEXT: or t0, a5, t0 +; RV64I-NEXT: sra a1, a6, a1 +; RV64I-NEXT: sb a5, 16(a2) +; RV64I-NEXT: sb a1, 24(a2) +; RV64I-NEXT: sb a3, 0(a2) +; RV64I-NEXT: sb a7, 8(a2) +; RV64I-NEXT: srli a6, a5, 24 +; RV64I-NEXT: sb a6, 19(a2) +; RV64I-NEXT: srli a6, a5, 16 +; RV64I-NEXT: sb a6, 18(a2) +; RV64I-NEXT: srli a5, a5, 8 ; RV64I-NEXT: sb a5, 17(a2) -; RV64I-NEXT: srli a5, a3, 56 +; RV64I-NEXT: srli a5, a1, 56 ; RV64I-NEXT: sb a5, 31(a2) -; RV64I-NEXT: srli a5, a3, 48 +; RV64I-NEXT: srli a5, a1, 48 ; RV64I-NEXT: sb a5, 30(a2) -; RV64I-NEXT: srli a5, a3, 40 +; RV64I-NEXT: srli a5, a1, 40 ; RV64I-NEXT: sb a5, 29(a2) -; RV64I-NEXT: srli a5, a3, 32 +; RV64I-NEXT: srli a5, a1, 32 ; RV64I-NEXT: sb a5, 28(a2) -; RV64I-NEXT: srli a5, a3, 24 +; RV64I-NEXT: srli a5, a1, 24 ; RV64I-NEXT: sb a5, 27(a2) -; RV64I-NEXT: srli a5, a3, 16 +; RV64I-NEXT: srli a5, a1, 16 ; RV64I-NEXT: sb a5, 26(a2) +; RV64I-NEXT: srli a1, a1, 8 +; RV64I-NEXT: sb a1, 25(a2) +; RV64I-NEXT: srli a1, a3, 24 +; RV64I-NEXT: sb a1, 3(a2) +; RV64I-NEXT: srli a1, a3, 16 +; RV64I-NEXT: sb a1, 2(a2) ; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a3, 25(a2) -; RV64I-NEXT: srli a3, t0, 24 -; RV64I-NEXT: sb a3, 3(a2) -; RV64I-NEXT: srli a3, t0, 16 -; RV64I-NEXT: sb a3, 2(a2) -; RV64I-NEXT: srli a3, t0, 8 ; RV64I-NEXT: sb a3, 1(a2) -; RV64I-NEXT: srli a3, a1, 24 -; RV64I-NEXT: sb a3, 11(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 10(a2) -; RV64I-NEXT: srli a1, a1, 8 +; RV64I-NEXT: srli a1, a7, 24 +; RV64I-NEXT: sb a1, 11(a2) +; RV64I-NEXT: srli a1, a7, 16 +; RV64I-NEXT: sb a1, 10(a2) +; RV64I-NEXT: srli a1, a7, 8 ; RV64I-NEXT: sb a1, 9(a2) -; RV64I-NEXT: srli a1, a7, 56 +; RV64I-NEXT: srli a1, t0, 56 ; RV64I-NEXT: sb a1, 23(a2) -; RV64I-NEXT: srli a1, a7, 48 +; RV64I-NEXT: srli a1, t0, 48 ; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, a7, 40 +; RV64I-NEXT: srli a1, t0, 40 ; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, a7, 32 +; RV64I-NEXT: srli a1, t0, 32 ; RV64I-NEXT: sb a1, 20(a2) ; RV64I-NEXT: srli a1, a4, 56 ; RV64I-NEXT: sb a1, 7(a2) @@ -4963,77 +4963,77 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu t1, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t1, t1, 24 -; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: or a6, t1, a6 ; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t1, 17(a0) +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: lbu a7, 18(a0) ; RV32I-NEXT: lbu t2, 19(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t0 -; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli t2, t2, 24 -; RV32I-NEXT: or t0, t2, t1 -; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: lbu t0, 21(a0) +; RV32I-NEXT: or a7, t2, a7 ; RV32I-NEXT: lbu t1, 20(a0) -; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t2, 21(a0) +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: lbu t0, 22(a0) ; RV32I-NEXT: lbu t3, 23(a0) -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t0, t0, t1 -; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli t3, t3, 24 -; RV32I-NEXT: or t1, t3, t2 -; RV32I-NEXT: or t0, t1, t0 -; RV32I-NEXT: lbu t1, 25(a0) +; RV32I-NEXT: or t0, t3, t0 ; RV32I-NEXT: lbu t2, 24(a0) -; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: or t0, t0, t1 +; RV32I-NEXT: lbu t1, 26(a0) ; RV32I-NEXT: lbu t4, 27(a0) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t1, t1, t2 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: or t1, t4, t1 ; RV32I-NEXT: lbu t3, 28(a0) -; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu t4, 29(a0) +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: lbu t2, 30(a0) ; RV32I-NEXT: lbu a0, 31(a0) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t2, t2, t3 -; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: slli t2, t2, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or t3, a0, t4 -; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: or t2, a0, t2 +; RV32I-NEXT: or t2, t2, t3 ; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: sw a0, 60(sp) @@ -5055,64 +5055,64 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: andi a1, a1, 28 ; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: add a3, a0, a1 -; RV32I-NEXT: lw a0, 4(a3) -; RV32I-NEXT: lw a1, 0(a3) -; RV32I-NEXT: lw a4, 12(a3) -; RV32I-NEXT: lw a5, 8(a3) -; RV32I-NEXT: lw a6, 24(a3) -; RV32I-NEXT: lw a7, 28(a3) -; RV32I-NEXT: lw t0, 16(a3) -; RV32I-NEXT: lw a3, 20(a3) -; RV32I-NEXT: sb a6, 24(a2) -; RV32I-NEXT: sb a7, 28(a2) -; RV32I-NEXT: sb t0, 16(a2) -; RV32I-NEXT: sb a3, 20(a2) -; RV32I-NEXT: sb a5, 8(a2) -; RV32I-NEXT: sb a4, 12(a2) -; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: lw a3, 16(a1) +; RV32I-NEXT: lw a4, 20(a1) +; RV32I-NEXT: lw a5, 24(a1) +; RV32I-NEXT: lw a6, 28(a1) +; RV32I-NEXT: lw a7, 0(a1) +; RV32I-NEXT: lw a0, 4(a1) +; RV32I-NEXT: lw t0, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: sb a5, 24(a2) +; RV32I-NEXT: sb a6, 28(a2) +; RV32I-NEXT: sb a3, 16(a2) +; RV32I-NEXT: sb a4, 20(a2) +; RV32I-NEXT: sb t0, 8(a2) +; RV32I-NEXT: sb a1, 12(a2) +; RV32I-NEXT: sb a7, 0(a2) ; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli t1, a6, 24 +; RV32I-NEXT: srli t1, a5, 24 ; RV32I-NEXT: sb t1, 27(a2) -; RV32I-NEXT: srli t1, a6, 16 +; RV32I-NEXT: srli t1, a5, 16 ; RV32I-NEXT: sb t1, 26(a2) -; RV32I-NEXT: srli a6, a6, 8 -; RV32I-NEXT: sb a6, 25(a2) -; RV32I-NEXT: srli a6, a7, 24 -; RV32I-NEXT: sb a6, 31(a2) -; RV32I-NEXT: srli a6, a7, 16 -; RV32I-NEXT: sb a6, 30(a2) -; RV32I-NEXT: srli a6, a7, 8 -; RV32I-NEXT: sb a6, 29(a2) -; RV32I-NEXT: srli a6, t0, 24 -; RV32I-NEXT: sb a6, 19(a2) -; RV32I-NEXT: srli a6, t0, 16 -; RV32I-NEXT: sb a6, 18(a2) -; RV32I-NEXT: srli a6, t0, 8 -; RV32I-NEXT: sb a6, 17(a2) -; RV32I-NEXT: srli a6, a3, 24 -; RV32I-NEXT: sb a6, 23(a2) -; RV32I-NEXT: srli a6, a3, 16 -; RV32I-NEXT: sb a6, 22(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 21(a2) -; RV32I-NEXT: srli a3, a5, 24 -; RV32I-NEXT: sb a3, 11(a2) -; RV32I-NEXT: srli a3, a5, 16 -; RV32I-NEXT: sb a3, 10(a2) ; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb a5, 25(a2) +; RV32I-NEXT: srli a5, a6, 24 +; RV32I-NEXT: sb a5, 31(a2) +; RV32I-NEXT: srli a5, a6, 16 +; RV32I-NEXT: sb a5, 30(a2) +; RV32I-NEXT: srli a5, a6, 8 +; RV32I-NEXT: sb a5, 29(a2) +; RV32I-NEXT: srli a5, a3, 24 +; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a5, a3, 16 +; RV32I-NEXT: sb a5, 18(a2) +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a3, 17(a2) ; RV32I-NEXT: srli a3, a4, 24 -; RV32I-NEXT: sb a3, 15(a2) +; RV32I-NEXT: sb a3, 23(a2) ; RV32I-NEXT: srli a3, a4, 16 -; RV32I-NEXT: sb a3, 14(a2) +; RV32I-NEXT: sb a3, 22(a2) ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 13(a2) +; RV32I-NEXT: sb a4, 21(a2) +; RV32I-NEXT: srli a3, t0, 24 +; RV32I-NEXT: sb a3, 11(a2) +; RV32I-NEXT: srli a3, t0, 16 +; RV32I-NEXT: sb a3, 10(a2) +; RV32I-NEXT: srli a3, t0, 8 +; RV32I-NEXT: sb a3, 9(a2) ; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 3(a2) +; RV32I-NEXT: sb a3, 15(a2) ; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 2(a2) +; RV32I-NEXT: sb a3, 14(a2) ; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a1, 13(a2) +; RV32I-NEXT: srli a1, a7, 24 +; RV32I-NEXT: sb a1, 3(a2) +; RV32I-NEXT: srli a1, a7, 16 +; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: srli a1, a7, 8 ; RV32I-NEXT: sb a1, 1(a2) ; RV32I-NEXT: srli a1, a0, 24 ; RV32I-NEXT: sb a1, 7(a2) @@ -5143,83 +5143,83 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 9(a0) ; RV64I-NEXT: lbu a5, 8(a0) -; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a6, 9(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 10(a0) ; RV64I-NEXT: lbu a7, 11(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 12(a0) -; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 14(a0) ; RV64I-NEXT: lbu t0, 15(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a5, a5, 32 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 17(a0) ; RV64I-NEXT: lbu a6, 16(a0) -; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu a7, 17(a0) +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 18(a0) ; RV64I-NEXT: lbu t0, 19(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: lbu a7, 20(a0) -; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t0, 21(a0) +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: lbu a6, 22(a0) ; RV64I-NEXT: lbu t1, 23(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a6, t1, a6 +; RV64I-NEXT: or a6, a6, a7 ; RV64I-NEXT: slli a6, a6, 32 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 25(a0) ; RV64I-NEXT: lbu a7, 24(a0) -; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t0, 25(a0) +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 26(a0) ; RV64I-NEXT: lbu t1, 27(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 28(a0) -; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu t1, 29(a0) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 30(a0) ; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a7, a0, 32 ; RV64I-NEXT: or a6, a7, a6 ; RV64I-NEXT: lbu a1, 0(a1) @@ -5315,77 +5315,77 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu t1, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t1, t1, 24 -; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: or a6, t1, a6 ; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t1, 17(a0) +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: lbu a7, 18(a0) ; RV32I-NEXT: lbu t2, 19(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t0 -; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli t2, t2, 24 -; RV32I-NEXT: or t0, t2, t1 -; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: lbu t0, 21(a0) +; RV32I-NEXT: or a7, t2, a7 ; RV32I-NEXT: lbu t1, 20(a0) -; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t2, 21(a0) +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: lbu t0, 22(a0) ; RV32I-NEXT: lbu t3, 23(a0) -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t0, t0, t1 -; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli t3, t3, 24 -; RV32I-NEXT: or t1, t3, t2 -; RV32I-NEXT: or t0, t1, t0 -; RV32I-NEXT: lbu t1, 25(a0) +; RV32I-NEXT: or t0, t3, t0 ; RV32I-NEXT: lbu t2, 24(a0) -; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: or t0, t0, t1 +; RV32I-NEXT: lbu t1, 26(a0) ; RV32I-NEXT: lbu t4, 27(a0) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t1, t1, t2 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: or t1, t4, t1 ; RV32I-NEXT: lbu t3, 28(a0) -; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu t4, 29(a0) +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: lbu t2, 30(a0) ; RV32I-NEXT: lbu a0, 31(a0) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t2, t2, t3 -; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: slli t2, t2, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or t3, a0, t4 -; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: or t2, a0, t2 +; RV32I-NEXT: or t2, t2, t3 ; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: sw a0, 60(sp) @@ -5407,64 +5407,64 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: andi a1, a1, 24 ; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: add a3, a0, a1 -; RV32I-NEXT: lw a0, 4(a3) -; RV32I-NEXT: lw a1, 0(a3) -; RV32I-NEXT: lw a4, 12(a3) -; RV32I-NEXT: lw a5, 8(a3) -; RV32I-NEXT: lw a6, 24(a3) -; RV32I-NEXT: lw a7, 28(a3) -; RV32I-NEXT: lw t0, 16(a3) -; RV32I-NEXT: lw a3, 20(a3) -; RV32I-NEXT: sb a6, 24(a2) -; RV32I-NEXT: sb a7, 28(a2) -; RV32I-NEXT: sb t0, 16(a2) -; RV32I-NEXT: sb a3, 20(a2) -; RV32I-NEXT: sb a5, 8(a2) -; RV32I-NEXT: sb a4, 12(a2) -; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: lw a3, 16(a1) +; RV32I-NEXT: lw a4, 20(a1) +; RV32I-NEXT: lw a5, 24(a1) +; RV32I-NEXT: lw a6, 28(a1) +; RV32I-NEXT: lw a7, 0(a1) +; RV32I-NEXT: lw a0, 4(a1) +; RV32I-NEXT: lw t0, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: sb a5, 24(a2) +; RV32I-NEXT: sb a6, 28(a2) +; RV32I-NEXT: sb a3, 16(a2) +; RV32I-NEXT: sb a4, 20(a2) +; RV32I-NEXT: sb t0, 8(a2) +; RV32I-NEXT: sb a1, 12(a2) +; RV32I-NEXT: sb a7, 0(a2) ; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli t1, a6, 24 +; RV32I-NEXT: srli t1, a5, 24 ; RV32I-NEXT: sb t1, 27(a2) -; RV32I-NEXT: srli t1, a6, 16 +; RV32I-NEXT: srli t1, a5, 16 ; RV32I-NEXT: sb t1, 26(a2) -; RV32I-NEXT: srli a6, a6, 8 -; RV32I-NEXT: sb a6, 25(a2) -; RV32I-NEXT: srli a6, a7, 24 -; RV32I-NEXT: sb a6, 31(a2) -; RV32I-NEXT: srli a6, a7, 16 -; RV32I-NEXT: sb a6, 30(a2) -; RV32I-NEXT: srli a6, a7, 8 -; RV32I-NEXT: sb a6, 29(a2) -; RV32I-NEXT: srli a6, t0, 24 -; RV32I-NEXT: sb a6, 19(a2) -; RV32I-NEXT: srli a6, t0, 16 -; RV32I-NEXT: sb a6, 18(a2) -; RV32I-NEXT: srli a6, t0, 8 -; RV32I-NEXT: sb a6, 17(a2) -; RV32I-NEXT: srli a6, a3, 24 -; RV32I-NEXT: sb a6, 23(a2) -; RV32I-NEXT: srli a6, a3, 16 -; RV32I-NEXT: sb a6, 22(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 21(a2) -; RV32I-NEXT: srli a3, a5, 24 -; RV32I-NEXT: sb a3, 11(a2) -; RV32I-NEXT: srli a3, a5, 16 -; RV32I-NEXT: sb a3, 10(a2) ; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb a5, 25(a2) +; RV32I-NEXT: srli a5, a6, 24 +; RV32I-NEXT: sb a5, 31(a2) +; RV32I-NEXT: srli a5, a6, 16 +; RV32I-NEXT: sb a5, 30(a2) +; RV32I-NEXT: srli a5, a6, 8 +; RV32I-NEXT: sb a5, 29(a2) +; RV32I-NEXT: srli a5, a3, 24 +; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a5, a3, 16 +; RV32I-NEXT: sb a5, 18(a2) +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a3, 17(a2) ; RV32I-NEXT: srli a3, a4, 24 -; RV32I-NEXT: sb a3, 15(a2) +; RV32I-NEXT: sb a3, 23(a2) ; RV32I-NEXT: srli a3, a4, 16 -; RV32I-NEXT: sb a3, 14(a2) +; RV32I-NEXT: sb a3, 22(a2) ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 13(a2) +; RV32I-NEXT: sb a4, 21(a2) +; RV32I-NEXT: srli a3, t0, 24 +; RV32I-NEXT: sb a3, 11(a2) +; RV32I-NEXT: srli a3, t0, 16 +; RV32I-NEXT: sb a3, 10(a2) +; RV32I-NEXT: srli a3, t0, 8 +; RV32I-NEXT: sb a3, 9(a2) ; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 3(a2) +; RV32I-NEXT: sb a3, 15(a2) ; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 2(a2) +; RV32I-NEXT: sb a3, 14(a2) ; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a1, 13(a2) +; RV32I-NEXT: srli a1, a7, 24 +; RV32I-NEXT: sb a1, 3(a2) +; RV32I-NEXT: srli a1, a7, 16 +; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: srli a1, a7, 8 ; RV32I-NEXT: sb a1, 1(a2) ; RV32I-NEXT: srli a1, a0, 24 ; RV32I-NEXT: sb a1, 7(a2) diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll index 7e879b137b4f0d..190d67a5d8c118 100644 --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll @@ -8,8 +8,8 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a3, 1(a0) ; RV64I-NEXT: lbu a4, 0(a0) ; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: lb a0, 3(a0) +; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: lbu a1, 0(a1) @@ -37,17 +37,17 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, a5 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: lbu a3, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a5, 2(a1) +; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: lbu a3, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: slli a3, a3, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 ; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: srl a0, a0, a1 ; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a1, a0, 16 @@ -69,8 +69,8 @@ define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a3, 1(a0) ; RV64I-NEXT: lbu a4, 0(a0) ; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: lb a0, 3(a0) +; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: lbu a1, 0(a1) @@ -98,17 +98,17 @@ define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, a5 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: lbu a3, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a5, 2(a1) +; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: lbu a3, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: slli a3, a3, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 ; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: sll a0, a0, a1 ; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a1, a0, 16 @@ -130,8 +130,8 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a3, 1(a0) ; RV64I-NEXT: lbu a4, 0(a0) ; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: lb a0, 3(a0) +; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: lbu a1, 0(a1) @@ -159,17 +159,17 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, a5 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: lbu a3, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a5, 2(a1) +; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: lbu a3, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: slli a3, a3, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 ; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: sra a0, a0, a1 ; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a1, a0, 16 @@ -198,39 +198,39 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a0, a0, a5 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: lbu a3, 1(a1) ; RV64I-NEXT: lbu a4, 0(a1) -; RV64I-NEXT: lbu a5, 2(a1) +; RV64I-NEXT: lbu a5, 1(a1) +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: lbu a3, 2(a1) ; RV64I-NEXT: lbu a6, 3(a1) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: slli a3, a3, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a1) +; RV64I-NEXT: or a3, a6, a3 ; RV64I-NEXT: lbu a5, 4(a1) -; RV64I-NEXT: lbu a6, 6(a1) +; RV64I-NEXT: lbu a6, 5(a1) +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: lbu a4, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: or a1, a1, a4 +; RV64I-NEXT: or a1, a1, a5 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a3 ; RV64I-NEXT: srl a0, a0, a1 @@ -262,17 +262,17 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 1(a1) ; RV32I-NEXT: lbu a5, 0(a1) -; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: lbu a6, 1(a1) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a5, a1, a6 -; RV32I-NEXT: or a5, a5, a4 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: or a5, a1, a5 ; RV32I-NEXT: addi a4, a5, -32 ; RV32I-NEXT: srl a1, a3, a5 ; RV32I-NEXT: bltz a4, .LBB3_2 @@ -331,39 +331,39 @@ define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a0, a0, a5 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: lbu a3, 1(a1) ; RV64I-NEXT: lbu a4, 0(a1) -; RV64I-NEXT: lbu a5, 2(a1) +; RV64I-NEXT: lbu a5, 1(a1) +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: lbu a3, 2(a1) ; RV64I-NEXT: lbu a6, 3(a1) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: slli a3, a3, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a1) +; RV64I-NEXT: or a3, a6, a3 ; RV64I-NEXT: lbu a5, 4(a1) -; RV64I-NEXT: lbu a6, 6(a1) +; RV64I-NEXT: lbu a6, 5(a1) +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: lbu a4, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: or a1, a1, a4 +; RV64I-NEXT: or a1, a1, a5 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a3 ; RV64I-NEXT: sll a0, a0, a1 @@ -395,17 +395,17 @@ define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 1(a1) ; RV32I-NEXT: lbu a5, 0(a1) -; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: lbu a6, 1(a1) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a5, a1, a6 -; RV32I-NEXT: or a5, a5, a4 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: or a5, a1, a5 ; RV32I-NEXT: addi a4, a5, -32 ; RV32I-NEXT: sll a1, a3, a5 ; RV32I-NEXT: bltz a4, .LBB4_2 @@ -464,39 +464,39 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a0, a0, a5 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: lbu a3, 1(a1) ; RV64I-NEXT: lbu a4, 0(a1) -; RV64I-NEXT: lbu a5, 2(a1) +; RV64I-NEXT: lbu a5, 1(a1) +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: lbu a3, 2(a1) ; RV64I-NEXT: lbu a6, 3(a1) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: slli a3, a3, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a1) +; RV64I-NEXT: or a3, a6, a3 ; RV64I-NEXT: lbu a5, 4(a1) -; RV64I-NEXT: lbu a6, 6(a1) +; RV64I-NEXT: lbu a6, 5(a1) +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: lbu a4, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: or a1, a1, a4 +; RV64I-NEXT: or a1, a1, a5 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a3 ; RV64I-NEXT: sra a0, a0, a1 @@ -528,17 +528,17 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a4, a6, 24 ; RV32I-NEXT: or a5, a4, a5 -; RV32I-NEXT: or a3, a5, a3 -; RV32I-NEXT: lbu a5, 1(a1) ; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 2(a1) +; RV32I-NEXT: lbu a7, 1(a1) +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: lbu a5, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a7 -; RV32I-NEXT: or a5, a1, a5 +; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: or a5, a1, a6 ; RV32I-NEXT: addi a6, a5, -32 ; RV32I-NEXT: sra a1, a3, a5 ; RV32I-NEXT: bltz a6, .LBB5_2 @@ -598,39 +598,39 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 13(a0) ; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 14(a0) +; RV64I-NEXT: lbu a6, 13(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 14(a0) ; RV64I-NEXT: lbu a7, 15(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 1(a1) ; RV64I-NEXT: lbu a5, 0(a1) -; RV64I-NEXT: lbu a6, 2(a1) +; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 2(a1) ; RV64I-NEXT: lbu a7, 3(a1) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 5(a1) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 4(a1) -; RV64I-NEXT: lbu a7, 6(a1) +; RV64I-NEXT: lbu a7, 5(a1) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a5, a1, a4 ; RV64I-NEXT: addi a4, a5, -64 @@ -649,17 +649,17 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli t1, t1, 24 ; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 5(a0) ; RV64I-NEXT: lbu t0, 4(a0) -; RV64I-NEXT: lbu t1, 6(a0) +; RV64I-NEXT: lbu t1, 5(a0) +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 6(a0) ; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: srl a0, a0, a5 @@ -714,46 +714,46 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu a0, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, t0 ; RV32I-NEXT: or a0, a0, a6 -; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: lbu a7, 0(a1) -; RV32I-NEXT: lbu t0, 2(a1) +; RV32I-NEXT: lbu a6, 0(a1) +; RV32I-NEXT: lbu t0, 1(a1) +; RV32I-NEXT: or a0, a0, a7 +; RV32I-NEXT: lbu a7, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a6, t0, a6 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: or a1, a1, a6 ; RV32I-NEXT: sw zero, 28(sp) ; RV32I-NEXT: sw zero, 24(sp) @@ -768,23 +768,23 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: mv a3, sp ; RV32I-NEXT: add a0, a3, a0 ; RV32I-NEXT: lw a3, 4(a0) -; RV32I-NEXT: srl a4, a3, a1 +; RV32I-NEXT: lw a4, 0(a0) ; RV32I-NEXT: lw a5, 8(a0) -; RV32I-NEXT: andi a6, a1, 31 -; RV32I-NEXT: xori a6, a6, 31 -; RV32I-NEXT: lw a7, 0(a0) +; RV32I-NEXT: lw a0, 12(a0) +; RV32I-NEXT: srl a6, a3, a1 +; RV32I-NEXT: andi a7, a1, 31 +; RV32I-NEXT: xori a7, a7, 31 ; RV32I-NEXT: slli t0, a5, 1 -; RV32I-NEXT: sll t0, t0, a6 -; RV32I-NEXT: or a4, a4, t0 -; RV32I-NEXT: srl a7, a7, a1 +; RV32I-NEXT: sll t0, t0, a7 +; RV32I-NEXT: or a6, a6, t0 +; RV32I-NEXT: srl a4, a4, a1 ; RV32I-NEXT: slli a3, a3, 1 -; RV32I-NEXT: lw a0, 12(a0) -; RV32I-NEXT: sll a3, a3, a6 -; RV32I-NEXT: or a3, a7, a3 -; RV32I-NEXT: srl a5, a5, a1 -; RV32I-NEXT: slli a7, a0, 1 -; RV32I-NEXT: sll a6, a7, a6 -; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: sll a3, a3, a7 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: srl a4, a5, a1 +; RV32I-NEXT: slli a5, a0, 1 +; RV32I-NEXT: sll a5, a5, a7 +; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: srl a0, a0, a1 ; RV32I-NEXT: sb a0, 12(a2) ; RV32I-NEXT: srli a1, a0, 16 @@ -793,27 +793,27 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sb a1, 15(a2) ; RV32I-NEXT: srli a0, a0, 8 ; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: sb a5, 8(a2) +; RV32I-NEXT: sb a4, 8(a2) ; RV32I-NEXT: sb a3, 0(a2) -; RV32I-NEXT: sb a4, 4(a2) -; RV32I-NEXT: srli a0, a5, 16 +; RV32I-NEXT: sb a6, 4(a2) +; RV32I-NEXT: srli a0, a4, 16 ; RV32I-NEXT: sb a0, 10(a2) -; RV32I-NEXT: srli a0, a5, 24 +; RV32I-NEXT: srli a0, a4, 24 ; RV32I-NEXT: sb a0, 11(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a4, 9(a2) ; RV32I-NEXT: srli a0, a3, 16 ; RV32I-NEXT: sb a0, 2(a2) ; RV32I-NEXT: srli a0, a3, 24 ; RV32I-NEXT: sb a0, 3(a2) ; RV32I-NEXT: srli a3, a3, 8 ; RV32I-NEXT: sb a3, 1(a2) -; RV32I-NEXT: srli a0, a4, 16 +; RV32I-NEXT: srli a0, a6, 16 ; RV32I-NEXT: sb a0, 6(a2) -; RV32I-NEXT: srli a0, a4, 24 +; RV32I-NEXT: srli a0, a6, 24 ; RV32I-NEXT: sb a0, 7(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: srli a0, a6, 8 +; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 @@ -834,39 +834,39 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 1(a1) ; RV64I-NEXT: lbu a5, 0(a1) -; RV64I-NEXT: lbu a6, 2(a1) +; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 2(a1) ; RV64I-NEXT: lbu a7, 3(a1) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 5(a1) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 4(a1) -; RV64I-NEXT: lbu a7, 6(a1) +; RV64I-NEXT: lbu a7, 5(a1) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a5, a1, a4 ; RV64I-NEXT: addi a4, a5, -64 @@ -885,17 +885,17 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli t1, t1, 24 ; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 13(a0) ; RV64I-NEXT: lbu t0, 12(a0) -; RV64I-NEXT: lbu t1, 14(a0) +; RV64I-NEXT: lbu t1, 13(a0) +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 14(a0) ; RV64I-NEXT: lbu a0, 15(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: sll a0, a0, a5 @@ -950,46 +950,46 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu a0, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, t0 ; RV32I-NEXT: or a0, a0, a6 -; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: lbu a7, 0(a1) -; RV32I-NEXT: lbu t0, 2(a1) +; RV32I-NEXT: lbu a6, 0(a1) +; RV32I-NEXT: lbu t0, 1(a1) +; RV32I-NEXT: or a0, a0, a7 +; RV32I-NEXT: lbu a7, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a6, t0, a6 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: or a1, a1, a6 ; RV32I-NEXT: sw zero, 12(sp) ; RV32I-NEXT: sw zero, 8(sp) @@ -1005,51 +1005,51 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sub a3, a3, a0 ; RV32I-NEXT: lw a0, 4(a3) ; RV32I-NEXT: lw a4, 0(a3) -; RV32I-NEXT: sll a5, a0, a1 -; RV32I-NEXT: andi a6, a1, 31 -; RV32I-NEXT: xori a6, a6, 31 -; RV32I-NEXT: srli a7, a4, 1 -; RV32I-NEXT: lw t0, 12(a3) -; RV32I-NEXT: lw a3, 8(a3) -; RV32I-NEXT: srl a7, a7, a6 -; RV32I-NEXT: or a5, a5, a7 -; RV32I-NEXT: sll a7, t0, a1 -; RV32I-NEXT: srli t0, a3, 1 -; RV32I-NEXT: srl t0, t0, a6 -; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: lw a5, 8(a3) +; RV32I-NEXT: lw a3, 12(a3) +; RV32I-NEXT: sll a6, a0, a1 +; RV32I-NEXT: andi a7, a1, 31 +; RV32I-NEXT: xori a7, a7, 31 +; RV32I-NEXT: srli t0, a4, 1 +; RV32I-NEXT: srl t0, t0, a7 +; RV32I-NEXT: or a6, a6, t0 ; RV32I-NEXT: sll a3, a3, a1 +; RV32I-NEXT: srli t0, a5, 1 +; RV32I-NEXT: srl t0, t0, a7 +; RV32I-NEXT: or a3, a3, t0 +; RV32I-NEXT: sll a5, a5, a1 ; RV32I-NEXT: srli a0, a0, 1 -; RV32I-NEXT: srl a0, a0, a6 -; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: srl a0, a0, a7 +; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: sll a1, a4, a1 ; RV32I-NEXT: sb a1, 0(a2) -; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 2(a2) -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 3(a2) +; RV32I-NEXT: srli a4, a1, 16 +; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: srli a4, a1, 24 +; RV32I-NEXT: sb a4, 3(a2) ; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: sb a1, 1(a2) ; RV32I-NEXT: sb a0, 8(a2) -; RV32I-NEXT: sb a7, 12(a2) -; RV32I-NEXT: sb a5, 4(a2) +; RV32I-NEXT: sb a3, 12(a2) +; RV32I-NEXT: sb a6, 4(a2) ; RV32I-NEXT: srli a1, a0, 16 ; RV32I-NEXT: sb a1, 10(a2) ; RV32I-NEXT: srli a1, a0, 24 ; RV32I-NEXT: sb a1, 11(a2) ; RV32I-NEXT: srli a0, a0, 8 ; RV32I-NEXT: sb a0, 9(a2) -; RV32I-NEXT: srli a0, a7, 16 +; RV32I-NEXT: srli a0, a3, 16 ; RV32I-NEXT: sb a0, 14(a2) -; RV32I-NEXT: srli a0, a7, 24 +; RV32I-NEXT: srli a0, a3, 24 ; RV32I-NEXT: sb a0, 15(a2) -; RV32I-NEXT: srli a0, a7, 8 -; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: srli a0, a5, 16 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a3, 13(a2) +; RV32I-NEXT: srli a0, a6, 16 ; RV32I-NEXT: sb a0, 6(a2) -; RV32I-NEXT: srli a0, a5, 24 +; RV32I-NEXT: srli a0, a6, 24 ; RV32I-NEXT: sb a0, 7(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 5(a2) +; RV32I-NEXT: srli a0, a6, 8 +; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 @@ -1070,39 +1070,39 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 13(a0) ; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 14(a0) +; RV64I-NEXT: lbu a6, 13(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 14(a0) ; RV64I-NEXT: lbu a7, 15(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a5, a4, 32 -; RV64I-NEXT: or a3, a5, a3 -; RV64I-NEXT: lbu a5, 1(a1) ; RV64I-NEXT: lbu a6, 0(a1) -; RV64I-NEXT: lbu a7, 2(a1) +; RV64I-NEXT: lbu a7, 1(a1) +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: lbu a5, 2(a1) ; RV64I-NEXT: lbu t0, 3(a1) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 5(a1) +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: lbu a7, 4(a1) -; RV64I-NEXT: lbu t0, 6(a1) +; RV64I-NEXT: lbu t0, 5(a1) +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: lbu a6, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a5, a1, a5 ; RV64I-NEXT: addi a6, a5, -64 @@ -1123,17 +1123,17 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli t0, t0, 24 ; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a4, a6, a4 -; RV64I-NEXT: lbu a6, 5(a0) ; RV64I-NEXT: lbu a7, 4(a0) -; RV64I-NEXT: lbu t0, 6(a0) +; RV64I-NEXT: lbu t0, 5(a0) +; RV64I-NEXT: or a4, a6, a4 +; RV64I-NEXT: lbu a6, 6(a0) ; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: or a0, a0, a7 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: srl a0, a0, a5 @@ -1186,47 +1186,47 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu a0, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a7, a0, t0 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 1(a1) +; RV32I-NEXT: or a6, a0, a6 ; RV32I-NEXT: lbu t0, 0(a1) -; RV32I-NEXT: lbu t1, 2(a1) +; RV32I-NEXT: lbu t1, 1(a1) +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: lbu a7, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t0 -; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, t1 ; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: or a1, a1, t0 ; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: sw a0, 28(sp) ; RV32I-NEXT: sw a0, 24(sp) @@ -1241,23 +1241,23 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: mv a3, sp ; RV32I-NEXT: add a0, a3, a0 ; RV32I-NEXT: lw a3, 4(a0) -; RV32I-NEXT: srl a4, a3, a1 +; RV32I-NEXT: lw a4, 0(a0) ; RV32I-NEXT: lw a5, 8(a0) -; RV32I-NEXT: andi a6, a1, 31 -; RV32I-NEXT: xori a6, a6, 31 -; RV32I-NEXT: lw a7, 0(a0) +; RV32I-NEXT: lw a0, 12(a0) +; RV32I-NEXT: srl a6, a3, a1 +; RV32I-NEXT: andi a7, a1, 31 +; RV32I-NEXT: xori a7, a7, 31 ; RV32I-NEXT: slli t0, a5, 1 -; RV32I-NEXT: sll t0, t0, a6 -; RV32I-NEXT: or a4, a4, t0 -; RV32I-NEXT: srl a7, a7, a1 +; RV32I-NEXT: sll t0, t0, a7 +; RV32I-NEXT: or a6, a6, t0 +; RV32I-NEXT: srl a4, a4, a1 ; RV32I-NEXT: slli a3, a3, 1 -; RV32I-NEXT: lw a0, 12(a0) -; RV32I-NEXT: sll a3, a3, a6 -; RV32I-NEXT: or a3, a7, a3 -; RV32I-NEXT: srl a5, a5, a1 -; RV32I-NEXT: slli a7, a0, 1 -; RV32I-NEXT: sll a6, a7, a6 -; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: sll a3, a3, a7 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: srl a4, a5, a1 +; RV32I-NEXT: slli a5, a0, 1 +; RV32I-NEXT: sll a5, a5, a7 +; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: sra a0, a0, a1 ; RV32I-NEXT: sb a0, 12(a2) ; RV32I-NEXT: srli a1, a0, 16 @@ -1266,27 +1266,27 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sb a1, 15(a2) ; RV32I-NEXT: srli a0, a0, 8 ; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: sb a5, 8(a2) +; RV32I-NEXT: sb a4, 8(a2) ; RV32I-NEXT: sb a3, 0(a2) -; RV32I-NEXT: sb a4, 4(a2) -; RV32I-NEXT: srli a0, a5, 16 +; RV32I-NEXT: sb a6, 4(a2) +; RV32I-NEXT: srli a0, a4, 16 ; RV32I-NEXT: sb a0, 10(a2) -; RV32I-NEXT: srli a0, a5, 24 +; RV32I-NEXT: srli a0, a4, 24 ; RV32I-NEXT: sb a0, 11(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a4, 9(a2) ; RV32I-NEXT: srli a0, a3, 16 ; RV32I-NEXT: sb a0, 2(a2) ; RV32I-NEXT: srli a0, a3, 24 ; RV32I-NEXT: sb a0, 3(a2) ; RV32I-NEXT: srli a3, a3, 8 ; RV32I-NEXT: sb a3, 1(a2) -; RV32I-NEXT: srli a0, a4, 16 +; RV32I-NEXT: srli a0, a6, 16 ; RV32I-NEXT: sb a0, 6(a2) -; RV32I-NEXT: srli a0, a4, 24 +; RV32I-NEXT: srli a0, a6, 24 ; RV32I-NEXT: sb a0, 7(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: srli a0, a6, 8 +; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 @@ -1309,105 +1309,105 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 9(a0) ; RV64I-NEXT: lbu a5, 8(a0) -; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a6, 9(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 10(a0) ; RV64I-NEXT: lbu a7, 11(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 12(a0) -; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 14(a0) ; RV64I-NEXT: lbu t0, 15(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a5, a5, 32 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 17(a0) ; RV64I-NEXT: lbu a6, 16(a0) -; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu a7, 17(a0) +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 18(a0) ; RV64I-NEXT: lbu t0, 19(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: lbu a7, 20(a0) -; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t0, 21(a0) +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: lbu a6, 22(a0) ; RV64I-NEXT: lbu t1, 23(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a6, t1, a6 +; RV64I-NEXT: or a6, a6, a7 ; RV64I-NEXT: slli a6, a6, 32 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 25(a0) ; RV64I-NEXT: lbu a7, 24(a0) -; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t0, 25(a0) +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 26(a0) ; RV64I-NEXT: lbu t1, 27(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 28(a0) -; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu t1, 29(a0) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 30(a0) ; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a0, a0, a6 -; RV64I-NEXT: lbu a6, 1(a1) ; RV64I-NEXT: lbu a7, 0(a1) -; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: lbu t0, 1(a1) +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: lbu a6, 2(a1) ; RV64I-NEXT: lbu t1, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 5(a1) +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 4(a1) -; RV64I-NEXT: lbu t1, 6(a1) +; RV64I-NEXT: lbu t1, 5(a1) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t1 ; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: sd zero, 56(sp) @@ -1421,72 +1421,72 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: srli a0, a1, 3 ; RV64I-NEXT: andi a0, a0, 24 ; RV64I-NEXT: mv a3, sp -; RV64I-NEXT: add a3, a3, a0 -; RV64I-NEXT: ld a4, 8(a3) -; RV64I-NEXT: srl a0, a4, a1 -; RV64I-NEXT: ld a5, 16(a3) -; RV64I-NEXT: andi a6, a1, 63 -; RV64I-NEXT: xori a6, a6, 63 -; RV64I-NEXT: ld a7, 0(a3) +; RV64I-NEXT: add a0, a3, a0 +; RV64I-NEXT: ld a3, 8(a0) +; RV64I-NEXT: ld a4, 0(a0) +; RV64I-NEXT: ld a5, 16(a0) +; RV64I-NEXT: ld a6, 24(a0) +; RV64I-NEXT: srl a0, a3, a1 +; RV64I-NEXT: andi a7, a1, 63 +; RV64I-NEXT: xori a7, a7, 63 ; RV64I-NEXT: slli t0, a5, 1 -; RV64I-NEXT: sll t0, t0, a6 +; RV64I-NEXT: sll t0, t0, a7 ; RV64I-NEXT: or a0, a0, t0 -; RV64I-NEXT: srl a7, a7, a1 -; RV64I-NEXT: slli a4, a4, 1 -; RV64I-NEXT: ld a3, 24(a3) -; RV64I-NEXT: sll a4, a4, a6 -; RV64I-NEXT: or a4, a7, a4 -; RV64I-NEXT: srl a5, a5, a1 -; RV64I-NEXT: slli a7, a3, 1 -; RV64I-NEXT: sll a6, a7, a6 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: srl a1, a3, a1 +; RV64I-NEXT: srl a4, a4, a1 +; RV64I-NEXT: slli a3, a3, 1 +; RV64I-NEXT: sll a3, a3, a7 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: srl a4, a5, a1 +; RV64I-NEXT: slli a5, a6, 1 +; RV64I-NEXT: sll a5, a5, a7 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: srl a1, a6, a1 ; RV64I-NEXT: sb a1, 24(a2) -; RV64I-NEXT: srli a3, a1, 56 -; RV64I-NEXT: sb a3, 31(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 30(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 29(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 28(a2) -; RV64I-NEXT: srli a3, a1, 24 -; RV64I-NEXT: sb a3, 27(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 26(a2) +; RV64I-NEXT: srli a5, a1, 56 +; RV64I-NEXT: sb a5, 31(a2) +; RV64I-NEXT: srli a5, a1, 48 +; RV64I-NEXT: sb a5, 30(a2) +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: sb a5, 29(a2) +; RV64I-NEXT: srli a5, a1, 32 +; RV64I-NEXT: sb a5, 28(a2) +; RV64I-NEXT: srli a5, a1, 24 +; RV64I-NEXT: sb a5, 27(a2) +; RV64I-NEXT: srli a5, a1, 16 +; RV64I-NEXT: sb a5, 26(a2) ; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: sb a5, 16(a2) -; RV64I-NEXT: sb a4, 0(a2) +; RV64I-NEXT: sb a4, 16(a2) +; RV64I-NEXT: sb a3, 0(a2) ; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: srli a1, a5, 56 +; RV64I-NEXT: srli a1, a4, 56 ; RV64I-NEXT: sb a1, 23(a2) -; RV64I-NEXT: srli a1, a5, 48 +; RV64I-NEXT: srli a1, a4, 48 ; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, a5, 40 +; RV64I-NEXT: srli a1, a4, 40 ; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, a5, 32 +; RV64I-NEXT: srli a1, a4, 32 ; RV64I-NEXT: sb a1, 20(a2) -; RV64I-NEXT: srli a1, a5, 24 +; RV64I-NEXT: srli a1, a4, 24 ; RV64I-NEXT: sb a1, 19(a2) -; RV64I-NEXT: srli a1, a5, 16 +; RV64I-NEXT: srli a1, a4, 16 ; RV64I-NEXT: sb a1, 18(a2) -; RV64I-NEXT: srli a5, a5, 8 -; RV64I-NEXT: sb a5, 17(a2) -; RV64I-NEXT: srli a1, a4, 56 +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: sb a4, 17(a2) +; RV64I-NEXT: srli a1, a3, 56 ; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a4, 48 +; RV64I-NEXT: srli a1, a3, 48 ; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a4, 40 +; RV64I-NEXT: srli a1, a3, 40 ; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: srli a1, a4, 32 +; RV64I-NEXT: srli a1, a3, 32 ; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: srli a1, a4, 24 +; RV64I-NEXT: srli a1, a3, 24 ; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a1, a4, 16 +; RV64I-NEXT: srli a1, a3, 16 ; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a4, 1(a2) +; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a3, 1(a2) ; RV64I-NEXT: srli a1, a0, 56 ; RV64I-NEXT: sb a1, 15(a2) ; RV64I-NEXT: srli a1, a0, 48 @@ -1516,87 +1516,87 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu t1, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t1, t1, 24 -; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: or a6, t1, a6 ; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t1, 17(a0) +; RV32I-NEXT: or a7, a6, a7 +; RV32I-NEXT: lbu a6, 18(a0) ; RV32I-NEXT: lbu t2, 19(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t0 -; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t2, t2, 24 -; RV32I-NEXT: or t0, t2, t1 -; RV32I-NEXT: or t0, t0, a7 -; RV32I-NEXT: lbu a7, 21(a0) +; RV32I-NEXT: or a6, t2, a6 ; RV32I-NEXT: lbu t1, 20(a0) -; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t2, 21(a0) +; RV32I-NEXT: or t0, a6, t0 +; RV32I-NEXT: lbu a6, 22(a0) ; RV32I-NEXT: lbu t3, 23(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t1 -; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t3, t3, 24 -; RV32I-NEXT: or t1, t3, t2 -; RV32I-NEXT: or t1, t1, a7 -; RV32I-NEXT: lbu a7, 25(a0) +; RV32I-NEXT: or a6, t3, a6 ; RV32I-NEXT: lbu t2, 24(a0) -; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: or t1, a6, t1 +; RV32I-NEXT: lbu a6, 26(a0) ; RV32I-NEXT: lbu t4, 27(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t2 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or t2, t2, a7 -; RV32I-NEXT: lbu a7, 29(a0) +; RV32I-NEXT: or a6, t4, a6 ; RV32I-NEXT: lbu t3, 28(a0) -; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu t4, 29(a0) +; RV32I-NEXT: or t2, a6, t2 +; RV32I-NEXT: lbu a6, 30(a0) ; RV32I-NEXT: lbu a0, 31(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t3 -; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, t4 -; RV32I-NEXT: or a0, a0, a7 -; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: lbu t3, 0(a1) -; RV32I-NEXT: lbu t4, 2(a1) +; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: lbu a6, 0(a1) +; RV32I-NEXT: lbu t4, 1(a1) +; RV32I-NEXT: or a0, a0, t3 +; RV32I-NEXT: lbu t3, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t3 -; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or a6, t4, a6 +; RV32I-NEXT: slli t3, t3, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, t4 -; RV32I-NEXT: or a7, a1, a7 +; RV32I-NEXT: or a1, a1, t3 +; RV32I-NEXT: or a6, a1, a6 ; RV32I-NEXT: sw zero, 60(sp) ; RV32I-NEXT: sw zero, 56(sp) ; RV32I-NEXT: sw zero, 52(sp) @@ -1609,91 +1609,91 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw t2, 24(sp) ; RV32I-NEXT: sw t1, 20(sp) ; RV32I-NEXT: sw t0, 16(sp) -; RV32I-NEXT: sw a6, 12(sp) +; RV32I-NEXT: sw a7, 12(sp) ; RV32I-NEXT: sw a5, 8(sp) ; RV32I-NEXT: sw a4, 4(sp) ; RV32I-NEXT: sw a3, 0(sp) -; RV32I-NEXT: srli a0, a7, 3 +; RV32I-NEXT: srli a0, a6, 3 ; RV32I-NEXT: andi a0, a0, 28 ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: add a4, a1, a0 -; RV32I-NEXT: lw a1, 4(a4) -; RV32I-NEXT: srl a0, a1, a7 -; RV32I-NEXT: lw a5, 8(a4) -; RV32I-NEXT: andi a3, a7, 31 -; RV32I-NEXT: xori a6, a3, 31 -; RV32I-NEXT: lw a3, 0(a4) -; RV32I-NEXT: slli t0, a5, 1 -; RV32I-NEXT: sll t0, t0, a6 -; RV32I-NEXT: or a0, a0, t0 -; RV32I-NEXT: srl a3, a3, a7 +; RV32I-NEXT: add a3, a1, a0 +; RV32I-NEXT: lw a1, 4(a3) +; RV32I-NEXT: lw a4, 0(a3) +; RV32I-NEXT: lw a5, 8(a3) +; RV32I-NEXT: lw a7, 12(a3) +; RV32I-NEXT: srl a0, a1, a6 +; RV32I-NEXT: andi t0, a6, 31 +; RV32I-NEXT: xori t0, t0, 31 +; RV32I-NEXT: slli t1, a5, 1 +; RV32I-NEXT: sll t1, t1, t0 +; RV32I-NEXT: or a0, a0, t1 +; RV32I-NEXT: srl a4, a4, a6 ; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: lw t0, 12(a4) -; RV32I-NEXT: lw t1, 16(a4) -; RV32I-NEXT: sll a1, a1, a6 -; RV32I-NEXT: or a1, a3, a1 -; RV32I-NEXT: srl a3, t0, a7 -; RV32I-NEXT: slli t2, t1, 1 -; RV32I-NEXT: sll t2, t2, a6 -; RV32I-NEXT: or a3, a3, t2 -; RV32I-NEXT: srl a5, a5, a7 -; RV32I-NEXT: slli t0, t0, 1 -; RV32I-NEXT: lw t2, 20(a4) -; RV32I-NEXT: lw t3, 24(a4) -; RV32I-NEXT: sll t0, t0, a6 -; RV32I-NEXT: or a5, a5, t0 -; RV32I-NEXT: srl t0, t2, a7 -; RV32I-NEXT: slli t4, t3, 1 -; RV32I-NEXT: sll t4, t4, a6 -; RV32I-NEXT: or t0, t0, t4 -; RV32I-NEXT: srl t1, t1, a7 +; RV32I-NEXT: sll a1, a1, t0 +; RV32I-NEXT: or a1, a4, a1 +; RV32I-NEXT: srl a4, a7, a6 +; RV32I-NEXT: lw t1, 16(a3) +; RV32I-NEXT: lw t2, 20(a3) +; RV32I-NEXT: lw t3, 24(a3) +; RV32I-NEXT: lw t4, 28(a3) +; RV32I-NEXT: slli a3, t1, 1 +; RV32I-NEXT: sll a3, a3, t0 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: srl a4, a5, a6 +; RV32I-NEXT: slli a7, a7, 1 +; RV32I-NEXT: sll a5, a7, t0 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: srl a5, t2, a6 +; RV32I-NEXT: slli a7, t3, 1 +; RV32I-NEXT: sll a7, a7, t0 +; RV32I-NEXT: or a5, a5, a7 +; RV32I-NEXT: srl a7, t1, a6 ; RV32I-NEXT: slli t2, t2, 1 -; RV32I-NEXT: lw a4, 28(a4) -; RV32I-NEXT: sll t2, t2, a6 -; RV32I-NEXT: or t1, t1, t2 -; RV32I-NEXT: srl t2, t3, a7 -; RV32I-NEXT: slli t3, a4, 1 -; RV32I-NEXT: sll a6, t3, a6 -; RV32I-NEXT: or a6, t2, a6 -; RV32I-NEXT: srl a4, a4, a7 -; RV32I-NEXT: sb a4, 28(a2) -; RV32I-NEXT: srli a7, a4, 24 -; RV32I-NEXT: sb a7, 31(a2) -; RV32I-NEXT: srli a7, a4, 16 -; RV32I-NEXT: sb a7, 30(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: sb a6, 24(a2) -; RV32I-NEXT: sb t1, 16(a2) -; RV32I-NEXT: sb t0, 20(a2) -; RV32I-NEXT: sb a5, 8(a2) +; RV32I-NEXT: sll t1, t2, t0 +; RV32I-NEXT: or a7, a7, t1 +; RV32I-NEXT: srl t1, t3, a6 +; RV32I-NEXT: slli t2, t4, 1 +; RV32I-NEXT: sll t0, t2, t0 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: srl a6, t4, a6 +; RV32I-NEXT: sb a6, 28(a2) +; RV32I-NEXT: srli t1, a6, 24 +; RV32I-NEXT: sb t1, 31(a2) +; RV32I-NEXT: srli t1, a6, 16 +; RV32I-NEXT: sb t1, 30(a2) +; RV32I-NEXT: srli a6, a6, 8 +; RV32I-NEXT: sb a6, 29(a2) +; RV32I-NEXT: sb t0, 24(a2) +; RV32I-NEXT: sb a7, 16(a2) +; RV32I-NEXT: sb a5, 20(a2) +; RV32I-NEXT: sb a4, 8(a2) ; RV32I-NEXT: sb a3, 12(a2) ; RV32I-NEXT: sb a1, 0(a2) ; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli a4, a6, 24 -; RV32I-NEXT: sb a4, 27(a2) -; RV32I-NEXT: srli a4, a6, 16 -; RV32I-NEXT: sb a4, 26(a2) -; RV32I-NEXT: srli a4, a6, 8 -; RV32I-NEXT: sb a4, 25(a2) -; RV32I-NEXT: srli a4, t1, 24 -; RV32I-NEXT: sb a4, 19(a2) -; RV32I-NEXT: srli a4, t1, 16 -; RV32I-NEXT: sb a4, 18(a2) -; RV32I-NEXT: srli a4, t1, 8 -; RV32I-NEXT: sb a4, 17(a2) -; RV32I-NEXT: srli a4, t0, 24 -; RV32I-NEXT: sb a4, 23(a2) -; RV32I-NEXT: srli a4, t0, 16 -; RV32I-NEXT: sb a4, 22(a2) -; RV32I-NEXT: srli a4, t0, 8 -; RV32I-NEXT: sb a4, 21(a2) -; RV32I-NEXT: srli a4, a5, 24 -; RV32I-NEXT: sb a4, 11(a2) -; RV32I-NEXT: srli a4, a5, 16 -; RV32I-NEXT: sb a4, 10(a2) +; RV32I-NEXT: srli a6, t0, 24 +; RV32I-NEXT: sb a6, 27(a2) +; RV32I-NEXT: srli a6, t0, 16 +; RV32I-NEXT: sb a6, 26(a2) +; RV32I-NEXT: srli a6, t0, 8 +; RV32I-NEXT: sb a6, 25(a2) +; RV32I-NEXT: srli a6, a7, 24 +; RV32I-NEXT: sb a6, 19(a2) +; RV32I-NEXT: srli a6, a7, 16 +; RV32I-NEXT: sb a6, 18(a2) +; RV32I-NEXT: srli a6, a7, 8 +; RV32I-NEXT: sb a6, 17(a2) +; RV32I-NEXT: srli a6, a5, 24 +; RV32I-NEXT: sb a6, 23(a2) +; RV32I-NEXT: srli a6, a5, 16 +; RV32I-NEXT: sb a6, 22(a2) ; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb a5, 21(a2) +; RV32I-NEXT: srli a5, a4, 24 +; RV32I-NEXT: sb a5, 11(a2) +; RV32I-NEXT: srli a5, a4, 16 +; RV32I-NEXT: sb a5, 10(a2) +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a4, 9(a2) ; RV32I-NEXT: srli a4, a3, 24 ; RV32I-NEXT: sb a4, 15(a2) ; RV32I-NEXT: srli a4, a3, 16 @@ -1733,105 +1733,105 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 9(a0) ; RV64I-NEXT: lbu a5, 8(a0) -; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a6, 9(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 10(a0) ; RV64I-NEXT: lbu a7, 11(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 12(a0) -; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 14(a0) ; RV64I-NEXT: lbu t0, 15(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a5, a5, 32 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 17(a0) ; RV64I-NEXT: lbu a6, 16(a0) -; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu a7, 17(a0) +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 18(a0) ; RV64I-NEXT: lbu t0, 19(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: lbu a7, 20(a0) -; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t0, 21(a0) +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: lbu a6, 22(a0) ; RV64I-NEXT: lbu t1, 23(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a6, t1, a6 +; RV64I-NEXT: or a6, a6, a7 ; RV64I-NEXT: slli a6, a6, 32 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 25(a0) ; RV64I-NEXT: lbu a7, 24(a0) -; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t0, 25(a0) +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 26(a0) ; RV64I-NEXT: lbu t1, 27(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 28(a0) -; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu t1, 29(a0) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 30(a0) ; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a0, a0, a6 -; RV64I-NEXT: lbu a6, 1(a1) ; RV64I-NEXT: lbu a7, 0(a1) -; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: lbu t0, 1(a1) +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: lbu a6, 2(a1) ; RV64I-NEXT: lbu t1, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 5(a1) +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 4(a1) -; RV64I-NEXT: lbu t1, 6(a1) +; RV64I-NEXT: lbu t1, 5(a1) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t1 ; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: sd zero, 24(sp) @@ -1848,69 +1848,69 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sub a3, a3, a0 ; RV64I-NEXT: ld a4, 8(a3) ; RV64I-NEXT: ld a5, 0(a3) +; RV64I-NEXT: ld a6, 16(a3) +; RV64I-NEXT: ld a3, 24(a3) ; RV64I-NEXT: sll a0, a4, a1 -; RV64I-NEXT: andi a6, a1, 63 -; RV64I-NEXT: xori a6, a6, 63 -; RV64I-NEXT: srli a7, a5, 1 -; RV64I-NEXT: ld t0, 24(a3) -; RV64I-NEXT: ld a3, 16(a3) -; RV64I-NEXT: srl a7, a7, a6 -; RV64I-NEXT: or a0, a0, a7 -; RV64I-NEXT: sll a7, t0, a1 -; RV64I-NEXT: srli t0, a3, 1 -; RV64I-NEXT: srl t0, t0, a6 -; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: andi a7, a1, 63 +; RV64I-NEXT: xori a7, a7, 63 +; RV64I-NEXT: srli t0, a5, 1 +; RV64I-NEXT: srl t0, t0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: sll a3, a3, a1 +; RV64I-NEXT: srli t0, a6, 1 +; RV64I-NEXT: srl t0, t0, a7 +; RV64I-NEXT: or a3, a3, t0 +; RV64I-NEXT: sll a6, a6, a1 ; RV64I-NEXT: srli a4, a4, 1 -; RV64I-NEXT: srl a4, a4, a6 -; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: srl a4, a4, a7 +; RV64I-NEXT: or a4, a6, a4 ; RV64I-NEXT: sll a1, a5, a1 ; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: srli a4, a1, 56 -; RV64I-NEXT: sb a4, 7(a2) -; RV64I-NEXT: srli a4, a1, 48 -; RV64I-NEXT: sb a4, 6(a2) -; RV64I-NEXT: srli a4, a1, 40 -; RV64I-NEXT: sb a4, 5(a2) -; RV64I-NEXT: srli a4, a1, 32 -; RV64I-NEXT: sb a4, 4(a2) -; RV64I-NEXT: srli a4, a1, 24 -; RV64I-NEXT: sb a4, 3(a2) -; RV64I-NEXT: srli a4, a1, 16 -; RV64I-NEXT: sb a4, 2(a2) +; RV64I-NEXT: srli a5, a1, 56 +; RV64I-NEXT: sb a5, 7(a2) +; RV64I-NEXT: srli a5, a1, 48 +; RV64I-NEXT: sb a5, 6(a2) +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: srli a5, a1, 32 +; RV64I-NEXT: sb a5, 4(a2) +; RV64I-NEXT: srli a5, a1, 24 +; RV64I-NEXT: sb a5, 3(a2) +; RV64I-NEXT: srli a5, a1, 16 +; RV64I-NEXT: sb a5, 2(a2) ; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: sb a1, 1(a2) -; RV64I-NEXT: sb a3, 16(a2) -; RV64I-NEXT: sb a7, 24(a2) +; RV64I-NEXT: sb a4, 16(a2) +; RV64I-NEXT: sb a3, 24(a2) ; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: srli a1, a3, 56 +; RV64I-NEXT: srli a1, a4, 56 ; RV64I-NEXT: sb a1, 23(a2) -; RV64I-NEXT: srli a1, a3, 48 +; RV64I-NEXT: srli a1, a4, 48 ; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, a3, 40 +; RV64I-NEXT: srli a1, a4, 40 ; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, a3, 32 +; RV64I-NEXT: srli a1, a4, 32 ; RV64I-NEXT: sb a1, 20(a2) -; RV64I-NEXT: srli a1, a3, 24 +; RV64I-NEXT: srli a1, a4, 24 ; RV64I-NEXT: sb a1, 19(a2) -; RV64I-NEXT: srli a1, a3, 16 +; RV64I-NEXT: srli a1, a4, 16 ; RV64I-NEXT: sb a1, 18(a2) -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a3, 17(a2) -; RV64I-NEXT: srli a1, a7, 56 +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: sb a4, 17(a2) +; RV64I-NEXT: srli a1, a3, 56 ; RV64I-NEXT: sb a1, 31(a2) -; RV64I-NEXT: srli a1, a7, 48 +; RV64I-NEXT: srli a1, a3, 48 ; RV64I-NEXT: sb a1, 30(a2) -; RV64I-NEXT: srli a1, a7, 40 +; RV64I-NEXT: srli a1, a3, 40 ; RV64I-NEXT: sb a1, 29(a2) -; RV64I-NEXT: srli a1, a7, 32 +; RV64I-NEXT: srli a1, a3, 32 ; RV64I-NEXT: sb a1, 28(a2) -; RV64I-NEXT: srli a1, a7, 24 +; RV64I-NEXT: srli a1, a3, 24 ; RV64I-NEXT: sb a1, 27(a2) -; RV64I-NEXT: srli a1, a7, 16 +; RV64I-NEXT: srli a1, a3, 16 ; RV64I-NEXT: sb a1, 26(a2) -; RV64I-NEXT: srli a1, a7, 8 -; RV64I-NEXT: sb a1, 25(a2) +; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a3, 25(a2) ; RV64I-NEXT: srli a1, a0, 56 ; RV64I-NEXT: sb a1, 15(a2) ; RV64I-NEXT: srli a1, a0, 48 @@ -1940,86 +1940,86 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu t1, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t1, t1, 24 -; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: or a6, t1, a6 ; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t1, 17(a0) +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: lbu a7, 18(a0) ; RV32I-NEXT: lbu t2, 19(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t0 -; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli t2, t2, 24 -; RV32I-NEXT: or t0, t2, t1 -; RV32I-NEXT: or t0, t0, a7 -; RV32I-NEXT: lbu a7, 21(a0) +; RV32I-NEXT: or a7, t2, a7 ; RV32I-NEXT: lbu t1, 20(a0) -; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t2, 21(a0) +; RV32I-NEXT: or t0, a7, t0 +; RV32I-NEXT: lbu a7, 22(a0) ; RV32I-NEXT: lbu t3, 23(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t1 -; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli t3, t3, 24 -; RV32I-NEXT: or t1, t3, t2 -; RV32I-NEXT: or t1, t1, a7 -; RV32I-NEXT: lbu a7, 25(a0) +; RV32I-NEXT: or a7, t3, a7 ; RV32I-NEXT: lbu t2, 24(a0) -; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: or t1, a7, t1 +; RV32I-NEXT: lbu a7, 26(a0) ; RV32I-NEXT: lbu t4, 27(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t2 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or t2, t2, a7 -; RV32I-NEXT: lbu a7, 29(a0) +; RV32I-NEXT: or a7, t4, a7 ; RV32I-NEXT: lbu t3, 28(a0) -; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu t4, 29(a0) +; RV32I-NEXT: or t2, a7, t2 +; RV32I-NEXT: lbu a7, 30(a0) ; RV32I-NEXT: lbu a0, 31(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t3 -; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, t4 ; RV32I-NEXT: or a0, a0, a7 -; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: lbu t3, 0(a1) -; RV32I-NEXT: lbu t4, 2(a1) +; RV32I-NEXT: lbu a7, 0(a1) +; RV32I-NEXT: lbu t4, 1(a1) +; RV32I-NEXT: or a0, a0, t3 +; RV32I-NEXT: lbu t3, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t3 -; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or a7, t4, a7 +; RV32I-NEXT: slli t3, t3, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, t4 +; RV32I-NEXT: or a1, a1, t3 ; RV32I-NEXT: or a7, a1, a7 ; RV32I-NEXT: sw zero, 28(sp) ; RV32I-NEXT: sw zero, 24(sp) @@ -2043,68 +2043,68 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sub a4, a1, a0 ; RV32I-NEXT: lw a3, 4(a4) ; RV32I-NEXT: lw a5, 0(a4) +; RV32I-NEXT: lw a6, 8(a4) +; RV32I-NEXT: lw t0, 12(a4) ; RV32I-NEXT: sll a0, a3, a7 ; RV32I-NEXT: andi a1, a7, 31 -; RV32I-NEXT: xori a6, a1, 31 +; RV32I-NEXT: xori t1, a1, 31 ; RV32I-NEXT: srli a1, a5, 1 -; RV32I-NEXT: lw t0, 12(a4) -; RV32I-NEXT: lw t1, 8(a4) -; RV32I-NEXT: srl a1, a1, a6 +; RV32I-NEXT: srl a1, a1, t1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: sll a1, t0, a7 -; RV32I-NEXT: srli t2, t1, 1 -; RV32I-NEXT: srl t2, t2, a6 +; RV32I-NEXT: srli t2, a6, 1 +; RV32I-NEXT: srl t2, t2, t1 ; RV32I-NEXT: or a1, a1, t2 -; RV32I-NEXT: sll t1, t1, a7 +; RV32I-NEXT: sll a6, a6, a7 ; RV32I-NEXT: srli a3, a3, 1 -; RV32I-NEXT: lw t2, 20(a4) -; RV32I-NEXT: lw t3, 16(a4) -; RV32I-NEXT: srl a3, a3, a6 -; RV32I-NEXT: or a3, t1, a3 -; RV32I-NEXT: sll t1, t2, a7 -; RV32I-NEXT: srli t4, t3, 1 -; RV32I-NEXT: srl t4, t4, a6 -; RV32I-NEXT: or t1, t1, t4 -; RV32I-NEXT: sll t3, t3, a7 +; RV32I-NEXT: srl a3, a3, t1 +; RV32I-NEXT: lw t2, 16(a4) +; RV32I-NEXT: lw t3, 20(a4) +; RV32I-NEXT: or a3, a6, a3 +; RV32I-NEXT: lw a6, 24(a4) +; RV32I-NEXT: lw a4, 28(a4) +; RV32I-NEXT: sll t4, t3, a7 +; RV32I-NEXT: srli t5, t2, 1 +; RV32I-NEXT: srl t5, t5, t1 +; RV32I-NEXT: or t4, t4, t5 +; RV32I-NEXT: sll t2, t2, a7 ; RV32I-NEXT: srli t0, t0, 1 -; RV32I-NEXT: lw t4, 28(a4) -; RV32I-NEXT: lw a4, 24(a4) -; RV32I-NEXT: srl t0, t0, a6 -; RV32I-NEXT: or t0, t3, t0 -; RV32I-NEXT: sll t3, t4, a7 -; RV32I-NEXT: srli t4, a4, 1 -; RV32I-NEXT: srl t4, t4, a6 -; RV32I-NEXT: or t3, t3, t4 +; RV32I-NEXT: srl t0, t0, t1 +; RV32I-NEXT: or t0, t2, t0 ; RV32I-NEXT: sll a4, a4, a7 -; RV32I-NEXT: srli t2, t2, 1 -; RV32I-NEXT: srl a6, t2, a6 -; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: srli t2, a6, 1 +; RV32I-NEXT: srl t2, t2, t1 +; RV32I-NEXT: or a4, a4, t2 +; RV32I-NEXT: sll a6, a6, a7 +; RV32I-NEXT: srli t2, t3, 1 +; RV32I-NEXT: srl t1, t2, t1 +; RV32I-NEXT: or a6, a6, t1 ; RV32I-NEXT: sll a5, a5, a7 ; RV32I-NEXT: sb a5, 0(a2) -; RV32I-NEXT: srli a6, a5, 24 -; RV32I-NEXT: sb a6, 3(a2) -; RV32I-NEXT: srli a6, a5, 16 -; RV32I-NEXT: sb a6, 2(a2) +; RV32I-NEXT: srli a7, a5, 24 +; RV32I-NEXT: sb a7, 3(a2) +; RV32I-NEXT: srli a7, a5, 16 +; RV32I-NEXT: sb a7, 2(a2) ; RV32I-NEXT: srli a5, a5, 8 ; RV32I-NEXT: sb a5, 1(a2) -; RV32I-NEXT: sb a4, 24(a2) -; RV32I-NEXT: sb t3, 28(a2) +; RV32I-NEXT: sb a6, 24(a2) +; RV32I-NEXT: sb a4, 28(a2) ; RV32I-NEXT: sb t0, 16(a2) -; RV32I-NEXT: sb t1, 20(a2) +; RV32I-NEXT: sb t4, 20(a2) ; RV32I-NEXT: sb a3, 8(a2) ; RV32I-NEXT: sb a1, 12(a2) ; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli a5, a4, 24 +; RV32I-NEXT: srli a5, a6, 24 ; RV32I-NEXT: sb a5, 27(a2) -; RV32I-NEXT: srli a5, a4, 16 +; RV32I-NEXT: srli a5, a6, 16 ; RV32I-NEXT: sb a5, 26(a2) +; RV32I-NEXT: srli a5, a6, 8 +; RV32I-NEXT: sb a5, 25(a2) +; RV32I-NEXT: srli a5, a4, 24 +; RV32I-NEXT: sb a5, 31(a2) +; RV32I-NEXT: srli a5, a4, 16 +; RV32I-NEXT: sb a5, 30(a2) ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 25(a2) -; RV32I-NEXT: srli a4, t3, 24 -; RV32I-NEXT: sb a4, 31(a2) -; RV32I-NEXT: srli a4, t3, 16 -; RV32I-NEXT: sb a4, 30(a2) -; RV32I-NEXT: srli a4, t3, 8 ; RV32I-NEXT: sb a4, 29(a2) ; RV32I-NEXT: srli a4, t0, 24 ; RV32I-NEXT: sb a4, 19(a2) @@ -2112,11 +2112,11 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sb a4, 18(a2) ; RV32I-NEXT: srli a4, t0, 8 ; RV32I-NEXT: sb a4, 17(a2) -; RV32I-NEXT: srli a4, t1, 24 +; RV32I-NEXT: srli a4, t4, 24 ; RV32I-NEXT: sb a4, 23(a2) -; RV32I-NEXT: srli a4, t1, 16 +; RV32I-NEXT: srli a4, t4, 16 ; RV32I-NEXT: sb a4, 22(a2) -; RV32I-NEXT: srli a4, t1, 8 +; RV32I-NEXT: srli a4, t4, 8 ; RV32I-NEXT: sb a4, 21(a2) ; RV32I-NEXT: srli a4, a3, 24 ; RV32I-NEXT: sb a4, 11(a2) @@ -2157,105 +2157,105 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 6(a0) ; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 9(a0) ; RV64I-NEXT: lbu a5, 8(a0) -; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a6, 9(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 10(a0) ; RV64I-NEXT: lbu a7, 11(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: lbu a6, 12(a0) -; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: lbu a5, 14(a0) ; RV64I-NEXT: lbu t0, 15(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a5, a5, 32 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 17(a0) ; RV64I-NEXT: lbu a6, 16(a0) -; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu a7, 17(a0) +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 18(a0) ; RV64I-NEXT: lbu t0, 19(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: lbu a7, 20(a0) -; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t0, 21(a0) +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: lbu a6, 22(a0) ; RV64I-NEXT: lbu t1, 23(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a6, t1, a6 +; RV64I-NEXT: or a6, a6, a7 ; RV64I-NEXT: slli a6, a6, 32 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 25(a0) ; RV64I-NEXT: lbu a7, 24(a0) -; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t0, 25(a0) +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 26(a0) ; RV64I-NEXT: lbu t1, 27(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a7, t1, t0 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: lbu t0, 28(a0) -; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu t1, 29(a0) +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 30(a0) ; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a7, a0, 32 -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 1(a1) ; RV64I-NEXT: lbu t0, 0(a1) -; RV64I-NEXT: lbu t1, 2(a1) +; RV64I-NEXT: lbu t1, 1(a1) +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 2(a1) ; RV64I-NEXT: lbu t2, 3(a1) -; RV64I-NEXT: slli a7, a7, 8 -; RV64I-NEXT: or a7, a7, t0 -; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli t2, t2, 24 -; RV64I-NEXT: or t0, t2, t1 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: lbu t0, 5(a1) +; RV64I-NEXT: or a7, t2, a7 ; RV64I-NEXT: lbu t1, 4(a1) -; RV64I-NEXT: lbu t2, 6(a1) +; RV64I-NEXT: lbu t2, 5(a1) +; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: lbu t0, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: or t0, t0, t1 -; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli t2, t2, 8 +; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t2 ; RV64I-NEXT: or a1, a1, t0 +; RV64I-NEXT: or a1, a1, t1 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: sraiw a0, a0, 31 @@ -2270,72 +2270,72 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: srli a0, a1, 3 ; RV64I-NEXT: andi a0, a0, 24 ; RV64I-NEXT: mv a3, sp -; RV64I-NEXT: add a3, a3, a0 -; RV64I-NEXT: ld a4, 8(a3) -; RV64I-NEXT: srl a0, a4, a1 -; RV64I-NEXT: ld a5, 16(a3) -; RV64I-NEXT: andi a6, a1, 63 -; RV64I-NEXT: xori a6, a6, 63 -; RV64I-NEXT: ld a7, 0(a3) +; RV64I-NEXT: add a0, a3, a0 +; RV64I-NEXT: ld a3, 8(a0) +; RV64I-NEXT: ld a4, 0(a0) +; RV64I-NEXT: ld a5, 16(a0) +; RV64I-NEXT: ld a6, 24(a0) +; RV64I-NEXT: srl a0, a3, a1 +; RV64I-NEXT: andi a7, a1, 63 +; RV64I-NEXT: xori a7, a7, 63 ; RV64I-NEXT: slli t0, a5, 1 -; RV64I-NEXT: sll t0, t0, a6 +; RV64I-NEXT: sll t0, t0, a7 ; RV64I-NEXT: or a0, a0, t0 -; RV64I-NEXT: srl a7, a7, a1 -; RV64I-NEXT: slli a4, a4, 1 -; RV64I-NEXT: ld a3, 24(a3) -; RV64I-NEXT: sll a4, a4, a6 -; RV64I-NEXT: or a4, a7, a4 -; RV64I-NEXT: srl a5, a5, a1 -; RV64I-NEXT: slli a7, a3, 1 -; RV64I-NEXT: sll a6, a7, a6 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: sra a1, a3, a1 +; RV64I-NEXT: srl a4, a4, a1 +; RV64I-NEXT: slli a3, a3, 1 +; RV64I-NEXT: sll a3, a3, a7 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: srl a4, a5, a1 +; RV64I-NEXT: slli a5, a6, 1 +; RV64I-NEXT: sll a5, a5, a7 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: sra a1, a6, a1 ; RV64I-NEXT: sb a1, 24(a2) -; RV64I-NEXT: srli a3, a1, 56 -; RV64I-NEXT: sb a3, 31(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 30(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 29(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 28(a2) -; RV64I-NEXT: srli a3, a1, 24 -; RV64I-NEXT: sb a3, 27(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 26(a2) +; RV64I-NEXT: srli a5, a1, 56 +; RV64I-NEXT: sb a5, 31(a2) +; RV64I-NEXT: srli a5, a1, 48 +; RV64I-NEXT: sb a5, 30(a2) +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: sb a5, 29(a2) +; RV64I-NEXT: srli a5, a1, 32 +; RV64I-NEXT: sb a5, 28(a2) +; RV64I-NEXT: srli a5, a1, 24 +; RV64I-NEXT: sb a5, 27(a2) +; RV64I-NEXT: srli a5, a1, 16 +; RV64I-NEXT: sb a5, 26(a2) ; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: sb a5, 16(a2) -; RV64I-NEXT: sb a4, 0(a2) +; RV64I-NEXT: sb a4, 16(a2) +; RV64I-NEXT: sb a3, 0(a2) ; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: srli a1, a5, 56 +; RV64I-NEXT: srli a1, a4, 56 ; RV64I-NEXT: sb a1, 23(a2) -; RV64I-NEXT: srli a1, a5, 48 +; RV64I-NEXT: srli a1, a4, 48 ; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, a5, 40 +; RV64I-NEXT: srli a1, a4, 40 ; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, a5, 32 +; RV64I-NEXT: srli a1, a4, 32 ; RV64I-NEXT: sb a1, 20(a2) -; RV64I-NEXT: srli a1, a5, 24 +; RV64I-NEXT: srli a1, a4, 24 ; RV64I-NEXT: sb a1, 19(a2) -; RV64I-NEXT: srli a1, a5, 16 +; RV64I-NEXT: srli a1, a4, 16 ; RV64I-NEXT: sb a1, 18(a2) -; RV64I-NEXT: srli a5, a5, 8 -; RV64I-NEXT: sb a5, 17(a2) -; RV64I-NEXT: srli a1, a4, 56 +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: sb a4, 17(a2) +; RV64I-NEXT: srli a1, a3, 56 ; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a4, 48 +; RV64I-NEXT: srli a1, a3, 48 ; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a4, 40 +; RV64I-NEXT: srli a1, a3, 40 ; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: srli a1, a4, 32 +; RV64I-NEXT: srli a1, a3, 32 ; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: srli a1, a4, 24 +; RV64I-NEXT: srli a1, a3, 24 ; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a1, a4, 16 +; RV64I-NEXT: srli a1, a3, 16 ; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a4, 1(a2) +; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a3, 1(a2) ; RV64I-NEXT: srli a1, a0, 56 ; RV64I-NEXT: sb a1, 15(a2) ; RV64I-NEXT: srli a1, a0, 48 @@ -2365,87 +2365,87 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) -; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a6, 5(a0) +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 6(a0) ; RV32I-NEXT: lbu a7, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a7, a7, 24 -; RV32I-NEXT: or a5, a7, a6 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: or a4, a7, a4 ; RV32I-NEXT: lbu a6, 8(a0) -; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu a7, 9(a0) +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: lbu a5, 10(a0) ; RV32I-NEXT: lbu t0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: or a5, t0, a5 ; RV32I-NEXT: lbu a7, 12(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lbu a6, 14(a0) ; RV32I-NEXT: lbu t1, 15(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t1, t1, 24 -; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: or a6, t1, a6 ; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t1, 17(a0) +; RV32I-NEXT: or a7, a6, a7 +; RV32I-NEXT: lbu a6, 18(a0) ; RV32I-NEXT: lbu t2, 19(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t0 -; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t2, t2, 24 -; RV32I-NEXT: or t0, t2, t1 -; RV32I-NEXT: or t0, t0, a7 -; RV32I-NEXT: lbu a7, 21(a0) +; RV32I-NEXT: or a6, t2, a6 ; RV32I-NEXT: lbu t1, 20(a0) -; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t2, 21(a0) +; RV32I-NEXT: or t0, a6, t0 +; RV32I-NEXT: lbu a6, 22(a0) ; RV32I-NEXT: lbu t3, 23(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t1 -; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t3, t3, 24 -; RV32I-NEXT: or t1, t3, t2 -; RV32I-NEXT: or t1, t1, a7 -; RV32I-NEXT: lbu a7, 25(a0) +; RV32I-NEXT: or a6, t3, a6 ; RV32I-NEXT: lbu t2, 24(a0) -; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: or t1, a6, t1 +; RV32I-NEXT: lbu a6, 26(a0) ; RV32I-NEXT: lbu t4, 27(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t2 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or t2, t2, a7 -; RV32I-NEXT: lbu a7, 29(a0) +; RV32I-NEXT: or a6, t4, a6 ; RV32I-NEXT: lbu t3, 28(a0) -; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu t4, 29(a0) +; RV32I-NEXT: or t2, a6, t2 +; RV32I-NEXT: lbu a6, 30(a0) ; RV32I-NEXT: lbu a0, 31(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t3 -; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or t3, a0, t4 -; RV32I-NEXT: or t3, t3, a7 -; RV32I-NEXT: lbu a7, 1(a1) +; RV32I-NEXT: or a6, a0, a6 ; RV32I-NEXT: lbu t4, 0(a1) -; RV32I-NEXT: lbu t5, 2(a1) +; RV32I-NEXT: lbu t5, 1(a1) +; RV32I-NEXT: or t3, a6, t3 +; RV32I-NEXT: lbu a6, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, t4 -; RV32I-NEXT: slli t5, t5, 16 +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, t5 -; RV32I-NEXT: or a7, a1, a7 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a6, a1, t4 ; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: sw a0, 60(sp) ; RV32I-NEXT: sw a0, 56(sp) @@ -2459,91 +2459,91 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw t2, 24(sp) ; RV32I-NEXT: sw t1, 20(sp) ; RV32I-NEXT: sw t0, 16(sp) -; RV32I-NEXT: sw a6, 12(sp) +; RV32I-NEXT: sw a7, 12(sp) ; RV32I-NEXT: sw a5, 8(sp) ; RV32I-NEXT: sw a4, 4(sp) ; RV32I-NEXT: sw a3, 0(sp) -; RV32I-NEXT: srli a0, a7, 3 +; RV32I-NEXT: srli a0, a6, 3 ; RV32I-NEXT: andi a0, a0, 28 ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: add a4, a1, a0 -; RV32I-NEXT: lw a1, 4(a4) -; RV32I-NEXT: srl a0, a1, a7 -; RV32I-NEXT: lw a5, 8(a4) -; RV32I-NEXT: andi a3, a7, 31 -; RV32I-NEXT: xori a6, a3, 31 -; RV32I-NEXT: lw a3, 0(a4) -; RV32I-NEXT: slli t0, a5, 1 -; RV32I-NEXT: sll t0, t0, a6 -; RV32I-NEXT: or a0, a0, t0 -; RV32I-NEXT: srl a3, a3, a7 +; RV32I-NEXT: add a3, a1, a0 +; RV32I-NEXT: lw a1, 4(a3) +; RV32I-NEXT: lw a4, 0(a3) +; RV32I-NEXT: lw a5, 8(a3) +; RV32I-NEXT: lw a7, 12(a3) +; RV32I-NEXT: srl a0, a1, a6 +; RV32I-NEXT: andi t0, a6, 31 +; RV32I-NEXT: xori t0, t0, 31 +; RV32I-NEXT: slli t1, a5, 1 +; RV32I-NEXT: sll t1, t1, t0 +; RV32I-NEXT: or a0, a0, t1 +; RV32I-NEXT: srl a4, a4, a6 ; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: lw t0, 12(a4) -; RV32I-NEXT: lw t1, 16(a4) -; RV32I-NEXT: sll a1, a1, a6 -; RV32I-NEXT: or a1, a3, a1 -; RV32I-NEXT: srl a3, t0, a7 -; RV32I-NEXT: slli t2, t1, 1 -; RV32I-NEXT: sll t2, t2, a6 -; RV32I-NEXT: or a3, a3, t2 -; RV32I-NEXT: srl a5, a5, a7 -; RV32I-NEXT: slli t0, t0, 1 -; RV32I-NEXT: lw t2, 20(a4) -; RV32I-NEXT: lw t3, 24(a4) -; RV32I-NEXT: sll t0, t0, a6 -; RV32I-NEXT: or a5, a5, t0 -; RV32I-NEXT: srl t0, t2, a7 -; RV32I-NEXT: slli t4, t3, 1 -; RV32I-NEXT: sll t4, t4, a6 -; RV32I-NEXT: or t0, t0, t4 -; RV32I-NEXT: srl t1, t1, a7 +; RV32I-NEXT: sll a1, a1, t0 +; RV32I-NEXT: or a1, a4, a1 +; RV32I-NEXT: srl a4, a7, a6 +; RV32I-NEXT: lw t1, 16(a3) +; RV32I-NEXT: lw t2, 20(a3) +; RV32I-NEXT: lw t3, 24(a3) +; RV32I-NEXT: lw t4, 28(a3) +; RV32I-NEXT: slli a3, t1, 1 +; RV32I-NEXT: sll a3, a3, t0 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: srl a4, a5, a6 +; RV32I-NEXT: slli a7, a7, 1 +; RV32I-NEXT: sll a5, a7, t0 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: srl a5, t2, a6 +; RV32I-NEXT: slli a7, t3, 1 +; RV32I-NEXT: sll a7, a7, t0 +; RV32I-NEXT: or a5, a5, a7 +; RV32I-NEXT: srl a7, t1, a6 ; RV32I-NEXT: slli t2, t2, 1 -; RV32I-NEXT: lw a4, 28(a4) -; RV32I-NEXT: sll t2, t2, a6 -; RV32I-NEXT: or t1, t1, t2 -; RV32I-NEXT: srl t2, t3, a7 -; RV32I-NEXT: slli t3, a4, 1 -; RV32I-NEXT: sll a6, t3, a6 -; RV32I-NEXT: or a6, t2, a6 -; RV32I-NEXT: sra a4, a4, a7 -; RV32I-NEXT: sb a4, 28(a2) -; RV32I-NEXT: srli a7, a4, 24 -; RV32I-NEXT: sb a7, 31(a2) -; RV32I-NEXT: srli a7, a4, 16 -; RV32I-NEXT: sb a7, 30(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: sb a6, 24(a2) -; RV32I-NEXT: sb t1, 16(a2) -; RV32I-NEXT: sb t0, 20(a2) -; RV32I-NEXT: sb a5, 8(a2) +; RV32I-NEXT: sll t1, t2, t0 +; RV32I-NEXT: or a7, a7, t1 +; RV32I-NEXT: srl t1, t3, a6 +; RV32I-NEXT: slli t2, t4, 1 +; RV32I-NEXT: sll t0, t2, t0 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: sra a6, t4, a6 +; RV32I-NEXT: sb a6, 28(a2) +; RV32I-NEXT: srli t1, a6, 24 +; RV32I-NEXT: sb t1, 31(a2) +; RV32I-NEXT: srli t1, a6, 16 +; RV32I-NEXT: sb t1, 30(a2) +; RV32I-NEXT: srli a6, a6, 8 +; RV32I-NEXT: sb a6, 29(a2) +; RV32I-NEXT: sb t0, 24(a2) +; RV32I-NEXT: sb a7, 16(a2) +; RV32I-NEXT: sb a5, 20(a2) +; RV32I-NEXT: sb a4, 8(a2) ; RV32I-NEXT: sb a3, 12(a2) ; RV32I-NEXT: sb a1, 0(a2) ; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli a4, a6, 24 -; RV32I-NEXT: sb a4, 27(a2) -; RV32I-NEXT: srli a4, a6, 16 -; RV32I-NEXT: sb a4, 26(a2) -; RV32I-NEXT: srli a4, a6, 8 -; RV32I-NEXT: sb a4, 25(a2) -; RV32I-NEXT: srli a4, t1, 24 -; RV32I-NEXT: sb a4, 19(a2) -; RV32I-NEXT: srli a4, t1, 16 -; RV32I-NEXT: sb a4, 18(a2) -; RV32I-NEXT: srli a4, t1, 8 -; RV32I-NEXT: sb a4, 17(a2) -; RV32I-NEXT: srli a4, t0, 24 -; RV32I-NEXT: sb a4, 23(a2) -; RV32I-NEXT: srli a4, t0, 16 -; RV32I-NEXT: sb a4, 22(a2) -; RV32I-NEXT: srli a4, t0, 8 -; RV32I-NEXT: sb a4, 21(a2) -; RV32I-NEXT: srli a4, a5, 24 -; RV32I-NEXT: sb a4, 11(a2) -; RV32I-NEXT: srli a4, a5, 16 -; RV32I-NEXT: sb a4, 10(a2) +; RV32I-NEXT: srli a6, t0, 24 +; RV32I-NEXT: sb a6, 27(a2) +; RV32I-NEXT: srli a6, t0, 16 +; RV32I-NEXT: sb a6, 26(a2) +; RV32I-NEXT: srli a6, t0, 8 +; RV32I-NEXT: sb a6, 25(a2) +; RV32I-NEXT: srli a6, a7, 24 +; RV32I-NEXT: sb a6, 19(a2) +; RV32I-NEXT: srli a6, a7, 16 +; RV32I-NEXT: sb a6, 18(a2) +; RV32I-NEXT: srli a6, a7, 8 +; RV32I-NEXT: sb a6, 17(a2) +; RV32I-NEXT: srli a6, a5, 24 +; RV32I-NEXT: sb a6, 23(a2) +; RV32I-NEXT: srli a6, a5, 16 +; RV32I-NEXT: sb a6, 22(a2) ; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb a5, 21(a2) +; RV32I-NEXT: srli a5, a4, 24 +; RV32I-NEXT: sb a5, 11(a2) +; RV32I-NEXT: srli a5, a4, 16 +; RV32I-NEXT: sb a5, 10(a2) +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a4, 9(a2) ; RV32I-NEXT: srli a4, a3, 24 ; RV32I-NEXT: sb a4, 15(a2) ; RV32I-NEXT: srli a4, a3, 16 diff --git a/llvm/test/CodeGen/RISCV/xtheadmempair.ll b/llvm/test/CodeGen/RISCV/xtheadmempair.ll index 333fd4c0472427..3a74bb66d9ec25 100644 --- a/llvm/test/CodeGen/RISCV/xtheadmempair.ll +++ b/llvm/test/CodeGen/RISCV/xtheadmempair.ll @@ -57,14 +57,14 @@ define i64 @lwud(ptr %a) { define i64 @ldd(ptr %a) { ; RV32XTHEADMEMPAIR-LABEL: ldd: ; RV32XTHEADMEMPAIR: # %bb.0: -; RV32XTHEADMEMPAIR-NEXT: lw a1, 32(a0) -; RV32XTHEADMEMPAIR-NEXT: lw a2, 36(a0) -; RV32XTHEADMEMPAIR-NEXT: lw a3, 44(a0) +; RV32XTHEADMEMPAIR-NEXT: lw a1, 44(a0) +; RV32XTHEADMEMPAIR-NEXT: lw a2, 32(a0) +; RV32XTHEADMEMPAIR-NEXT: lw a3, 36(a0) ; RV32XTHEADMEMPAIR-NEXT: lw a0, 40(a0) -; RV32XTHEADMEMPAIR-NEXT: add a2, a2, a3 -; RV32XTHEADMEMPAIR-NEXT: add a0, a1, a0 -; RV32XTHEADMEMPAIR-NEXT: sltu a1, a0, a1 -; RV32XTHEADMEMPAIR-NEXT: add a1, a2, a1 +; RV32XTHEADMEMPAIR-NEXT: add a1, a3, a1 +; RV32XTHEADMEMPAIR-NEXT: add a0, a2, a0 +; RV32XTHEADMEMPAIR-NEXT: sltu a2, a0, a2 +; RV32XTHEADMEMPAIR-NEXT: add a1, a1, a2 ; RV32XTHEADMEMPAIR-NEXT: ret ; ; RV64XTHEADMEMPAIR-LABEL: ldd: