From 2967e5f8007d873a3e9d97870d2461d0827a3976 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Fri, 11 Oct 2024 20:25:53 +0100 Subject: [PATCH] [RISCV] Enable store clustering by default (#73796) Builds on #73789, enabling store clustering by default using the same heuristic. --- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 10 +- llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll | 42 +- llvm/test/CodeGen/RISCV/abds-neg.ll | 16 +- llvm/test/CodeGen/RISCV/abds.ll | 148 +- llvm/test/CodeGen/RISCV/abdu-neg.ll | 8 +- llvm/test/CodeGen/RISCV/add-before-shl.ll | 4 +- llvm/test/CodeGen/RISCV/alloca.ll | 16 +- llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll | 40 +- llvm/test/CodeGen/RISCV/callee-saved-gprs.ll | 112 +- .../RISCV/calling-conv-ilp32-ilp32f-common.ll | 72 +- ...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 176 +- .../test/CodeGen/RISCV/calling-conv-ilp32d.ll | 4 +- .../test/CodeGen/RISCV/calling-conv-ilp32e.ll | 512 +-- .../calling-conv-lp64-lp64f-lp64d-common.ll | 80 +- llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll | 20 +- .../CodeGen/RISCV/calling-conv-rv32f-ilp32.ll | 9 +- .../RISCV/calling-conv-rv32f-ilp32e.ll | 21 +- .../RISCV/calling-conv-vector-float.ll | 12 +- llvm/test/CodeGen/RISCV/calls.ll | 24 +- .../test/CodeGen/RISCV/double-calling-conv.ll | 28 +- .../CodeGen/RISCV/double-convert-strict.ll | 4 +- llvm/test/CodeGen/RISCV/double-convert.ll | 4 +- llvm/test/CodeGen/RISCV/fastcc-bf16.ll | 18 +- llvm/test/CodeGen/RISCV/fastcc-float.ll | 18 +- llvm/test/CodeGen/RISCV/fastcc-half.ll | 18 +- llvm/test/CodeGen/RISCV/fastcc-int.ll | 12 +- .../CodeGen/RISCV/fastcc-without-f-reg.ll | 808 ++-- .../test/CodeGen/RISCV/fold-addi-loadstore.ll | 4 +- llvm/test/CodeGen/RISCV/forced-atomics.ll | 58 +- llvm/test/CodeGen/RISCV/fp128.ll | 68 +- llvm/test/CodeGen/RISCV/frame.ll | 12 +- .../CodeGen/RISCV/get-setcc-result-type.ll | 6 +- .../CodeGen/RISCV/hoist-global-addr-base.ll | 4 +- .../RISCV/intrinsic-cttz-elts-vscale.ll | 2 +- llvm/test/CodeGen/RISCV/legalize-fneg.ll | 12 +- llvm/test/CodeGen/RISCV/llvm.exp10.ll | 54 +- llvm/test/CodeGen/RISCV/llvm.frexp.ll | 468 +-- llvm/test/CodeGen/RISCV/memcpy.ll | 174 +- llvm/test/CodeGen/RISCV/memset-inline.ll | 1034 ++--- .../CodeGen/RISCV/misched-load-clustering.ll | 4 +- llvm/test/CodeGen/RISCV/neg-abs.ll | 10 +- llvm/test/CodeGen/RISCV/nontemporal.ll | 2960 +++++++------- .../test/CodeGen/RISCV/overflow-intrinsics.ll | 4 +- llvm/test/CodeGen/RISCV/pr63816.ll | 12 +- llvm/test/CodeGen/RISCV/push-pop-popret.ll | 54 +- .../CodeGen/RISCV/riscv-codegenprepare-asm.ll | 2 +- .../CodeGen/RISCV/riscv-shifted-extend.ll | 8 +- llvm/test/CodeGen/RISCV/rv32zbb.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/binop-splats.ll | 12 +- .../CodeGen/RISCV/rvv/bitreverse-sdnode.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll | 22 +- llvm/test/CodeGen/RISCV/rvv/combine-store.ll | 8 +- .../RISCV/rvv/fixed-vectors-binop-splats.ll | 8 +- .../RISCV/rvv/fixed-vectors-bitreverse-vp.ll | 44 +- .../RISCV/rvv/fixed-vectors-bitreverse.ll | 4 +- .../RISCV/rvv/fixed-vectors-bswap-vp.ll | 20 +- .../CodeGen/RISCV/rvv/fixed-vectors-bswap.ll | 4 +- .../rvv/fixed-vectors-calling-conv-fastcc.ll | 6 +- .../RISCV/rvv/fixed-vectors-ctlz-vp.ll | 96 +- .../RISCV/rvv/fixed-vectors-ctpop-vp.ll | 139 +- .../RISCV/rvv/fixed-vectors-cttz-vp.ll | 96 +- .../CodeGen/RISCV/rvv/fixed-vectors-elen.ll | 8 +- .../RISCV/rvv/fixed-vectors-expandload-int.ll | 8 +- .../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 352 +- .../CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll | 48 +- .../CodeGen/RISCV/rvv/fixed-vectors-insert.ll | 12 +- .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 66 +- .../RISCV/rvv/fixed-vectors-int-splat.ll | 6 +- .../CodeGen/RISCV/rvv/fixed-vectors-int.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-llrint.ll | 140 +- .../CodeGen/RISCV/rvv/fixed-vectors-lrint.ll | 128 +- .../RISCV/rvv/fixed-vectors-masked-gather.ll | 520 +-- .../RISCV/rvv/fixed-vectors-masked-scatter.ll | 132 +- .../rvv/fixed-vectors-reduction-int-vp.ll | 32 +- .../RISCV/rvv/fixed-vectors-setcc-int-vp.ll | 36 +- .../rvv/fixed-vectors-shuffle-vslide1down.ll | 4 +- .../rvv/fixed-vectors-shuffle-vslide1up.ll | 4 +- .../rvv/fixed-vectors-shufflevector-vnsrl.ll | 32 +- .../fixed-vectors-strided-load-store-asm.ll | 12 +- .../RISCV/rvv/fixed-vectors-unaligned.ll | 48 +- .../CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll | 4 +- .../RISCV/rvv/fixed-vectors-vadd-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-vand-vp.ll | 20 +- .../RISCV/rvv/fixed-vectors-vdiv-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-vdivu-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-vmacc-vp.ll | 18 +- .../RISCV/rvv/fixed-vectors-vmax-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-vmaxu-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-vmin-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-vminu-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-vmul-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-vnmsac-vp.ll | 18 +- .../CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-vp-splat.ll | 8 +- .../RISCV/rvv/fixed-vectors-vpmerge.ll | 16 +- .../RISCV/rvv/fixed-vectors-vrem-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-vremu-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-vrsub-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-vsadd-vp.ll | 16 +- .../CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll | 8 +- .../RISCV/rvv/fixed-vectors-vsaddu-vp.ll | 16 +- .../CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll | 8 +- .../RISCV/rvv/fixed-vectors-vssub-vp.ll | 16 +- .../CodeGen/RISCV/rvv/fixed-vectors-vssub.ll | 8 +- .../RISCV/rvv/fixed-vectors-vssubu-vp.ll | 16 +- .../CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll | 8 +- .../RISCV/rvv/fixed-vectors-vsub-vp.ll | 16 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll | 8 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll | 14 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll | 8 +- .../RISCV/rvv/fixed-vectors-vwmulsu.ll | 6 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll | 8 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll | 14 +- .../RISCV/rvv/fixed-vectors-vxor-vp.ll | 16 +- .../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 360 +- llvm/test/CodeGen/RISCV/rvv/masked-tama.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/memset-inline.ll | 88 +- .../CodeGen/RISCV/rvv/no-reserved-frame.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/pr104480.ll | 4 +- .../test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll | 72 +- llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll | 40 +- .../rvv/splat-vector-split-i64-vl-sdnode.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/stepvector.ll | 34 +- llvm/test/CodeGen/RISCV/rvv/unmasked-ta.ll | 10 +- llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll | 18 +- llvm/test/CodeGen/RISCV/rvv/vaadd.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vaaddu.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vadc.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll | 10 +- llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vadd.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll | 10 +- llvm/test/CodeGen/RISCV/rvv/vand-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vand.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll | 32 +- llvm/test/CodeGen/RISCV/rvv/vandn-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vandn.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vasub.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vasubu.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vclmul.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vclmulh.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll | 32 +- llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vdiv.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll | 24 +- llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vdivu.ll | 16 +- .../CodeGen/RISCV/rvv/vec3-setcc-crash.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll | 24 +- llvm/test/CodeGen/RISCV/rvv/vmacc.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vmadc.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll | 24 +- llvm/test/CodeGen/RISCV/rvv/vmadd.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vmax.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vmaxu.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vmin.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vminu.ll | 16 +- .../test/CodeGen/RISCV/rvv/vmsbc.borrow.in.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vmsbc.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vmseq.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmsge.ll | 18 +- llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll | 18 +- llvm/test/CodeGen/RISCV/rvv/vmsgt.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmsle.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmsleu.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmslt.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmsltu.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmsne.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll | 14 +- llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vmul.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vmulh.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vmulhsu.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vmulhu.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vmv.s.x.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vmv.v.x.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll | 24 +- llvm/test/CodeGen/RISCV/rvv/vnmsac.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vnmsub.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll | 10 +- llvm/test/CodeGen/RISCV/rvv/vor-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vor.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vp-splat.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 8 +- .../CodeGen/RISCV/rvv/vreductions-int-vp.ll | 60 +- llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll | 32 +- llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vrem.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll | 24 +- llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vremu.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vrsub.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vsadd.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vsaddu.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vsbc.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vselect-int.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vsmul.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vssub.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vssubu.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll | 10 +- llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vsub.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll | 10 +- llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vxor.ll | 16 +- .../CodeGen/RISCV/select-optimize-multiple.ll | 14 +- llvm/test/CodeGen/RISCV/shifts.ll | 72 +- llvm/test/CodeGen/RISCV/split-offsets.ll | 12 +- llvm/test/CodeGen/RISCV/split-store.ll | 4 +- .../CodeGen/RISCV/srem-seteq-illegal-types.ll | 96 +- llvm/test/CodeGen/RISCV/srem-vector-lkk.ll | 230 +- llvm/test/CodeGen/RISCV/stack-slot-size.ll | 32 +- llvm/test/CodeGen/RISCV/stack-store-check.ll | 484 +-- llvm/test/CodeGen/RISCV/tail-calls.ll | 32 +- .../CodeGen/RISCV/unaligned-load-store.ll | 140 +- .../CodeGen/RISCV/urem-seteq-illegal-types.ll | 36 +- llvm/test/CodeGen/RISCV/urem-vector-lkk.ll | 216 +- llvm/test/CodeGen/RISCV/vararg-ilp32e.ll | 12 +- llvm/test/CodeGen/RISCV/vararg.ll | 1052 ++--- ...lar-shift-by-byte-multiple-legalization.ll | 3522 +++++++++-------- .../RISCV/wide-scalar-shift-legalization.ll | 1296 +++--- llvm/test/CodeGen/RISCV/xtheadmempair.ll | 4 +- 250 files changed, 9579 insertions(+), 9592 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index d819131dae8cb82..c48470ab707f107 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -94,9 +94,9 @@ static cl::opt cl::desc("Enable the loop data prefetch pass"), cl::init(true)); -static cl::opt EnableMISchedLoadClustering( - "riscv-misched-load-clustering", cl::Hidden, - cl::desc("Enable load clustering in the machine scheduler"), +static cl::opt EnableMISchedLoadStoreClustering( + "riscv-misched-load-store-clustering", cl::Hidden, + cl::desc("Enable load and store clustering in the machine scheduler"), cl::init(true)); static cl::opt EnableVSETVLIAfterRVVRegAlloc( @@ -352,10 +352,12 @@ class RISCVPassConfig : public TargetPassConfig { ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override { ScheduleDAGMILive *DAG = nullptr; - if (EnableMISchedLoadClustering) { + if (EnableMISchedLoadStoreClustering) { DAG = createGenericSchedLive(C); DAG->addMutation(createLoadClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); + DAG->addMutation(createStoreClusterDAGMutation( + DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); } return DAG; } diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll index 3df4aca40ec9428..6c0e322a252c72d 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll @@ -69,12 +69,12 @@ define i32 @va1(ptr %fmt, ...) { ; RV64-NEXT: sd a2, 32(sp) ; RV64-NEXT: sd a3, 40(sp) ; RV64-NEXT: sd a4, 48(sp) -; RV64-NEXT: sd a5, 56(sp) ; RV64-NEXT: addi a0, sp, 8 ; RV64-NEXT: addi a1, sp, 24 ; RV64-NEXT: sd a1, 8(sp) ; RV64-NEXT: lw a0, 4(a0) ; RV64-NEXT: lwu a1, 8(sp) +; RV64-NEXT: sd a5, 56(sp) ; RV64-NEXT: sd a6, 64(sp) ; RV64-NEXT: sd a7, 72(sp) ; RV64-NEXT: slli a0, a0, 32 @@ -129,12 +129,12 @@ define i32 @va1(ptr %fmt, ...) { ; RV64-WITHFP-NEXT: sd a2, 16(s0) ; RV64-WITHFP-NEXT: sd a3, 24(s0) ; RV64-WITHFP-NEXT: sd a4, 32(s0) -; RV64-WITHFP-NEXT: sd a5, 40(s0) ; RV64-WITHFP-NEXT: addi a0, s0, -24 ; RV64-WITHFP-NEXT: addi a1, s0, 8 ; RV64-WITHFP-NEXT: sd a1, -24(s0) ; RV64-WITHFP-NEXT: lw a0, 4(a0) ; RV64-WITHFP-NEXT: lwu a1, -24(s0) +; RV64-WITHFP-NEXT: sd a5, 40(s0) ; RV64-WITHFP-NEXT: sd a6, 48(s0) ; RV64-WITHFP-NEXT: sd a7, 56(s0) ; RV64-WITHFP-NEXT: slli a0, a0, 32 @@ -844,11 +844,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; ILP32-LABEL: va3: ; ILP32: # %bb.0: ; ILP32-NEXT: addi sp, sp, -32 -; ILP32-NEXT: sw a3, 12(sp) -; ILP32-NEXT: sw a4, 16(sp) ; ILP32-NEXT: addi a0, sp, 12 ; ILP32-NEXT: sw a0, 4(sp) ; ILP32-NEXT: lw a0, 4(sp) +; ILP32-NEXT: sw a3, 12(sp) +; ILP32-NEXT: sw a4, 16(sp) ; ILP32-NEXT: sw a5, 20(sp) ; ILP32-NEXT: sw a6, 24(sp) ; ILP32-NEXT: sw a7, 28(sp) @@ -868,11 +868,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV32D-ILP32-LABEL: va3: ; RV32D-ILP32: # %bb.0: ; RV32D-ILP32-NEXT: addi sp, sp, -48 -; RV32D-ILP32-NEXT: sw a3, 28(sp) -; RV32D-ILP32-NEXT: sw a4, 32(sp) ; RV32D-ILP32-NEXT: addi a0, sp, 28 ; RV32D-ILP32-NEXT: sw a0, 20(sp) ; RV32D-ILP32-NEXT: lw a0, 20(sp) +; RV32D-ILP32-NEXT: sw a3, 28(sp) +; RV32D-ILP32-NEXT: sw a4, 32(sp) ; RV32D-ILP32-NEXT: sw a5, 36(sp) ; RV32D-ILP32-NEXT: sw a6, 40(sp) ; RV32D-ILP32-NEXT: sw a7, 44(sp) @@ -894,11 +894,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV32D-ILP32F-LABEL: va3: ; RV32D-ILP32F: # %bb.0: ; RV32D-ILP32F-NEXT: addi sp, sp, -48 -; RV32D-ILP32F-NEXT: sw a3, 28(sp) -; RV32D-ILP32F-NEXT: sw a4, 32(sp) ; RV32D-ILP32F-NEXT: addi a0, sp, 28 ; RV32D-ILP32F-NEXT: sw a0, 20(sp) ; RV32D-ILP32F-NEXT: lw a0, 20(sp) +; RV32D-ILP32F-NEXT: sw a3, 28(sp) +; RV32D-ILP32F-NEXT: sw a4, 32(sp) ; RV32D-ILP32F-NEXT: sw a5, 36(sp) ; RV32D-ILP32F-NEXT: sw a6, 40(sp) ; RV32D-ILP32F-NEXT: sw a7, 44(sp) @@ -920,11 +920,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV32D-ILP32D-LABEL: va3: ; RV32D-ILP32D: # %bb.0: ; RV32D-ILP32D-NEXT: addi sp, sp, -48 -; RV32D-ILP32D-NEXT: sw a3, 28(sp) -; RV32D-ILP32D-NEXT: sw a4, 32(sp) ; RV32D-ILP32D-NEXT: addi a0, sp, 28 ; RV32D-ILP32D-NEXT: sw a0, 20(sp) ; RV32D-ILP32D-NEXT: lw a0, 20(sp) +; RV32D-ILP32D-NEXT: sw a3, 28(sp) +; RV32D-ILP32D-NEXT: sw a4, 32(sp) ; RV32D-ILP32D-NEXT: sw a5, 36(sp) ; RV32D-ILP32D-NEXT: sw a6, 40(sp) ; RV32D-ILP32D-NEXT: sw a7, 44(sp) @@ -946,12 +946,12 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV64-LABEL: va3: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: sd a2, 16(sp) -; RV64-NEXT: sd a3, 24(sp) -; RV64-NEXT: sd a4, 32(sp) ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: sd a0, 8(sp) ; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: sd a2, 16(sp) +; RV64-NEXT: sd a3, 24(sp) +; RV64-NEXT: sd a4, 32(sp) ; RV64-NEXT: sd a5, 40(sp) ; RV64-NEXT: sd a6, 48(sp) ; RV64-NEXT: sd a7, 56(sp) @@ -970,11 +970,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV32-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill ; RV32-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill ; RV32-WITHFP-NEXT: addi s0, sp, 24 -; RV32-WITHFP-NEXT: sw a3, 4(s0) -; RV32-WITHFP-NEXT: sw a4, 8(s0) ; RV32-WITHFP-NEXT: addi a0, s0, 4 ; RV32-WITHFP-NEXT: sw a0, -12(s0) ; RV32-WITHFP-NEXT: lw a0, -12(s0) +; RV32-WITHFP-NEXT: sw a3, 4(s0) +; RV32-WITHFP-NEXT: sw a4, 8(s0) ; RV32-WITHFP-NEXT: sw a5, 12(s0) ; RV32-WITHFP-NEXT: sw a6, 16(s0) ; RV32-WITHFP-NEXT: sw a7, 20(s0) @@ -999,12 +999,12 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV64-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64-WITHFP-NEXT: addi s0, sp, 32 -; RV64-WITHFP-NEXT: sd a2, 0(s0) -; RV64-WITHFP-NEXT: sd a3, 8(s0) -; RV64-WITHFP-NEXT: sd a4, 16(s0) ; RV64-WITHFP-NEXT: mv a0, s0 ; RV64-WITHFP-NEXT: sd a0, -24(s0) ; RV64-WITHFP-NEXT: ld a0, -24(s0) +; RV64-WITHFP-NEXT: sd a2, 0(s0) +; RV64-WITHFP-NEXT: sd a3, 8(s0) +; RV64-WITHFP-NEXT: sd a4, 16(s0) ; RV64-WITHFP-NEXT: sd a5, 24(s0) ; RV64-WITHFP-NEXT: sd a6, 32(s0) ; RV64-WITHFP-NEXT: sd a7, 40(s0) @@ -1622,9 +1622,6 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV64-NEXT: lui a0, 24414 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: sd a4, 304(a0) -; RV64-NEXT: lui a0, 24414 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: sd a5, 312(a0) ; RV64-NEXT: addi a0, sp, 8 ; RV64-NEXT: lui a1, 24414 ; RV64-NEXT: addiw a1, a1, 280 @@ -1634,6 +1631,9 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV64-NEXT: lwu a1, 8(sp) ; RV64-NEXT: lui a2, 24414 ; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: sd a5, 312(a2) +; RV64-NEXT: lui a2, 24414 +; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: sd a6, 320(a2) ; RV64-NEXT: lui a2, 24414 ; RV64-NEXT: add a2, sp, a2 diff --git a/llvm/test/CodeGen/RISCV/abds-neg.ll b/llvm/test/CodeGen/RISCV/abds-neg.ll index b6064198935a61d..e7fd87bd7838765 100644 --- a/llvm/test/CodeGen/RISCV/abds-neg.ll +++ b/llvm/test/CodeGen/RISCV/abds-neg.ll @@ -705,8 +705,8 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: sub a4, a4, a3 ; RV32I-NEXT: neg a1, a1 ; RV32I-NEXT: sw a1, 0(a0) -; RV32I-NEXT: sw a4, 8(a0) ; RV32I-NEXT: sw a2, 4(a0) +; RV32I-NEXT: sw a4, 8(a0) ; RV32I-NEXT: sw a5, 12(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 @@ -824,8 +824,8 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: sub a4, a4, a3 ; RV32ZBB-NEXT: neg a1, a1 ; RV32ZBB-NEXT: sw a1, 0(a0) -; RV32ZBB-NEXT: sw a4, 8(a0) ; RV32ZBB-NEXT: sw a2, 4(a0) +; RV32ZBB-NEXT: sw a4, 8(a0) ; RV32ZBB-NEXT: sw a5, 12(a0) ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBB-NEXT: addi sp, sp, 16 @@ -952,8 +952,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: sub a4, a4, a3 ; RV32I-NEXT: neg a1, a1 ; RV32I-NEXT: sw a1, 0(a0) -; RV32I-NEXT: sw a4, 8(a0) ; RV32I-NEXT: sw a2, 4(a0) +; RV32I-NEXT: sw a4, 8(a0) ; RV32I-NEXT: sw a5, 12(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 @@ -1071,8 +1071,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: sub a4, a4, a3 ; RV32ZBB-NEXT: neg a1, a1 ; RV32ZBB-NEXT: sw a1, 0(a0) -; RV32ZBB-NEXT: sw a4, 8(a0) ; RV32ZBB-NEXT: sw a2, 4(a0) +; RV32ZBB-NEXT: sw a4, 8(a0) ; RV32ZBB-NEXT: sw a5, 12(a0) ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBB-NEXT: addi sp, sp, 16 @@ -1918,9 +1918,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: sub a1, a1, t2 ; RV32I-NEXT: sub a2, a2, a3 ; RV32I-NEXT: .LBB22_11: -; RV32I-NEXT: sw a6, 8(a0) -; RV32I-NEXT: sw a1, 4(a0) ; RV32I-NEXT: sw a2, 0(a0) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a6, 8(a0) ; RV32I-NEXT: sw a5, 12(a0) ; RV32I-NEXT: ret ; @@ -2005,9 +2005,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: sub a1, a1, t2 ; RV32ZBB-NEXT: sub a2, a2, a3 ; RV32ZBB-NEXT: .LBB22_11: -; RV32ZBB-NEXT: sw a6, 8(a0) -; RV32ZBB-NEXT: sw a1, 4(a0) ; RV32ZBB-NEXT: sw a2, 0(a0) +; RV32ZBB-NEXT: sw a1, 4(a0) +; RV32ZBB-NEXT: sw a6, 8(a0) ; RV32ZBB-NEXT: sw a5, 12(a0) ; RV32ZBB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/abds.ll b/llvm/test/CodeGen/RISCV/abds.ll index 91b044902a5201b..e639d4b30d4c941 100644 --- a/llvm/test/CodeGen/RISCV/abds.ll +++ b/llvm/test/CodeGen/RISCV/abds.ll @@ -599,9 +599,9 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: sub a1, a4, t5 ; RV32I-NEXT: sub a4, a6, t6 ; RV32I-NEXT: .LBB11_13: -; RV32I-NEXT: sw a4, 8(a0) -; RV32I-NEXT: sw a1, 4(a0) ; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a4, 8(a0) ; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 @@ -695,9 +695,9 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: sub a1, a4, t5 ; RV32ZBB-NEXT: sub a4, a6, t6 ; RV32ZBB-NEXT: .LBB11_13: -; RV32ZBB-NEXT: sw a4, 8(a0) -; RV32ZBB-NEXT: sw a1, 4(a0) ; RV32ZBB-NEXT: sw a3, 0(a0) +; RV32ZBB-NEXT: sw a1, 4(a0) +; RV32ZBB-NEXT: sw a4, 8(a0) ; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBB-NEXT: addi sp, sp, 16 @@ -799,9 +799,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: sub a1, a4, t5 ; RV32I-NEXT: sub a4, a6, t6 ; RV32I-NEXT: .LBB12_13: -; RV32I-NEXT: sw a4, 8(a0) -; RV32I-NEXT: sw a1, 4(a0) ; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a4, 8(a0) ; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 @@ -895,9 +895,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: sub a1, a4, t5 ; RV32ZBB-NEXT: sub a4, a6, t6 ; RV32ZBB-NEXT: .LBB12_13: -; RV32ZBB-NEXT: sw a4, 8(a0) -; RV32ZBB-NEXT: sw a1, 4(a0) ; RV32ZBB-NEXT: sw a3, 0(a0) +; RV32ZBB-NEXT: sw a1, 4(a0) +; RV32ZBB-NEXT: sw a4, 8(a0) ; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBB-NEXT: addi sp, sp, 16 @@ -1188,9 +1188,9 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: sub a1, a4, t5 ; RV32I-NEXT: sub a4, a6, t6 ; RV32I-NEXT: .LBB17_13: -; RV32I-NEXT: sw a4, 8(a0) -; RV32I-NEXT: sw a1, 4(a0) ; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a4, 8(a0) ; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 @@ -1284,9 +1284,9 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: sub a1, a4, t5 ; RV32ZBB-NEXT: sub a4, a6, t6 ; RV32ZBB-NEXT: .LBB17_13: -; RV32ZBB-NEXT: sw a4, 8(a0) -; RV32ZBB-NEXT: sw a1, 4(a0) ; RV32ZBB-NEXT: sw a3, 0(a0) +; RV32ZBB-NEXT: sw a1, 4(a0) +; RV32ZBB-NEXT: sw a4, 8(a0) ; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBB-NEXT: addi sp, sp, 16 @@ -1579,9 +1579,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: sub a1, a4, t5 ; RV32I-NEXT: sub a4, a6, t6 ; RV32I-NEXT: .LBB22_13: -; RV32I-NEXT: sw a4, 8(a0) -; RV32I-NEXT: sw a1, 4(a0) ; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a4, 8(a0) ; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 @@ -1675,9 +1675,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: sub a1, a4, t5 ; RV32ZBB-NEXT: sub a4, a6, t6 ; RV32ZBB-NEXT: .LBB22_13: -; RV32ZBB-NEXT: sw a4, 8(a0) -; RV32ZBB-NEXT: sw a1, 4(a0) ; RV32ZBB-NEXT: sw a3, 0(a0) +; RV32ZBB-NEXT: sw a1, 4(a0) +; RV32ZBB-NEXT: sw a4, 8(a0) ; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBB-NEXT: addi sp, sp, 16 @@ -2043,8 +2043,8 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind { define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_subnsw_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a4, 4(a2) +; RV32I-NEXT: lw a4, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) ; RV32I-NEXT: lw a6, 8(a2) ; RV32I-NEXT: lw t0, 12(a2) ; RV32I-NEXT: lw a2, 8(a1) @@ -2053,24 +2053,24 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: lw a7, 4(a1) ; RV32I-NEXT: sltu a1, a2, a6 ; RV32I-NEXT: sub t1, t1, t0 -; RV32I-NEXT: sltu t0, a5, a3 +; RV32I-NEXT: sltu t0, a5, a4 ; RV32I-NEXT: sub a1, t1, a1 ; RV32I-NEXT: mv t1, t0 -; RV32I-NEXT: beq a7, a4, .LBB31_2 +; RV32I-NEXT: beq a7, a3, .LBB31_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a7, a4 +; RV32I-NEXT: sltu t1, a7, a3 ; RV32I-NEXT: .LBB31_2: ; RV32I-NEXT: sub a2, a2, a6 ; RV32I-NEXT: sltu a6, a2, t1 ; RV32I-NEXT: sub a1, a1, a6 ; RV32I-NEXT: sub a2, a2, t1 -; RV32I-NEXT: sub a4, a7, a4 -; RV32I-NEXT: sub a4, a4, t0 -; RV32I-NEXT: sub a3, a5, a3 +; RV32I-NEXT: sub a3, a7, a3 +; RV32I-NEXT: sub a3, a3, t0 +; RV32I-NEXT: sub a4, a5, a4 ; RV32I-NEXT: bgez a1, .LBB31_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: snez a5, a4 -; RV32I-NEXT: snez a6, a3 +; RV32I-NEXT: snez a5, a3 +; RV32I-NEXT: snez a6, a4 ; RV32I-NEXT: or a5, a6, a5 ; RV32I-NEXT: neg a7, a2 ; RV32I-NEXT: sltu t0, a7, a5 @@ -2079,12 +2079,12 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: neg a1, a1 ; RV32I-NEXT: sub a1, a1, t0 ; RV32I-NEXT: sub a2, a7, a5 -; RV32I-NEXT: neg a3, a3 -; RV32I-NEXT: add a4, a4, a6 ; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: add a3, a3, a6 +; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: .LBB31_4: -; RV32I-NEXT: sw a4, 4(a0) -; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a3, 4(a0) ; RV32I-NEXT: sw a2, 8(a0) ; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: ret @@ -2106,8 +2106,8 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_subnsw_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a4, 4(a2) +; RV32ZBB-NEXT: lw a4, 0(a2) +; RV32ZBB-NEXT: lw a3, 4(a2) ; RV32ZBB-NEXT: lw a6, 8(a2) ; RV32ZBB-NEXT: lw t0, 12(a2) ; RV32ZBB-NEXT: lw a2, 8(a1) @@ -2116,24 +2116,24 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: lw a7, 4(a1) ; RV32ZBB-NEXT: sltu a1, a2, a6 ; RV32ZBB-NEXT: sub t1, t1, t0 -; RV32ZBB-NEXT: sltu t0, a5, a3 +; RV32ZBB-NEXT: sltu t0, a5, a4 ; RV32ZBB-NEXT: sub a1, t1, a1 ; RV32ZBB-NEXT: mv t1, t0 -; RV32ZBB-NEXT: beq a7, a4, .LBB31_2 +; RV32ZBB-NEXT: beq a7, a3, .LBB31_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a7, a4 +; RV32ZBB-NEXT: sltu t1, a7, a3 ; RV32ZBB-NEXT: .LBB31_2: ; RV32ZBB-NEXT: sub a2, a2, a6 ; RV32ZBB-NEXT: sltu a6, a2, t1 ; RV32ZBB-NEXT: sub a1, a1, a6 ; RV32ZBB-NEXT: sub a2, a2, t1 -; RV32ZBB-NEXT: sub a4, a7, a4 -; RV32ZBB-NEXT: sub a4, a4, t0 -; RV32ZBB-NEXT: sub a3, a5, a3 +; RV32ZBB-NEXT: sub a3, a7, a3 +; RV32ZBB-NEXT: sub a3, a3, t0 +; RV32ZBB-NEXT: sub a4, a5, a4 ; RV32ZBB-NEXT: bgez a1, .LBB31_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: snez a5, a4 -; RV32ZBB-NEXT: snez a6, a3 +; RV32ZBB-NEXT: snez a5, a3 +; RV32ZBB-NEXT: snez a6, a4 ; RV32ZBB-NEXT: or a5, a6, a5 ; RV32ZBB-NEXT: neg a7, a2 ; RV32ZBB-NEXT: sltu t0, a7, a5 @@ -2142,12 +2142,12 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: neg a1, a1 ; RV32ZBB-NEXT: sub a1, a1, t0 ; RV32ZBB-NEXT: sub a2, a7, a5 -; RV32ZBB-NEXT: neg a3, a3 -; RV32ZBB-NEXT: add a4, a4, a6 ; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: add a3, a3, a6 +; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: .LBB31_4: -; RV32ZBB-NEXT: sw a4, 4(a0) -; RV32ZBB-NEXT: sw a3, 0(a0) +; RV32ZBB-NEXT: sw a4, 0(a0) +; RV32ZBB-NEXT: sw a3, 4(a0) ; RV32ZBB-NEXT: sw a2, 8(a0) ; RV32ZBB-NEXT: sw a1, 12(a0) ; RV32ZBB-NEXT: ret @@ -2174,8 +2174,8 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_subnsw_i128_undef: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a4, 4(a2) +; RV32I-NEXT: lw a4, 0(a2) +; RV32I-NEXT: lw a3, 4(a2) ; RV32I-NEXT: lw a6, 8(a2) ; RV32I-NEXT: lw t0, 12(a2) ; RV32I-NEXT: lw a2, 8(a1) @@ -2184,24 +2184,24 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: lw a7, 4(a1) ; RV32I-NEXT: sltu a1, a2, a6 ; RV32I-NEXT: sub t1, t1, t0 -; RV32I-NEXT: sltu t0, a5, a3 +; RV32I-NEXT: sltu t0, a5, a4 ; RV32I-NEXT: sub a1, t1, a1 ; RV32I-NEXT: mv t1, t0 -; RV32I-NEXT: beq a7, a4, .LBB32_2 +; RV32I-NEXT: beq a7, a3, .LBB32_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a7, a4 +; RV32I-NEXT: sltu t1, a7, a3 ; RV32I-NEXT: .LBB32_2: ; RV32I-NEXT: sub a2, a2, a6 ; RV32I-NEXT: sltu a6, a2, t1 ; RV32I-NEXT: sub a1, a1, a6 ; RV32I-NEXT: sub a2, a2, t1 -; RV32I-NEXT: sub a4, a7, a4 -; RV32I-NEXT: sub a4, a4, t0 -; RV32I-NEXT: sub a3, a5, a3 +; RV32I-NEXT: sub a3, a7, a3 +; RV32I-NEXT: sub a3, a3, t0 +; RV32I-NEXT: sub a4, a5, a4 ; RV32I-NEXT: bgez a1, .LBB32_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: snez a5, a4 -; RV32I-NEXT: snez a6, a3 +; RV32I-NEXT: snez a5, a3 +; RV32I-NEXT: snez a6, a4 ; RV32I-NEXT: or a5, a6, a5 ; RV32I-NEXT: neg a7, a2 ; RV32I-NEXT: sltu t0, a7, a5 @@ -2210,12 +2210,12 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: neg a1, a1 ; RV32I-NEXT: sub a1, a1, t0 ; RV32I-NEXT: sub a2, a7, a5 -; RV32I-NEXT: neg a3, a3 -; RV32I-NEXT: add a4, a4, a6 ; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: add a3, a3, a6 +; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: .LBB32_4: -; RV32I-NEXT: sw a4, 4(a0) -; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a3, 4(a0) ; RV32I-NEXT: sw a2, 8(a0) ; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: ret @@ -2237,8 +2237,8 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_subnsw_i128_undef: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a4, 4(a2) +; RV32ZBB-NEXT: lw a4, 0(a2) +; RV32ZBB-NEXT: lw a3, 4(a2) ; RV32ZBB-NEXT: lw a6, 8(a2) ; RV32ZBB-NEXT: lw t0, 12(a2) ; RV32ZBB-NEXT: lw a2, 8(a1) @@ -2247,24 +2247,24 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: lw a7, 4(a1) ; RV32ZBB-NEXT: sltu a1, a2, a6 ; RV32ZBB-NEXT: sub t1, t1, t0 -; RV32ZBB-NEXT: sltu t0, a5, a3 +; RV32ZBB-NEXT: sltu t0, a5, a4 ; RV32ZBB-NEXT: sub a1, t1, a1 ; RV32ZBB-NEXT: mv t1, t0 -; RV32ZBB-NEXT: beq a7, a4, .LBB32_2 +; RV32ZBB-NEXT: beq a7, a3, .LBB32_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a7, a4 +; RV32ZBB-NEXT: sltu t1, a7, a3 ; RV32ZBB-NEXT: .LBB32_2: ; RV32ZBB-NEXT: sub a2, a2, a6 ; RV32ZBB-NEXT: sltu a6, a2, t1 ; RV32ZBB-NEXT: sub a1, a1, a6 ; RV32ZBB-NEXT: sub a2, a2, t1 -; RV32ZBB-NEXT: sub a4, a7, a4 -; RV32ZBB-NEXT: sub a4, a4, t0 -; RV32ZBB-NEXT: sub a3, a5, a3 +; RV32ZBB-NEXT: sub a3, a7, a3 +; RV32ZBB-NEXT: sub a3, a3, t0 +; RV32ZBB-NEXT: sub a4, a5, a4 ; RV32ZBB-NEXT: bgez a1, .LBB32_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: snez a5, a4 -; RV32ZBB-NEXT: snez a6, a3 +; RV32ZBB-NEXT: snez a5, a3 +; RV32ZBB-NEXT: snez a6, a4 ; RV32ZBB-NEXT: or a5, a6, a5 ; RV32ZBB-NEXT: neg a7, a2 ; RV32ZBB-NEXT: sltu t0, a7, a5 @@ -2273,12 +2273,12 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: neg a1, a1 ; RV32ZBB-NEXT: sub a1, a1, t0 ; RV32ZBB-NEXT: sub a2, a7, a5 -; RV32ZBB-NEXT: neg a3, a3 -; RV32ZBB-NEXT: add a4, a4, a6 ; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: add a3, a3, a6 +; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: .LBB32_4: -; RV32ZBB-NEXT: sw a4, 4(a0) -; RV32ZBB-NEXT: sw a3, 0(a0) +; RV32ZBB-NEXT: sw a4, 0(a0) +; RV32ZBB-NEXT: sw a3, 4(a0) ; RV32ZBB-NEXT: sw a2, 8(a0) ; RV32ZBB-NEXT: sw a1, 12(a0) ; RV32ZBB-NEXT: ret @@ -2711,9 +2711,9 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: sub a1, a4, t5 ; RV32ZBB-NEXT: sub a4, a6, t6 ; RV32ZBB-NEXT: .LBB38_13: -; RV32ZBB-NEXT: sw a4, 8(a0) -; RV32ZBB-NEXT: sw a1, 4(a0) ; RV32ZBB-NEXT: sw a3, 0(a0) +; RV32ZBB-NEXT: sw a1, 4(a0) +; RV32ZBB-NEXT: sw a4, 8(a0) ; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBB-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/abdu-neg.ll b/llvm/test/CodeGen/RISCV/abdu-neg.ll index 54075f416943926..b39285c3d343f57 100644 --- a/llvm/test/CodeGen/RISCV/abdu-neg.ll +++ b/llvm/test/CodeGen/RISCV/abdu-neg.ll @@ -1855,9 +1855,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: sub a1, a1, t2 ; RV32I-NEXT: sub a2, a2, a3 ; RV32I-NEXT: .LBB22_11: -; RV32I-NEXT: sw a6, 8(a0) -; RV32I-NEXT: sw a1, 4(a0) ; RV32I-NEXT: sw a2, 0(a0) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a6, 8(a0) ; RV32I-NEXT: sw a5, 12(a0) ; RV32I-NEXT: ret ; @@ -1942,9 +1942,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: sub a1, a1, t2 ; RV32ZBB-NEXT: sub a2, a2, a3 ; RV32ZBB-NEXT: .LBB22_11: -; RV32ZBB-NEXT: sw a6, 8(a0) -; RV32ZBB-NEXT: sw a1, 4(a0) ; RV32ZBB-NEXT: sw a2, 0(a0) +; RV32ZBB-NEXT: sw a1, 4(a0) +; RV32ZBB-NEXT: sw a6, 8(a0) ; RV32ZBB-NEXT: sw a5, 12(a0) ; RV32ZBB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll index 823918f1c42e7ac..db7498340d3951b 100644 --- a/llvm/test/CodeGen/RISCV/add-before-shl.ll +++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll @@ -182,8 +182,8 @@ define i128 @add_wide_operand(i128 %a) nounwind { ; RV32I-NEXT: lui a4, 128 ; RV32I-NEXT: add a1, a1, a4 ; RV32I-NEXT: sw a2, 0(a0) -; RV32I-NEXT: sw a3, 8(a0) ; RV32I-NEXT: sw a5, 4(a0) +; RV32I-NEXT: sw a3, 8(a0) ; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: jalr zero, 0(ra) ; @@ -217,8 +217,8 @@ define i128 @add_wide_operand(i128 %a) nounwind { ; RV32C-NEXT: c.or a1, a3 ; RV32C-NEXT: c.slli a6, 3 ; RV32C-NEXT: sw a6, 0(a0) -; RV32C-NEXT: c.sw a1, 8(a0) ; RV32C-NEXT: c.sw a4, 4(a0) +; RV32C-NEXT: c.sw a1, 8(a0) ; RV32C-NEXT: c.sw a2, 12(a0) ; RV32C-NEXT: c.jr ra ; diff --git a/llvm/test/CodeGen/RISCV/alloca.ll b/llvm/test/CodeGen/RISCV/alloca.ll index bcb0592c18f59f1..975fc93c830af21 100644 --- a/llvm/test/CodeGen/RISCV/alloca.ll +++ b/llvm/test/CodeGen/RISCV/alloca.ll @@ -76,13 +76,10 @@ define void @alloca_callframe(i32 %n) nounwind { ; RV32I-NEXT: sub a0, sp, a0 ; RV32I-NEXT: mv sp, a0 ; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: li a1, 12 -; RV32I-NEXT: sw a1, 12(sp) -; RV32I-NEXT: li a1, 11 -; RV32I-NEXT: sw a1, 8(sp) -; RV32I-NEXT: li a1, 10 -; RV32I-NEXT: sw a1, 4(sp) -; RV32I-NEXT: li t0, 9 +; RV32I-NEXT: li t0, 12 +; RV32I-NEXT: li t1, 11 +; RV32I-NEXT: li t2, 10 +; RV32I-NEXT: li t3, 9 ; RV32I-NEXT: li a1, 2 ; RV32I-NEXT: li a2, 3 ; RV32I-NEXT: li a3, 4 @@ -90,7 +87,10 @@ define void @alloca_callframe(i32 %n) nounwind { ; RV32I-NEXT: li a5, 6 ; RV32I-NEXT: li a6, 7 ; RV32I-NEXT: li a7, 8 -; RV32I-NEXT: sw t0, 0(sp) +; RV32I-NEXT: sw t3, 0(sp) +; RV32I-NEXT: sw t2, 4(sp) +; RV32I-NEXT: sw t1, 8(sp) +; RV32I-NEXT: sw t0, 12(sp) ; RV32I-NEXT: call func ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: addi sp, s0, -16 diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll index 9908503adb9c300..a87b49e61a8dbcd 100644 --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll @@ -4882,8 +4882,8 @@ define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounw ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: mv a3, a4 @@ -4898,8 +4898,8 @@ define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounw ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: mv a1, sp ; RV32IA-NEXT: mv a2, a3 ; RV32IA-NEXT: mv a3, a4 @@ -4964,8 +4964,8 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv a5, a4 -; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: mv a2, a3 @@ -4981,8 +4981,8 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: mv a1, sp ; RV32IA-NEXT: li a4, 2 ; RV32IA-NEXT: mv a2, a3 @@ -5057,8 +5057,8 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv a6, a4 -; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: li a5, 2 @@ -5074,8 +5074,8 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv a6, a4 -; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: mv a1, sp ; RV32IA-NEXT: li a4, 2 ; RV32IA-NEXT: li a5, 2 @@ -5150,8 +5150,8 @@ define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv a5, a4 -; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a4, 3 ; RV32I-NEXT: mv a2, a3 @@ -5167,8 +5167,8 @@ define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: mv a1, sp ; RV32IA-NEXT: li a4, 3 ; RV32IA-NEXT: mv a2, a3 @@ -5243,8 +5243,8 @@ define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv a6, a4 -; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a4, 3 ; RV32I-NEXT: li a5, 2 @@ -5260,8 +5260,8 @@ define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv a6, a4 -; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: mv a1, sp ; RV32IA-NEXT: li a4, 3 ; RV32IA-NEXT: li a5, 2 @@ -5336,8 +5336,8 @@ define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv a5, a4 -; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a4, 4 ; RV32I-NEXT: mv a2, a3 @@ -5353,8 +5353,8 @@ define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: mv a1, sp ; RV32IA-NEXT: li a4, 4 ; RV32IA-NEXT: mv a2, a3 @@ -5429,8 +5429,8 @@ define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv a6, a4 -; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a4, 4 ; RV32I-NEXT: li a5, 2 @@ -5446,8 +5446,8 @@ define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv a6, a4 -; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: mv a1, sp ; RV32IA-NEXT: li a4, 4 ; RV32IA-NEXT: li a5, 2 @@ -5522,8 +5522,8 @@ define void @cmpxchg_i64_seq_cst_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv a5, a4 -; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: mv a2, a3 @@ -5539,8 +5539,8 @@ define void @cmpxchg_i64_seq_cst_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: mv a1, sp ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: mv a2, a3 @@ -5615,8 +5615,8 @@ define void @cmpxchg_i64_seq_cst_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv a6, a4 -; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 2 @@ -5632,8 +5632,8 @@ define void @cmpxchg_i64_seq_cst_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv a6, a4 -; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: mv a1, sp ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 2 @@ -5708,8 +5708,8 @@ define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv a6, a4 -; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 @@ -5725,8 +5725,8 @@ define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv a6, a4 -; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) ; RV32IA-NEXT: mv a1, sp ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll index 6d2263f74062df8..f26425c297ab077 100644 --- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll @@ -2767,14 +2767,14 @@ define void @varargs(...) { ; RV32I-NEXT: .cfi_def_cfa_offset 48 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: .cfi_offset ra, -36 -; RV32I-NEXT: sw a7, 44(sp) -; RV32I-NEXT: sw a6, 40(sp) -; RV32I-NEXT: sw a5, 36(sp) ; RV32I-NEXT: sw a4, 32(sp) -; RV32I-NEXT: sw a3, 28(sp) -; RV32I-NEXT: sw a2, 24(sp) -; RV32I-NEXT: sw a1, 20(sp) +; RV32I-NEXT: sw a5, 36(sp) +; RV32I-NEXT: sw a6, 40(sp) +; RV32I-NEXT: sw a7, 44(sp) ; RV32I-NEXT: sw a0, 16(sp) +; RV32I-NEXT: sw a1, 20(sp) +; RV32I-NEXT: sw a2, 24(sp) +; RV32I-NEXT: sw a3, 28(sp) ; RV32I-NEXT: call callee ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 48 @@ -2786,12 +2786,12 @@ define void @varargs(...) { ; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 28 ; RV32I-ILP32E-NEXT: sw ra, 0(sp) # 4-byte Folded Spill ; RV32I-ILP32E-NEXT: .cfi_offset ra, -28 -; RV32I-ILP32E-NEXT: sw a5, 24(sp) ; RV32I-ILP32E-NEXT: sw a4, 20(sp) -; RV32I-ILP32E-NEXT: sw a3, 16(sp) -; RV32I-ILP32E-NEXT: sw a2, 12(sp) -; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: sw a5, 24(sp) ; RV32I-ILP32E-NEXT: sw a0, 4(sp) +; RV32I-ILP32E-NEXT: sw a1, 8(sp) +; RV32I-ILP32E-NEXT: sw a2, 12(sp) +; RV32I-ILP32E-NEXT: sw a3, 16(sp) ; RV32I-ILP32E-NEXT: call callee ; RV32I-ILP32E-NEXT: lw ra, 0(sp) # 4-byte Folded Reload ; RV32I-ILP32E-NEXT: addi sp, sp, 28 @@ -2807,14 +2807,14 @@ define void @varargs(...) { ; RV32I-WITH-FP-NEXT: .cfi_offset s0, -40 ; RV32I-WITH-FP-NEXT: addi s0, sp, 16 ; RV32I-WITH-FP-NEXT: .cfi_def_cfa s0, 32 -; RV32I-WITH-FP-NEXT: sw a7, 28(s0) -; RV32I-WITH-FP-NEXT: sw a6, 24(s0) -; RV32I-WITH-FP-NEXT: sw a5, 20(s0) ; RV32I-WITH-FP-NEXT: sw a4, 16(s0) -; RV32I-WITH-FP-NEXT: sw a3, 12(s0) -; RV32I-WITH-FP-NEXT: sw a2, 8(s0) -; RV32I-WITH-FP-NEXT: sw a1, 4(s0) +; RV32I-WITH-FP-NEXT: sw a5, 20(s0) +; RV32I-WITH-FP-NEXT: sw a6, 24(s0) +; RV32I-WITH-FP-NEXT: sw a7, 28(s0) ; RV32I-WITH-FP-NEXT: sw a0, 0(s0) +; RV32I-WITH-FP-NEXT: sw a1, 4(s0) +; RV32I-WITH-FP-NEXT: sw a2, 8(s0) +; RV32I-WITH-FP-NEXT: sw a3, 12(s0) ; RV32I-WITH-FP-NEXT: call callee ; RV32I-WITH-FP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITH-FP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2827,14 +2827,14 @@ define void @varargs(...) { ; RV32IZCMP-NEXT: .cfi_def_cfa_offset 48 ; RV32IZCMP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZCMP-NEXT: .cfi_offset ra, -36 -; RV32IZCMP-NEXT: sw a7, 44(sp) -; RV32IZCMP-NEXT: sw a6, 40(sp) -; RV32IZCMP-NEXT: sw a5, 36(sp) ; RV32IZCMP-NEXT: sw a4, 32(sp) -; RV32IZCMP-NEXT: sw a3, 28(sp) -; RV32IZCMP-NEXT: sw a2, 24(sp) -; RV32IZCMP-NEXT: sw a1, 20(sp) +; RV32IZCMP-NEXT: sw a5, 36(sp) +; RV32IZCMP-NEXT: sw a6, 40(sp) +; RV32IZCMP-NEXT: sw a7, 44(sp) ; RV32IZCMP-NEXT: sw a0, 16(sp) +; RV32IZCMP-NEXT: sw a1, 20(sp) +; RV32IZCMP-NEXT: sw a2, 24(sp) +; RV32IZCMP-NEXT: sw a3, 28(sp) ; RV32IZCMP-NEXT: call callee ; RV32IZCMP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZCMP-NEXT: addi sp, sp, 48 @@ -2850,14 +2850,14 @@ define void @varargs(...) { ; RV32IZCMP-WITH-FP-NEXT: .cfi_offset s0, -40 ; RV32IZCMP-WITH-FP-NEXT: addi s0, sp, 16 ; RV32IZCMP-WITH-FP-NEXT: .cfi_def_cfa s0, 32 -; RV32IZCMP-WITH-FP-NEXT: sw a7, 28(s0) -; RV32IZCMP-WITH-FP-NEXT: sw a6, 24(s0) -; RV32IZCMP-WITH-FP-NEXT: sw a5, 20(s0) ; RV32IZCMP-WITH-FP-NEXT: sw a4, 16(s0) -; RV32IZCMP-WITH-FP-NEXT: sw a3, 12(s0) -; RV32IZCMP-WITH-FP-NEXT: sw a2, 8(s0) -; RV32IZCMP-WITH-FP-NEXT: sw a1, 4(s0) +; RV32IZCMP-WITH-FP-NEXT: sw a5, 20(s0) +; RV32IZCMP-WITH-FP-NEXT: sw a6, 24(s0) +; RV32IZCMP-WITH-FP-NEXT: sw a7, 28(s0) ; RV32IZCMP-WITH-FP-NEXT: sw a0, 0(s0) +; RV32IZCMP-WITH-FP-NEXT: sw a1, 4(s0) +; RV32IZCMP-WITH-FP-NEXT: sw a2, 8(s0) +; RV32IZCMP-WITH-FP-NEXT: sw a3, 12(s0) ; RV32IZCMP-WITH-FP-NEXT: call callee ; RV32IZCMP-WITH-FP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZCMP-WITH-FP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2870,14 +2870,14 @@ define void @varargs(...) { ; RV64I-NEXT: .cfi_def_cfa_offset 80 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: .cfi_offset ra, -72 -; RV64I-NEXT: sd a7, 72(sp) -; RV64I-NEXT: sd a6, 64(sp) -; RV64I-NEXT: sd a5, 56(sp) ; RV64I-NEXT: sd a4, 48(sp) -; RV64I-NEXT: sd a3, 40(sp) -; RV64I-NEXT: sd a2, 32(sp) -; RV64I-NEXT: sd a1, 24(sp) +; RV64I-NEXT: sd a5, 56(sp) +; RV64I-NEXT: sd a6, 64(sp) +; RV64I-NEXT: sd a7, 72(sp) ; RV64I-NEXT: sd a0, 16(sp) +; RV64I-NEXT: sd a1, 24(sp) +; RV64I-NEXT: sd a2, 32(sp) +; RV64I-NEXT: sd a3, 40(sp) ; RV64I-NEXT: call callee ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 80 @@ -2889,12 +2889,12 @@ define void @varargs(...) { ; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 56 ; RV64I-LP64E-NEXT: sd ra, 0(sp) # 8-byte Folded Spill ; RV64I-LP64E-NEXT: .cfi_offset ra, -56 -; RV64I-LP64E-NEXT: sd a5, 48(sp) ; RV64I-LP64E-NEXT: sd a4, 40(sp) -; RV64I-LP64E-NEXT: sd a3, 32(sp) -; RV64I-LP64E-NEXT: sd a2, 24(sp) -; RV64I-LP64E-NEXT: sd a1, 16(sp) +; RV64I-LP64E-NEXT: sd a5, 48(sp) ; RV64I-LP64E-NEXT: sd a0, 8(sp) +; RV64I-LP64E-NEXT: sd a1, 16(sp) +; RV64I-LP64E-NEXT: sd a2, 24(sp) +; RV64I-LP64E-NEXT: sd a3, 32(sp) ; RV64I-LP64E-NEXT: call callee ; RV64I-LP64E-NEXT: ld ra, 0(sp) # 8-byte Folded Reload ; RV64I-LP64E-NEXT: addi sp, sp, 56 @@ -2910,14 +2910,14 @@ define void @varargs(...) { ; RV64I-WITH-FP-NEXT: .cfi_offset s0, -80 ; RV64I-WITH-FP-NEXT: addi s0, sp, 16 ; RV64I-WITH-FP-NEXT: .cfi_def_cfa s0, 64 -; RV64I-WITH-FP-NEXT: sd a7, 56(s0) -; RV64I-WITH-FP-NEXT: sd a6, 48(s0) -; RV64I-WITH-FP-NEXT: sd a5, 40(s0) ; RV64I-WITH-FP-NEXT: sd a4, 32(s0) -; RV64I-WITH-FP-NEXT: sd a3, 24(s0) -; RV64I-WITH-FP-NEXT: sd a2, 16(s0) -; RV64I-WITH-FP-NEXT: sd a1, 8(s0) +; RV64I-WITH-FP-NEXT: sd a5, 40(s0) +; RV64I-WITH-FP-NEXT: sd a6, 48(s0) +; RV64I-WITH-FP-NEXT: sd a7, 56(s0) ; RV64I-WITH-FP-NEXT: sd a0, 0(s0) +; RV64I-WITH-FP-NEXT: sd a1, 8(s0) +; RV64I-WITH-FP-NEXT: sd a2, 16(s0) +; RV64I-WITH-FP-NEXT: sd a3, 24(s0) ; RV64I-WITH-FP-NEXT: call callee ; RV64I-WITH-FP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload @@ -2930,14 +2930,14 @@ define void @varargs(...) { ; RV64IZCMP-NEXT: .cfi_def_cfa_offset 80 ; RV64IZCMP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZCMP-NEXT: .cfi_offset ra, -72 -; RV64IZCMP-NEXT: sd a7, 72(sp) -; RV64IZCMP-NEXT: sd a6, 64(sp) -; RV64IZCMP-NEXT: sd a5, 56(sp) ; RV64IZCMP-NEXT: sd a4, 48(sp) -; RV64IZCMP-NEXT: sd a3, 40(sp) -; RV64IZCMP-NEXT: sd a2, 32(sp) -; RV64IZCMP-NEXT: sd a1, 24(sp) +; RV64IZCMP-NEXT: sd a5, 56(sp) +; RV64IZCMP-NEXT: sd a6, 64(sp) +; RV64IZCMP-NEXT: sd a7, 72(sp) ; RV64IZCMP-NEXT: sd a0, 16(sp) +; RV64IZCMP-NEXT: sd a1, 24(sp) +; RV64IZCMP-NEXT: sd a2, 32(sp) +; RV64IZCMP-NEXT: sd a3, 40(sp) ; RV64IZCMP-NEXT: call callee ; RV64IZCMP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IZCMP-NEXT: addi sp, sp, 80 @@ -2953,14 +2953,14 @@ define void @varargs(...) { ; RV64IZCMP-WITH-FP-NEXT: .cfi_offset s0, -80 ; RV64IZCMP-WITH-FP-NEXT: addi s0, sp, 16 ; RV64IZCMP-WITH-FP-NEXT: .cfi_def_cfa s0, 64 -; RV64IZCMP-WITH-FP-NEXT: sd a7, 56(s0) -; RV64IZCMP-WITH-FP-NEXT: sd a6, 48(s0) -; RV64IZCMP-WITH-FP-NEXT: sd a5, 40(s0) ; RV64IZCMP-WITH-FP-NEXT: sd a4, 32(s0) -; RV64IZCMP-WITH-FP-NEXT: sd a3, 24(s0) -; RV64IZCMP-WITH-FP-NEXT: sd a2, 16(s0) -; RV64IZCMP-WITH-FP-NEXT: sd a1, 8(s0) +; RV64IZCMP-WITH-FP-NEXT: sd a5, 40(s0) +; RV64IZCMP-WITH-FP-NEXT: sd a6, 48(s0) +; RV64IZCMP-WITH-FP-NEXT: sd a7, 56(s0) ; RV64IZCMP-WITH-FP-NEXT: sd a0, 0(s0) +; RV64IZCMP-WITH-FP-NEXT: sd a1, 8(s0) +; RV64IZCMP-WITH-FP-NEXT: sd a2, 16(s0) +; RV64IZCMP-WITH-FP-NEXT: sd a3, 24(s0) ; RV64IZCMP-WITH-FP-NEXT: call callee ; RV64IZCMP-WITH-FP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IZCMP-WITH-FP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll index 278187f62cd75ee..e97a3bff32fac70 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll @@ -146,30 +146,27 @@ define void @caller_aligned_stack() nounwind { ; RV32I-FPELIM-NEXT: addi sp, sp, -64 ; RV32I-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill ; RV32I-FPELIM-NEXT: li a0, 18 +; RV32I-FPELIM-NEXT: li a1, 17 +; RV32I-FPELIM-NEXT: sw a1, 20(sp) ; RV32I-FPELIM-NEXT: sw a0, 24(sp) -; RV32I-FPELIM-NEXT: li a0, 17 -; RV32I-FPELIM-NEXT: sw a0, 20(sp) ; RV32I-FPELIM-NEXT: li a0, 16 +; RV32I-FPELIM-NEXT: lui a1, 262236 +; RV32I-FPELIM-NEXT: addi a1, a1, 655 +; RV32I-FPELIM-NEXT: lui a2, 377487 +; RV32I-FPELIM-NEXT: addi a2, a2, 1475 +; RV32I-FPELIM-NEXT: li a3, 15 +; RV32I-FPELIM-NEXT: sw a3, 0(sp) +; RV32I-FPELIM-NEXT: sw a2, 8(sp) +; RV32I-FPELIM-NEXT: sw a1, 12(sp) ; RV32I-FPELIM-NEXT: sw a0, 16(sp) -; RV32I-FPELIM-NEXT: lui a0, 262236 -; RV32I-FPELIM-NEXT: addi a0, a0, 655 -; RV32I-FPELIM-NEXT: sw a0, 12(sp) -; RV32I-FPELIM-NEXT: lui a0, 377487 -; RV32I-FPELIM-NEXT: addi a0, a0, 1475 -; RV32I-FPELIM-NEXT: sw a0, 8(sp) -; RV32I-FPELIM-NEXT: li a0, 15 -; RV32I-FPELIM-NEXT: sw a0, 0(sp) ; RV32I-FPELIM-NEXT: lui a0, 262153 -; RV32I-FPELIM-NEXT: addi a0, a0, 491 -; RV32I-FPELIM-NEXT: sw a0, 44(sp) +; RV32I-FPELIM-NEXT: addi t0, a0, 491 ; RV32I-FPELIM-NEXT: lui a0, 545260 -; RV32I-FPELIM-NEXT: addi a0, a0, -1967 -; RV32I-FPELIM-NEXT: sw a0, 40(sp) +; RV32I-FPELIM-NEXT: addi t1, a0, -1967 ; RV32I-FPELIM-NEXT: lui a0, 964690 -; RV32I-FPELIM-NEXT: addi a0, a0, -328 -; RV32I-FPELIM-NEXT: sw a0, 36(sp) +; RV32I-FPELIM-NEXT: addi t2, a0, -328 ; RV32I-FPELIM-NEXT: lui a0, 335544 -; RV32I-FPELIM-NEXT: addi t0, a0, 1311 +; RV32I-FPELIM-NEXT: addi t3, a0, 1311 ; RV32I-FPELIM-NEXT: lui a0, 688509 ; RV32I-FPELIM-NEXT: addi a5, a0, -2048 ; RV32I-FPELIM-NEXT: li a0, 1 @@ -179,7 +176,10 @@ define void @caller_aligned_stack() nounwind { ; RV32I-FPELIM-NEXT: li a4, 13 ; RV32I-FPELIM-NEXT: li a6, 4 ; RV32I-FPELIM-NEXT: li a7, 14 -; RV32I-FPELIM-NEXT: sw t0, 32(sp) +; RV32I-FPELIM-NEXT: sw t3, 32(sp) +; RV32I-FPELIM-NEXT: sw t2, 36(sp) +; RV32I-FPELIM-NEXT: sw t1, 40(sp) +; RV32I-FPELIM-NEXT: sw t0, 44(sp) ; RV32I-FPELIM-NEXT: call callee_aligned_stack ; RV32I-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-FPELIM-NEXT: addi sp, sp, 64 @@ -192,30 +192,27 @@ define void @caller_aligned_stack() nounwind { ; RV32I-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 64 ; RV32I-WITHFP-NEXT: li a0, 18 +; RV32I-WITHFP-NEXT: li a1, 17 +; RV32I-WITHFP-NEXT: sw a1, 20(sp) ; RV32I-WITHFP-NEXT: sw a0, 24(sp) -; RV32I-WITHFP-NEXT: li a0, 17 -; RV32I-WITHFP-NEXT: sw a0, 20(sp) ; RV32I-WITHFP-NEXT: li a0, 16 +; RV32I-WITHFP-NEXT: lui a1, 262236 +; RV32I-WITHFP-NEXT: addi a1, a1, 655 +; RV32I-WITHFP-NEXT: lui a2, 377487 +; RV32I-WITHFP-NEXT: addi a2, a2, 1475 +; RV32I-WITHFP-NEXT: li a3, 15 +; RV32I-WITHFP-NEXT: sw a3, 0(sp) +; RV32I-WITHFP-NEXT: sw a2, 8(sp) +; RV32I-WITHFP-NEXT: sw a1, 12(sp) ; RV32I-WITHFP-NEXT: sw a0, 16(sp) -; RV32I-WITHFP-NEXT: lui a0, 262236 -; RV32I-WITHFP-NEXT: addi a0, a0, 655 -; RV32I-WITHFP-NEXT: sw a0, 12(sp) -; RV32I-WITHFP-NEXT: lui a0, 377487 -; RV32I-WITHFP-NEXT: addi a0, a0, 1475 -; RV32I-WITHFP-NEXT: sw a0, 8(sp) -; RV32I-WITHFP-NEXT: li a0, 15 -; RV32I-WITHFP-NEXT: sw a0, 0(sp) ; RV32I-WITHFP-NEXT: lui a0, 262153 -; RV32I-WITHFP-NEXT: addi a0, a0, 491 -; RV32I-WITHFP-NEXT: sw a0, -20(s0) +; RV32I-WITHFP-NEXT: addi t0, a0, 491 ; RV32I-WITHFP-NEXT: lui a0, 545260 -; RV32I-WITHFP-NEXT: addi a0, a0, -1967 -; RV32I-WITHFP-NEXT: sw a0, -24(s0) +; RV32I-WITHFP-NEXT: addi t1, a0, -1967 ; RV32I-WITHFP-NEXT: lui a0, 964690 -; RV32I-WITHFP-NEXT: addi a0, a0, -328 -; RV32I-WITHFP-NEXT: sw a0, -28(s0) +; RV32I-WITHFP-NEXT: addi t2, a0, -328 ; RV32I-WITHFP-NEXT: lui a0, 335544 -; RV32I-WITHFP-NEXT: addi t0, a0, 1311 +; RV32I-WITHFP-NEXT: addi t3, a0, 1311 ; RV32I-WITHFP-NEXT: lui a0, 688509 ; RV32I-WITHFP-NEXT: addi a5, a0, -2048 ; RV32I-WITHFP-NEXT: li a0, 1 @@ -225,7 +222,10 @@ define void @caller_aligned_stack() nounwind { ; RV32I-WITHFP-NEXT: li a4, 13 ; RV32I-WITHFP-NEXT: li a6, 4 ; RV32I-WITHFP-NEXT: li a7, 14 -; RV32I-WITHFP-NEXT: sw t0, -32(s0) +; RV32I-WITHFP-NEXT: sw t3, -32(s0) +; RV32I-WITHFP-NEXT: sw t2, -28(s0) +; RV32I-WITHFP-NEXT: sw t1, -24(s0) +; RV32I-WITHFP-NEXT: sw t0, -20(s0) ; RV32I-WITHFP-NEXT: call callee_aligned_stack ; RV32I-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll index bb082b0314d5998..9e4c8a6e3320c2b 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll @@ -142,8 +142,7 @@ define i32 @caller_many_scalars() nounwind { ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: addi sp, sp, -16 ; RV32I-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-FPELIM-NEXT: li a0, 8 -; RV32I-FPELIM-NEXT: sw a0, 4(sp) +; RV32I-FPELIM-NEXT: li a4, 8 ; RV32I-FPELIM-NEXT: li a0, 1 ; RV32I-FPELIM-NEXT: li a1, 2 ; RV32I-FPELIM-NEXT: li a2, 3 @@ -152,6 +151,7 @@ define i32 @caller_many_scalars() nounwind { ; RV32I-FPELIM-NEXT: li a6, 6 ; RV32I-FPELIM-NEXT: li a7, 7 ; RV32I-FPELIM-NEXT: sw zero, 0(sp) +; RV32I-FPELIM-NEXT: sw a4, 4(sp) ; RV32I-FPELIM-NEXT: li a4, 0 ; RV32I-FPELIM-NEXT: call callee_many_scalars ; RV32I-FPELIM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -164,8 +164,7 @@ define i32 @caller_many_scalars() nounwind { ; RV32I-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 16 -; RV32I-WITHFP-NEXT: li a0, 8 -; RV32I-WITHFP-NEXT: sw a0, 4(sp) +; RV32I-WITHFP-NEXT: li a4, 8 ; RV32I-WITHFP-NEXT: li a0, 1 ; RV32I-WITHFP-NEXT: li a1, 2 ; RV32I-WITHFP-NEXT: li a2, 3 @@ -174,6 +173,7 @@ define i32 @caller_many_scalars() nounwind { ; RV32I-WITHFP-NEXT: li a6, 6 ; RV32I-WITHFP-NEXT: li a7, 7 ; RV32I-WITHFP-NEXT: sw zero, 0(sp) +; RV32I-WITHFP-NEXT: sw a4, 4(sp) ; RV32I-WITHFP-NEXT: li a4, 0 ; RV32I-WITHFP-NEXT: call callee_many_scalars ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -246,17 +246,17 @@ define i32 @caller_large_scalars() nounwind { ; RV32I-FPELIM-NEXT: addi sp, sp, -48 ; RV32I-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32I-FPELIM-NEXT: lui a0, 524272 -; RV32I-FPELIM-NEXT: sw a0, 12(sp) -; RV32I-FPELIM-NEXT: sw zero, 8(sp) -; RV32I-FPELIM-NEXT: sw zero, 4(sp) ; RV32I-FPELIM-NEXT: sw zero, 0(sp) -; RV32I-FPELIM-NEXT: sw zero, 36(sp) -; RV32I-FPELIM-NEXT: sw zero, 32(sp) -; RV32I-FPELIM-NEXT: sw zero, 28(sp) +; RV32I-FPELIM-NEXT: sw zero, 4(sp) +; RV32I-FPELIM-NEXT: sw zero, 8(sp) +; RV32I-FPELIM-NEXT: sw a0, 12(sp) ; RV32I-FPELIM-NEXT: li a2, 1 ; RV32I-FPELIM-NEXT: addi a0, sp, 24 ; RV32I-FPELIM-NEXT: mv a1, sp ; RV32I-FPELIM-NEXT: sw a2, 24(sp) +; RV32I-FPELIM-NEXT: sw zero, 28(sp) +; RV32I-FPELIM-NEXT: sw zero, 32(sp) +; RV32I-FPELIM-NEXT: sw zero, 36(sp) ; RV32I-FPELIM-NEXT: call callee_large_scalars ; RV32I-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-FPELIM-NEXT: addi sp, sp, 48 @@ -269,17 +269,17 @@ define i32 @caller_large_scalars() nounwind { ; RV32I-WITHFP-NEXT: sw s0, 40(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 48 ; RV32I-WITHFP-NEXT: lui a0, 524272 -; RV32I-WITHFP-NEXT: sw a0, -36(s0) -; RV32I-WITHFP-NEXT: sw zero, -40(s0) -; RV32I-WITHFP-NEXT: sw zero, -44(s0) ; RV32I-WITHFP-NEXT: sw zero, -48(s0) -; RV32I-WITHFP-NEXT: sw zero, -12(s0) -; RV32I-WITHFP-NEXT: sw zero, -16(s0) -; RV32I-WITHFP-NEXT: sw zero, -20(s0) +; RV32I-WITHFP-NEXT: sw zero, -44(s0) +; RV32I-WITHFP-NEXT: sw zero, -40(s0) +; RV32I-WITHFP-NEXT: sw a0, -36(s0) ; RV32I-WITHFP-NEXT: li a2, 1 ; RV32I-WITHFP-NEXT: addi a0, s0, -24 ; RV32I-WITHFP-NEXT: addi a1, s0, -48 ; RV32I-WITHFP-NEXT: sw a2, -24(s0) +; RV32I-WITHFP-NEXT: sw zero, -20(s0) +; RV32I-WITHFP-NEXT: sw zero, -16(s0) +; RV32I-WITHFP-NEXT: sw zero, -12(s0) ; RV32I-WITHFP-NEXT: call callee_large_scalars ; RV32I-WITHFP-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 40(sp) # 4-byte Folded Reload @@ -354,18 +354,15 @@ define i32 @caller_large_scalars_exhausted_regs() nounwind { ; RV32I-FPELIM-NEXT: addi sp, sp, -64 ; RV32I-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill ; RV32I-FPELIM-NEXT: addi a0, sp, 16 +; RV32I-FPELIM-NEXT: li a1, 9 +; RV32I-FPELIM-NEXT: sw a1, 0(sp) ; RV32I-FPELIM-NEXT: sw a0, 4(sp) -; RV32I-FPELIM-NEXT: li a0, 9 -; RV32I-FPELIM-NEXT: sw a0, 0(sp) ; RV32I-FPELIM-NEXT: lui a0, 524272 -; RV32I-FPELIM-NEXT: sw a0, 28(sp) -; RV32I-FPELIM-NEXT: sw zero, 24(sp) -; RV32I-FPELIM-NEXT: sw zero, 20(sp) ; RV32I-FPELIM-NEXT: sw zero, 16(sp) -; RV32I-FPELIM-NEXT: sw zero, 52(sp) -; RV32I-FPELIM-NEXT: sw zero, 48(sp) -; RV32I-FPELIM-NEXT: li a0, 8 -; RV32I-FPELIM-NEXT: sw a0, 40(sp) +; RV32I-FPELIM-NEXT: sw zero, 20(sp) +; RV32I-FPELIM-NEXT: sw zero, 24(sp) +; RV32I-FPELIM-NEXT: sw a0, 28(sp) +; RV32I-FPELIM-NEXT: li t0, 8 ; RV32I-FPELIM-NEXT: li a0, 1 ; RV32I-FPELIM-NEXT: li a1, 2 ; RV32I-FPELIM-NEXT: li a2, 3 @@ -374,7 +371,10 @@ define i32 @caller_large_scalars_exhausted_regs() nounwind { ; RV32I-FPELIM-NEXT: li a5, 6 ; RV32I-FPELIM-NEXT: li a6, 7 ; RV32I-FPELIM-NEXT: addi a7, sp, 40 +; RV32I-FPELIM-NEXT: sw t0, 40(sp) ; RV32I-FPELIM-NEXT: sw zero, 44(sp) +; RV32I-FPELIM-NEXT: sw zero, 48(sp) +; RV32I-FPELIM-NEXT: sw zero, 52(sp) ; RV32I-FPELIM-NEXT: call callee_large_scalars_exhausted_regs ; RV32I-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-FPELIM-NEXT: addi sp, sp, 64 @@ -387,18 +387,15 @@ define i32 @caller_large_scalars_exhausted_regs() nounwind { ; RV32I-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 64 ; RV32I-WITHFP-NEXT: addi a0, s0, -48 +; RV32I-WITHFP-NEXT: li a1, 9 +; RV32I-WITHFP-NEXT: sw a1, 0(sp) ; RV32I-WITHFP-NEXT: sw a0, 4(sp) -; RV32I-WITHFP-NEXT: li a0, 9 -; RV32I-WITHFP-NEXT: sw a0, 0(sp) ; RV32I-WITHFP-NEXT: lui a0, 524272 -; RV32I-WITHFP-NEXT: sw a0, -36(s0) -; RV32I-WITHFP-NEXT: sw zero, -40(s0) -; RV32I-WITHFP-NEXT: sw zero, -44(s0) ; RV32I-WITHFP-NEXT: sw zero, -48(s0) -; RV32I-WITHFP-NEXT: sw zero, -12(s0) -; RV32I-WITHFP-NEXT: sw zero, -16(s0) -; RV32I-WITHFP-NEXT: li a0, 8 -; RV32I-WITHFP-NEXT: sw a0, -24(s0) +; RV32I-WITHFP-NEXT: sw zero, -44(s0) +; RV32I-WITHFP-NEXT: sw zero, -40(s0) +; RV32I-WITHFP-NEXT: sw a0, -36(s0) +; RV32I-WITHFP-NEXT: li t0, 8 ; RV32I-WITHFP-NEXT: li a0, 1 ; RV32I-WITHFP-NEXT: li a1, 2 ; RV32I-WITHFP-NEXT: li a2, 3 @@ -407,7 +404,10 @@ define i32 @caller_large_scalars_exhausted_regs() nounwind { ; RV32I-WITHFP-NEXT: li a5, 6 ; RV32I-WITHFP-NEXT: li a6, 7 ; RV32I-WITHFP-NEXT: addi a7, s0, -24 +; RV32I-WITHFP-NEXT: sw t0, -24(s0) ; RV32I-WITHFP-NEXT: sw zero, -20(s0) +; RV32I-WITHFP-NEXT: sw zero, -16(s0) +; RV32I-WITHFP-NEXT: sw zero, -12(s0) ; RV32I-WITHFP-NEXT: call callee_large_scalars_exhausted_regs ; RV32I-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload @@ -553,12 +553,12 @@ define i32 @caller_large_struct() nounwind { ; RV32I-FPELIM-NEXT: addi sp, sp, -48 ; RV32I-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32I-FPELIM-NEXT: li a0, 1 -; RV32I-FPELIM-NEXT: sw a0, 24(sp) ; RV32I-FPELIM-NEXT: li a1, 2 -; RV32I-FPELIM-NEXT: sw a1, 28(sp) ; RV32I-FPELIM-NEXT: li a2, 3 -; RV32I-FPELIM-NEXT: sw a2, 32(sp) ; RV32I-FPELIM-NEXT: li a3, 4 +; RV32I-FPELIM-NEXT: sw a0, 24(sp) +; RV32I-FPELIM-NEXT: sw a1, 28(sp) +; RV32I-FPELIM-NEXT: sw a2, 32(sp) ; RV32I-FPELIM-NEXT: sw a3, 36(sp) ; RV32I-FPELIM-NEXT: sw a0, 8(sp) ; RV32I-FPELIM-NEXT: sw a1, 12(sp) @@ -577,12 +577,12 @@ define i32 @caller_large_struct() nounwind { ; RV32I-WITHFP-NEXT: sw s0, 40(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 48 ; RV32I-WITHFP-NEXT: li a0, 1 -; RV32I-WITHFP-NEXT: sw a0, -24(s0) ; RV32I-WITHFP-NEXT: li a1, 2 -; RV32I-WITHFP-NEXT: sw a1, -20(s0) ; RV32I-WITHFP-NEXT: li a2, 3 -; RV32I-WITHFP-NEXT: sw a2, -16(s0) ; RV32I-WITHFP-NEXT: li a3, 4 +; RV32I-WITHFP-NEXT: sw a0, -24(s0) +; RV32I-WITHFP-NEXT: sw a1, -20(s0) +; RV32I-WITHFP-NEXT: sw a2, -16(s0) ; RV32I-WITHFP-NEXT: sw a3, -12(s0) ; RV32I-WITHFP-NEXT: sw a0, -40(s0) ; RV32I-WITHFP-NEXT: sw a1, -36(s0) @@ -665,27 +665,24 @@ define void @caller_aligned_stack() nounwind { ; RV32I-FPELIM-NEXT: addi sp, sp, -64 ; RV32I-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill ; RV32I-FPELIM-NEXT: li a0, 19 +; RV32I-FPELIM-NEXT: li a1, 18 +; RV32I-FPELIM-NEXT: sw a1, 20(sp) ; RV32I-FPELIM-NEXT: sw a0, 24(sp) -; RV32I-FPELIM-NEXT: li a0, 18 -; RV32I-FPELIM-NEXT: sw a0, 20(sp) ; RV32I-FPELIM-NEXT: li a0, 17 -; RV32I-FPELIM-NEXT: sw a0, 16(sp) +; RV32I-FPELIM-NEXT: li a1, 16 +; RV32I-FPELIM-NEXT: li a2, 15 +; RV32I-FPELIM-NEXT: sw a2, 0(sp) +; RV32I-FPELIM-NEXT: sw a1, 8(sp) ; RV32I-FPELIM-NEXT: sw zero, 12(sp) -; RV32I-FPELIM-NEXT: li a0, 16 -; RV32I-FPELIM-NEXT: sw a0, 8(sp) -; RV32I-FPELIM-NEXT: li a0, 15 -; RV32I-FPELIM-NEXT: sw a0, 0(sp) +; RV32I-FPELIM-NEXT: sw a0, 16(sp) ; RV32I-FPELIM-NEXT: lui a0, 262153 -; RV32I-FPELIM-NEXT: addi a0, a0, 491 -; RV32I-FPELIM-NEXT: sw a0, 44(sp) +; RV32I-FPELIM-NEXT: addi t0, a0, 491 ; RV32I-FPELIM-NEXT: lui a0, 545260 -; RV32I-FPELIM-NEXT: addi a0, a0, -1967 -; RV32I-FPELIM-NEXT: sw a0, 40(sp) +; RV32I-FPELIM-NEXT: addi t1, a0, -1967 ; RV32I-FPELIM-NEXT: lui a0, 964690 -; RV32I-FPELIM-NEXT: addi a0, a0, -328 -; RV32I-FPELIM-NEXT: sw a0, 36(sp) +; RV32I-FPELIM-NEXT: addi t2, a0, -328 ; RV32I-FPELIM-NEXT: lui a0, 335544 -; RV32I-FPELIM-NEXT: addi t0, a0, 1311 +; RV32I-FPELIM-NEXT: addi t3, a0, 1311 ; RV32I-FPELIM-NEXT: lui a0, 688509 ; RV32I-FPELIM-NEXT: addi a5, a0, -2048 ; RV32I-FPELIM-NEXT: li a0, 1 @@ -695,7 +692,10 @@ define void @caller_aligned_stack() nounwind { ; RV32I-FPELIM-NEXT: li a4, 13 ; RV32I-FPELIM-NEXT: li a6, 4 ; RV32I-FPELIM-NEXT: li a7, 14 -; RV32I-FPELIM-NEXT: sw t0, 32(sp) +; RV32I-FPELIM-NEXT: sw t3, 32(sp) +; RV32I-FPELIM-NEXT: sw t2, 36(sp) +; RV32I-FPELIM-NEXT: sw t1, 40(sp) +; RV32I-FPELIM-NEXT: sw t0, 44(sp) ; RV32I-FPELIM-NEXT: call callee_aligned_stack ; RV32I-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-FPELIM-NEXT: addi sp, sp, 64 @@ -708,27 +708,24 @@ define void @caller_aligned_stack() nounwind { ; RV32I-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 64 ; RV32I-WITHFP-NEXT: li a0, 19 +; RV32I-WITHFP-NEXT: li a1, 18 +; RV32I-WITHFP-NEXT: sw a1, 20(sp) ; RV32I-WITHFP-NEXT: sw a0, 24(sp) -; RV32I-WITHFP-NEXT: li a0, 18 -; RV32I-WITHFP-NEXT: sw a0, 20(sp) ; RV32I-WITHFP-NEXT: li a0, 17 -; RV32I-WITHFP-NEXT: sw a0, 16(sp) +; RV32I-WITHFP-NEXT: li a1, 16 +; RV32I-WITHFP-NEXT: li a2, 15 +; RV32I-WITHFP-NEXT: sw a2, 0(sp) +; RV32I-WITHFP-NEXT: sw a1, 8(sp) ; RV32I-WITHFP-NEXT: sw zero, 12(sp) -; RV32I-WITHFP-NEXT: li a0, 16 -; RV32I-WITHFP-NEXT: sw a0, 8(sp) -; RV32I-WITHFP-NEXT: li a0, 15 -; RV32I-WITHFP-NEXT: sw a0, 0(sp) +; RV32I-WITHFP-NEXT: sw a0, 16(sp) ; RV32I-WITHFP-NEXT: lui a0, 262153 -; RV32I-WITHFP-NEXT: addi a0, a0, 491 -; RV32I-WITHFP-NEXT: sw a0, -20(s0) +; RV32I-WITHFP-NEXT: addi t0, a0, 491 ; RV32I-WITHFP-NEXT: lui a0, 545260 -; RV32I-WITHFP-NEXT: addi a0, a0, -1967 -; RV32I-WITHFP-NEXT: sw a0, -24(s0) +; RV32I-WITHFP-NEXT: addi t1, a0, -1967 ; RV32I-WITHFP-NEXT: lui a0, 964690 -; RV32I-WITHFP-NEXT: addi a0, a0, -328 -; RV32I-WITHFP-NEXT: sw a0, -28(s0) +; RV32I-WITHFP-NEXT: addi t2, a0, -328 ; RV32I-WITHFP-NEXT: lui a0, 335544 -; RV32I-WITHFP-NEXT: addi t0, a0, 1311 +; RV32I-WITHFP-NEXT: addi t3, a0, 1311 ; RV32I-WITHFP-NEXT: lui a0, 688509 ; RV32I-WITHFP-NEXT: addi a5, a0, -2048 ; RV32I-WITHFP-NEXT: li a0, 1 @@ -738,7 +735,10 @@ define void @caller_aligned_stack() nounwind { ; RV32I-WITHFP-NEXT: li a4, 13 ; RV32I-WITHFP-NEXT: li a6, 4 ; RV32I-WITHFP-NEXT: li a7, 14 -; RV32I-WITHFP-NEXT: sw t0, -32(s0) +; RV32I-WITHFP-NEXT: sw t3, -32(s0) +; RV32I-WITHFP-NEXT: sw t2, -28(s0) +; RV32I-WITHFP-NEXT: sw t1, -24(s0) +; RV32I-WITHFP-NEXT: sw t0, -20(s0) ; RV32I-WITHFP-NEXT: call callee_aligned_stack ; RV32I-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload @@ -881,10 +881,10 @@ define fp128 @callee_large_scalar_ret() nounwind { ; RV32I-FPELIM-LABEL: callee_large_scalar_ret: ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: lui a1, 524272 -; RV32I-FPELIM-NEXT: sw a1, 12(a0) -; RV32I-FPELIM-NEXT: sw zero, 8(a0) -; RV32I-FPELIM-NEXT: sw zero, 4(a0) ; RV32I-FPELIM-NEXT: sw zero, 0(a0) +; RV32I-FPELIM-NEXT: sw zero, 4(a0) +; RV32I-FPELIM-NEXT: sw zero, 8(a0) +; RV32I-FPELIM-NEXT: sw a1, 12(a0) ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: callee_large_scalar_ret: @@ -894,10 +894,10 @@ define fp128 @callee_large_scalar_ret() nounwind { ; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 16 ; RV32I-WITHFP-NEXT: lui a1, 524272 -; RV32I-WITHFP-NEXT: sw a1, 12(a0) -; RV32I-WITHFP-NEXT: sw zero, 8(a0) -; RV32I-WITHFP-NEXT: sw zero, 4(a0) ; RV32I-WITHFP-NEXT: sw zero, 0(a0) +; RV32I-WITHFP-NEXT: sw zero, 4(a0) +; RV32I-WITHFP-NEXT: sw zero, 8(a0) +; RV32I-WITHFP-NEXT: sw a1, 12(a0) ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: addi sp, sp, 16 @@ -938,13 +938,13 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result ; RV32I-FPELIM-LABEL: callee_large_struct_ret: ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: li a1, 1 +; RV32I-FPELIM-NEXT: li a2, 2 +; RV32I-FPELIM-NEXT: li a3, 3 +; RV32I-FPELIM-NEXT: li a4, 4 ; RV32I-FPELIM-NEXT: sw a1, 0(a0) -; RV32I-FPELIM-NEXT: li a1, 2 -; RV32I-FPELIM-NEXT: sw a1, 4(a0) -; RV32I-FPELIM-NEXT: li a1, 3 -; RV32I-FPELIM-NEXT: sw a1, 8(a0) -; RV32I-FPELIM-NEXT: li a1, 4 -; RV32I-FPELIM-NEXT: sw a1, 12(a0) +; RV32I-FPELIM-NEXT: sw a2, 4(a0) +; RV32I-FPELIM-NEXT: sw a3, 8(a0) +; RV32I-FPELIM-NEXT: sw a4, 12(a0) ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: callee_large_struct_ret: @@ -954,13 +954,13 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result ; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 16 ; RV32I-WITHFP-NEXT: li a1, 1 +; RV32I-WITHFP-NEXT: li a2, 2 +; RV32I-WITHFP-NEXT: li a3, 3 +; RV32I-WITHFP-NEXT: li a4, 4 ; RV32I-WITHFP-NEXT: sw a1, 0(a0) -; RV32I-WITHFP-NEXT: li a1, 2 -; RV32I-WITHFP-NEXT: sw a1, 4(a0) -; RV32I-WITHFP-NEXT: li a1, 3 -; RV32I-WITHFP-NEXT: sw a1, 8(a0) -; RV32I-WITHFP-NEXT: li a1, 4 -; RV32I-WITHFP-NEXT: sw a1, 12(a0) +; RV32I-WITHFP-NEXT: sw a2, 4(a0) +; RV32I-WITHFP-NEXT: sw a3, 8(a0) +; RV32I-WITHFP-NEXT: sw a4, 12(a0) ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll index bcceea7ac35b3e2..1321413fbc57e8f 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll @@ -203,8 +203,7 @@ define i32 @caller_double_on_stack_exhausted_gprs_fprs() nounwind { ; RV32-ILP32D: # %bb.0: ; RV32-ILP32D-NEXT: addi sp, sp, -16 ; RV32-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32D-NEXT: lui a0, 262816 -; RV32-ILP32D-NEXT: sw a0, 4(sp) +; RV32-ILP32D-NEXT: lui a1, 262816 ; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_0) ; RV32-ILP32D-NEXT: fld fa0, %lo(.LCPI9_0)(a0) ; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_1) @@ -226,6 +225,7 @@ define i32 @caller_double_on_stack_exhausted_gprs_fprs() nounwind { ; RV32-ILP32D-NEXT: li a4, 5 ; RV32-ILP32D-NEXT: li a6, 7 ; RV32-ILP32D-NEXT: sw zero, 0(sp) +; RV32-ILP32D-NEXT: sw a1, 4(sp) ; RV32-ILP32D-NEXT: li a1, 0 ; RV32-ILP32D-NEXT: li a3, 0 ; RV32-ILP32D-NEXT: li a5, 0 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll index 708cb00d1c45c64..948645948f64d23 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll @@ -199,14 +199,14 @@ define i32 @caller_float_on_stack() { ; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 16 ; ILP32E-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 -; ILP32E-FPELIM-NEXT: lui a0, 264704 -; ILP32E-FPELIM-NEXT: sw a0, 8(sp) -; ILP32E-FPELIM-NEXT: sw zero, 4(sp) -; ILP32E-FPELIM-NEXT: li a1, 4 +; ILP32E-FPELIM-NEXT: lui a1, 264704 +; ILP32E-FPELIM-NEXT: li a3, 4 ; ILP32E-FPELIM-NEXT: li a0, 1 ; ILP32E-FPELIM-NEXT: li a2, 2 ; ILP32E-FPELIM-NEXT: li a4, 3 -; ILP32E-FPELIM-NEXT: sw a1, 0(sp) +; ILP32E-FPELIM-NEXT: sw a3, 0(sp) +; ILP32E-FPELIM-NEXT: sw zero, 4(sp) +; ILP32E-FPELIM-NEXT: sw a1, 8(sp) ; ILP32E-FPELIM-NEXT: li a1, 0 ; ILP32E-FPELIM-NEXT: li a3, 0 ; ILP32E-FPELIM-NEXT: li a5, 0 @@ -225,14 +225,14 @@ define i32 @caller_float_on_stack() { ; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 ; ILP32E-WITHFP-NEXT: addi s0, sp, 20 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 -; ILP32E-WITHFP-NEXT: lui a0, 264704 -; ILP32E-WITHFP-NEXT: sw a0, 8(sp) -; ILP32E-WITHFP-NEXT: sw zero, 4(sp) -; ILP32E-WITHFP-NEXT: li a1, 4 +; ILP32E-WITHFP-NEXT: lui a1, 264704 +; ILP32E-WITHFP-NEXT: li a3, 4 ; ILP32E-WITHFP-NEXT: li a0, 1 ; ILP32E-WITHFP-NEXT: li a2, 2 ; ILP32E-WITHFP-NEXT: li a4, 3 -; ILP32E-WITHFP-NEXT: sw a1, 0(sp) +; ILP32E-WITHFP-NEXT: sw a3, 0(sp) +; ILP32E-WITHFP-NEXT: sw zero, 4(sp) +; ILP32E-WITHFP-NEXT: sw a1, 8(sp) ; ILP32E-WITHFP-NEXT: li a1, 0 ; ILP32E-WITHFP-NEXT: li a3, 0 ; ILP32E-WITHFP-NEXT: li a5, 0 @@ -248,14 +248,14 @@ define i32 @caller_float_on_stack() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -12 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 16 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 264704 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 8(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a1, 264704 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 2 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 3 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 8(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 0 @@ -272,14 +272,14 @@ define i32 @caller_float_on_stack() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 20 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 264704 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 8(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a1, 264704 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 2 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 3 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 8(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 0 @@ -630,34 +630,31 @@ define void @caller_aligned_stack() { ; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-NEXT: andi sp, sp, -16 ; ILP32E-FPELIM-NEXT: li a0, 18 +; ILP32E-FPELIM-NEXT: li a1, 17 +; ILP32E-FPELIM-NEXT: li a2, 16 +; ILP32E-FPELIM-NEXT: lui a3, 262236 +; ILP32E-FPELIM-NEXT: addi a3, a3, 655 +; ILP32E-FPELIM-NEXT: sw a3, 16(sp) +; ILP32E-FPELIM-NEXT: sw a2, 20(sp) +; ILP32E-FPELIM-NEXT: sw a1, 24(sp) ; ILP32E-FPELIM-NEXT: sw a0, 28(sp) -; ILP32E-FPELIM-NEXT: li a0, 17 -; ILP32E-FPELIM-NEXT: sw a0, 24(sp) -; ILP32E-FPELIM-NEXT: li a0, 16 -; ILP32E-FPELIM-NEXT: sw a0, 20(sp) -; ILP32E-FPELIM-NEXT: lui a0, 262236 -; ILP32E-FPELIM-NEXT: addi a0, a0, 655 -; ILP32E-FPELIM-NEXT: sw a0, 16(sp) ; ILP32E-FPELIM-NEXT: lui a0, 377487 ; ILP32E-FPELIM-NEXT: addi a0, a0, 1475 +; ILP32E-FPELIM-NEXT: li a1, 15 +; ILP32E-FPELIM-NEXT: li a2, 14 +; ILP32E-FPELIM-NEXT: li a3, 4 +; ILP32E-FPELIM-NEXT: sw a3, 0(sp) +; ILP32E-FPELIM-NEXT: sw a2, 4(sp) +; ILP32E-FPELIM-NEXT: sw a1, 8(sp) ; ILP32E-FPELIM-NEXT: sw a0, 12(sp) -; ILP32E-FPELIM-NEXT: li a0, 15 -; ILP32E-FPELIM-NEXT: sw a0, 8(sp) -; ILP32E-FPELIM-NEXT: li a0, 14 -; ILP32E-FPELIM-NEXT: sw a0, 4(sp) -; ILP32E-FPELIM-NEXT: li a0, 4 -; ILP32E-FPELIM-NEXT: sw a0, 0(sp) ; ILP32E-FPELIM-NEXT: lui a0, 262153 -; ILP32E-FPELIM-NEXT: addi a0, a0, 491 -; ILP32E-FPELIM-NEXT: sw a0, 44(sp) +; ILP32E-FPELIM-NEXT: addi a6, a0, 491 ; ILP32E-FPELIM-NEXT: lui a0, 545260 -; ILP32E-FPELIM-NEXT: addi a0, a0, -1967 -; ILP32E-FPELIM-NEXT: sw a0, 40(sp) +; ILP32E-FPELIM-NEXT: addi a7, a0, -1967 ; ILP32E-FPELIM-NEXT: lui a0, 964690 -; ILP32E-FPELIM-NEXT: addi a0, a0, -328 -; ILP32E-FPELIM-NEXT: sw a0, 36(sp) +; ILP32E-FPELIM-NEXT: addi t0, a0, -328 ; ILP32E-FPELIM-NEXT: lui a0, 335544 -; ILP32E-FPELIM-NEXT: addi a6, a0, 1311 +; ILP32E-FPELIM-NEXT: addi t1, a0, 1311 ; ILP32E-FPELIM-NEXT: lui a0, 688509 ; ILP32E-FPELIM-NEXT: addi a5, a0, -2048 ; ILP32E-FPELIM-NEXT: li a0, 1 @@ -665,7 +662,10 @@ define void @caller_aligned_stack() { ; ILP32E-FPELIM-NEXT: addi a2, sp, 32 ; ILP32E-FPELIM-NEXT: li a3, 12 ; ILP32E-FPELIM-NEXT: li a4, 13 -; ILP32E-FPELIM-NEXT: sw a6, 32(sp) +; ILP32E-FPELIM-NEXT: sw t1, 32(sp) +; ILP32E-FPELIM-NEXT: sw t0, 36(sp) +; ILP32E-FPELIM-NEXT: sw a7, 40(sp) +; ILP32E-FPELIM-NEXT: sw a6, 44(sp) ; ILP32E-FPELIM-NEXT: call callee_aligned_stack ; ILP32E-FPELIM-NEXT: addi sp, s0, -64 ; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload @@ -685,34 +685,31 @@ define void @caller_aligned_stack() { ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-NEXT: andi sp, sp, -16 ; ILP32E-WITHFP-NEXT: li a0, 18 +; ILP32E-WITHFP-NEXT: li a1, 17 +; ILP32E-WITHFP-NEXT: li a2, 16 +; ILP32E-WITHFP-NEXT: lui a3, 262236 +; ILP32E-WITHFP-NEXT: addi a3, a3, 655 +; ILP32E-WITHFP-NEXT: sw a3, 16(sp) +; ILP32E-WITHFP-NEXT: sw a2, 20(sp) +; ILP32E-WITHFP-NEXT: sw a1, 24(sp) ; ILP32E-WITHFP-NEXT: sw a0, 28(sp) -; ILP32E-WITHFP-NEXT: li a0, 17 -; ILP32E-WITHFP-NEXT: sw a0, 24(sp) -; ILP32E-WITHFP-NEXT: li a0, 16 -; ILP32E-WITHFP-NEXT: sw a0, 20(sp) -; ILP32E-WITHFP-NEXT: lui a0, 262236 -; ILP32E-WITHFP-NEXT: addi a0, a0, 655 -; ILP32E-WITHFP-NEXT: sw a0, 16(sp) ; ILP32E-WITHFP-NEXT: lui a0, 377487 ; ILP32E-WITHFP-NEXT: addi a0, a0, 1475 +; ILP32E-WITHFP-NEXT: li a1, 15 +; ILP32E-WITHFP-NEXT: li a2, 14 +; ILP32E-WITHFP-NEXT: li a3, 4 +; ILP32E-WITHFP-NEXT: sw a3, 0(sp) +; ILP32E-WITHFP-NEXT: sw a2, 4(sp) +; ILP32E-WITHFP-NEXT: sw a1, 8(sp) ; ILP32E-WITHFP-NEXT: sw a0, 12(sp) -; ILP32E-WITHFP-NEXT: li a0, 15 -; ILP32E-WITHFP-NEXT: sw a0, 8(sp) -; ILP32E-WITHFP-NEXT: li a0, 14 -; ILP32E-WITHFP-NEXT: sw a0, 4(sp) -; ILP32E-WITHFP-NEXT: li a0, 4 -; ILP32E-WITHFP-NEXT: sw a0, 0(sp) ; ILP32E-WITHFP-NEXT: lui a0, 262153 -; ILP32E-WITHFP-NEXT: addi a0, a0, 491 -; ILP32E-WITHFP-NEXT: sw a0, 44(sp) +; ILP32E-WITHFP-NEXT: addi a6, a0, 491 ; ILP32E-WITHFP-NEXT: lui a0, 545260 -; ILP32E-WITHFP-NEXT: addi a0, a0, -1967 -; ILP32E-WITHFP-NEXT: sw a0, 40(sp) +; ILP32E-WITHFP-NEXT: addi a7, a0, -1967 ; ILP32E-WITHFP-NEXT: lui a0, 964690 -; ILP32E-WITHFP-NEXT: addi a0, a0, -328 -; ILP32E-WITHFP-NEXT: sw a0, 36(sp) +; ILP32E-WITHFP-NEXT: addi t0, a0, -328 ; ILP32E-WITHFP-NEXT: lui a0, 335544 -; ILP32E-WITHFP-NEXT: addi a6, a0, 1311 +; ILP32E-WITHFP-NEXT: addi t1, a0, 1311 ; ILP32E-WITHFP-NEXT: lui a0, 688509 ; ILP32E-WITHFP-NEXT: addi a5, a0, -2048 ; ILP32E-WITHFP-NEXT: li a0, 1 @@ -720,7 +717,10 @@ define void @caller_aligned_stack() { ; ILP32E-WITHFP-NEXT: addi a2, sp, 32 ; ILP32E-WITHFP-NEXT: li a3, 12 ; ILP32E-WITHFP-NEXT: li a4, 13 -; ILP32E-WITHFP-NEXT: sw a6, 32(sp) +; ILP32E-WITHFP-NEXT: sw t1, 32(sp) +; ILP32E-WITHFP-NEXT: sw t0, 36(sp) +; ILP32E-WITHFP-NEXT: sw a7, 40(sp) +; ILP32E-WITHFP-NEXT: sw a6, 44(sp) ; ILP32E-WITHFP-NEXT: call callee_aligned_stack ; ILP32E-WITHFP-NEXT: addi sp, s0, -64 ; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload @@ -739,34 +739,31 @@ define void @caller_aligned_stack() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 18 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 17 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a3, 262236 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a3, a3, 655 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 16(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 20(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 24(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 28(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 17 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 24(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 16 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 20(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 262236 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, 655 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 16(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 377487 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, 1475 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 15 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 14 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 8(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 15 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 8(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 14 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 4(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 4 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 0(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 262153 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, 491 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 44(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a6, a0, 491 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 545260 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, -1967 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 40(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a7, a0, -1967 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 964690 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, -328 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 36(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi t0, a0, -328 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 335544 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a6, a0, 1311 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi t1, a0, 1311 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 688509 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a5, a0, -2048 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 @@ -774,7 +771,10 @@ define void @caller_aligned_stack() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a2, sp, 32 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 12 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 13 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 32(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t1, 32(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t0, 36(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a7, 40(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 44(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_aligned_stack ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -64 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 56 @@ -791,34 +791,31 @@ define void @caller_aligned_stack() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 18 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 17 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a3, 262236 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a3, a3, 655 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 16(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 20(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 24(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 28(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 17 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 24(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 16 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 20(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 262236 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, 655 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 16(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 377487 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, 1475 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 15 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 14 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 8(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 15 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 8(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 14 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 4(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 4 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 0(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 262153 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, 491 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 44(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a6, a0, 491 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 545260 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, -1967 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 40(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a7, a0, -1967 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 964690 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, -328 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 36(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi t0, a0, -328 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 335544 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a6, a0, 1311 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi t1, a0, 1311 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 688509 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a5, a0, -2048 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 @@ -826,7 +823,10 @@ define void @caller_aligned_stack() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a2, sp, 32 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 12 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 13 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 32(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t1, 32(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t0, 36(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a7, 40(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 44(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_aligned_stack ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -64 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 56 @@ -1163,18 +1163,18 @@ define i32 @caller_many_scalars() { ; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 20 ; ILP32E-FPELIM-NEXT: sw ra, 16(sp) # 4-byte Folded Spill ; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 -; ILP32E-FPELIM-NEXT: li a0, 8 -; ILP32E-FPELIM-NEXT: sw a0, 12(sp) -; ILP32E-FPELIM-NEXT: sw zero, 8(sp) -; ILP32E-FPELIM-NEXT: li a0, 7 -; ILP32E-FPELIM-NEXT: sw a0, 4(sp) -; ILP32E-FPELIM-NEXT: li a4, 6 +; ILP32E-FPELIM-NEXT: li a4, 8 +; ILP32E-FPELIM-NEXT: li a6, 7 +; ILP32E-FPELIM-NEXT: li a7, 6 ; ILP32E-FPELIM-NEXT: li a0, 1 ; ILP32E-FPELIM-NEXT: li a1, 2 ; ILP32E-FPELIM-NEXT: li a2, 3 ; ILP32E-FPELIM-NEXT: li a3, 4 ; ILP32E-FPELIM-NEXT: li a5, 5 -; ILP32E-FPELIM-NEXT: sw a4, 0(sp) +; ILP32E-FPELIM-NEXT: sw a7, 0(sp) +; ILP32E-FPELIM-NEXT: sw a6, 4(sp) +; ILP32E-FPELIM-NEXT: sw zero, 8(sp) +; ILP32E-FPELIM-NEXT: sw a4, 12(sp) ; ILP32E-FPELIM-NEXT: li a4, 0 ; ILP32E-FPELIM-NEXT: call callee_many_scalars ; ILP32E-FPELIM-NEXT: lw ra, 16(sp) # 4-byte Folded Reload @@ -1191,18 +1191,18 @@ define i32 @caller_many_scalars() { ; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8 ; ILP32E-WITHFP-NEXT: addi s0, sp, 24 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 -; ILP32E-WITHFP-NEXT: li a0, 8 -; ILP32E-WITHFP-NEXT: sw a0, 12(sp) -; ILP32E-WITHFP-NEXT: sw zero, 8(sp) -; ILP32E-WITHFP-NEXT: li a0, 7 -; ILP32E-WITHFP-NEXT: sw a0, 4(sp) -; ILP32E-WITHFP-NEXT: li a4, 6 +; ILP32E-WITHFP-NEXT: li a4, 8 +; ILP32E-WITHFP-NEXT: li a6, 7 +; ILP32E-WITHFP-NEXT: li a7, 6 ; ILP32E-WITHFP-NEXT: li a0, 1 ; ILP32E-WITHFP-NEXT: li a1, 2 ; ILP32E-WITHFP-NEXT: li a2, 3 ; ILP32E-WITHFP-NEXT: li a3, 4 ; ILP32E-WITHFP-NEXT: li a5, 5 -; ILP32E-WITHFP-NEXT: sw a4, 0(sp) +; ILP32E-WITHFP-NEXT: sw a7, 0(sp) +; ILP32E-WITHFP-NEXT: sw a6, 4(sp) +; ILP32E-WITHFP-NEXT: sw zero, 8(sp) +; ILP32E-WITHFP-NEXT: sw a4, 12(sp) ; ILP32E-WITHFP-NEXT: li a4, 0 ; ILP32E-WITHFP-NEXT: call callee_many_scalars ; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload @@ -1216,18 +1216,18 @@ define i32 @caller_many_scalars() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -16 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 20 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 8 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 7 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 4(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 6 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a6, 7 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a7, 6 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 5 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a7, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 12(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_many_scalars ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 16 @@ -1242,18 +1242,18 @@ define i32 @caller_many_scalars() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 24 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 8 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 7 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 4(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 6 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a6, 7 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a7, 6 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 5 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a7, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 12(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_many_scalars ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 16 @@ -1380,17 +1380,17 @@ define i32 @caller_large_scalars() { ; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-NEXT: andi sp, sp, -16 ; ILP32E-FPELIM-NEXT: lui a0, 524272 -; ILP32E-FPELIM-NEXT: sw a0, 12(sp) -; ILP32E-FPELIM-NEXT: sw zero, 8(sp) -; ILP32E-FPELIM-NEXT: sw zero, 4(sp) ; ILP32E-FPELIM-NEXT: sw zero, 0(sp) -; ILP32E-FPELIM-NEXT: sw zero, 36(sp) -; ILP32E-FPELIM-NEXT: sw zero, 32(sp) -; ILP32E-FPELIM-NEXT: sw zero, 28(sp) +; ILP32E-FPELIM-NEXT: sw zero, 4(sp) +; ILP32E-FPELIM-NEXT: sw zero, 8(sp) +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) ; ILP32E-FPELIM-NEXT: li a2, 1 ; ILP32E-FPELIM-NEXT: addi a0, sp, 24 ; ILP32E-FPELIM-NEXT: mv a1, sp ; ILP32E-FPELIM-NEXT: sw a2, 24(sp) +; ILP32E-FPELIM-NEXT: sw zero, 28(sp) +; ILP32E-FPELIM-NEXT: sw zero, 32(sp) +; ILP32E-FPELIM-NEXT: sw zero, 36(sp) ; ILP32E-FPELIM-NEXT: call callee_large_scalars ; ILP32E-FPELIM-NEXT: addi sp, s0, -48 ; ILP32E-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload @@ -1410,17 +1410,17 @@ define i32 @caller_large_scalars() { ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-NEXT: andi sp, sp, -16 ; ILP32E-WITHFP-NEXT: lui a0, 524272 -; ILP32E-WITHFP-NEXT: sw a0, 12(sp) -; ILP32E-WITHFP-NEXT: sw zero, 8(sp) -; ILP32E-WITHFP-NEXT: sw zero, 4(sp) ; ILP32E-WITHFP-NEXT: sw zero, 0(sp) -; ILP32E-WITHFP-NEXT: sw zero, 36(sp) -; ILP32E-WITHFP-NEXT: sw zero, 32(sp) -; ILP32E-WITHFP-NEXT: sw zero, 28(sp) +; ILP32E-WITHFP-NEXT: sw zero, 4(sp) +; ILP32E-WITHFP-NEXT: sw zero, 8(sp) +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) ; ILP32E-WITHFP-NEXT: li a2, 1 ; ILP32E-WITHFP-NEXT: addi a0, sp, 24 ; ILP32E-WITHFP-NEXT: mv a1, sp ; ILP32E-WITHFP-NEXT: sw a2, 24(sp) +; ILP32E-WITHFP-NEXT: sw zero, 28(sp) +; ILP32E-WITHFP-NEXT: sw zero, 32(sp) +; ILP32E-WITHFP-NEXT: sw zero, 36(sp) ; ILP32E-WITHFP-NEXT: call callee_large_scalars ; ILP32E-WITHFP-NEXT: addi sp, s0, -48 ; ILP32E-WITHFP-NEXT: lw ra, 44(sp) # 4-byte Folded Reload @@ -1439,17 +1439,17 @@ define i32 @caller_large_scalars() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 524272 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 0(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 36(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 32(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 28(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 24 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a1, sp ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 28(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 32(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 36(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalars ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -48 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 40 @@ -1466,17 +1466,17 @@ define i32 @caller_large_scalars() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 524272 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 0(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 36(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 32(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 28(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 24 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a1, sp ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 24(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 28(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 32(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 36(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalars ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -48 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 40 @@ -1613,29 +1613,29 @@ define i32 @caller_large_scalars_exhausted_regs() { ; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-NEXT: andi sp, sp, -16 ; ILP32E-FPELIM-NEXT: addi a0, sp, 16 +; ILP32E-FPELIM-NEXT: li a1, 9 +; ILP32E-FPELIM-NEXT: addi a2, sp, 40 +; ILP32E-FPELIM-NEXT: li a3, 7 +; ILP32E-FPELIM-NEXT: sw a3, 0(sp) +; ILP32E-FPELIM-NEXT: sw a2, 4(sp) +; ILP32E-FPELIM-NEXT: sw a1, 8(sp) ; ILP32E-FPELIM-NEXT: sw a0, 12(sp) -; ILP32E-FPELIM-NEXT: li a0, 9 -; ILP32E-FPELIM-NEXT: sw a0, 8(sp) -; ILP32E-FPELIM-NEXT: addi a0, sp, 40 -; ILP32E-FPELIM-NEXT: sw a0, 4(sp) -; ILP32E-FPELIM-NEXT: li a0, 7 -; ILP32E-FPELIM-NEXT: sw a0, 0(sp) ; ILP32E-FPELIM-NEXT: lui a0, 524272 -; ILP32E-FPELIM-NEXT: sw a0, 28(sp) -; ILP32E-FPELIM-NEXT: sw zero, 24(sp) -; ILP32E-FPELIM-NEXT: sw zero, 20(sp) ; ILP32E-FPELIM-NEXT: sw zero, 16(sp) -; ILP32E-FPELIM-NEXT: sw zero, 52(sp) -; ILP32E-FPELIM-NEXT: sw zero, 48(sp) -; ILP32E-FPELIM-NEXT: li a0, 8 -; ILP32E-FPELIM-NEXT: sw a0, 40(sp) +; ILP32E-FPELIM-NEXT: sw zero, 20(sp) +; ILP32E-FPELIM-NEXT: sw zero, 24(sp) +; ILP32E-FPELIM-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-NEXT: li a6, 8 ; ILP32E-FPELIM-NEXT: li a0, 1 ; ILP32E-FPELIM-NEXT: li a1, 2 ; ILP32E-FPELIM-NEXT: li a2, 3 ; ILP32E-FPELIM-NEXT: li a3, 4 ; ILP32E-FPELIM-NEXT: li a4, 5 ; ILP32E-FPELIM-NEXT: li a5, 6 +; ILP32E-FPELIM-NEXT: sw a6, 40(sp) ; ILP32E-FPELIM-NEXT: sw zero, 44(sp) +; ILP32E-FPELIM-NEXT: sw zero, 48(sp) +; ILP32E-FPELIM-NEXT: sw zero, 52(sp) ; ILP32E-FPELIM-NEXT: call callee_large_scalars_exhausted_regs ; ILP32E-FPELIM-NEXT: addi sp, s0, -64 ; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload @@ -1655,29 +1655,29 @@ define i32 @caller_large_scalars_exhausted_regs() { ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-NEXT: andi sp, sp, -16 ; ILP32E-WITHFP-NEXT: addi a0, sp, 16 +; ILP32E-WITHFP-NEXT: li a1, 9 +; ILP32E-WITHFP-NEXT: addi a2, sp, 40 +; ILP32E-WITHFP-NEXT: li a3, 7 +; ILP32E-WITHFP-NEXT: sw a3, 0(sp) +; ILP32E-WITHFP-NEXT: sw a2, 4(sp) +; ILP32E-WITHFP-NEXT: sw a1, 8(sp) ; ILP32E-WITHFP-NEXT: sw a0, 12(sp) -; ILP32E-WITHFP-NEXT: li a0, 9 -; ILP32E-WITHFP-NEXT: sw a0, 8(sp) -; ILP32E-WITHFP-NEXT: addi a0, sp, 40 -; ILP32E-WITHFP-NEXT: sw a0, 4(sp) -; ILP32E-WITHFP-NEXT: li a0, 7 -; ILP32E-WITHFP-NEXT: sw a0, 0(sp) ; ILP32E-WITHFP-NEXT: lui a0, 524272 -; ILP32E-WITHFP-NEXT: sw a0, 28(sp) -; ILP32E-WITHFP-NEXT: sw zero, 24(sp) -; ILP32E-WITHFP-NEXT: sw zero, 20(sp) ; ILP32E-WITHFP-NEXT: sw zero, 16(sp) -; ILP32E-WITHFP-NEXT: sw zero, 52(sp) -; ILP32E-WITHFP-NEXT: sw zero, 48(sp) -; ILP32E-WITHFP-NEXT: li a0, 8 -; ILP32E-WITHFP-NEXT: sw a0, 40(sp) +; ILP32E-WITHFP-NEXT: sw zero, 20(sp) +; ILP32E-WITHFP-NEXT: sw zero, 24(sp) +; ILP32E-WITHFP-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-NEXT: li a6, 8 ; ILP32E-WITHFP-NEXT: li a0, 1 ; ILP32E-WITHFP-NEXT: li a1, 2 ; ILP32E-WITHFP-NEXT: li a2, 3 ; ILP32E-WITHFP-NEXT: li a3, 4 ; ILP32E-WITHFP-NEXT: li a4, 5 ; ILP32E-WITHFP-NEXT: li a5, 6 +; ILP32E-WITHFP-NEXT: sw a6, 40(sp) ; ILP32E-WITHFP-NEXT: sw zero, 44(sp) +; ILP32E-WITHFP-NEXT: sw zero, 48(sp) +; ILP32E-WITHFP-NEXT: sw zero, 52(sp) ; ILP32E-WITHFP-NEXT: call callee_large_scalars_exhausted_regs ; ILP32E-WITHFP-NEXT: addi sp, s0, -64 ; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload @@ -1696,29 +1696,29 @@ define i32 @caller_large_scalars_exhausted_regs() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 9 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a2, sp, 40 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 7 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 8(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 9 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 8(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 40 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 4(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 7 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 0(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 524272 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 28(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 24(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 20(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 16(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 52(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 48(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 8 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 40(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 20(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a6, 8 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 5 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 6 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 40(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 44(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 48(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 52(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalars_exhausted_regs ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -64 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 56 @@ -1735,29 +1735,29 @@ define i32 @caller_large_scalars_exhausted_regs() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 9 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a2, sp, 40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 7 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 8(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 9 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 8(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 40 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 4(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 7 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 0(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 524272 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 28(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 24(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 20(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 16(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 52(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 48(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 8 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 40(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 20(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 24(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a6, 8 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 5 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 6 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 40(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 44(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 48(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 52(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalars_exhausted_regs ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -64 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 56 @@ -2034,12 +2034,12 @@ define i32 @caller_large_struct() { ; ILP32E-FPELIM-NEXT: sw ra, 32(sp) # 4-byte Folded Spill ; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 ; ILP32E-FPELIM-NEXT: li a0, 1 -; ILP32E-FPELIM-NEXT: sw a0, 16(sp) ; ILP32E-FPELIM-NEXT: li a1, 2 -; ILP32E-FPELIM-NEXT: sw a1, 20(sp) ; ILP32E-FPELIM-NEXT: li a2, 3 -; ILP32E-FPELIM-NEXT: sw a2, 24(sp) ; ILP32E-FPELIM-NEXT: li a3, 4 +; ILP32E-FPELIM-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-NEXT: sw a1, 20(sp) +; ILP32E-FPELIM-NEXT: sw a2, 24(sp) ; ILP32E-FPELIM-NEXT: sw a3, 28(sp) ; ILP32E-FPELIM-NEXT: sw a0, 0(sp) ; ILP32E-FPELIM-NEXT: sw a1, 4(sp) @@ -2062,12 +2062,12 @@ define i32 @caller_large_struct() { ; ILP32E-WITHFP-NEXT: addi s0, sp, 40 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-NEXT: li a0, 1 -; ILP32E-WITHFP-NEXT: sw a0, -24(s0) ; ILP32E-WITHFP-NEXT: li a1, 2 -; ILP32E-WITHFP-NEXT: sw a1, -20(s0) ; ILP32E-WITHFP-NEXT: li a2, 3 -; ILP32E-WITHFP-NEXT: sw a2, -16(s0) ; ILP32E-WITHFP-NEXT: li a3, 4 +; ILP32E-WITHFP-NEXT: sw a0, -24(s0) +; ILP32E-WITHFP-NEXT: sw a1, -20(s0) +; ILP32E-WITHFP-NEXT: sw a2, -16(s0) ; ILP32E-WITHFP-NEXT: sw a3, -12(s0) ; ILP32E-WITHFP-NEXT: sw a0, -40(s0) ; ILP32E-WITHFP-NEXT: sw a1, -36(s0) @@ -2087,12 +2087,12 @@ define i32 @caller_large_struct() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 36 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 16(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 20(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 24(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 16(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 20(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 24(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 28(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 0(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 4(sp) @@ -2113,12 +2113,12 @@ define i32 @caller_large_struct() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 40 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, -24(s0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, -20(s0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, -16(s0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, -24(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, -20(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, -16(s0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, -12(s0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, -40(s0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, -36(s0) @@ -2252,10 +2252,10 @@ define fp128 @callee_large_scalar_ret() { ; ILP32E-FPELIM-LABEL: callee_large_scalar_ret: ; ILP32E-FPELIM: # %bb.0: ; ILP32E-FPELIM-NEXT: lui a1, 524272 -; ILP32E-FPELIM-NEXT: sw a1, 12(a0) -; ILP32E-FPELIM-NEXT: sw zero, 8(a0) -; ILP32E-FPELIM-NEXT: sw zero, 4(a0) ; ILP32E-FPELIM-NEXT: sw zero, 0(a0) +; ILP32E-FPELIM-NEXT: sw zero, 4(a0) +; ILP32E-FPELIM-NEXT: sw zero, 8(a0) +; ILP32E-FPELIM-NEXT: sw a1, 12(a0) ; ILP32E-FPELIM-NEXT: ret ; ; ILP32E-WITHFP-LABEL: callee_large_scalar_ret: @@ -2269,10 +2269,10 @@ define fp128 @callee_large_scalar_ret() { ; ILP32E-WITHFP-NEXT: addi s0, sp, 8 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-NEXT: lui a1, 524272 -; ILP32E-WITHFP-NEXT: sw a1, 12(a0) -; ILP32E-WITHFP-NEXT: sw zero, 8(a0) -; ILP32E-WITHFP-NEXT: sw zero, 4(a0) ; ILP32E-WITHFP-NEXT: sw zero, 0(a0) +; ILP32E-WITHFP-NEXT: sw zero, 4(a0) +; ILP32E-WITHFP-NEXT: sw zero, 8(a0) +; ILP32E-WITHFP-NEXT: sw a1, 12(a0) ; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload ; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload ; ILP32E-WITHFP-NEXT: addi sp, sp, 8 @@ -2281,10 +2281,10 @@ define fp128 @callee_large_scalar_ret() { ; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_scalar_ret: ; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a1, 524272 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 12(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(a0) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 0(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 12(a0) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret ; ; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_scalar_ret: @@ -2296,10 +2296,10 @@ define fp128 @callee_large_scalar_ret() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a1, 524272 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 12(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(a0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 0(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 12(a0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 ret fp128 0xL00000000000000007FFF000000000000 } @@ -2384,13 +2384,13 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result ; ILP32E-FPELIM-LABEL: callee_large_struct_ret: ; ILP32E-FPELIM: # %bb.0: ; ILP32E-FPELIM-NEXT: li a1, 1 +; ILP32E-FPELIM-NEXT: li a2, 2 +; ILP32E-FPELIM-NEXT: li a3, 3 +; ILP32E-FPELIM-NEXT: li a4, 4 ; ILP32E-FPELIM-NEXT: sw a1, 0(a0) -; ILP32E-FPELIM-NEXT: li a1, 2 -; ILP32E-FPELIM-NEXT: sw a1, 4(a0) -; ILP32E-FPELIM-NEXT: li a1, 3 -; ILP32E-FPELIM-NEXT: sw a1, 8(a0) -; ILP32E-FPELIM-NEXT: li a1, 4 -; ILP32E-FPELIM-NEXT: sw a1, 12(a0) +; ILP32E-FPELIM-NEXT: sw a2, 4(a0) +; ILP32E-FPELIM-NEXT: sw a3, 8(a0) +; ILP32E-FPELIM-NEXT: sw a4, 12(a0) ; ILP32E-FPELIM-NEXT: ret ; ; ILP32E-WITHFP-LABEL: callee_large_struct_ret: @@ -2404,13 +2404,13 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result ; ILP32E-WITHFP-NEXT: addi s0, sp, 8 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-NEXT: li a1, 1 +; ILP32E-WITHFP-NEXT: li a2, 2 +; ILP32E-WITHFP-NEXT: li a3, 3 +; ILP32E-WITHFP-NEXT: li a4, 4 ; ILP32E-WITHFP-NEXT: sw a1, 0(a0) -; ILP32E-WITHFP-NEXT: li a1, 2 -; ILP32E-WITHFP-NEXT: sw a1, 4(a0) -; ILP32E-WITHFP-NEXT: li a1, 3 -; ILP32E-WITHFP-NEXT: sw a1, 8(a0) -; ILP32E-WITHFP-NEXT: li a1, 4 -; ILP32E-WITHFP-NEXT: sw a1, 12(a0) +; ILP32E-WITHFP-NEXT: sw a2, 4(a0) +; ILP32E-WITHFP-NEXT: sw a3, 8(a0) +; ILP32E-WITHFP-NEXT: sw a4, 12(a0) ; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload ; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload ; ILP32E-WITHFP-NEXT: addi sp, sp, 8 @@ -2419,13 +2419,13 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result ; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_struct_ret: ; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 4 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 0(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 4(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 3 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 8(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 4 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 12(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 4(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 8(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 12(a0) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret ; ; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_struct_ret: @@ -2437,13 +2437,13 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 4 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 0(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 4(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 3 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 8(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 4 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 12(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 4(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 8(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 12(a0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 %a = getelementptr inbounds %struct.large, ptr %agg.result, i32 0, i32 0 store i32 1, ptr %a, align 4 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll index a0e1b002b7260dc..cbd2cef981d71f2 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll @@ -82,8 +82,7 @@ define i32 @caller_many_scalars() nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: li a0, 8 -; RV64I-NEXT: sd a0, 8(sp) +; RV64I-NEXT: li a4, 8 ; RV64I-NEXT: li a0, 1 ; RV64I-NEXT: li a1, 2 ; RV64I-NEXT: li a2, 3 @@ -92,6 +91,7 @@ define i32 @caller_many_scalars() nounwind { ; RV64I-NEXT: li a6, 6 ; RV64I-NEXT: li a7, 7 ; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: sd a4, 8(sp) ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call callee_many_scalars ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -133,18 +133,18 @@ define i64 @caller_large_scalars() nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -80 ; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd zero, 24(sp) -; RV64I-NEXT: sd zero, 16(sp) -; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: li a0, 2 ; RV64I-NEXT: sd a0, 0(sp) -; RV64I-NEXT: sd zero, 56(sp) -; RV64I-NEXT: sd zero, 48(sp) -; RV64I-NEXT: sd zero, 40(sp) +; RV64I-NEXT: sd zero, 8(sp) +; RV64I-NEXT: sd zero, 16(sp) +; RV64I-NEXT: sd zero, 24(sp) ; RV64I-NEXT: li a2, 1 ; RV64I-NEXT: addi a0, sp, 32 ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: sd a2, 32(sp) +; RV64I-NEXT: sd zero, 40(sp) +; RV64I-NEXT: sd zero, 48(sp) +; RV64I-NEXT: sd zero, 56(sp) ; RV64I-NEXT: call callee_large_scalars ; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 80 @@ -189,18 +189,15 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind { ; RV64I-NEXT: addi sp, sp, -96 ; RV64I-NEXT: sd ra, 88(sp) # 8-byte Folded Spill ; RV64I-NEXT: addi a0, sp, 16 +; RV64I-NEXT: li a1, 9 +; RV64I-NEXT: sd a1, 0(sp) ; RV64I-NEXT: sd a0, 8(sp) -; RV64I-NEXT: li a0, 9 -; RV64I-NEXT: sd a0, 0(sp) -; RV64I-NEXT: sd zero, 40(sp) -; RV64I-NEXT: sd zero, 32(sp) -; RV64I-NEXT: sd zero, 24(sp) ; RV64I-NEXT: li a0, 10 ; RV64I-NEXT: sd a0, 16(sp) -; RV64I-NEXT: sd zero, 72(sp) -; RV64I-NEXT: sd zero, 64(sp) -; RV64I-NEXT: li a0, 8 -; RV64I-NEXT: sd a0, 48(sp) +; RV64I-NEXT: sd zero, 24(sp) +; RV64I-NEXT: sd zero, 32(sp) +; RV64I-NEXT: sd zero, 40(sp) +; RV64I-NEXT: li t0, 8 ; RV64I-NEXT: li a0, 1 ; RV64I-NEXT: li a1, 2 ; RV64I-NEXT: li a2, 3 @@ -209,7 +206,10 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind { ; RV64I-NEXT: li a5, 6 ; RV64I-NEXT: li a6, 7 ; RV64I-NEXT: addi a7, sp, 48 +; RV64I-NEXT: sd t0, 48(sp) ; RV64I-NEXT: sd zero, 56(sp) +; RV64I-NEXT: sd zero, 64(sp) +; RV64I-NEXT: sd zero, 72(sp) ; RV64I-NEXT: call callee_large_scalars_exhausted_regs ; RV64I-NEXT: ld ra, 88(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 96 @@ -293,12 +293,12 @@ define i64 @caller_large_struct() nounwind { ; RV64I-NEXT: addi sp, sp, -80 ; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill ; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: sd a0, 40(sp) ; RV64I-NEXT: li a1, 2 -; RV64I-NEXT: sd a1, 48(sp) ; RV64I-NEXT: li a2, 3 -; RV64I-NEXT: sd a2, 56(sp) ; RV64I-NEXT: li a3, 4 +; RV64I-NEXT: sd a0, 40(sp) +; RV64I-NEXT: sd a1, 48(sp) +; RV64I-NEXT: sd a2, 56(sp) ; RV64I-NEXT: sd a3, 64(sp) ; RV64I-NEXT: sd a0, 8(sp) ; RV64I-NEXT: sd a1, 16(sp) @@ -357,15 +357,12 @@ define void @caller_aligned_stack() nounwind { ; RV64I-NEXT: addi sp, sp, -64 ; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; RV64I-NEXT: li a0, 12 +; RV64I-NEXT: li a1, 11 +; RV64I-NEXT: sd a1, 40(sp) ; RV64I-NEXT: sd a0, 48(sp) -; RV64I-NEXT: li a0, 11 -; RV64I-NEXT: sd a0, 40(sp) -; RV64I-NEXT: li a0, 10 -; RV64I-NEXT: sd a0, 32(sp) -; RV64I-NEXT: sd zero, 24(sp) -; RV64I-NEXT: li a0, 9 -; RV64I-NEXT: sd a0, 16(sp) -; RV64I-NEXT: li a6, 8 +; RV64I-NEXT: li a6, 10 +; RV64I-NEXT: li t0, 9 +; RV64I-NEXT: li t1, 8 ; RV64I-NEXT: li a0, 1 ; RV64I-NEXT: li a1, 2 ; RV64I-NEXT: li a2, 3 @@ -373,7 +370,10 @@ define void @caller_aligned_stack() nounwind { ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: li a5, 6 ; RV64I-NEXT: li a7, 7 -; RV64I-NEXT: sd a6, 0(sp) +; RV64I-NEXT: sd t1, 0(sp) +; RV64I-NEXT: sd t0, 16(sp) +; RV64I-NEXT: sd zero, 24(sp) +; RV64I-NEXT: sd a6, 32(sp) ; RV64I-NEXT: li a6, 0 ; RV64I-NEXT: call callee_aligned_stack ; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload @@ -449,12 +449,12 @@ define i256 @callee_large_scalar_ret() nounwind { ; RV64I-LABEL: callee_large_scalar_ret: ; RV64I: # %bb.0: ; RV64I-NEXT: li a1, -1 -; RV64I-NEXT: sd a1, 24(a0) -; RV64I-NEXT: sd a1, 16(a0) +; RV64I-NEXT: lui a2, 1018435 +; RV64I-NEXT: addiw a2, a2, 747 +; RV64I-NEXT: sd a2, 0(a0) ; RV64I-NEXT: sd a1, 8(a0) -; RV64I-NEXT: lui a1, 1018435 -; RV64I-NEXT: addiw a1, a1, 747 -; RV64I-NEXT: sd a1, 0(a0) +; RV64I-NEXT: sd a1, 16(a0) +; RV64I-NEXT: sd a1, 24(a0) ; RV64I-NEXT: ret ret i256 -123456789 } @@ -478,18 +478,18 @@ define void @caller_large_scalar_ret() nounwind { define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result) nounwind { ; RV64I-LABEL: callee_large_struct_ret: ; RV64I: # %bb.0: -; RV64I-NEXT: sw zero, 4(a0) ; RV64I-NEXT: li a1, 1 +; RV64I-NEXT: li a2, 2 ; RV64I-NEXT: sw a1, 0(a0) +; RV64I-NEXT: sw zero, 4(a0) +; RV64I-NEXT: sw a2, 8(a0) ; RV64I-NEXT: sw zero, 12(a0) -; RV64I-NEXT: li a1, 2 -; RV64I-NEXT: sw a1, 8(a0) -; RV64I-NEXT: sw zero, 20(a0) ; RV64I-NEXT: li a1, 3 +; RV64I-NEXT: li a2, 4 ; RV64I-NEXT: sw a1, 16(a0) +; RV64I-NEXT: sw zero, 20(a0) +; RV64I-NEXT: sw a2, 24(a0) ; RV64I-NEXT: sw zero, 28(a0) -; RV64I-NEXT: li a1, 4 -; RV64I-NEXT: sw a1, 24(a0) ; RV64I-NEXT: ret store i64 1, ptr %agg.result, align 4 %b = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 1 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll index bb2fd5934025122..985135a086e2477 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll @@ -114,14 +114,14 @@ define i64 @caller_float_on_stack() nounwind { ; RV64I-LP64E-FPELIM-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-LP64E-FPELIM-NEXT: addi s0, sp, 48 ; RV64I-LP64E-FPELIM-NEXT: andi sp, sp, -16 -; RV64I-LP64E-FPELIM-NEXT: lui a0, 264704 -; RV64I-LP64E-FPELIM-NEXT: sd a0, 16(sp) -; RV64I-LP64E-FPELIM-NEXT: sd zero, 8(sp) -; RV64I-LP64E-FPELIM-NEXT: li a1, 4 +; RV64I-LP64E-FPELIM-NEXT: lui a1, 264704 +; RV64I-LP64E-FPELIM-NEXT: li a3, 4 ; RV64I-LP64E-FPELIM-NEXT: li a0, 1 ; RV64I-LP64E-FPELIM-NEXT: li a2, 2 ; RV64I-LP64E-FPELIM-NEXT: li a4, 3 -; RV64I-LP64E-FPELIM-NEXT: sd a1, 0(sp) +; RV64I-LP64E-FPELIM-NEXT: sd a3, 0(sp) +; RV64I-LP64E-FPELIM-NEXT: sd zero, 8(sp) +; RV64I-LP64E-FPELIM-NEXT: sd a1, 16(sp) ; RV64I-LP64E-FPELIM-NEXT: li a1, 0 ; RV64I-LP64E-FPELIM-NEXT: li a3, 0 ; RV64I-LP64E-FPELIM-NEXT: li a5, 0 @@ -139,14 +139,14 @@ define i64 @caller_float_on_stack() nounwind { ; RV64I-LP64E-WITHFP-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 48 ; RV64I-LP64E-WITHFP-NEXT: andi sp, sp, -16 -; RV64I-LP64E-WITHFP-NEXT: lui a0, 264704 -; RV64I-LP64E-WITHFP-NEXT: sd a0, 16(sp) -; RV64I-LP64E-WITHFP-NEXT: sd zero, 8(sp) -; RV64I-LP64E-WITHFP-NEXT: li a1, 4 +; RV64I-LP64E-WITHFP-NEXT: lui a1, 264704 +; RV64I-LP64E-WITHFP-NEXT: li a3, 4 ; RV64I-LP64E-WITHFP-NEXT: li a0, 1 ; RV64I-LP64E-WITHFP-NEXT: li a2, 2 ; RV64I-LP64E-WITHFP-NEXT: li a4, 3 -; RV64I-LP64E-WITHFP-NEXT: sd a1, 0(sp) +; RV64I-LP64E-WITHFP-NEXT: sd a3, 0(sp) +; RV64I-LP64E-WITHFP-NEXT: sd zero, 8(sp) +; RV64I-LP64E-WITHFP-NEXT: sd a1, 16(sp) ; RV64I-LP64E-WITHFP-NEXT: li a1, 0 ; RV64I-LP64E-WITHFP-NEXT: li a3, 0 ; RV64I-LP64E-WITHFP-NEXT: li a5, 0 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll index 37d9eb6990b0e49..eaba1acffa05471 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll @@ -32,13 +32,14 @@ define float @caller_onstack_f32_noop(float %a) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw a0, 4(sp) -; RV32IF-NEXT: lui a1, 264704 +; RV32IF-NEXT: mv a1, a0 +; RV32IF-NEXT: lui a3, 264704 ; RV32IF-NEXT: li a0, 1 ; RV32IF-NEXT: li a2, 2 ; RV32IF-NEXT: li a4, 3 ; RV32IF-NEXT: li a6, 4 -; RV32IF-NEXT: sw a1, 0(sp) +; RV32IF-NEXT: sw a3, 0(sp) +; RV32IF-NEXT: sw a1, 4(sp) ; RV32IF-NEXT: li a1, 0 ; RV32IF-NEXT: li a3, 0 ; RV32IF-NEXT: li a5, 0 @@ -60,12 +61,12 @@ define float @caller_onstack_f32_fadd(float %a, float %b) nounwind { ; RV32IF-NEXT: fmv.w.x fa4, a0 ; RV32IF-NEXT: fadd.s fa3, fa4, fa5 ; RV32IF-NEXT: fsub.s fa5, fa5, fa4 -; RV32IF-NEXT: fsw fa5, 4(sp) ; RV32IF-NEXT: li a0, 1 ; RV32IF-NEXT: li a2, 2 ; RV32IF-NEXT: li a4, 3 ; RV32IF-NEXT: li a6, 4 ; RV32IF-NEXT: fsw fa3, 0(sp) +; RV32IF-NEXT: fsw fa5, 4(sp) ; RV32IF-NEXT: li a1, 0 ; RV32IF-NEXT: li a3, 0 ; RV32IF-NEXT: li a5, 0 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll index d3530a4341330d5..63d4ea5fee33137 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll @@ -32,15 +32,16 @@ define float @caller_onstack_f32_noop(float %a) nounwind { ; RV32IF-ILP32E: # %bb.0: ; RV32IF-ILP32E-NEXT: addi sp, sp, -20 ; RV32IF-ILP32E-NEXT: sw ra, 16(sp) # 4-byte Folded Spill -; RV32IF-ILP32E-NEXT: sw a0, 12(sp) -; RV32IF-ILP32E-NEXT: lui a0, 264704 -; RV32IF-ILP32E-NEXT: sw a0, 8(sp) -; RV32IF-ILP32E-NEXT: sw zero, 4(sp) -; RV32IF-ILP32E-NEXT: li a1, 4 +; RV32IF-ILP32E-NEXT: mv a1, a0 +; RV32IF-ILP32E-NEXT: lui a3, 264704 +; RV32IF-ILP32E-NEXT: li a5, 4 ; RV32IF-ILP32E-NEXT: li a0, 1 ; RV32IF-ILP32E-NEXT: li a2, 2 ; RV32IF-ILP32E-NEXT: li a4, 3 -; RV32IF-ILP32E-NEXT: sw a1, 0(sp) +; RV32IF-ILP32E-NEXT: sw a5, 0(sp) +; RV32IF-ILP32E-NEXT: sw zero, 4(sp) +; RV32IF-ILP32E-NEXT: sw a3, 8(sp) +; RV32IF-ILP32E-NEXT: sw a1, 12(sp) ; RV32IF-ILP32E-NEXT: li a1, 0 ; RV32IF-ILP32E-NEXT: li a3, 0 ; RV32IF-ILP32E-NEXT: li a5, 0 @@ -61,14 +62,14 @@ define float @caller_onstack_f32_fadd(float %a, float %b) nounwind { ; RV32IF-ILP32E-NEXT: fmv.w.x fa4, a0 ; RV32IF-ILP32E-NEXT: fadd.s fa3, fa4, fa5 ; RV32IF-ILP32E-NEXT: fsub.s fa5, fa5, fa4 -; RV32IF-ILP32E-NEXT: sw zero, 4(sp) -; RV32IF-ILP32E-NEXT: li a0, 4 -; RV32IF-ILP32E-NEXT: sw a0, 0(sp) -; RV32IF-ILP32E-NEXT: fsw fa5, 12(sp) +; RV32IF-ILP32E-NEXT: li a1, 4 ; RV32IF-ILP32E-NEXT: li a0, 1 ; RV32IF-ILP32E-NEXT: li a2, 2 ; RV32IF-ILP32E-NEXT: li a4, 3 +; RV32IF-ILP32E-NEXT: sw a1, 0(sp) +; RV32IF-ILP32E-NEXT: sw zero, 4(sp) ; RV32IF-ILP32E-NEXT: fsw fa3, 8(sp) +; RV32IF-ILP32E-NEXT: fsw fa5, 12(sp) ; RV32IF-ILP32E-NEXT: li a1, 0 ; RV32IF-ILP32E-NEXT: li a3, 0 ; RV32IF-ILP32E-NEXT: li a5, 0 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-vector-float.ll b/llvm/test/CodeGen/RISCV/calling-conv-vector-float.ll index 8ebe9b6301c44df..4153cad1ae88148 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-vector-float.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-vector-float.ll @@ -41,10 +41,10 @@ define <4 x float> @callee_v4f32(<4 x float> %x, <4 x float> %y) { ; RV64-NEXT: fadd.s fa2, fa1, fa2 ; RV64-NEXT: fadd.s fa4, fa3, fa4 ; RV64-NEXT: fadd.s fa5, fa5, ft1 -; RV64-NEXT: fsw fa5, 12(a0) -; RV64-NEXT: fsw fa4, 8(a0) -; RV64-NEXT: fsw fa2, 4(a0) ; RV64-NEXT: fsw fa0, 0(a0) +; RV64-NEXT: fsw fa2, 4(a0) +; RV64-NEXT: fsw fa4, 8(a0) +; RV64-NEXT: fsw fa5, 12(a0) ; RV64-NEXT: ret ; ; RV64LP64F-LABEL: callee_v4f32: @@ -53,10 +53,10 @@ define <4 x float> @callee_v4f32(<4 x float> %x, <4 x float> %y) { ; RV64LP64F-NEXT: fadd.s fa5, fa1, fa5 ; RV64LP64F-NEXT: fadd.s fa2, fa2, fa6 ; RV64LP64F-NEXT: fadd.s fa3, fa3, fa7 -; RV64LP64F-NEXT: fsw fa3, 12(a0) -; RV64LP64F-NEXT: fsw fa2, 8(a0) -; RV64LP64F-NEXT: fsw fa5, 4(a0) ; RV64LP64F-NEXT: fsw fa4, 0(a0) +; RV64LP64F-NEXT: fsw fa5, 4(a0) +; RV64LP64F-NEXT: fsw fa2, 8(a0) +; RV64LP64F-NEXT: fsw fa3, 12(a0) ; RV64LP64F-NEXT: ret %z = fadd <4 x float> %x, %y ret <4 x float> %z diff --git a/llvm/test/CodeGen/RISCV/calls.ll b/llvm/test/CodeGen/RISCV/calls.ll index f18bbb4ed84ee3e..cf0e625f3c6c781 100644 --- a/llvm/test/CodeGen/RISCV/calls.ll +++ b/llvm/test/CodeGen/RISCV/calls.ll @@ -566,8 +566,8 @@ define i32 @test_call_external_many_args(i32 %a) nounwind { ; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; CHECK-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: sw a0, 4(sp) ; CHECK-NEXT: sw a0, 0(sp) +; CHECK-NEXT: sw a0, 4(sp) ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: mv a3, a0 @@ -588,8 +588,8 @@ define i32 @test_call_external_many_args(i32 %a) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: sd a0, 8(sp) ; RV64I-NEXT: sd a0, 0(sp) +; RV64I-NEXT: sd a0, 8(sp) ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a2, a0 ; RV64I-NEXT: mv a3, a0 @@ -610,8 +610,8 @@ define i32 @test_call_external_many_args(i32 %a) nounwind { ; RV64I-SMALL-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-SMALL-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-SMALL-NEXT: mv s0, a0 -; RV64I-SMALL-NEXT: sd a0, 8(sp) ; RV64I-SMALL-NEXT: sd a0, 0(sp) +; RV64I-SMALL-NEXT: sd a0, 8(sp) ; RV64I-SMALL-NEXT: mv a1, a0 ; RV64I-SMALL-NEXT: mv a2, a0 ; RV64I-SMALL-NEXT: mv a3, a0 @@ -632,8 +632,8 @@ define i32 @test_call_external_many_args(i32 %a) nounwind { ; RV64I-MEDIUM-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-MEDIUM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-MEDIUM-NEXT: mv s0, a0 -; RV64I-MEDIUM-NEXT: sd a0, 8(sp) ; RV64I-MEDIUM-NEXT: sd a0, 0(sp) +; RV64I-MEDIUM-NEXT: sd a0, 8(sp) ; RV64I-MEDIUM-NEXT: mv a1, a0 ; RV64I-MEDIUM-NEXT: mv a2, a0 ; RV64I-MEDIUM-NEXT: mv a3, a0 @@ -657,8 +657,8 @@ define i32 @test_call_external_many_args(i32 %a) nounwind { ; RV64I-LARGE-NEXT: .Lpcrel_hi4: ; RV64I-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI8_0) ; RV64I-LARGE-NEXT: ld t1, %pcrel_lo(.Lpcrel_hi4)(a0) -; RV64I-LARGE-NEXT: sd s0, 8(sp) ; RV64I-LARGE-NEXT: sd s0, 0(sp) +; RV64I-LARGE-NEXT: sd s0, 8(sp) ; RV64I-LARGE-NEXT: mv a0, s0 ; RV64I-LARGE-NEXT: mv a1, s0 ; RV64I-LARGE-NEXT: mv a2, s0 @@ -684,8 +684,8 @@ define i32 @test_call_external_many_args(i32 %a) nounwind { ; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi4: ; RV64I-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI8_0) ; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi4)(a0) -; RV64I-LARGE-ZICFILP-NEXT: sd s0, 8(sp) ; RV64I-LARGE-ZICFILP-NEXT: sd s0, 0(sp) +; RV64I-LARGE-ZICFILP-NEXT: sd s0, 8(sp) ; RV64I-LARGE-ZICFILP-NEXT: mv a0, s0 ; RV64I-LARGE-ZICFILP-NEXT: mv a1, s0 ; RV64I-LARGE-ZICFILP-NEXT: mv a2, s0 @@ -751,8 +751,8 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw a0, 4(sp) ; CHECK-NEXT: sw a0, 0(sp) +; CHECK-NEXT: sw a0, 4(sp) ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: mv a3, a0 @@ -769,8 +769,8 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -32 ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd a0, 8(sp) ; RV64I-NEXT: sd a0, 0(sp) +; RV64I-NEXT: sd a0, 8(sp) ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a2, a0 ; RV64I-NEXT: mv a3, a0 @@ -787,8 +787,8 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind { ; RV64I-SMALL: # %bb.0: ; RV64I-SMALL-NEXT: addi sp, sp, -32 ; RV64I-SMALL-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64I-SMALL-NEXT: sd a0, 8(sp) ; RV64I-SMALL-NEXT: sd a0, 0(sp) +; RV64I-SMALL-NEXT: sd a0, 8(sp) ; RV64I-SMALL-NEXT: mv a1, a0 ; RV64I-SMALL-NEXT: mv a2, a0 ; RV64I-SMALL-NEXT: mv a3, a0 @@ -805,8 +805,8 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind { ; RV64I-MEDIUM: # %bb.0: ; RV64I-MEDIUM-NEXT: addi sp, sp, -32 ; RV64I-MEDIUM-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64I-MEDIUM-NEXT: sd a0, 8(sp) ; RV64I-MEDIUM-NEXT: sd a0, 0(sp) +; RV64I-MEDIUM-NEXT: sd a0, 8(sp) ; RV64I-MEDIUM-NEXT: mv a1, a0 ; RV64I-MEDIUM-NEXT: mv a2, a0 ; RV64I-MEDIUM-NEXT: mv a3, a0 @@ -826,8 +826,8 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind { ; RV64I-LARGE-NEXT: .Lpcrel_hi5: ; RV64I-LARGE-NEXT: auipc a1, %pcrel_hi(.LCPI10_0) ; RV64I-LARGE-NEXT: ld t1, %pcrel_lo(.Lpcrel_hi5)(a1) -; RV64I-LARGE-NEXT: sd a0, 8(sp) ; RV64I-LARGE-NEXT: sd a0, 0(sp) +; RV64I-LARGE-NEXT: sd a0, 8(sp) ; RV64I-LARGE-NEXT: mv a1, a0 ; RV64I-LARGE-NEXT: mv a2, a0 ; RV64I-LARGE-NEXT: mv a3, a0 @@ -848,8 +848,8 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind { ; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi5: ; RV64I-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI10_0) ; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi5)(a1) -; RV64I-LARGE-ZICFILP-NEXT: sd a0, 8(sp) ; RV64I-LARGE-ZICFILP-NEXT: sd a0, 0(sp) +; RV64I-LARGE-ZICFILP-NEXT: sd a0, 8(sp) ; RV64I-LARGE-ZICFILP-NEXT: mv a1, a0 ; RV64I-LARGE-ZICFILP-NEXT: mv a2, a0 ; RV64I-LARGE-ZICFILP-NEXT: mv a3, a0 diff --git a/llvm/test/CodeGen/RISCV/double-calling-conv.ll b/llvm/test/CodeGen/RISCV/double-calling-conv.ll index 57aaa4c9f74e46b..b9e80dccd97b9ac 100644 --- a/llvm/test/CodeGen/RISCV/double-calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/double-calling-conv.ll @@ -181,19 +181,19 @@ define double @caller_double_stack() nounwind { ; RV32IFD-NEXT: addi sp, sp, -32 ; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: lui a0, 262510 -; RV32IFD-NEXT: addi a0, a0, 327 -; RV32IFD-NEXT: sw a0, 4(sp) +; RV32IFD-NEXT: addi a1, a0, 327 ; RV32IFD-NEXT: lui a0, 713032 -; RV32IFD-NEXT: addi a1, a0, -1311 -; RV32IFD-NEXT: sw a1, 0(sp) +; RV32IFD-NEXT: addi a3, a0, -1311 ; RV32IFD-NEXT: lui a0, 262574 -; RV32IFD-NEXT: addi a0, a0, 327 -; RV32IFD-NEXT: sw a0, 12(sp) +; RV32IFD-NEXT: addi a5, a0, 327 ; RV32IFD-NEXT: li a0, 1 ; RV32IFD-NEXT: li a2, 2 ; RV32IFD-NEXT: li a4, 3 ; RV32IFD-NEXT: li a6, 4 -; RV32IFD-NEXT: sw a1, 8(sp) +; RV32IFD-NEXT: sw a3, 0(sp) +; RV32IFD-NEXT: sw a1, 4(sp) +; RV32IFD-NEXT: sw a3, 8(sp) +; RV32IFD-NEXT: sw a5, 12(sp) ; RV32IFD-NEXT: li a1, 0 ; RV32IFD-NEXT: li a3, 0 ; RV32IFD-NEXT: li a5, 0 @@ -208,19 +208,19 @@ define double @caller_double_stack() nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: lui a0, 262510 -; RV32IZFINXZDINX-NEXT: addi a0, a0, 327 -; RV32IZFINXZDINX-NEXT: sw a0, 4(sp) +; RV32IZFINXZDINX-NEXT: addi a1, a0, 327 ; RV32IZFINXZDINX-NEXT: lui a0, 713032 -; RV32IZFINXZDINX-NEXT: addi a1, a0, -1311 -; RV32IZFINXZDINX-NEXT: sw a1, 0(sp) +; RV32IZFINXZDINX-NEXT: addi a3, a0, -1311 ; RV32IZFINXZDINX-NEXT: lui a0, 262574 -; RV32IZFINXZDINX-NEXT: addi a0, a0, 327 -; RV32IZFINXZDINX-NEXT: sw a0, 12(sp) +; RV32IZFINXZDINX-NEXT: addi a5, a0, 327 ; RV32IZFINXZDINX-NEXT: li a0, 1 ; RV32IZFINXZDINX-NEXT: li a2, 2 ; RV32IZFINXZDINX-NEXT: li a4, 3 ; RV32IZFINXZDINX-NEXT: li a6, 4 -; RV32IZFINXZDINX-NEXT: sw a1, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a3, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: sw a3, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a5, 12(sp) ; RV32IZFINXZDINX-NEXT: li a1, 0 ; RV32IZFINXZDINX-NEXT: li a3, 0 ; RV32IZFINXZDINX-NEXT: li a5, 0 diff --git a/llvm/test/CodeGen/RISCV/double-convert-strict.ll b/llvm/test/CodeGen/RISCV/double-convert-strict.ll index 3732978b8bd83ea..2b1ec10fcaf17e2 100644 --- a/llvm/test/CodeGen/RISCV/double-convert-strict.ll +++ b/llvm/test/CodeGen/RISCV/double-convert-strict.ll @@ -792,8 +792,8 @@ define signext i32 @fcvt_d_w_demanded_bits(i32 signext %0, ptr %1) nounwind stri ; RV32I-NEXT: addi s1, a0, 1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __floatsidf -; RV32I-NEXT: sw a1, 4(s0) ; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: sw a1, 4(s0) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -865,8 +865,8 @@ define signext i32 @fcvt_d_wu_demanded_bits(i32 signext %0, ptr %1) nounwind str ; RV32I-NEXT: addi s1, a0, 1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __floatunsidf -; RV32I-NEXT: sw a1, 4(s0) ; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: sw a1, 4(s0) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll index ef2d8e7627be548..a8b141618bbb3a7 100644 --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -1474,8 +1474,8 @@ define signext i32 @fcvt_d_w_demanded_bits(i32 signext %0, ptr %1) nounwind { ; RV32I-NEXT: addi s1, a0, 1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __floatsidf -; RV32I-NEXT: sw a1, 4(s0) ; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: sw a1, 4(s0) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -1547,8 +1547,8 @@ define signext i32 @fcvt_d_wu_demanded_bits(i32 signext %0, ptr %1) nounwind { ; RV32I-NEXT: addi s1, a0, 1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __floatunsidf -; RV32I-NEXT: sw a1, 4(s0) ; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: sw a1, 4(s0) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/fastcc-bf16.ll b/llvm/test/CodeGen/RISCV/fastcc-bf16.ll index 493bc63992547e2..17356116081fff8 100644 --- a/llvm/test/CodeGen/RISCV/fastcc-bf16.ll +++ b/llvm/test/CodeGen/RISCV/fastcc-bf16.ll @@ -50,18 +50,18 @@ define bfloat @caller(<32 x bfloat> %A) nounwind { ; CHECK-NEXT: fmv.h.x fa5, a5 ; CHECK-NEXT: fmv.h.x fa6, a6 ; CHECK-NEXT: fmv.h.x fa7, a7 -; CHECK-NEXT: fsh fs11, 22(sp) -; CHECK-NEXT: fsh fs10, 20(sp) -; CHECK-NEXT: fsh fs9, 18(sp) ; CHECK-NEXT: fsh fs8, 16(sp) -; CHECK-NEXT: fsh fs7, 14(sp) -; CHECK-NEXT: fsh fs6, 12(sp) -; CHECK-NEXT: fsh fs5, 10(sp) +; CHECK-NEXT: fsh fs9, 18(sp) +; CHECK-NEXT: fsh fs10, 20(sp) +; CHECK-NEXT: fsh fs11, 22(sp) ; CHECK-NEXT: fsh fs4, 8(sp) -; CHECK-NEXT: fsh fs3, 6(sp) -; CHECK-NEXT: fsh fs2, 4(sp) -; CHECK-NEXT: fsh fs1, 2(sp) +; CHECK-NEXT: fsh fs5, 10(sp) +; CHECK-NEXT: fsh fs6, 12(sp) +; CHECK-NEXT: fsh fs7, 14(sp) ; CHECK-NEXT: fsh fs0, 0(sp) +; CHECK-NEXT: fsh fs1, 2(sp) +; CHECK-NEXT: fsh fs2, 4(sp) +; CHECK-NEXT: fsh fs3, 6(sp) ; CHECK-NEXT: call callee ; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 diff --git a/llvm/test/CodeGen/RISCV/fastcc-float.ll b/llvm/test/CodeGen/RISCV/fastcc-float.ll index 488c97d5a4506c4..237a72d983de4ae 100644 --- a/llvm/test/CodeGen/RISCV/fastcc-float.ll +++ b/llvm/test/CodeGen/RISCV/fastcc-float.ll @@ -50,18 +50,18 @@ define float @caller(<32 x float> %A) nounwind { ; CHECK-NEXT: fmv.w.x fa5, a5 ; CHECK-NEXT: fmv.w.x fa6, a6 ; CHECK-NEXT: fmv.w.x fa7, a7 -; CHECK-NEXT: fsw fs11, 44(sp) -; CHECK-NEXT: fsw fs10, 40(sp) -; CHECK-NEXT: fsw fs9, 36(sp) ; CHECK-NEXT: fsw fs8, 32(sp) -; CHECK-NEXT: fsw fs7, 28(sp) -; CHECK-NEXT: fsw fs6, 24(sp) -; CHECK-NEXT: fsw fs5, 20(sp) +; CHECK-NEXT: fsw fs9, 36(sp) +; CHECK-NEXT: fsw fs10, 40(sp) +; CHECK-NEXT: fsw fs11, 44(sp) ; CHECK-NEXT: fsw fs4, 16(sp) -; CHECK-NEXT: fsw fs3, 12(sp) -; CHECK-NEXT: fsw fs2, 8(sp) -; CHECK-NEXT: fsw fs1, 4(sp) +; CHECK-NEXT: fsw fs5, 20(sp) +; CHECK-NEXT: fsw fs6, 24(sp) +; CHECK-NEXT: fsw fs7, 28(sp) ; CHECK-NEXT: fsw fs0, 0(sp) +; CHECK-NEXT: fsw fs1, 4(sp) +; CHECK-NEXT: fsw fs2, 8(sp) +; CHECK-NEXT: fsw fs3, 12(sp) ; CHECK-NEXT: call callee ; CHECK-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 64 diff --git a/llvm/test/CodeGen/RISCV/fastcc-half.ll b/llvm/test/CodeGen/RISCV/fastcc-half.ll index 499b91b9d81c350..bf8d4e8dcb98c4b 100644 --- a/llvm/test/CodeGen/RISCV/fastcc-half.ll +++ b/llvm/test/CodeGen/RISCV/fastcc-half.ll @@ -50,18 +50,18 @@ define half @caller(<32 x half> %A) nounwind { ; CHECK-NEXT: fmv.h.x fa5, a5 ; CHECK-NEXT: fmv.h.x fa6, a6 ; CHECK-NEXT: fmv.h.x fa7, a7 -; CHECK-NEXT: fsh fs11, 22(sp) -; CHECK-NEXT: fsh fs10, 20(sp) -; CHECK-NEXT: fsh fs9, 18(sp) ; CHECK-NEXT: fsh fs8, 16(sp) -; CHECK-NEXT: fsh fs7, 14(sp) -; CHECK-NEXT: fsh fs6, 12(sp) -; CHECK-NEXT: fsh fs5, 10(sp) +; CHECK-NEXT: fsh fs9, 18(sp) +; CHECK-NEXT: fsh fs10, 20(sp) +; CHECK-NEXT: fsh fs11, 22(sp) ; CHECK-NEXT: fsh fs4, 8(sp) -; CHECK-NEXT: fsh fs3, 6(sp) -; CHECK-NEXT: fsh fs2, 4(sp) -; CHECK-NEXT: fsh fs1, 2(sp) +; CHECK-NEXT: fsh fs5, 10(sp) +; CHECK-NEXT: fsh fs6, 12(sp) +; CHECK-NEXT: fsh fs7, 14(sp) ; CHECK-NEXT: fsh fs0, 0(sp) +; CHECK-NEXT: fsh fs1, 2(sp) +; CHECK-NEXT: fsh fs2, 4(sp) +; CHECK-NEXT: fsh fs3, 6(sp) ; CHECK-NEXT: call callee ; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 diff --git a/llvm/test/CodeGen/RISCV/fastcc-int.ll b/llvm/test/CodeGen/RISCV/fastcc-int.ll index 75046b701b23526..3570ece269609f7 100644 --- a/llvm/test/CodeGen/RISCV/fastcc-int.ll +++ b/llvm/test/CodeGen/RISCV/fastcc-int.ll @@ -40,10 +40,10 @@ define i32 @caller(<16 x i32> %A) nounwind { ; RV32-NEXT: lw t2, 52(a0) ; RV32-NEXT: lw s0, 56(a0) ; RV32-NEXT: lw a0, 60(a0) -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw s0, 8(sp) -; RV32-NEXT: sw t2, 4(sp) ; RV32-NEXT: sw t1, 0(sp) +; RV32-NEXT: sw t2, 4(sp) +; RV32-NEXT: sw s0, 8(sp) +; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: mv a0, t0 ; RV32-NEXT: call callee ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -72,10 +72,10 @@ define i32 @caller(<16 x i32> %A) nounwind { ; RV64-NEXT: ld t2, 104(a0) ; RV64-NEXT: ld s0, 112(a0) ; RV64-NEXT: ld a0, 120(a0) -; RV64-NEXT: sd a0, 24(sp) -; RV64-NEXT: sd s0, 16(sp) -; RV64-NEXT: sd t2, 8(sp) ; RV64-NEXT: sd t1, 0(sp) +; RV64-NEXT: sd t2, 8(sp) +; RV64-NEXT: sd s0, 16(sp) +; RV64-NEXT: sd a0, 24(sp) ; RV64-NEXT: mv a0, t0 ; RV64-NEXT: call callee ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll index 0eefc34ad552a98..7523119c4ff778d 100644 --- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll +++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll @@ -289,8 +289,8 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-NEXT: lh t0, 124(sp) ; ZHINX32-NEXT: sh t0, 52(sp) # 2-byte Folded Spill ; ZHINX32-NEXT: lh t6, 128(sp) -; ZHINX32-NEXT: lh t5, 132(sp) -; ZHINX32-NEXT: lh t4, 136(sp) +; ZHINX32-NEXT: lh t4, 132(sp) +; ZHINX32-NEXT: lh t5, 136(sp) ; ZHINX32-NEXT: lh s0, 140(sp) ; ZHINX32-NEXT: lh s1, 144(sp) ; ZHINX32-NEXT: lh s2, 148(sp) @@ -304,30 +304,30 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-NEXT: lh s10, 180(sp) ; ZHINX32-NEXT: lh s11, 184(sp) ; ZHINX32-NEXT: lh ra, 188(sp) -; ZHINX32-NEXT: lh t3, 192(sp) -; ZHINX32-NEXT: lh t2, 196(sp) -; ZHINX32-NEXT: lh t1, 200(sp) -; ZHINX32-NEXT: lh t0, 204(sp) -; ZHINX32-NEXT: sh t0, 38(sp) -; ZHINX32-NEXT: sh t1, 36(sp) -; ZHINX32-NEXT: sh t2, 34(sp) -; ZHINX32-NEXT: sh t3, 32(sp) -; ZHINX32-NEXT: sh ra, 30(sp) -; ZHINX32-NEXT: sh s11, 28(sp) -; ZHINX32-NEXT: sh s10, 26(sp) +; ZHINX32-NEXT: lh t0, 192(sp) +; ZHINX32-NEXT: lh t1, 196(sp) +; ZHINX32-NEXT: lh t2, 200(sp) +; ZHINX32-NEXT: lh t3, 204(sp) +; ZHINX32-NEXT: sh t0, 32(sp) +; ZHINX32-NEXT: sh t1, 34(sp) +; ZHINX32-NEXT: sh t2, 36(sp) +; ZHINX32-NEXT: sh t3, 38(sp) ; ZHINX32-NEXT: sh s9, 24(sp) -; ZHINX32-NEXT: sh s8, 22(sp) -; ZHINX32-NEXT: sh s7, 20(sp) -; ZHINX32-NEXT: sh s6, 18(sp) +; ZHINX32-NEXT: sh s10, 26(sp) +; ZHINX32-NEXT: sh s11, 28(sp) +; ZHINX32-NEXT: sh ra, 30(sp) ; ZHINX32-NEXT: sh s5, 16(sp) -; ZHINX32-NEXT: sh s4, 14(sp) -; ZHINX32-NEXT: sh s3, 12(sp) -; ZHINX32-NEXT: sh s2, 10(sp) +; ZHINX32-NEXT: sh s6, 18(sp) +; ZHINX32-NEXT: sh s7, 20(sp) +; ZHINX32-NEXT: sh s8, 22(sp) ; ZHINX32-NEXT: sh s1, 8(sp) -; ZHINX32-NEXT: sh s0, 6(sp) -; ZHINX32-NEXT: sh t4, 4(sp) -; ZHINX32-NEXT: sh t5, 2(sp) +; ZHINX32-NEXT: sh s2, 10(sp) +; ZHINX32-NEXT: sh s3, 12(sp) +; ZHINX32-NEXT: sh s4, 14(sp) ; ZHINX32-NEXT: sh t6, 0(sp) +; ZHINX32-NEXT: sh t4, 2(sp) +; ZHINX32-NEXT: sh t5, 4(sp) +; ZHINX32-NEXT: sh s0, 6(sp) ; ZHINX32-NEXT: lh t3, 58(sp) # 2-byte Folded Reload ; ZHINX32-NEXT: lh t4, 56(sp) # 2-byte Folded Reload ; ZHINX32-NEXT: lh t5, 54(sp) # 2-byte Folded Reload @@ -374,8 +374,8 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX64-NEXT: lh t0, 184(sp) ; ZHINX64-NEXT: sh t0, 48(sp) # 2-byte Folded Spill ; ZHINX64-NEXT: lh t6, 192(sp) -; ZHINX64-NEXT: lh t5, 200(sp) -; ZHINX64-NEXT: lh t4, 208(sp) +; ZHINX64-NEXT: lh t4, 200(sp) +; ZHINX64-NEXT: lh t5, 208(sp) ; ZHINX64-NEXT: lh s0, 216(sp) ; ZHINX64-NEXT: lh s1, 224(sp) ; ZHINX64-NEXT: lh s2, 232(sp) @@ -389,30 +389,30 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX64-NEXT: lh s10, 296(sp) ; ZHINX64-NEXT: lh s11, 304(sp) ; ZHINX64-NEXT: lh ra, 312(sp) -; ZHINX64-NEXT: lh t3, 320(sp) -; ZHINX64-NEXT: lh t2, 328(sp) -; ZHINX64-NEXT: lh t1, 336(sp) -; ZHINX64-NEXT: lh t0, 344(sp) -; ZHINX64-NEXT: sh t0, 38(sp) -; ZHINX64-NEXT: sh t1, 36(sp) -; ZHINX64-NEXT: sh t2, 34(sp) -; ZHINX64-NEXT: sh t3, 32(sp) -; ZHINX64-NEXT: sh ra, 30(sp) -; ZHINX64-NEXT: sh s11, 28(sp) -; ZHINX64-NEXT: sh s10, 26(sp) +; ZHINX64-NEXT: lh t0, 320(sp) +; ZHINX64-NEXT: lh t1, 328(sp) +; ZHINX64-NEXT: lh t2, 336(sp) +; ZHINX64-NEXT: lh t3, 344(sp) +; ZHINX64-NEXT: sh t0, 32(sp) +; ZHINX64-NEXT: sh t1, 34(sp) +; ZHINX64-NEXT: sh t2, 36(sp) +; ZHINX64-NEXT: sh t3, 38(sp) ; ZHINX64-NEXT: sh s9, 24(sp) -; ZHINX64-NEXT: sh s8, 22(sp) -; ZHINX64-NEXT: sh s7, 20(sp) -; ZHINX64-NEXT: sh s6, 18(sp) +; ZHINX64-NEXT: sh s10, 26(sp) +; ZHINX64-NEXT: sh s11, 28(sp) +; ZHINX64-NEXT: sh ra, 30(sp) ; ZHINX64-NEXT: sh s5, 16(sp) -; ZHINX64-NEXT: sh s4, 14(sp) -; ZHINX64-NEXT: sh s3, 12(sp) -; ZHINX64-NEXT: sh s2, 10(sp) +; ZHINX64-NEXT: sh s6, 18(sp) +; ZHINX64-NEXT: sh s7, 20(sp) +; ZHINX64-NEXT: sh s8, 22(sp) ; ZHINX64-NEXT: sh s1, 8(sp) -; ZHINX64-NEXT: sh s0, 6(sp) -; ZHINX64-NEXT: sh t4, 4(sp) -; ZHINX64-NEXT: sh t5, 2(sp) +; ZHINX64-NEXT: sh s2, 10(sp) +; ZHINX64-NEXT: sh s3, 12(sp) +; ZHINX64-NEXT: sh s4, 14(sp) ; ZHINX64-NEXT: sh t6, 0(sp) +; ZHINX64-NEXT: sh t4, 2(sp) +; ZHINX64-NEXT: sh t5, 4(sp) +; ZHINX64-NEXT: sh s0, 6(sp) ; ZHINX64-NEXT: lh t3, 54(sp) # 2-byte Folded Reload ; ZHINX64-NEXT: lh t4, 52(sp) # 2-byte Folded Reload ; ZHINX64-NEXT: lh t5, 50(sp) # 2-byte Folded Reload @@ -450,67 +450,67 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZFINX32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill ; ZFINX32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill ; ZFINX32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: lw t0, 0(a0) -; ZFINX32-NEXT: lw a1, 4(a0) -; ZFINX32-NEXT: sw a1, 104(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: lw a1, 8(a0) -; ZFINX32-NEXT: sw a1, 100(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: lw a1, 12(a0) -; ZFINX32-NEXT: sw a1, 96(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: lw a1, 16(a0) -; ZFINX32-NEXT: sw a1, 92(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: lw a5, 20(a0) -; ZFINX32-NEXT: lw a6, 24(a0) -; ZFINX32-NEXT: lw a7, 28(a0) -; ZFINX32-NEXT: lw t3, 32(a0) -; ZFINX32-NEXT: lw t4, 36(a0) -; ZFINX32-NEXT: lw t5, 40(a0) -; ZFINX32-NEXT: lw t6, 44(a0) -; ZFINX32-NEXT: lw t1, 48(a0) -; ZFINX32-NEXT: lw t2, 52(a0) -; ZFINX32-NEXT: lw s0, 56(a0) -; ZFINX32-NEXT: lw s1, 60(a0) -; ZFINX32-NEXT: lw s2, 64(a0) -; ZFINX32-NEXT: lw s3, 68(a0) -; ZFINX32-NEXT: lw s4, 72(a0) -; ZFINX32-NEXT: lw s5, 76(a0) -; ZFINX32-NEXT: lw s6, 80(a0) -; ZFINX32-NEXT: lw s7, 84(a0) -; ZFINX32-NEXT: lw s8, 88(a0) -; ZFINX32-NEXT: lw s9, 92(a0) -; ZFINX32-NEXT: lw s10, 96(a0) -; ZFINX32-NEXT: lw s11, 100(a0) -; ZFINX32-NEXT: lw ra, 104(a0) -; ZFINX32-NEXT: lw a4, 108(a0) -; ZFINX32-NEXT: lw a3, 112(a0) -; ZFINX32-NEXT: lw a2, 116(a0) -; ZFINX32-NEXT: lw a1, 120(a0) -; ZFINX32-NEXT: lw a0, 124(a0) -; ZFINX32-NEXT: sw a0, 76(sp) -; ZFINX32-NEXT: sw a1, 72(sp) -; ZFINX32-NEXT: sw a2, 68(sp) -; ZFINX32-NEXT: sw a3, 64(sp) -; ZFINX32-NEXT: sw a4, 60(sp) -; ZFINX32-NEXT: sw ra, 56(sp) -; ZFINX32-NEXT: sw s11, 52(sp) +; ZFINX32-NEXT: mv t0, a0 +; ZFINX32-NEXT: lw a0, 0(a0) +; ZFINX32-NEXT: sw a0, 104(sp) # 4-byte Folded Spill +; ZFINX32-NEXT: lw a0, 4(t0) +; ZFINX32-NEXT: sw a0, 100(sp) # 4-byte Folded Spill +; ZFINX32-NEXT: lw a0, 8(t0) +; ZFINX32-NEXT: sw a0, 96(sp) # 4-byte Folded Spill +; ZFINX32-NEXT: lw a0, 12(t0) +; ZFINX32-NEXT: sw a0, 92(sp) # 4-byte Folded Spill +; ZFINX32-NEXT: lw a4, 16(t0) +; ZFINX32-NEXT: lw a5, 20(t0) +; ZFINX32-NEXT: lw a6, 24(t0) +; ZFINX32-NEXT: lw a7, 28(t0) +; ZFINX32-NEXT: lw t3, 32(t0) +; ZFINX32-NEXT: lw t4, 36(t0) +; ZFINX32-NEXT: lw t5, 40(t0) +; ZFINX32-NEXT: lw t6, 44(t0) +; ZFINX32-NEXT: lw t1, 48(t0) +; ZFINX32-NEXT: lw t2, 52(t0) +; ZFINX32-NEXT: lw s0, 56(t0) +; ZFINX32-NEXT: lw s1, 60(t0) +; ZFINX32-NEXT: lw s2, 64(t0) +; ZFINX32-NEXT: lw s3, 68(t0) +; ZFINX32-NEXT: lw s4, 72(t0) +; ZFINX32-NEXT: lw s5, 76(t0) +; ZFINX32-NEXT: lw s6, 80(t0) +; ZFINX32-NEXT: lw s7, 84(t0) +; ZFINX32-NEXT: lw s8, 88(t0) +; ZFINX32-NEXT: lw s9, 92(t0) +; ZFINX32-NEXT: lw s10, 96(t0) +; ZFINX32-NEXT: lw s11, 100(t0) +; ZFINX32-NEXT: lw ra, 104(t0) +; ZFINX32-NEXT: lw a3, 108(t0) +; ZFINX32-NEXT: lw a0, 112(t0) +; ZFINX32-NEXT: lw a1, 116(t0) +; ZFINX32-NEXT: lw a2, 120(t0) +; ZFINX32-NEXT: lw t0, 124(t0) +; ZFINX32-NEXT: sw a0, 64(sp) +; ZFINX32-NEXT: sw a1, 68(sp) +; ZFINX32-NEXT: sw a2, 72(sp) +; ZFINX32-NEXT: sw t0, 76(sp) ; ZFINX32-NEXT: sw s10, 48(sp) -; ZFINX32-NEXT: sw s9, 44(sp) -; ZFINX32-NEXT: sw s8, 40(sp) -; ZFINX32-NEXT: sw s7, 36(sp) +; ZFINX32-NEXT: sw s11, 52(sp) +; ZFINX32-NEXT: sw ra, 56(sp) +; ZFINX32-NEXT: sw a3, 60(sp) ; ZFINX32-NEXT: sw s6, 32(sp) -; ZFINX32-NEXT: sw s5, 28(sp) -; ZFINX32-NEXT: sw s4, 24(sp) -; ZFINX32-NEXT: sw s3, 20(sp) +; ZFINX32-NEXT: sw s7, 36(sp) +; ZFINX32-NEXT: sw s8, 40(sp) +; ZFINX32-NEXT: sw s9, 44(sp) ; ZFINX32-NEXT: sw s2, 16(sp) -; ZFINX32-NEXT: sw s1, 12(sp) -; ZFINX32-NEXT: sw s0, 8(sp) -; ZFINX32-NEXT: sw t2, 4(sp) +; ZFINX32-NEXT: sw s3, 20(sp) +; ZFINX32-NEXT: sw s4, 24(sp) +; ZFINX32-NEXT: sw s5, 28(sp) ; ZFINX32-NEXT: sw t1, 0(sp) -; ZFINX32-NEXT: mv a0, t0 -; ZFINX32-NEXT: lw a1, 104(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw a2, 100(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw a3, 96(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw a4, 92(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: sw t2, 4(sp) +; ZFINX32-NEXT: sw s0, 8(sp) +; ZFINX32-NEXT: sw s1, 12(sp) +; ZFINX32-NEXT: lw a0, 104(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: lw a1, 100(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: lw a2, 96(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: lw a3, 92(sp) # 4-byte Folded Reload ; ZFINX32-NEXT: call callee_half_32 ; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; ZFINX32-NEXT: lui a1, 1048560 @@ -548,67 +548,67 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZFINX64-NEXT: sd s9, 216(sp) # 8-byte Folded Spill ; ZFINX64-NEXT: sd s10, 208(sp) # 8-byte Folded Spill ; ZFINX64-NEXT: sd s11, 200(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: ld t0, 0(a0) -; ZFINX64-NEXT: ld a1, 8(a0) -; ZFINX64-NEXT: sd a1, 192(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: ld a1, 16(a0) -; ZFINX64-NEXT: sd a1, 184(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: ld a1, 24(a0) -; ZFINX64-NEXT: sd a1, 176(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: ld a1, 32(a0) -; ZFINX64-NEXT: sd a1, 168(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: ld a5, 40(a0) -; ZFINX64-NEXT: ld a6, 48(a0) -; ZFINX64-NEXT: ld a7, 56(a0) -; ZFINX64-NEXT: ld t3, 64(a0) -; ZFINX64-NEXT: ld t4, 72(a0) -; ZFINX64-NEXT: ld t5, 80(a0) -; ZFINX64-NEXT: ld t6, 88(a0) -; ZFINX64-NEXT: ld t1, 96(a0) -; ZFINX64-NEXT: ld t2, 104(a0) -; ZFINX64-NEXT: ld s0, 112(a0) -; ZFINX64-NEXT: ld s1, 120(a0) -; ZFINX64-NEXT: ld s2, 128(a0) -; ZFINX64-NEXT: ld s3, 136(a0) -; ZFINX64-NEXT: ld s4, 144(a0) -; ZFINX64-NEXT: ld s5, 152(a0) -; ZFINX64-NEXT: ld s6, 160(a0) -; ZFINX64-NEXT: ld s7, 168(a0) -; ZFINX64-NEXT: ld s8, 176(a0) -; ZFINX64-NEXT: ld s9, 184(a0) -; ZFINX64-NEXT: ld s10, 192(a0) -; ZFINX64-NEXT: ld s11, 200(a0) -; ZFINX64-NEXT: ld ra, 208(a0) -; ZFINX64-NEXT: ld a4, 216(a0) -; ZFINX64-NEXT: ld a3, 224(a0) -; ZFINX64-NEXT: ld a2, 232(a0) -; ZFINX64-NEXT: ld a1, 240(a0) -; ZFINX64-NEXT: ld a0, 248(a0) -; ZFINX64-NEXT: sd a0, 152(sp) -; ZFINX64-NEXT: sd a1, 144(sp) -; ZFINX64-NEXT: sd a2, 136(sp) -; ZFINX64-NEXT: sd a3, 128(sp) -; ZFINX64-NEXT: sd a4, 120(sp) -; ZFINX64-NEXT: sd ra, 112(sp) -; ZFINX64-NEXT: sd s11, 104(sp) +; ZFINX64-NEXT: mv t0, a0 +; ZFINX64-NEXT: ld a0, 0(a0) +; ZFINX64-NEXT: sd a0, 192(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: ld a0, 8(t0) +; ZFINX64-NEXT: sd a0, 184(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: ld a0, 16(t0) +; ZFINX64-NEXT: sd a0, 176(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: ld a0, 24(t0) +; ZFINX64-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: ld a4, 32(t0) +; ZFINX64-NEXT: ld a5, 40(t0) +; ZFINX64-NEXT: ld a6, 48(t0) +; ZFINX64-NEXT: ld a7, 56(t0) +; ZFINX64-NEXT: ld t3, 64(t0) +; ZFINX64-NEXT: ld t4, 72(t0) +; ZFINX64-NEXT: ld t5, 80(t0) +; ZFINX64-NEXT: ld t6, 88(t0) +; ZFINX64-NEXT: ld t1, 96(t0) +; ZFINX64-NEXT: ld t2, 104(t0) +; ZFINX64-NEXT: ld s0, 112(t0) +; ZFINX64-NEXT: ld s1, 120(t0) +; ZFINX64-NEXT: ld s2, 128(t0) +; ZFINX64-NEXT: ld s3, 136(t0) +; ZFINX64-NEXT: ld s4, 144(t0) +; ZFINX64-NEXT: ld s5, 152(t0) +; ZFINX64-NEXT: ld s6, 160(t0) +; ZFINX64-NEXT: ld s7, 168(t0) +; ZFINX64-NEXT: ld s8, 176(t0) +; ZFINX64-NEXT: ld s9, 184(t0) +; ZFINX64-NEXT: ld s10, 192(t0) +; ZFINX64-NEXT: ld s11, 200(t0) +; ZFINX64-NEXT: ld ra, 208(t0) +; ZFINX64-NEXT: ld a3, 216(t0) +; ZFINX64-NEXT: ld a0, 224(t0) +; ZFINX64-NEXT: ld a1, 232(t0) +; ZFINX64-NEXT: ld a2, 240(t0) +; ZFINX64-NEXT: ld t0, 248(t0) +; ZFINX64-NEXT: sd a0, 128(sp) +; ZFINX64-NEXT: sd a1, 136(sp) +; ZFINX64-NEXT: sd a2, 144(sp) +; ZFINX64-NEXT: sd t0, 152(sp) ; ZFINX64-NEXT: sd s10, 96(sp) -; ZFINX64-NEXT: sd s9, 88(sp) -; ZFINX64-NEXT: sd s8, 80(sp) -; ZFINX64-NEXT: sd s7, 72(sp) +; ZFINX64-NEXT: sd s11, 104(sp) +; ZFINX64-NEXT: sd ra, 112(sp) +; ZFINX64-NEXT: sd a3, 120(sp) ; ZFINX64-NEXT: sd s6, 64(sp) -; ZFINX64-NEXT: sd s5, 56(sp) -; ZFINX64-NEXT: sd s4, 48(sp) -; ZFINX64-NEXT: sd s3, 40(sp) +; ZFINX64-NEXT: sd s7, 72(sp) +; ZFINX64-NEXT: sd s8, 80(sp) +; ZFINX64-NEXT: sd s9, 88(sp) ; ZFINX64-NEXT: sd s2, 32(sp) -; ZFINX64-NEXT: sd s1, 24(sp) -; ZFINX64-NEXT: sd s0, 16(sp) -; ZFINX64-NEXT: sd t2, 8(sp) +; ZFINX64-NEXT: sd s3, 40(sp) +; ZFINX64-NEXT: sd s4, 48(sp) +; ZFINX64-NEXT: sd s5, 56(sp) ; ZFINX64-NEXT: sd t1, 0(sp) -; ZFINX64-NEXT: mv a0, t0 -; ZFINX64-NEXT: ld a1, 192(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld a2, 184(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld a3, 176(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld a4, 168(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: sd t2, 8(sp) +; ZFINX64-NEXT: sd s0, 16(sp) +; ZFINX64-NEXT: sd s1, 24(sp) +; ZFINX64-NEXT: ld a0, 192(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld a2, 176(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld a3, 168(sp) # 8-byte Folded Reload ; ZFINX64-NEXT: call callee_half_32 ; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; ZFINX64-NEXT: lui a1, 1048560 @@ -646,67 +646,67 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZDINX32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill ; ZDINX32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill ; ZDINX32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: lw t0, 0(a0) -; ZDINX32-NEXT: lw a1, 4(a0) -; ZDINX32-NEXT: sw a1, 104(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: lw a1, 8(a0) -; ZDINX32-NEXT: sw a1, 100(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: lw a1, 12(a0) -; ZDINX32-NEXT: sw a1, 96(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: lw a1, 16(a0) -; ZDINX32-NEXT: sw a1, 92(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: lw a5, 20(a0) -; ZDINX32-NEXT: lw a6, 24(a0) -; ZDINX32-NEXT: lw a7, 28(a0) -; ZDINX32-NEXT: lw t3, 32(a0) -; ZDINX32-NEXT: lw t4, 36(a0) -; ZDINX32-NEXT: lw t5, 40(a0) -; ZDINX32-NEXT: lw t6, 44(a0) -; ZDINX32-NEXT: lw t1, 48(a0) -; ZDINX32-NEXT: lw t2, 52(a0) -; ZDINX32-NEXT: lw s0, 56(a0) -; ZDINX32-NEXT: lw s1, 60(a0) -; ZDINX32-NEXT: lw s2, 64(a0) -; ZDINX32-NEXT: lw s3, 68(a0) -; ZDINX32-NEXT: lw s4, 72(a0) -; ZDINX32-NEXT: lw s5, 76(a0) -; ZDINX32-NEXT: lw s6, 80(a0) -; ZDINX32-NEXT: lw s7, 84(a0) -; ZDINX32-NEXT: lw s8, 88(a0) -; ZDINX32-NEXT: lw s9, 92(a0) -; ZDINX32-NEXT: lw s10, 96(a0) -; ZDINX32-NEXT: lw s11, 100(a0) -; ZDINX32-NEXT: lw ra, 104(a0) -; ZDINX32-NEXT: lw a4, 108(a0) -; ZDINX32-NEXT: lw a3, 112(a0) -; ZDINX32-NEXT: lw a2, 116(a0) -; ZDINX32-NEXT: lw a1, 120(a0) -; ZDINX32-NEXT: lw a0, 124(a0) -; ZDINX32-NEXT: sw a0, 76(sp) -; ZDINX32-NEXT: sw a1, 72(sp) -; ZDINX32-NEXT: sw a2, 68(sp) -; ZDINX32-NEXT: sw a3, 64(sp) -; ZDINX32-NEXT: sw a4, 60(sp) -; ZDINX32-NEXT: sw ra, 56(sp) -; ZDINX32-NEXT: sw s11, 52(sp) +; ZDINX32-NEXT: mv t0, a0 +; ZDINX32-NEXT: lw a0, 0(a0) +; ZDINX32-NEXT: sw a0, 104(sp) # 4-byte Folded Spill +; ZDINX32-NEXT: lw a0, 4(t0) +; ZDINX32-NEXT: sw a0, 100(sp) # 4-byte Folded Spill +; ZDINX32-NEXT: lw a0, 8(t0) +; ZDINX32-NEXT: sw a0, 96(sp) # 4-byte Folded Spill +; ZDINX32-NEXT: lw a0, 12(t0) +; ZDINX32-NEXT: sw a0, 92(sp) # 4-byte Folded Spill +; ZDINX32-NEXT: lw a4, 16(t0) +; ZDINX32-NEXT: lw a5, 20(t0) +; ZDINX32-NEXT: lw a6, 24(t0) +; ZDINX32-NEXT: lw a7, 28(t0) +; ZDINX32-NEXT: lw t3, 32(t0) +; ZDINX32-NEXT: lw t4, 36(t0) +; ZDINX32-NEXT: lw t5, 40(t0) +; ZDINX32-NEXT: lw t6, 44(t0) +; ZDINX32-NEXT: lw t1, 48(t0) +; ZDINX32-NEXT: lw t2, 52(t0) +; ZDINX32-NEXT: lw s0, 56(t0) +; ZDINX32-NEXT: lw s1, 60(t0) +; ZDINX32-NEXT: lw s2, 64(t0) +; ZDINX32-NEXT: lw s3, 68(t0) +; ZDINX32-NEXT: lw s4, 72(t0) +; ZDINX32-NEXT: lw s5, 76(t0) +; ZDINX32-NEXT: lw s6, 80(t0) +; ZDINX32-NEXT: lw s7, 84(t0) +; ZDINX32-NEXT: lw s8, 88(t0) +; ZDINX32-NEXT: lw s9, 92(t0) +; ZDINX32-NEXT: lw s10, 96(t0) +; ZDINX32-NEXT: lw s11, 100(t0) +; ZDINX32-NEXT: lw ra, 104(t0) +; ZDINX32-NEXT: lw a3, 108(t0) +; ZDINX32-NEXT: lw a0, 112(t0) +; ZDINX32-NEXT: lw a1, 116(t0) +; ZDINX32-NEXT: lw a2, 120(t0) +; ZDINX32-NEXT: lw t0, 124(t0) +; ZDINX32-NEXT: sw a0, 64(sp) +; ZDINX32-NEXT: sw a1, 68(sp) +; ZDINX32-NEXT: sw a2, 72(sp) +; ZDINX32-NEXT: sw t0, 76(sp) ; ZDINX32-NEXT: sw s10, 48(sp) -; ZDINX32-NEXT: sw s9, 44(sp) -; ZDINX32-NEXT: sw s8, 40(sp) -; ZDINX32-NEXT: sw s7, 36(sp) +; ZDINX32-NEXT: sw s11, 52(sp) +; ZDINX32-NEXT: sw ra, 56(sp) +; ZDINX32-NEXT: sw a3, 60(sp) ; ZDINX32-NEXT: sw s6, 32(sp) -; ZDINX32-NEXT: sw s5, 28(sp) -; ZDINX32-NEXT: sw s4, 24(sp) -; ZDINX32-NEXT: sw s3, 20(sp) +; ZDINX32-NEXT: sw s7, 36(sp) +; ZDINX32-NEXT: sw s8, 40(sp) +; ZDINX32-NEXT: sw s9, 44(sp) ; ZDINX32-NEXT: sw s2, 16(sp) -; ZDINX32-NEXT: sw s1, 12(sp) -; ZDINX32-NEXT: sw s0, 8(sp) -; ZDINX32-NEXT: sw t2, 4(sp) +; ZDINX32-NEXT: sw s3, 20(sp) +; ZDINX32-NEXT: sw s4, 24(sp) +; ZDINX32-NEXT: sw s5, 28(sp) ; ZDINX32-NEXT: sw t1, 0(sp) -; ZDINX32-NEXT: mv a0, t0 -; ZDINX32-NEXT: lw a1, 104(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw a2, 100(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw a3, 96(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw a4, 92(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: sw t2, 4(sp) +; ZDINX32-NEXT: sw s0, 8(sp) +; ZDINX32-NEXT: sw s1, 12(sp) +; ZDINX32-NEXT: lw a0, 104(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: lw a1, 100(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: lw a2, 96(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: lw a3, 92(sp) # 4-byte Folded Reload ; ZDINX32-NEXT: call callee_half_32 ; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; ZDINX32-NEXT: lui a1, 1048560 @@ -744,67 +744,67 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZDINX64-NEXT: sd s9, 216(sp) # 8-byte Folded Spill ; ZDINX64-NEXT: sd s10, 208(sp) # 8-byte Folded Spill ; ZDINX64-NEXT: sd s11, 200(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: ld t0, 0(a0) -; ZDINX64-NEXT: ld a1, 8(a0) -; ZDINX64-NEXT: sd a1, 192(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: ld a1, 16(a0) -; ZDINX64-NEXT: sd a1, 184(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: ld a1, 24(a0) -; ZDINX64-NEXT: sd a1, 176(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: ld a1, 32(a0) -; ZDINX64-NEXT: sd a1, 168(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: ld a5, 40(a0) -; ZDINX64-NEXT: ld a6, 48(a0) -; ZDINX64-NEXT: ld a7, 56(a0) -; ZDINX64-NEXT: ld t3, 64(a0) -; ZDINX64-NEXT: ld t4, 72(a0) -; ZDINX64-NEXT: ld t5, 80(a0) -; ZDINX64-NEXT: ld t6, 88(a0) -; ZDINX64-NEXT: ld t1, 96(a0) -; ZDINX64-NEXT: ld t2, 104(a0) -; ZDINX64-NEXT: ld s0, 112(a0) -; ZDINX64-NEXT: ld s1, 120(a0) -; ZDINX64-NEXT: ld s2, 128(a0) -; ZDINX64-NEXT: ld s3, 136(a0) -; ZDINX64-NEXT: ld s4, 144(a0) -; ZDINX64-NEXT: ld s5, 152(a0) -; ZDINX64-NEXT: ld s6, 160(a0) -; ZDINX64-NEXT: ld s7, 168(a0) -; ZDINX64-NEXT: ld s8, 176(a0) -; ZDINX64-NEXT: ld s9, 184(a0) -; ZDINX64-NEXT: ld s10, 192(a0) -; ZDINX64-NEXT: ld s11, 200(a0) -; ZDINX64-NEXT: ld ra, 208(a0) -; ZDINX64-NEXT: ld a4, 216(a0) -; ZDINX64-NEXT: ld a3, 224(a0) -; ZDINX64-NEXT: ld a2, 232(a0) -; ZDINX64-NEXT: ld a1, 240(a0) -; ZDINX64-NEXT: ld a0, 248(a0) -; ZDINX64-NEXT: sd a0, 152(sp) -; ZDINX64-NEXT: sd a1, 144(sp) -; ZDINX64-NEXT: sd a2, 136(sp) -; ZDINX64-NEXT: sd a3, 128(sp) -; ZDINX64-NEXT: sd a4, 120(sp) -; ZDINX64-NEXT: sd ra, 112(sp) -; ZDINX64-NEXT: sd s11, 104(sp) +; ZDINX64-NEXT: mv t0, a0 +; ZDINX64-NEXT: ld a0, 0(a0) +; ZDINX64-NEXT: sd a0, 192(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: ld a0, 8(t0) +; ZDINX64-NEXT: sd a0, 184(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: ld a0, 16(t0) +; ZDINX64-NEXT: sd a0, 176(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: ld a0, 24(t0) +; ZDINX64-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: ld a4, 32(t0) +; ZDINX64-NEXT: ld a5, 40(t0) +; ZDINX64-NEXT: ld a6, 48(t0) +; ZDINX64-NEXT: ld a7, 56(t0) +; ZDINX64-NEXT: ld t3, 64(t0) +; ZDINX64-NEXT: ld t4, 72(t0) +; ZDINX64-NEXT: ld t5, 80(t0) +; ZDINX64-NEXT: ld t6, 88(t0) +; ZDINX64-NEXT: ld t1, 96(t0) +; ZDINX64-NEXT: ld t2, 104(t0) +; ZDINX64-NEXT: ld s0, 112(t0) +; ZDINX64-NEXT: ld s1, 120(t0) +; ZDINX64-NEXT: ld s2, 128(t0) +; ZDINX64-NEXT: ld s3, 136(t0) +; ZDINX64-NEXT: ld s4, 144(t0) +; ZDINX64-NEXT: ld s5, 152(t0) +; ZDINX64-NEXT: ld s6, 160(t0) +; ZDINX64-NEXT: ld s7, 168(t0) +; ZDINX64-NEXT: ld s8, 176(t0) +; ZDINX64-NEXT: ld s9, 184(t0) +; ZDINX64-NEXT: ld s10, 192(t0) +; ZDINX64-NEXT: ld s11, 200(t0) +; ZDINX64-NEXT: ld ra, 208(t0) +; ZDINX64-NEXT: ld a3, 216(t0) +; ZDINX64-NEXT: ld a0, 224(t0) +; ZDINX64-NEXT: ld a1, 232(t0) +; ZDINX64-NEXT: ld a2, 240(t0) +; ZDINX64-NEXT: ld t0, 248(t0) +; ZDINX64-NEXT: sd a0, 128(sp) +; ZDINX64-NEXT: sd a1, 136(sp) +; ZDINX64-NEXT: sd a2, 144(sp) +; ZDINX64-NEXT: sd t0, 152(sp) ; ZDINX64-NEXT: sd s10, 96(sp) -; ZDINX64-NEXT: sd s9, 88(sp) -; ZDINX64-NEXT: sd s8, 80(sp) -; ZDINX64-NEXT: sd s7, 72(sp) +; ZDINX64-NEXT: sd s11, 104(sp) +; ZDINX64-NEXT: sd ra, 112(sp) +; ZDINX64-NEXT: sd a3, 120(sp) ; ZDINX64-NEXT: sd s6, 64(sp) -; ZDINX64-NEXT: sd s5, 56(sp) -; ZDINX64-NEXT: sd s4, 48(sp) -; ZDINX64-NEXT: sd s3, 40(sp) +; ZDINX64-NEXT: sd s7, 72(sp) +; ZDINX64-NEXT: sd s8, 80(sp) +; ZDINX64-NEXT: sd s9, 88(sp) ; ZDINX64-NEXT: sd s2, 32(sp) -; ZDINX64-NEXT: sd s1, 24(sp) -; ZDINX64-NEXT: sd s0, 16(sp) -; ZDINX64-NEXT: sd t2, 8(sp) +; ZDINX64-NEXT: sd s3, 40(sp) +; ZDINX64-NEXT: sd s4, 48(sp) +; ZDINX64-NEXT: sd s5, 56(sp) ; ZDINX64-NEXT: sd t1, 0(sp) -; ZDINX64-NEXT: mv a0, t0 -; ZDINX64-NEXT: ld a1, 192(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld a2, 184(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld a3, 176(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld a4, 168(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: sd t2, 8(sp) +; ZDINX64-NEXT: sd s0, 16(sp) +; ZDINX64-NEXT: sd s1, 24(sp) +; ZDINX64-NEXT: ld a0, 192(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld a2, 176(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld a3, 168(sp) # 8-byte Folded Reload ; ZDINX64-NEXT: call callee_half_32 ; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; ZDINX64-NEXT: lui a1, 1048560 @@ -883,8 +883,8 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX32-NEXT: lw t0, 172(sp) ; ZHINX32-NEXT: sw t0, 92(sp) # 4-byte Folded Spill ; ZHINX32-NEXT: lw t6, 176(sp) -; ZHINX32-NEXT: lw t5, 180(sp) -; ZHINX32-NEXT: lw t4, 184(sp) +; ZHINX32-NEXT: lw t4, 180(sp) +; ZHINX32-NEXT: lw t5, 184(sp) ; ZHINX32-NEXT: lw s0, 188(sp) ; ZHINX32-NEXT: lw s1, 192(sp) ; ZHINX32-NEXT: lw s2, 196(sp) @@ -898,30 +898,30 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX32-NEXT: lw s10, 228(sp) ; ZHINX32-NEXT: lw s11, 232(sp) ; ZHINX32-NEXT: lw ra, 236(sp) -; ZHINX32-NEXT: lw t3, 240(sp) -; ZHINX32-NEXT: lw t2, 244(sp) -; ZHINX32-NEXT: lw t1, 248(sp) -; ZHINX32-NEXT: lw t0, 252(sp) -; ZHINX32-NEXT: sw t0, 76(sp) -; ZHINX32-NEXT: sw t1, 72(sp) -; ZHINX32-NEXT: sw t2, 68(sp) -; ZHINX32-NEXT: sw t3, 64(sp) -; ZHINX32-NEXT: sw ra, 60(sp) -; ZHINX32-NEXT: sw s11, 56(sp) -; ZHINX32-NEXT: sw s10, 52(sp) +; ZHINX32-NEXT: lw t0, 240(sp) +; ZHINX32-NEXT: lw t1, 244(sp) +; ZHINX32-NEXT: lw t2, 248(sp) +; ZHINX32-NEXT: lw t3, 252(sp) +; ZHINX32-NEXT: sw t0, 64(sp) +; ZHINX32-NEXT: sw t1, 68(sp) +; ZHINX32-NEXT: sw t2, 72(sp) +; ZHINX32-NEXT: sw t3, 76(sp) ; ZHINX32-NEXT: sw s9, 48(sp) -; ZHINX32-NEXT: sw s8, 44(sp) -; ZHINX32-NEXT: sw s7, 40(sp) -; ZHINX32-NEXT: sw s6, 36(sp) +; ZHINX32-NEXT: sw s10, 52(sp) +; ZHINX32-NEXT: sw s11, 56(sp) +; ZHINX32-NEXT: sw ra, 60(sp) ; ZHINX32-NEXT: sw s5, 32(sp) -; ZHINX32-NEXT: sw s4, 28(sp) -; ZHINX32-NEXT: sw s3, 24(sp) -; ZHINX32-NEXT: sw s2, 20(sp) +; ZHINX32-NEXT: sw s6, 36(sp) +; ZHINX32-NEXT: sw s7, 40(sp) +; ZHINX32-NEXT: sw s8, 44(sp) ; ZHINX32-NEXT: sw s1, 16(sp) -; ZHINX32-NEXT: sw s0, 12(sp) -; ZHINX32-NEXT: sw t4, 8(sp) -; ZHINX32-NEXT: sw t5, 4(sp) +; ZHINX32-NEXT: sw s2, 20(sp) +; ZHINX32-NEXT: sw s3, 24(sp) +; ZHINX32-NEXT: sw s4, 28(sp) ; ZHINX32-NEXT: sw t6, 0(sp) +; ZHINX32-NEXT: sw t4, 4(sp) +; ZHINX32-NEXT: sw t5, 8(sp) +; ZHINX32-NEXT: sw s0, 12(sp) ; ZHINX32-NEXT: lw t3, 104(sp) # 4-byte Folded Reload ; ZHINX32-NEXT: lw t4, 100(sp) # 4-byte Folded Reload ; ZHINX32-NEXT: lw t5, 96(sp) # 4-byte Folded Reload @@ -968,8 +968,8 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX64-NEXT: lw t0, 232(sp) ; ZHINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill ; ZHINX64-NEXT: lw t6, 240(sp) -; ZHINX64-NEXT: lw t5, 248(sp) -; ZHINX64-NEXT: lw t4, 256(sp) +; ZHINX64-NEXT: lw t4, 248(sp) +; ZHINX64-NEXT: lw t5, 256(sp) ; ZHINX64-NEXT: lw s0, 264(sp) ; ZHINX64-NEXT: lw s1, 272(sp) ; ZHINX64-NEXT: lw s2, 280(sp) @@ -983,30 +983,30 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX64-NEXT: lw s10, 344(sp) ; ZHINX64-NEXT: lw s11, 352(sp) ; ZHINX64-NEXT: lw ra, 360(sp) -; ZHINX64-NEXT: lw t3, 368(sp) -; ZHINX64-NEXT: lw t2, 376(sp) -; ZHINX64-NEXT: lw t1, 384(sp) -; ZHINX64-NEXT: lw t0, 392(sp) -; ZHINX64-NEXT: sw t0, 76(sp) -; ZHINX64-NEXT: sw t1, 72(sp) -; ZHINX64-NEXT: sw t2, 68(sp) -; ZHINX64-NEXT: sw t3, 64(sp) -; ZHINX64-NEXT: sw ra, 60(sp) -; ZHINX64-NEXT: sw s11, 56(sp) -; ZHINX64-NEXT: sw s10, 52(sp) +; ZHINX64-NEXT: lw t0, 368(sp) +; ZHINX64-NEXT: lw t1, 376(sp) +; ZHINX64-NEXT: lw t2, 384(sp) +; ZHINX64-NEXT: lw t3, 392(sp) +; ZHINX64-NEXT: sw t0, 64(sp) +; ZHINX64-NEXT: sw t1, 68(sp) +; ZHINX64-NEXT: sw t2, 72(sp) +; ZHINX64-NEXT: sw t3, 76(sp) ; ZHINX64-NEXT: sw s9, 48(sp) -; ZHINX64-NEXT: sw s8, 44(sp) -; ZHINX64-NEXT: sw s7, 40(sp) -; ZHINX64-NEXT: sw s6, 36(sp) +; ZHINX64-NEXT: sw s10, 52(sp) +; ZHINX64-NEXT: sw s11, 56(sp) +; ZHINX64-NEXT: sw ra, 60(sp) ; ZHINX64-NEXT: sw s5, 32(sp) -; ZHINX64-NEXT: sw s4, 28(sp) -; ZHINX64-NEXT: sw s3, 24(sp) -; ZHINX64-NEXT: sw s2, 20(sp) +; ZHINX64-NEXT: sw s6, 36(sp) +; ZHINX64-NEXT: sw s7, 40(sp) +; ZHINX64-NEXT: sw s8, 44(sp) ; ZHINX64-NEXT: sw s1, 16(sp) -; ZHINX64-NEXT: sw s0, 12(sp) -; ZHINX64-NEXT: sw t4, 8(sp) -; ZHINX64-NEXT: sw t5, 4(sp) +; ZHINX64-NEXT: sw s2, 20(sp) +; ZHINX64-NEXT: sw s3, 24(sp) +; ZHINX64-NEXT: sw s4, 28(sp) ; ZHINX64-NEXT: sw t6, 0(sp) +; ZHINX64-NEXT: sw t4, 4(sp) +; ZHINX64-NEXT: sw t5, 8(sp) +; ZHINX64-NEXT: sw s0, 12(sp) ; ZHINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload ; ZHINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload ; ZHINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload @@ -1053,8 +1053,8 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZFINX32-NEXT: lw t0, 172(sp) ; ZFINX32-NEXT: sw t0, 92(sp) # 4-byte Folded Spill ; ZFINX32-NEXT: lw t6, 176(sp) -; ZFINX32-NEXT: lw t5, 180(sp) -; ZFINX32-NEXT: lw t4, 184(sp) +; ZFINX32-NEXT: lw t4, 180(sp) +; ZFINX32-NEXT: lw t5, 184(sp) ; ZFINX32-NEXT: lw s0, 188(sp) ; ZFINX32-NEXT: lw s1, 192(sp) ; ZFINX32-NEXT: lw s2, 196(sp) @@ -1068,30 +1068,30 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZFINX32-NEXT: lw s10, 228(sp) ; ZFINX32-NEXT: lw s11, 232(sp) ; ZFINX32-NEXT: lw ra, 236(sp) -; ZFINX32-NEXT: lw t3, 240(sp) -; ZFINX32-NEXT: lw t2, 244(sp) -; ZFINX32-NEXT: lw t1, 248(sp) -; ZFINX32-NEXT: lw t0, 252(sp) -; ZFINX32-NEXT: sw t0, 76(sp) -; ZFINX32-NEXT: sw t1, 72(sp) -; ZFINX32-NEXT: sw t2, 68(sp) -; ZFINX32-NEXT: sw t3, 64(sp) -; ZFINX32-NEXT: sw ra, 60(sp) -; ZFINX32-NEXT: sw s11, 56(sp) -; ZFINX32-NEXT: sw s10, 52(sp) +; ZFINX32-NEXT: lw t0, 240(sp) +; ZFINX32-NEXT: lw t1, 244(sp) +; ZFINX32-NEXT: lw t2, 248(sp) +; ZFINX32-NEXT: lw t3, 252(sp) +; ZFINX32-NEXT: sw t0, 64(sp) +; ZFINX32-NEXT: sw t1, 68(sp) +; ZFINX32-NEXT: sw t2, 72(sp) +; ZFINX32-NEXT: sw t3, 76(sp) ; ZFINX32-NEXT: sw s9, 48(sp) -; ZFINX32-NEXT: sw s8, 44(sp) -; ZFINX32-NEXT: sw s7, 40(sp) -; ZFINX32-NEXT: sw s6, 36(sp) +; ZFINX32-NEXT: sw s10, 52(sp) +; ZFINX32-NEXT: sw s11, 56(sp) +; ZFINX32-NEXT: sw ra, 60(sp) ; ZFINX32-NEXT: sw s5, 32(sp) -; ZFINX32-NEXT: sw s4, 28(sp) -; ZFINX32-NEXT: sw s3, 24(sp) -; ZFINX32-NEXT: sw s2, 20(sp) +; ZFINX32-NEXT: sw s6, 36(sp) +; ZFINX32-NEXT: sw s7, 40(sp) +; ZFINX32-NEXT: sw s8, 44(sp) ; ZFINX32-NEXT: sw s1, 16(sp) -; ZFINX32-NEXT: sw s0, 12(sp) -; ZFINX32-NEXT: sw t4, 8(sp) -; ZFINX32-NEXT: sw t5, 4(sp) +; ZFINX32-NEXT: sw s2, 20(sp) +; ZFINX32-NEXT: sw s3, 24(sp) +; ZFINX32-NEXT: sw s4, 28(sp) ; ZFINX32-NEXT: sw t6, 0(sp) +; ZFINX32-NEXT: sw t4, 4(sp) +; ZFINX32-NEXT: sw t5, 8(sp) +; ZFINX32-NEXT: sw s0, 12(sp) ; ZFINX32-NEXT: lw t3, 104(sp) # 4-byte Folded Reload ; ZFINX32-NEXT: lw t4, 100(sp) # 4-byte Folded Reload ; ZFINX32-NEXT: lw t5, 96(sp) # 4-byte Folded Reload @@ -1138,8 +1138,8 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZFINX64-NEXT: lw t0, 232(sp) ; ZFINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill ; ZFINX64-NEXT: lw t6, 240(sp) -; ZFINX64-NEXT: lw t5, 248(sp) -; ZFINX64-NEXT: lw t4, 256(sp) +; ZFINX64-NEXT: lw t4, 248(sp) +; ZFINX64-NEXT: lw t5, 256(sp) ; ZFINX64-NEXT: lw s0, 264(sp) ; ZFINX64-NEXT: lw s1, 272(sp) ; ZFINX64-NEXT: lw s2, 280(sp) @@ -1153,30 +1153,30 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZFINX64-NEXT: lw s10, 344(sp) ; ZFINX64-NEXT: lw s11, 352(sp) ; ZFINX64-NEXT: lw ra, 360(sp) -; ZFINX64-NEXT: lw t3, 368(sp) -; ZFINX64-NEXT: lw t2, 376(sp) -; ZFINX64-NEXT: lw t1, 384(sp) -; ZFINX64-NEXT: lw t0, 392(sp) -; ZFINX64-NEXT: sw t0, 76(sp) -; ZFINX64-NEXT: sw t1, 72(sp) -; ZFINX64-NEXT: sw t2, 68(sp) -; ZFINX64-NEXT: sw t3, 64(sp) -; ZFINX64-NEXT: sw ra, 60(sp) -; ZFINX64-NEXT: sw s11, 56(sp) -; ZFINX64-NEXT: sw s10, 52(sp) +; ZFINX64-NEXT: lw t0, 368(sp) +; ZFINX64-NEXT: lw t1, 376(sp) +; ZFINX64-NEXT: lw t2, 384(sp) +; ZFINX64-NEXT: lw t3, 392(sp) +; ZFINX64-NEXT: sw t0, 64(sp) +; ZFINX64-NEXT: sw t1, 68(sp) +; ZFINX64-NEXT: sw t2, 72(sp) +; ZFINX64-NEXT: sw t3, 76(sp) ; ZFINX64-NEXT: sw s9, 48(sp) -; ZFINX64-NEXT: sw s8, 44(sp) -; ZFINX64-NEXT: sw s7, 40(sp) -; ZFINX64-NEXT: sw s6, 36(sp) +; ZFINX64-NEXT: sw s10, 52(sp) +; ZFINX64-NEXT: sw s11, 56(sp) +; ZFINX64-NEXT: sw ra, 60(sp) ; ZFINX64-NEXT: sw s5, 32(sp) -; ZFINX64-NEXT: sw s4, 28(sp) -; ZFINX64-NEXT: sw s3, 24(sp) -; ZFINX64-NEXT: sw s2, 20(sp) +; ZFINX64-NEXT: sw s6, 36(sp) +; ZFINX64-NEXT: sw s7, 40(sp) +; ZFINX64-NEXT: sw s8, 44(sp) ; ZFINX64-NEXT: sw s1, 16(sp) -; ZFINX64-NEXT: sw s0, 12(sp) -; ZFINX64-NEXT: sw t4, 8(sp) -; ZFINX64-NEXT: sw t5, 4(sp) +; ZFINX64-NEXT: sw s2, 20(sp) +; ZFINX64-NEXT: sw s3, 24(sp) +; ZFINX64-NEXT: sw s4, 28(sp) ; ZFINX64-NEXT: sw t6, 0(sp) +; ZFINX64-NEXT: sw t4, 4(sp) +; ZFINX64-NEXT: sw t5, 8(sp) +; ZFINX64-NEXT: sw s0, 12(sp) ; ZFINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload ; ZFINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload ; ZFINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload @@ -1223,8 +1223,8 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZDINX32-NEXT: lw t0, 172(sp) ; ZDINX32-NEXT: sw t0, 92(sp) # 4-byte Folded Spill ; ZDINX32-NEXT: lw t6, 176(sp) -; ZDINX32-NEXT: lw t5, 180(sp) -; ZDINX32-NEXT: lw t4, 184(sp) +; ZDINX32-NEXT: lw t4, 180(sp) +; ZDINX32-NEXT: lw t5, 184(sp) ; ZDINX32-NEXT: lw s0, 188(sp) ; ZDINX32-NEXT: lw s1, 192(sp) ; ZDINX32-NEXT: lw s2, 196(sp) @@ -1238,30 +1238,30 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZDINX32-NEXT: lw s10, 228(sp) ; ZDINX32-NEXT: lw s11, 232(sp) ; ZDINX32-NEXT: lw ra, 236(sp) -; ZDINX32-NEXT: lw t3, 240(sp) -; ZDINX32-NEXT: lw t2, 244(sp) -; ZDINX32-NEXT: lw t1, 248(sp) -; ZDINX32-NEXT: lw t0, 252(sp) -; ZDINX32-NEXT: sw t0, 76(sp) -; ZDINX32-NEXT: sw t1, 72(sp) -; ZDINX32-NEXT: sw t2, 68(sp) -; ZDINX32-NEXT: sw t3, 64(sp) -; ZDINX32-NEXT: sw ra, 60(sp) -; ZDINX32-NEXT: sw s11, 56(sp) -; ZDINX32-NEXT: sw s10, 52(sp) +; ZDINX32-NEXT: lw t0, 240(sp) +; ZDINX32-NEXT: lw t1, 244(sp) +; ZDINX32-NEXT: lw t2, 248(sp) +; ZDINX32-NEXT: lw t3, 252(sp) +; ZDINX32-NEXT: sw t0, 64(sp) +; ZDINX32-NEXT: sw t1, 68(sp) +; ZDINX32-NEXT: sw t2, 72(sp) +; ZDINX32-NEXT: sw t3, 76(sp) ; ZDINX32-NEXT: sw s9, 48(sp) -; ZDINX32-NEXT: sw s8, 44(sp) -; ZDINX32-NEXT: sw s7, 40(sp) -; ZDINX32-NEXT: sw s6, 36(sp) +; ZDINX32-NEXT: sw s10, 52(sp) +; ZDINX32-NEXT: sw s11, 56(sp) +; ZDINX32-NEXT: sw ra, 60(sp) ; ZDINX32-NEXT: sw s5, 32(sp) -; ZDINX32-NEXT: sw s4, 28(sp) -; ZDINX32-NEXT: sw s3, 24(sp) -; ZDINX32-NEXT: sw s2, 20(sp) +; ZDINX32-NEXT: sw s6, 36(sp) +; ZDINX32-NEXT: sw s7, 40(sp) +; ZDINX32-NEXT: sw s8, 44(sp) ; ZDINX32-NEXT: sw s1, 16(sp) -; ZDINX32-NEXT: sw s0, 12(sp) -; ZDINX32-NEXT: sw t4, 8(sp) -; ZDINX32-NEXT: sw t5, 4(sp) +; ZDINX32-NEXT: sw s2, 20(sp) +; ZDINX32-NEXT: sw s3, 24(sp) +; ZDINX32-NEXT: sw s4, 28(sp) ; ZDINX32-NEXT: sw t6, 0(sp) +; ZDINX32-NEXT: sw t4, 4(sp) +; ZDINX32-NEXT: sw t5, 8(sp) +; ZDINX32-NEXT: sw s0, 12(sp) ; ZDINX32-NEXT: lw t3, 104(sp) # 4-byte Folded Reload ; ZDINX32-NEXT: lw t4, 100(sp) # 4-byte Folded Reload ; ZDINX32-NEXT: lw t5, 96(sp) # 4-byte Folded Reload @@ -1308,8 +1308,8 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZDINX64-NEXT: lw t0, 232(sp) ; ZDINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill ; ZDINX64-NEXT: lw t6, 240(sp) -; ZDINX64-NEXT: lw t5, 248(sp) -; ZDINX64-NEXT: lw t4, 256(sp) +; ZDINX64-NEXT: lw t4, 248(sp) +; ZDINX64-NEXT: lw t5, 256(sp) ; ZDINX64-NEXT: lw s0, 264(sp) ; ZDINX64-NEXT: lw s1, 272(sp) ; ZDINX64-NEXT: lw s2, 280(sp) @@ -1323,30 +1323,30 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZDINX64-NEXT: lw s10, 344(sp) ; ZDINX64-NEXT: lw s11, 352(sp) ; ZDINX64-NEXT: lw ra, 360(sp) -; ZDINX64-NEXT: lw t3, 368(sp) -; ZDINX64-NEXT: lw t2, 376(sp) -; ZDINX64-NEXT: lw t1, 384(sp) -; ZDINX64-NEXT: lw t0, 392(sp) -; ZDINX64-NEXT: sw t0, 76(sp) -; ZDINX64-NEXT: sw t1, 72(sp) -; ZDINX64-NEXT: sw t2, 68(sp) -; ZDINX64-NEXT: sw t3, 64(sp) -; ZDINX64-NEXT: sw ra, 60(sp) -; ZDINX64-NEXT: sw s11, 56(sp) -; ZDINX64-NEXT: sw s10, 52(sp) +; ZDINX64-NEXT: lw t0, 368(sp) +; ZDINX64-NEXT: lw t1, 376(sp) +; ZDINX64-NEXT: lw t2, 384(sp) +; ZDINX64-NEXT: lw t3, 392(sp) +; ZDINX64-NEXT: sw t0, 64(sp) +; ZDINX64-NEXT: sw t1, 68(sp) +; ZDINX64-NEXT: sw t2, 72(sp) +; ZDINX64-NEXT: sw t3, 76(sp) ; ZDINX64-NEXT: sw s9, 48(sp) -; ZDINX64-NEXT: sw s8, 44(sp) -; ZDINX64-NEXT: sw s7, 40(sp) -; ZDINX64-NEXT: sw s6, 36(sp) +; ZDINX64-NEXT: sw s10, 52(sp) +; ZDINX64-NEXT: sw s11, 56(sp) +; ZDINX64-NEXT: sw ra, 60(sp) ; ZDINX64-NEXT: sw s5, 32(sp) -; ZDINX64-NEXT: sw s4, 28(sp) -; ZDINX64-NEXT: sw s3, 24(sp) -; ZDINX64-NEXT: sw s2, 20(sp) +; ZDINX64-NEXT: sw s6, 36(sp) +; ZDINX64-NEXT: sw s7, 40(sp) +; ZDINX64-NEXT: sw s8, 44(sp) ; ZDINX64-NEXT: sw s1, 16(sp) -; ZDINX64-NEXT: sw s0, 12(sp) -; ZDINX64-NEXT: sw t4, 8(sp) -; ZDINX64-NEXT: sw t5, 4(sp) +; ZDINX64-NEXT: sw s2, 20(sp) +; ZDINX64-NEXT: sw s3, 24(sp) +; ZDINX64-NEXT: sw s4, 28(sp) ; ZDINX64-NEXT: sw t6, 0(sp) +; ZDINX64-NEXT: sw t4, 4(sp) +; ZDINX64-NEXT: sw t5, 8(sp) +; ZDINX64-NEXT: sw s0, 12(sp) ; ZDINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload ; ZDINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload ; ZDINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll index 4d4580188096f05..a204b928304123f 100644 --- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll +++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll @@ -293,8 +293,8 @@ define dso_local void @store_g_4() nounwind { ; RV32I-MEDIUM-NEXT: .Lpcrel_hi6: ; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(g_4) ; RV32I-MEDIUM-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi6) -; RV32I-MEDIUM-NEXT: sw zero, 4(a0) ; RV32I-MEDIUM-NEXT: sw zero, 0(a0) +; RV32I-MEDIUM-NEXT: sw zero, 4(a0) ; RV32I-MEDIUM-NEXT: ret ; ; RV64I-LABEL: store_g_4: @@ -335,8 +335,8 @@ define dso_local void @store_g_8() nounwind { ; RV32I-MEDIUM-NEXT: .Lpcrel_hi7: ; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(g_8) ; RV32I-MEDIUM-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi7) -; RV32I-MEDIUM-NEXT: sw zero, 4(a0) ; RV32I-MEDIUM-NEXT: sw zero, 0(a0) +; RV32I-MEDIUM-NEXT: sw zero, 4(a0) ; RV32I-MEDIUM-NEXT: ret ; ; RV64I-LABEL: store_g_8: diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll index 603491bf3d3003d..133d735a46ed7f5 100644 --- a/llvm/test/CodeGen/RISCV/forced-atomics.ll +++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll @@ -4338,8 +4338,8 @@ define i64 @cmpxchg64_monotonic(ptr %p) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw zero, 4(sp) ; RV32-NEXT: sw zero, 0(sp) +; RV32-NEXT: sw zero, 4(sp) ; RV32-NEXT: mv a1, sp ; RV32-NEXT: li a2, 1 ; RV32-NEXT: li a3, 0 @@ -4398,8 +4398,8 @@ define i64 @cmpxchg64_seq_cst(ptr %p) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw zero, 4(sp) ; RV32-NEXT: sw zero, 0(sp) +; RV32-NEXT: sw zero, 4(sp) ; RV32-NEXT: mv a1, sp ; RV32-NEXT: li a2, 1 ; RV32-NEXT: li a4, 5 @@ -4464,14 +4464,14 @@ define i128 @load128(ptr %p) nounwind { ; RV32-NEXT: addi a2, sp, 8 ; RV32-NEXT: li a3, 5 ; RV32-NEXT: call __atomic_load -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a1, 16(sp) -; RV32-NEXT: lw a2, 12(sp) -; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: sw a0, 12(s0) -; RV32-NEXT: sw a1, 8(s0) -; RV32-NEXT: sw a2, 4(s0) -; RV32-NEXT: sw a3, 0(s0) +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a3, 20(sp) +; RV32-NEXT: sw a0, 0(s0) +; RV32-NEXT: sw a1, 4(s0) +; RV32-NEXT: sw a2, 8(s0) +; RV32-NEXT: sw a3, 12(s0) ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 @@ -4496,10 +4496,10 @@ define void @store128(ptr %p) nounwind { ; RV32-NEXT: addi sp, sp, -32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: mv a1, a0 -; RV32-NEXT: sw zero, 20(sp) -; RV32-NEXT: sw zero, 16(sp) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw zero, 8(sp) +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: li a0, 16 ; RV32-NEXT: addi a2, sp, 8 ; RV32-NEXT: li a3, 5 @@ -4550,8 +4550,8 @@ define i128 @rmw128(ptr %p) nounwind { ; RV32-NEXT: sw a2, 20(sp) ; RV32-NEXT: sw a3, 24(sp) ; RV32-NEXT: sw a4, 28(sp) -; RV32-NEXT: sw a5, 4(sp) ; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a5, 4(sp) ; RV32-NEXT: sw a6, 8(sp) ; RV32-NEXT: sw a7, 12(sp) ; RV32-NEXT: li a0, 16 @@ -4599,29 +4599,29 @@ define i128 @cmpxchg128(ptr %p) nounwind { ; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: sw zero, 36(sp) -; RV32-NEXT: sw zero, 32(sp) -; RV32-NEXT: sw zero, 28(sp) ; RV32-NEXT: sw zero, 24(sp) -; RV32-NEXT: sw zero, 20(sp) -; RV32-NEXT: sw zero, 16(sp) -; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: sw zero, 28(sp) +; RV32-NEXT: sw zero, 32(sp) +; RV32-NEXT: sw zero, 36(sp) ; RV32-NEXT: li a0, 1 ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: li a0, 16 ; RV32-NEXT: addi a2, sp, 24 ; RV32-NEXT: addi a3, sp, 8 ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 ; RV32-NEXT: call __atomic_compare_exchange -; RV32-NEXT: lw a0, 36(sp) -; RV32-NEXT: lw a1, 32(sp) -; RV32-NEXT: lw a2, 28(sp) -; RV32-NEXT: lw a3, 24(sp) -; RV32-NEXT: sw a0, 12(s0) -; RV32-NEXT: sw a1, 8(s0) -; RV32-NEXT: sw a2, 4(s0) -; RV32-NEXT: sw a3, 0(s0) +; RV32-NEXT: lw a0, 24(sp) +; RV32-NEXT: lw a1, 28(sp) +; RV32-NEXT: lw a2, 32(sp) +; RV32-NEXT: lw a3, 36(sp) +; RV32-NEXT: sw a0, 0(s0) +; RV32-NEXT: sw a1, 4(s0) +; RV32-NEXT: sw a2, 8(s0) +; RV32-NEXT: sw a3, 12(s0) ; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 48 @@ -4631,8 +4631,8 @@ define i128 @cmpxchg128(ptr %p) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -32 ; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: sd zero, 8(sp) ; RV64-NEXT: sd zero, 0(sp) +; RV64-NEXT: sd zero, 8(sp) ; RV64-NEXT: mv a1, sp ; RV64-NEXT: li a2, 1 ; RV64-NEXT: li a4, 5 diff --git a/llvm/test/CodeGen/RISCV/fp128.ll b/llvm/test/CodeGen/RISCV/fp128.ll index 611a70c0ad8b939..0bde85b54e5d159 100644 --- a/llvm/test/CodeGen/RISCV/fp128.ll +++ b/llvm/test/CodeGen/RISCV/fp128.ll @@ -15,24 +15,24 @@ define i32 @test_load_and_cmp() nounwind { ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32I-NEXT: lui a0, %hi(x) ; RV32I-NEXT: lw a2, %lo(x)(a0) -; RV32I-NEXT: lw a1, %lo(x+4)(a0) -; RV32I-NEXT: lw a3, %lo(x+8)(a0) -; RV32I-NEXT: lw a0, %lo(x+12)(a0) -; RV32I-NEXT: lui a4, %hi(y) -; RV32I-NEXT: lw a5, %lo(y)(a4) -; RV32I-NEXT: lw a6, %lo(y+4)(a4) -; RV32I-NEXT: lw a7, %lo(y+8)(a4) -; RV32I-NEXT: lw a4, %lo(y+12)(a4) -; RV32I-NEXT: sw a4, 20(sp) -; RV32I-NEXT: sw a7, 16(sp) +; RV32I-NEXT: lw a3, %lo(x+4)(a0) +; RV32I-NEXT: lw a4, %lo(x+8)(a0) +; RV32I-NEXT: lw a5, %lo(x+12)(a0) +; RV32I-NEXT: lui a0, %hi(y) +; RV32I-NEXT: lw a1, %lo(y)(a0) +; RV32I-NEXT: lw a6, %lo(y+4)(a0) +; RV32I-NEXT: lw a7, %lo(y+8)(a0) +; RV32I-NEXT: lw a0, %lo(y+12)(a0) +; RV32I-NEXT: sw a1, 8(sp) ; RV32I-NEXT: sw a6, 12(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a0, 36(sp) -; RV32I-NEXT: sw a3, 32(sp) -; RV32I-NEXT: sw a1, 28(sp) +; RV32I-NEXT: sw a7, 16(sp) +; RV32I-NEXT: sw a0, 20(sp) ; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: sw a2, 24(sp) +; RV32I-NEXT: sw a3, 28(sp) +; RV32I-NEXT: sw a4, 32(sp) +; RV32I-NEXT: sw a5, 36(sp) ; RV32I-NEXT: call __netf2 ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload @@ -52,35 +52,35 @@ define i32 @test_add_and_fptosi() nounwind { ; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill ; RV32I-NEXT: lui a0, %hi(x) ; RV32I-NEXT: lw a3, %lo(x)(a0) -; RV32I-NEXT: lw a1, %lo(x+4)(a0) -; RV32I-NEXT: lw a2, %lo(x+8)(a0) -; RV32I-NEXT: lw a0, %lo(x+12)(a0) -; RV32I-NEXT: lui a4, %hi(y) -; RV32I-NEXT: lw a5, %lo(y)(a4) -; RV32I-NEXT: lw a6, %lo(y+4)(a4) -; RV32I-NEXT: lw a7, %lo(y+8)(a4) -; RV32I-NEXT: lw a4, %lo(y+12)(a4) -; RV32I-NEXT: sw a4, 36(sp) +; RV32I-NEXT: lw a4, %lo(x+4)(a0) +; RV32I-NEXT: lw a5, %lo(x+8)(a0) +; RV32I-NEXT: lw a6, %lo(x+12)(a0) +; RV32I-NEXT: lui a0, %hi(y) +; RV32I-NEXT: lw a1, %lo(y)(a0) +; RV32I-NEXT: lw a2, %lo(y+4)(a0) +; RV32I-NEXT: lw a7, %lo(y+8)(a0) +; RV32I-NEXT: lw a0, %lo(y+12)(a0) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a2, 28(sp) ; RV32I-NEXT: sw a7, 32(sp) -; RV32I-NEXT: sw a6, 28(sp) -; RV32I-NEXT: sw a5, 24(sp) -; RV32I-NEXT: sw a0, 52(sp) -; RV32I-NEXT: sw a2, 48(sp) -; RV32I-NEXT: sw a1, 44(sp) +; RV32I-NEXT: sw a0, 36(sp) ; RV32I-NEXT: addi a0, sp, 56 ; RV32I-NEXT: addi a1, sp, 40 ; RV32I-NEXT: addi a2, sp, 24 ; RV32I-NEXT: sw a3, 40(sp) +; RV32I-NEXT: sw a4, 44(sp) +; RV32I-NEXT: sw a5, 48(sp) +; RV32I-NEXT: sw a6, 52(sp) ; RV32I-NEXT: call __addtf3 ; RV32I-NEXT: lw a1, 56(sp) -; RV32I-NEXT: lw a0, 60(sp) -; RV32I-NEXT: lw a2, 64(sp) -; RV32I-NEXT: lw a3, 68(sp) -; RV32I-NEXT: sw a3, 20(sp) -; RV32I-NEXT: sw a2, 16(sp) -; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: lw a2, 60(sp) +; RV32I-NEXT: lw a3, 64(sp) +; RV32I-NEXT: lw a4, 68(sp) ; RV32I-NEXT: addi a0, sp, 8 ; RV32I-NEXT: sw a1, 8(sp) +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: sw a3, 16(sp) +; RV32I-NEXT: sw a4, 20(sp) ; RV32I-NEXT: call __fixtfsi ; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 80 diff --git a/llvm/test/CodeGen/RISCV/frame.ll b/llvm/test/CodeGen/RISCV/frame.ll index d50f1e55417a725..10d542496e0f7f0 100644 --- a/llvm/test/CodeGen/RISCV/frame.ll +++ b/llvm/test/CodeGen/RISCV/frame.ll @@ -12,10 +12,10 @@ define i32 @test() nounwind { ; RV32I-FPELIM-NEXT: addi sp, sp, -32 ; RV32I-FPELIM-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32I-FPELIM-NEXT: sw zero, 24(sp) -; RV32I-FPELIM-NEXT: sw zero, 20(sp) -; RV32I-FPELIM-NEXT: sw zero, 16(sp) -; RV32I-FPELIM-NEXT: sw zero, 12(sp) ; RV32I-FPELIM-NEXT: sw zero, 8(sp) +; RV32I-FPELIM-NEXT: sw zero, 12(sp) +; RV32I-FPELIM-NEXT: sw zero, 16(sp) +; RV32I-FPELIM-NEXT: sw zero, 20(sp) ; RV32I-FPELIM-NEXT: addi a0, sp, 12 ; RV32I-FPELIM-NEXT: call test1 ; RV32I-FPELIM-NEXT: li a0, 0 @@ -30,10 +30,10 @@ define i32 @test() nounwind { ; RV32I-WITHFP-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 32 ; RV32I-WITHFP-NEXT: sw zero, -16(s0) -; RV32I-WITHFP-NEXT: sw zero, -20(s0) -; RV32I-WITHFP-NEXT: sw zero, -24(s0) -; RV32I-WITHFP-NEXT: sw zero, -28(s0) ; RV32I-WITHFP-NEXT: sw zero, -32(s0) +; RV32I-WITHFP-NEXT: sw zero, -28(s0) +; RV32I-WITHFP-NEXT: sw zero, -24(s0) +; RV32I-WITHFP-NEXT: sw zero, -20(s0) ; RV32I-WITHFP-NEXT: addi a0, s0, -28 ; RV32I-WITHFP-NEXT: call test1 ; RV32I-WITHFP-NEXT: li a0, 0 diff --git a/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll b/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll index f532f3c9f33df03..9322abcfbbdcefe 100644 --- a/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll +++ b/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll @@ -17,10 +17,10 @@ define void @getSetCCResultType(ptr %p, ptr %q) nounwind { ; RV32I-NEXT: addi a3, a3, -1 ; RV32I-NEXT: addi a2, a2, -1 ; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: sw a1, 12(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a3, 4(a0) ; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sw a2, 8(a0) +; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: ret entry: %0 = load <4 x i32>, ptr %p, align 16 diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll index 5f9866f08c821db..77efffb6358af7e 100644 --- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll +++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll @@ -17,9 +17,9 @@ define dso_local void @multiple_stores() local_unnamed_addr nounwind { ; CHECK-NEXT: lui a0, %hi(s) ; CHECK-NEXT: addi a0, a0, %lo(s) ; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: li a2, 20 ; CHECK-NEXT: sw a1, 160(a0) -; CHECK-NEXT: li a1, 20 -; CHECK-NEXT: sw a1, 164(a0) +; CHECK-NEXT: sw a2, 164(a0) ; CHECK-NEXT: ret entry: store i32 10, ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 1), align 4 diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll index 6009a6c7e138aee..8116d138d288e2f 100644 --- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll +++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll @@ -71,8 +71,8 @@ define i64 @ctz_nxv8i1_no_range( %a) { ; RV32-NEXT: li a1, 0 ; RV32-NEXT: li a3, 0 ; RV32-NEXT: call __muldi3 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a0, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a2), zero diff --git a/llvm/test/CodeGen/RISCV/legalize-fneg.ll b/llvm/test/CodeGen/RISCV/legalize-fneg.ll index dfd62e8d5f9f562..f60b77b92c09ea5 100644 --- a/llvm/test/CodeGen/RISCV/legalize-fneg.ll +++ b/llvm/test/CodeGen/RISCV/legalize-fneg.ll @@ -57,14 +57,14 @@ define void @test3(ptr %a, ptr %b) nounwind { ; RV32-LABEL: test3: ; RV32: # %bb.0: # %entry ; RV32-NEXT: lw a2, 12(a1) -; RV32-NEXT: lw a3, 4(a1) -; RV32-NEXT: lw a4, 8(a1) -; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: lw a3, 0(a1) +; RV32-NEXT: lw a4, 4(a1) +; RV32-NEXT: lw a1, 8(a1) ; RV32-NEXT: lui a5, 524288 ; RV32-NEXT: xor a2, a2, a5 -; RV32-NEXT: sw a4, 8(a0) -; RV32-NEXT: sw a1, 0(a0) -; RV32-NEXT: sw a3, 4(a0) +; RV32-NEXT: sw a3, 0(a0) +; RV32-NEXT: sw a4, 4(a0) +; RV32-NEXT: sw a1, 8(a0) ; RV32-NEXT: sw a2, 12(a0) ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/llvm.exp10.ll b/llvm/test/CodeGen/RISCV/llvm.exp10.ll index 0941f6a73da2800..cc07449c4e62007 100644 --- a/llvm/test/CodeGen/RISCV/llvm.exp10.ll +++ b/llvm/test/CodeGen/RISCV/llvm.exp10.ll @@ -197,8 +197,8 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) { ; RV32IFD-NEXT: call exp10f ; RV32IFD-NEXT: call __truncsfhf2 ; RV32IFD-NEXT: fmv.x.w a0, fa0 -; RV32IFD-NEXT: sh a0, 4(s0) ; RV32IFD-NEXT: sw s1, 0(s0) +; RV32IFD-NEXT: sh a0, 4(s0) ; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -246,8 +246,8 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) { ; RV64IFD-NEXT: call exp10f ; RV64IFD-NEXT: call __truncsfhf2 ; RV64IFD-NEXT: fmv.x.w a0, fa0 -; RV64IFD-NEXT: sh a0, 4(s0) ; RV64IFD-NEXT: sw s1, 0(s0) +; RV64IFD-NEXT: sh a0, 4(s0) ; RV64IFD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -313,10 +313,10 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) { ; RV32IFD-NEXT: fmv.x.w s3, fs3 ; RV32IFD-NEXT: call __truncsfhf2 ; RV32IFD-NEXT: fmv.x.w a0, fa0 -; RV32IFD-NEXT: sh a0, 6(s0) -; RV32IFD-NEXT: sh s3, 4(s0) -; RV32IFD-NEXT: sh s2, 2(s0) ; RV32IFD-NEXT: sh s1, 0(s0) +; RV32IFD-NEXT: sh s2, 2(s0) +; RV32IFD-NEXT: sh s3, 4(s0) +; RV32IFD-NEXT: sh a0, 6(s0) ; RV32IFD-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 56(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s1, 52(sp) # 4-byte Folded Reload @@ -377,10 +377,10 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) { ; RV64IFD-NEXT: fmv.x.w s3, fs0 ; RV64IFD-NEXT: call __truncsfhf2 ; RV64IFD-NEXT: fmv.x.w a0, fa0 -; RV64IFD-NEXT: sh a0, 6(s0) -; RV64IFD-NEXT: sh s3, 4(s0) -; RV64IFD-NEXT: sh s2, 2(s0) ; RV64IFD-NEXT: sh s1, 0(s0) +; RV64IFD-NEXT: sh s2, 2(s0) +; RV64IFD-NEXT: sh s3, 4(s0) +; RV64IFD-NEXT: sh a0, 6(s0) ; RV64IFD-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -504,9 +504,9 @@ define <3 x float> @exp10_v3f32(<3 x float> %x) { ; RV32IFD-NEXT: fmv.s fs1, fa0 ; RV32IFD-NEXT: fmv.s fa0, fs0 ; RV32IFD-NEXT: call exp10f -; RV32IFD-NEXT: fsw fa0, 8(s0) -; RV32IFD-NEXT: fsw fs1, 4(s0) ; RV32IFD-NEXT: fsw fs2, 0(s0) +; RV32IFD-NEXT: fsw fs1, 4(s0) +; RV32IFD-NEXT: fsw fa0, 8(s0) ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload @@ -544,8 +544,8 @@ define <3 x float> @exp10_v3f32(<3 x float> %x) { ; RV64IFD-NEXT: or s1, a0, s1 ; RV64IFD-NEXT: fmv.s fa0, fs0 ; RV64IFD-NEXT: call exp10f -; RV64IFD-NEXT: fsw fa0, 8(s0) ; RV64IFD-NEXT: sd s1, 0(s0) +; RV64IFD-NEXT: fsw fa0, 8(s0) ; RV64IFD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -588,10 +588,10 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) { ; RV32IFD-NEXT: fmv.s fs1, fa0 ; RV32IFD-NEXT: fmv.s fa0, fs0 ; RV32IFD-NEXT: call exp10f -; RV32IFD-NEXT: fsw fa0, 12(s0) -; RV32IFD-NEXT: fsw fs1, 8(s0) -; RV32IFD-NEXT: fsw fs2, 4(s0) ; RV32IFD-NEXT: fsw fs3, 0(s0) +; RV32IFD-NEXT: fsw fs2, 4(s0) +; RV32IFD-NEXT: fsw fs1, 8(s0) +; RV32IFD-NEXT: fsw fa0, 12(s0) ; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload @@ -631,10 +631,10 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) { ; RV64IFD-NEXT: fmv.s fs1, fa0 ; RV64IFD-NEXT: fmv.s fa0, fs0 ; RV64IFD-NEXT: call exp10f -; RV64IFD-NEXT: fsw fa0, 12(s0) -; RV64IFD-NEXT: fsw fs1, 8(s0) -; RV64IFD-NEXT: fsw fs2, 4(s0) ; RV64IFD-NEXT: fsw fs3, 0(s0) +; RV64IFD-NEXT: fsw fs2, 4(s0) +; RV64IFD-NEXT: fsw fs1, 8(s0) +; RV64IFD-NEXT: fsw fa0, 12(s0) ; RV64IFD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload @@ -736,9 +736,9 @@ define <3 x double> @exp10_v3f64(<3 x double> %x) { ; RV32IFD-NEXT: fmv.d fs1, fa0 ; RV32IFD-NEXT: fmv.d fa0, fs0 ; RV32IFD-NEXT: call exp10 -; RV32IFD-NEXT: fsd fa0, 16(s0) -; RV32IFD-NEXT: fsd fs1, 8(s0) ; RV32IFD-NEXT: fsd fs2, 0(s0) +; RV32IFD-NEXT: fsd fs1, 8(s0) +; RV32IFD-NEXT: fsd fa0, 16(s0) ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload @@ -771,9 +771,9 @@ define <3 x double> @exp10_v3f64(<3 x double> %x) { ; RV64IFD-NEXT: fmv.d fs1, fa0 ; RV64IFD-NEXT: fmv.d fa0, fs0 ; RV64IFD-NEXT: call exp10 -; RV64IFD-NEXT: fsd fa0, 16(s0) -; RV64IFD-NEXT: fsd fs1, 8(s0) ; RV64IFD-NEXT: fsd fs2, 0(s0) +; RV64IFD-NEXT: fsd fs1, 8(s0) +; RV64IFD-NEXT: fsd fa0, 16(s0) ; RV64IFD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload @@ -816,10 +816,10 @@ define <4 x double> @exp10_v4f64(<4 x double> %x) { ; RV32IFD-NEXT: fmv.d fs1, fa0 ; RV32IFD-NEXT: fmv.d fa0, fs0 ; RV32IFD-NEXT: call exp10 -; RV32IFD-NEXT: fsd fa0, 24(s0) -; RV32IFD-NEXT: fsd fs1, 16(s0) -; RV32IFD-NEXT: fsd fs2, 8(s0) ; RV32IFD-NEXT: fsd fs3, 0(s0) +; RV32IFD-NEXT: fsd fs2, 8(s0) +; RV32IFD-NEXT: fsd fs1, 16(s0) +; RV32IFD-NEXT: fsd fa0, 24(s0) ; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload @@ -859,10 +859,10 @@ define <4 x double> @exp10_v4f64(<4 x double> %x) { ; RV64IFD-NEXT: fmv.d fs1, fa0 ; RV64IFD-NEXT: fmv.d fa0, fs0 ; RV64IFD-NEXT: call exp10 -; RV64IFD-NEXT: fsd fa0, 24(s0) -; RV64IFD-NEXT: fsd fs1, 16(s0) -; RV64IFD-NEXT: fsd fs2, 8(s0) ; RV64IFD-NEXT: fsd fs3, 0(s0) +; RV64IFD-NEXT: fsd fs2, 8(s0) +; RV64IFD-NEXT: fsd fs1, 16(s0) +; RV64IFD-NEXT: fsd fa0, 24(s0) ; RV64IFD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/llvm.frexp.ll b/llvm/test/CodeGen/RISCV/llvm.frexp.ll index 2c9d640e03a634a..e85a7118f5ff830 100644 --- a/llvm/test/CodeGen/RISCV/llvm.frexp.ll +++ b/llvm/test/CodeGen/RISCV/llvm.frexp.ll @@ -568,18 +568,18 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; RV32IFD-NEXT: addi a0, sp, 20 ; RV32IFD-NEXT: fmv.s fa0, fs0 ; RV32IFD-NEXT: call frexpf -; RV32IFD-NEXT: lw a0, 20(sp) -; RV32IFD-NEXT: lw a1, 16(sp) -; RV32IFD-NEXT: lw a2, 12(sp) -; RV32IFD-NEXT: lw a3, 8(sp) -; RV32IFD-NEXT: sw a0, 28(s0) -; RV32IFD-NEXT: sw a1, 24(s0) -; RV32IFD-NEXT: sw a2, 20(s0) -; RV32IFD-NEXT: sw a3, 16(s0) -; RV32IFD-NEXT: fsw fa0, 12(s0) -; RV32IFD-NEXT: fsw fs1, 8(s0) -; RV32IFD-NEXT: fsw fs2, 4(s0) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: lw a2, 16(sp) +; RV32IFD-NEXT: lw a3, 20(sp) +; RV32IFD-NEXT: sw a0, 16(s0) +; RV32IFD-NEXT: sw a1, 20(s0) +; RV32IFD-NEXT: sw a2, 24(s0) +; RV32IFD-NEXT: sw a3, 28(s0) ; RV32IFD-NEXT: fsw fs3, 0(s0) +; RV32IFD-NEXT: fsw fs2, 4(s0) +; RV32IFD-NEXT: fsw fs1, 8(s0) +; RV32IFD-NEXT: fsw fa0, 12(s0) ; RV32IFD-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 56(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 48(sp) # 8-byte Folded Reload @@ -616,18 +616,18 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; RV64IFD-NEXT: addi a0, sp, 24 ; RV64IFD-NEXT: fmv.s fa0, fs0 ; RV64IFD-NEXT: call frexpf -; RV64IFD-NEXT: ld a0, 24(sp) -; RV64IFD-NEXT: ld a1, 16(sp) -; RV64IFD-NEXT: ld a2, 8(sp) -; RV64IFD-NEXT: ld a3, 0(sp) -; RV64IFD-NEXT: sw a0, 28(s0) -; RV64IFD-NEXT: sw a1, 24(s0) -; RV64IFD-NEXT: sw a2, 20(s0) -; RV64IFD-NEXT: sw a3, 16(s0) -; RV64IFD-NEXT: fsw fa0, 12(s0) -; RV64IFD-NEXT: fsw fs1, 8(s0) -; RV64IFD-NEXT: fsw fs2, 4(s0) +; RV64IFD-NEXT: ld a0, 0(sp) +; RV64IFD-NEXT: ld a1, 8(sp) +; RV64IFD-NEXT: ld a2, 16(sp) +; RV64IFD-NEXT: ld a3, 24(sp) +; RV64IFD-NEXT: sw a0, 16(s0) +; RV64IFD-NEXT: sw a1, 20(s0) +; RV64IFD-NEXT: sw a2, 24(s0) +; RV64IFD-NEXT: sw a3, 28(s0) ; RV64IFD-NEXT: fsw fs3, 0(s0) +; RV64IFD-NEXT: fsw fs2, 4(s0) +; RV64IFD-NEXT: fsw fs1, 8(s0) +; RV64IFD-NEXT: fsw fa0, 12(s0) ; RV64IFD-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: fld fs0, 56(sp) # 8-byte Folded Reload @@ -666,18 +666,18 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; RV32IZFINXZDINX-NEXT: addi a1, sp, 20 ; RV32IZFINXZDINX-NEXT: mv a0, s0 ; RV32IZFINXZDINX-NEXT: call frexpf -; RV32IZFINXZDINX-NEXT: lw a1, 20(sp) -; RV32IZFINXZDINX-NEXT: lw a2, 16(sp) -; RV32IZFINXZDINX-NEXT: lw a3, 12(sp) -; RV32IZFINXZDINX-NEXT: lw a4, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 28(s3) -; RV32IZFINXZDINX-NEXT: sw a2, 24(s3) -; RV32IZFINXZDINX-NEXT: sw a3, 20(s3) -; RV32IZFINXZDINX-NEXT: sw a4, 16(s3) -; RV32IZFINXZDINX-NEXT: sw a0, 12(s3) -; RV32IZFINXZDINX-NEXT: sw s1, 8(s3) -; RV32IZFINXZDINX-NEXT: sw s2, 4(s3) +; RV32IZFINXZDINX-NEXT: lw a1, 8(sp) +; RV32IZFINXZDINX-NEXT: lw a2, 12(sp) +; RV32IZFINXZDINX-NEXT: lw a3, 16(sp) +; RV32IZFINXZDINX-NEXT: lw a4, 20(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 16(s3) +; RV32IZFINXZDINX-NEXT: sw a2, 20(s3) +; RV32IZFINXZDINX-NEXT: sw a3, 24(s3) +; RV32IZFINXZDINX-NEXT: sw a4, 28(s3) ; RV32IZFINXZDINX-NEXT: sw s4, 0(s3) +; RV32IZFINXZDINX-NEXT: sw s2, 4(s3) +; RV32IZFINXZDINX-NEXT: sw s1, 8(s3) +; RV32IZFINXZDINX-NEXT: sw a0, 12(s3) ; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -716,18 +716,18 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; RV64IZFINXZDINX-NEXT: addi a1, sp, 24 ; RV64IZFINXZDINX-NEXT: mv a0, s0 ; RV64IZFINXZDINX-NEXT: call frexpf -; RV64IZFINXZDINX-NEXT: ld a1, 24(sp) -; RV64IZFINXZDINX-NEXT: ld a2, 16(sp) -; RV64IZFINXZDINX-NEXT: ld a3, 8(sp) -; RV64IZFINXZDINX-NEXT: ld a4, 0(sp) -; RV64IZFINXZDINX-NEXT: sw a1, 28(s3) -; RV64IZFINXZDINX-NEXT: sw a2, 24(s3) -; RV64IZFINXZDINX-NEXT: sw a3, 20(s3) -; RV64IZFINXZDINX-NEXT: sw a4, 16(s3) -; RV64IZFINXZDINX-NEXT: sw a0, 12(s3) -; RV64IZFINXZDINX-NEXT: sw s1, 8(s3) -; RV64IZFINXZDINX-NEXT: sw s2, 4(s3) +; RV64IZFINXZDINX-NEXT: ld a1, 0(sp) +; RV64IZFINXZDINX-NEXT: ld a2, 8(sp) +; RV64IZFINXZDINX-NEXT: ld a3, 16(sp) +; RV64IZFINXZDINX-NEXT: ld a4, 24(sp) +; RV64IZFINXZDINX-NEXT: sw a1, 16(s3) +; RV64IZFINXZDINX-NEXT: sw a2, 20(s3) +; RV64IZFINXZDINX-NEXT: sw a3, 24(s3) +; RV64IZFINXZDINX-NEXT: sw a4, 28(s3) ; RV64IZFINXZDINX-NEXT: sw s4, 0(s3) +; RV64IZFINXZDINX-NEXT: sw s2, 4(s3) +; RV64IZFINXZDINX-NEXT: sw s1, 8(s3) +; RV64IZFINXZDINX-NEXT: sw a0, 12(s3) ; RV64IZFINXZDINX-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64IZFINXZDINX-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; RV64IZFINXZDINX-NEXT: ld s1, 56(sp) # 8-byte Folded Reload @@ -770,14 +770,14 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; RV32I-NEXT: lw a2, 12(sp) ; RV32I-NEXT: lw a3, 16(sp) ; RV32I-NEXT: lw a4, 20(sp) -; RV32I-NEXT: sw a0, 12(s3) -; RV32I-NEXT: sw s1, 8(s3) -; RV32I-NEXT: sw s0, 4(s3) ; RV32I-NEXT: sw s4, 0(s3) -; RV32I-NEXT: sw a4, 28(s3) -; RV32I-NEXT: sw a3, 24(s3) -; RV32I-NEXT: sw a2, 20(s3) +; RV32I-NEXT: sw s0, 4(s3) +; RV32I-NEXT: sw s1, 8(s3) +; RV32I-NEXT: sw a0, 12(s3) ; RV32I-NEXT: sw a1, 16(s3) +; RV32I-NEXT: sw a2, 20(s3) +; RV32I-NEXT: sw a3, 24(s3) +; RV32I-NEXT: sw a4, 28(s3) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -820,14 +820,14 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; RV64I-NEXT: lw a2, 4(sp) ; RV64I-NEXT: lw a3, 8(sp) ; RV64I-NEXT: lw a4, 12(sp) -; RV64I-NEXT: sw a0, 12(s3) -; RV64I-NEXT: sw s1, 8(s3) -; RV64I-NEXT: sw s0, 4(s3) ; RV64I-NEXT: sw s4, 0(s3) -; RV64I-NEXT: sw a4, 28(s3) -; RV64I-NEXT: sw a3, 24(s3) -; RV64I-NEXT: sw a2, 20(s3) +; RV64I-NEXT: sw s0, 4(s3) +; RV64I-NEXT: sw s1, 8(s3) +; RV64I-NEXT: sw a0, 12(s3) ; RV64I-NEXT: sw a1, 16(s3) +; RV64I-NEXT: sw a2, 20(s3) +; RV64I-NEXT: sw a3, 24(s3) +; RV64I-NEXT: sw a4, 28(s3) ; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -868,10 +868,10 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi ; RV32IFD-NEXT: addi a0, sp, 20 ; RV32IFD-NEXT: fmv.s fa0, fs0 ; RV32IFD-NEXT: call frexpf -; RV32IFD-NEXT: fsw fa0, 12(s0) -; RV32IFD-NEXT: fsw fs1, 8(s0) -; RV32IFD-NEXT: fsw fs2, 4(s0) ; RV32IFD-NEXT: fsw fs3, 0(s0) +; RV32IFD-NEXT: fsw fs2, 4(s0) +; RV32IFD-NEXT: fsw fs1, 8(s0) +; RV32IFD-NEXT: fsw fa0, 12(s0) ; RV32IFD-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 56(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 48(sp) # 8-byte Folded Reload @@ -908,10 +908,10 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi ; RV64IFD-NEXT: addi a0, sp, 24 ; RV64IFD-NEXT: fmv.s fa0, fs0 ; RV64IFD-NEXT: call frexpf -; RV64IFD-NEXT: fsw fa0, 12(s0) -; RV64IFD-NEXT: fsw fs1, 8(s0) -; RV64IFD-NEXT: fsw fs2, 4(s0) ; RV64IFD-NEXT: fsw fs3, 0(s0) +; RV64IFD-NEXT: fsw fs2, 4(s0) +; RV64IFD-NEXT: fsw fs1, 8(s0) +; RV64IFD-NEXT: fsw fa0, 12(s0) ; RV64IFD-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: fld fs0, 56(sp) # 8-byte Folded Reload @@ -950,10 +950,10 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi ; RV32IZFINXZDINX-NEXT: addi a1, sp, 20 ; RV32IZFINXZDINX-NEXT: mv a0, s0 ; RV32IZFINXZDINX-NEXT: call frexpf -; RV32IZFINXZDINX-NEXT: sw a0, 12(s3) -; RV32IZFINXZDINX-NEXT: sw s1, 8(s3) -; RV32IZFINXZDINX-NEXT: sw s2, 4(s3) ; RV32IZFINXZDINX-NEXT: sw s4, 0(s3) +; RV32IZFINXZDINX-NEXT: sw s2, 4(s3) +; RV32IZFINXZDINX-NEXT: sw s1, 8(s3) +; RV32IZFINXZDINX-NEXT: sw a0, 12(s3) ; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -992,10 +992,10 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi ; RV64IZFINXZDINX-NEXT: addi a1, sp, 24 ; RV64IZFINXZDINX-NEXT: mv a0, s0 ; RV64IZFINXZDINX-NEXT: call frexpf -; RV64IZFINXZDINX-NEXT: sw a0, 12(s3) -; RV64IZFINXZDINX-NEXT: sw s1, 8(s3) -; RV64IZFINXZDINX-NEXT: sw s2, 4(s3) ; RV64IZFINXZDINX-NEXT: sw s4, 0(s3) +; RV64IZFINXZDINX-NEXT: sw s2, 4(s3) +; RV64IZFINXZDINX-NEXT: sw s1, 8(s3) +; RV64IZFINXZDINX-NEXT: sw a0, 12(s3) ; RV64IZFINXZDINX-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64IZFINXZDINX-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; RV64IZFINXZDINX-NEXT: ld s1, 56(sp) # 8-byte Folded Reload @@ -1034,10 +1034,10 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi ; RV32I-NEXT: addi a1, sp, 20 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call frexpf -; RV32I-NEXT: sw a0, 12(s3) -; RV32I-NEXT: sw s1, 8(s3) -; RV32I-NEXT: sw s0, 4(s3) ; RV32I-NEXT: sw s4, 0(s3) +; RV32I-NEXT: sw s0, 4(s3) +; RV32I-NEXT: sw s1, 8(s3) +; RV32I-NEXT: sw a0, 12(s3) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -1076,10 +1076,10 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call frexpf -; RV64I-NEXT: sw a0, 12(s3) -; RV64I-NEXT: sw s1, 8(s3) -; RV64I-NEXT: sw s0, 4(s3) ; RV64I-NEXT: sw s4, 0(s3) +; RV64I-NEXT: sw s0, 4(s3) +; RV64I-NEXT: sw s1, 8(s3) +; RV64I-NEXT: sw a0, 12(s3) ; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -1117,14 +1117,14 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind { ; RV32IFD-NEXT: addi a0, sp, 12 ; RV32IFD-NEXT: fmv.s fa0, fs0 ; RV32IFD-NEXT: call frexpf -; RV32IFD-NEXT: lw a0, 12(sp) -; RV32IFD-NEXT: lw a1, 8(sp) -; RV32IFD-NEXT: lw a2, 4(sp) -; RV32IFD-NEXT: lw a3, 0(sp) -; RV32IFD-NEXT: sw a0, 12(s0) -; RV32IFD-NEXT: sw a1, 8(s0) -; RV32IFD-NEXT: sw a2, 4(s0) -; RV32IFD-NEXT: sw a3, 0(s0) +; RV32IFD-NEXT: lw a0, 0(sp) +; RV32IFD-NEXT: lw a1, 4(sp) +; RV32IFD-NEXT: lw a2, 8(sp) +; RV32IFD-NEXT: lw a3, 12(sp) +; RV32IFD-NEXT: sw a0, 0(s0) +; RV32IFD-NEXT: sw a1, 4(s0) +; RV32IFD-NEXT: sw a2, 8(s0) +; RV32IFD-NEXT: sw a3, 12(s0) ; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload @@ -1156,14 +1156,14 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind { ; RV64IFD-NEXT: addi a0, sp, 32 ; RV64IFD-NEXT: fmv.s fa0, fs0 ; RV64IFD-NEXT: call frexpf -; RV64IFD-NEXT: ld a0, 32(sp) -; RV64IFD-NEXT: ld a1, 24(sp) -; RV64IFD-NEXT: ld a2, 16(sp) -; RV64IFD-NEXT: ld a3, 8(sp) -; RV64IFD-NEXT: sw a0, 12(s0) -; RV64IFD-NEXT: sw a1, 8(s0) -; RV64IFD-NEXT: sw a2, 4(s0) -; RV64IFD-NEXT: sw a3, 0(s0) +; RV64IFD-NEXT: ld a0, 8(sp) +; RV64IFD-NEXT: ld a1, 16(sp) +; RV64IFD-NEXT: ld a2, 24(sp) +; RV64IFD-NEXT: ld a3, 32(sp) +; RV64IFD-NEXT: sw a0, 0(s0) +; RV64IFD-NEXT: sw a1, 4(s0) +; RV64IFD-NEXT: sw a2, 8(s0) +; RV64IFD-NEXT: sw a3, 12(s0) ; RV64IFD-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: fld fs0, 56(sp) # 8-byte Folded Reload @@ -1197,14 +1197,14 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind { ; RV32IZFINXZDINX-NEXT: addi a1, sp, 24 ; RV32IZFINXZDINX-NEXT: mv a0, s0 ; RV32IZFINXZDINX-NEXT: call frexpf -; RV32IZFINXZDINX-NEXT: lw a0, 24(sp) -; RV32IZFINXZDINX-NEXT: lw a1, 20(sp) -; RV32IZFINXZDINX-NEXT: lw a2, 16(sp) -; RV32IZFINXZDINX-NEXT: lw a3, 12(sp) -; RV32IZFINXZDINX-NEXT: sw a0, 12(s3) -; RV32IZFINXZDINX-NEXT: sw a1, 8(s3) -; RV32IZFINXZDINX-NEXT: sw a2, 4(s3) -; RV32IZFINXZDINX-NEXT: sw a3, 0(s3) +; RV32IZFINXZDINX-NEXT: lw a0, 12(sp) +; RV32IZFINXZDINX-NEXT: lw a1, 16(sp) +; RV32IZFINXZDINX-NEXT: lw a2, 20(sp) +; RV32IZFINXZDINX-NEXT: lw a3, 24(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(s3) +; RV32IZFINXZDINX-NEXT: sw a1, 4(s3) +; RV32IZFINXZDINX-NEXT: sw a2, 8(s3) +; RV32IZFINXZDINX-NEXT: sw a3, 12(s3) ; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -1238,14 +1238,14 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind { ; RV64IZFINXZDINX-NEXT: addi a1, sp, 32 ; RV64IZFINXZDINX-NEXT: mv a0, s0 ; RV64IZFINXZDINX-NEXT: call frexpf -; RV64IZFINXZDINX-NEXT: ld a0, 32(sp) -; RV64IZFINXZDINX-NEXT: ld a1, 24(sp) -; RV64IZFINXZDINX-NEXT: ld a2, 16(sp) -; RV64IZFINXZDINX-NEXT: ld a3, 8(sp) -; RV64IZFINXZDINX-NEXT: sw a0, 12(s3) -; RV64IZFINXZDINX-NEXT: sw a1, 8(s3) -; RV64IZFINXZDINX-NEXT: sw a2, 4(s3) -; RV64IZFINXZDINX-NEXT: sw a3, 0(s3) +; RV64IZFINXZDINX-NEXT: ld a0, 8(sp) +; RV64IZFINXZDINX-NEXT: ld a1, 16(sp) +; RV64IZFINXZDINX-NEXT: ld a2, 24(sp) +; RV64IZFINXZDINX-NEXT: ld a3, 32(sp) +; RV64IZFINXZDINX-NEXT: sw a0, 0(s3) +; RV64IZFINXZDINX-NEXT: sw a1, 4(s3) +; RV64IZFINXZDINX-NEXT: sw a2, 8(s3) +; RV64IZFINXZDINX-NEXT: sw a3, 12(s3) ; RV64IZFINXZDINX-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64IZFINXZDINX-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; RV64IZFINXZDINX-NEXT: ld s1, 56(sp) # 8-byte Folded Reload @@ -1279,14 +1279,14 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind { ; RV32I-NEXT: addi a1, sp, 24 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call frexpf -; RV32I-NEXT: lw a0, 24(sp) -; RV32I-NEXT: lw a1, 20(sp) -; RV32I-NEXT: lw a2, 16(sp) -; RV32I-NEXT: lw a3, 12(sp) -; RV32I-NEXT: sw a0, 12(s3) -; RV32I-NEXT: sw a1, 8(s3) -; RV32I-NEXT: sw a2, 4(s3) -; RV32I-NEXT: sw a3, 0(s3) +; RV32I-NEXT: lw a0, 12(sp) +; RV32I-NEXT: lw a1, 16(sp) +; RV32I-NEXT: lw a2, 20(sp) +; RV32I-NEXT: lw a3, 24(sp) +; RV32I-NEXT: sw a0, 0(s3) +; RV32I-NEXT: sw a1, 4(s3) +; RV32I-NEXT: sw a2, 8(s3) +; RV32I-NEXT: sw a3, 12(s3) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -1320,14 +1320,14 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind { ; RV64I-NEXT: addi a1, sp, 20 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call frexpf -; RV64I-NEXT: lw a0, 20(sp) -; RV64I-NEXT: lw a1, 16(sp) -; RV64I-NEXT: lw a2, 12(sp) -; RV64I-NEXT: lw a3, 8(sp) -; RV64I-NEXT: sw a0, 12(s3) -; RV64I-NEXT: sw a1, 8(s3) -; RV64I-NEXT: sw a2, 4(s3) -; RV64I-NEXT: sw a3, 0(s3) +; RV64I-NEXT: lw a0, 8(sp) +; RV64I-NEXT: lw a1, 12(sp) +; RV64I-NEXT: lw a2, 16(sp) +; RV64I-NEXT: lw a3, 20(sp) +; RV64I-NEXT: sw a0, 0(s3) +; RV64I-NEXT: sw a1, 4(s3) +; RV64I-NEXT: sw a2, 8(s3) +; RV64I-NEXT: sw a3, 12(s3) ; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -1397,8 +1397,8 @@ define { double, i32 } @test_frexp_f64_i32(double %a) nounwind { ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: call frexp ; RV32I-NEXT: lw a2, 4(sp) -; RV32I-NEXT: sw a1, 4(s0) ; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: sw a1, 4(s0) ; RV32I-NEXT: sw a2, 8(s0) ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -1580,28 +1580,28 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind { ; RV32IFD-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: sw s0, 40(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: lw a3, 0(a1) -; RV32IFD-NEXT: lw a2, 4(a1) -; RV32IFD-NEXT: lw a4, 8(a1) -; RV32IFD-NEXT: lw a1, 12(a1) +; RV32IFD-NEXT: lw a4, 4(a1) +; RV32IFD-NEXT: lw a5, 8(a1) +; RV32IFD-NEXT: lw a6, 12(a1) ; RV32IFD-NEXT: mv s0, a0 -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: sw a4, 8(sp) -; RV32IFD-NEXT: sw a2, 4(sp) ; RV32IFD-NEXT: addi a0, sp, 16 ; RV32IFD-NEXT: mv a1, sp ; RV32IFD-NEXT: addi a2, sp, 36 ; RV32IFD-NEXT: sw a3, 0(sp) +; RV32IFD-NEXT: sw a4, 4(sp) +; RV32IFD-NEXT: sw a5, 8(sp) +; RV32IFD-NEXT: sw a6, 12(sp) ; RV32IFD-NEXT: call frexpl -; RV32IFD-NEXT: lw a0, 24(sp) -; RV32IFD-NEXT: lw a1, 28(sp) -; RV32IFD-NEXT: lw a2, 16(sp) -; RV32IFD-NEXT: lw a3, 20(sp) -; RV32IFD-NEXT: lw a4, 36(sp) -; RV32IFD-NEXT: sw a1, 12(s0) -; RV32IFD-NEXT: sw a0, 8(s0) -; RV32IFD-NEXT: sw a3, 4(s0) -; RV32IFD-NEXT: sw a2, 0(s0) -; RV32IFD-NEXT: sw a4, 16(s0) +; RV32IFD-NEXT: lw a0, 36(sp) +; RV32IFD-NEXT: lw a1, 16(sp) +; RV32IFD-NEXT: lw a2, 20(sp) +; RV32IFD-NEXT: lw a3, 24(sp) +; RV32IFD-NEXT: lw a4, 28(sp) +; RV32IFD-NEXT: sw a1, 0(s0) +; RV32IFD-NEXT: sw a2, 4(s0) +; RV32IFD-NEXT: sw a3, 8(s0) +; RV32IFD-NEXT: sw a4, 12(s0) +; RV32IFD-NEXT: sw a0, 16(s0) ; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 48 @@ -1619,8 +1619,8 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind { ; RV64IFD-NEXT: mv a1, a3 ; RV64IFD-NEXT: call frexpl ; RV64IFD-NEXT: lw a2, 12(sp) -; RV64IFD-NEXT: sd a1, 8(s0) ; RV64IFD-NEXT: sd a0, 0(s0) +; RV64IFD-NEXT: sd a1, 8(s0) ; RV64IFD-NEXT: sw a2, 16(s0) ; RV64IFD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1633,28 +1633,28 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 40(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: lw a3, 0(a1) -; RV32IZFINXZDINX-NEXT: lw a2, 4(a1) -; RV32IZFINXZDINX-NEXT: lw a4, 8(a1) -; RV32IZFINXZDINX-NEXT: lw a1, 12(a1) +; RV32IZFINXZDINX-NEXT: lw a4, 4(a1) +; RV32IZFINXZDINX-NEXT: lw a5, 8(a1) +; RV32IZFINXZDINX-NEXT: lw a6, 12(a1) ; RV32IZFINXZDINX-NEXT: mv s0, a0 -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: sw a4, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a2, 4(sp) ; RV32IZFINXZDINX-NEXT: addi a0, sp, 16 ; RV32IZFINXZDINX-NEXT: mv a1, sp ; RV32IZFINXZDINX-NEXT: addi a2, sp, 36 ; RV32IZFINXZDINX-NEXT: sw a3, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a4, 4(sp) +; RV32IZFINXZDINX-NEXT: sw a5, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a6, 12(sp) ; RV32IZFINXZDINX-NEXT: call frexpl -; RV32IZFINXZDINX-NEXT: lw a0, 24(sp) -; RV32IZFINXZDINX-NEXT: lw a1, 28(sp) -; RV32IZFINXZDINX-NEXT: lw a2, 16(sp) -; RV32IZFINXZDINX-NEXT: lw a3, 20(sp) -; RV32IZFINXZDINX-NEXT: lw a4, 36(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(s0) -; RV32IZFINXZDINX-NEXT: sw a0, 8(s0) -; RV32IZFINXZDINX-NEXT: sw a3, 4(s0) -; RV32IZFINXZDINX-NEXT: sw a2, 0(s0) -; RV32IZFINXZDINX-NEXT: sw a4, 16(s0) +; RV32IZFINXZDINX-NEXT: lw a0, 36(sp) +; RV32IZFINXZDINX-NEXT: lw a1, 16(sp) +; RV32IZFINXZDINX-NEXT: lw a2, 20(sp) +; RV32IZFINXZDINX-NEXT: lw a3, 24(sp) +; RV32IZFINXZDINX-NEXT: lw a4, 28(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 0(s0) +; RV32IZFINXZDINX-NEXT: sw a2, 4(s0) +; RV32IZFINXZDINX-NEXT: sw a3, 8(s0) +; RV32IZFINXZDINX-NEXT: sw a4, 12(s0) +; RV32IZFINXZDINX-NEXT: sw a0, 16(s0) ; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 48 @@ -1672,8 +1672,8 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind { ; RV64IZFINXZDINX-NEXT: mv a1, a3 ; RV64IZFINXZDINX-NEXT: call frexpl ; RV64IZFINXZDINX-NEXT: lw a2, 12(sp) -; RV64IZFINXZDINX-NEXT: sd a1, 8(s0) ; RV64IZFINXZDINX-NEXT: sd a0, 0(s0) +; RV64IZFINXZDINX-NEXT: sd a1, 8(s0) ; RV64IZFINXZDINX-NEXT: sw a2, 16(s0) ; RV64IZFINXZDINX-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64IZFINXZDINX-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1686,28 +1686,28 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind { ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a2, 4(a1) -; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: lw a4, 4(a1) +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a6, 12(a1) ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: sw a1, 12(sp) -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: addi a0, sp, 16 ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: addi a2, sp, 36 ; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a6, 12(sp) ; RV32I-NEXT: call frexpl -; RV32I-NEXT: lw a0, 24(sp) -; RV32I-NEXT: lw a1, 28(sp) -; RV32I-NEXT: lw a2, 16(sp) -; RV32I-NEXT: lw a3, 20(sp) -; RV32I-NEXT: lw a4, 36(sp) -; RV32I-NEXT: sw a1, 12(s0) -; RV32I-NEXT: sw a0, 8(s0) -; RV32I-NEXT: sw a3, 4(s0) -; RV32I-NEXT: sw a2, 0(s0) -; RV32I-NEXT: sw a4, 16(s0) +; RV32I-NEXT: lw a0, 36(sp) +; RV32I-NEXT: lw a1, 16(sp) +; RV32I-NEXT: lw a2, 20(sp) +; RV32I-NEXT: lw a3, 24(sp) +; RV32I-NEXT: lw a4, 28(sp) +; RV32I-NEXT: sw a1, 0(s0) +; RV32I-NEXT: sw a2, 4(s0) +; RV32I-NEXT: sw a3, 8(s0) +; RV32I-NEXT: sw a4, 12(s0) +; RV32I-NEXT: sw a0, 16(s0) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 48 @@ -1725,8 +1725,8 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind { ; RV64I-NEXT: mv a1, a3 ; RV64I-NEXT: call frexpl ; RV64I-NEXT: lw a2, 12(sp) -; RV64I-NEXT: sd a1, 8(s0) ; RV64I-NEXT: sd a0, 0(s0) +; RV64I-NEXT: sd a1, 8(s0) ; RV64I-NEXT: sw a2, 16(s0) ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1743,26 +1743,26 @@ define fp128 @test_frexp_f128_i32_only_use_fract(fp128 %a) nounwind { ; RV32IFD-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: sw s0, 40(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: lw a3, 0(a1) -; RV32IFD-NEXT: lw a2, 4(a1) -; RV32IFD-NEXT: lw a4, 8(a1) -; RV32IFD-NEXT: lw a1, 12(a1) +; RV32IFD-NEXT: lw a4, 4(a1) +; RV32IFD-NEXT: lw a5, 8(a1) +; RV32IFD-NEXT: lw a6, 12(a1) ; RV32IFD-NEXT: mv s0, a0 -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: sw a4, 8(sp) -; RV32IFD-NEXT: sw a2, 4(sp) ; RV32IFD-NEXT: addi a0, sp, 16 ; RV32IFD-NEXT: mv a1, sp ; RV32IFD-NEXT: addi a2, sp, 36 ; RV32IFD-NEXT: sw a3, 0(sp) +; RV32IFD-NEXT: sw a4, 4(sp) +; RV32IFD-NEXT: sw a5, 8(sp) +; RV32IFD-NEXT: sw a6, 12(sp) ; RV32IFD-NEXT: call frexpl -; RV32IFD-NEXT: lw a0, 28(sp) -; RV32IFD-NEXT: lw a1, 24(sp) -; RV32IFD-NEXT: lw a2, 20(sp) -; RV32IFD-NEXT: lw a3, 16(sp) -; RV32IFD-NEXT: sw a0, 12(s0) -; RV32IFD-NEXT: sw a1, 8(s0) -; RV32IFD-NEXT: sw a2, 4(s0) -; RV32IFD-NEXT: sw a3, 0(s0) +; RV32IFD-NEXT: lw a0, 16(sp) +; RV32IFD-NEXT: lw a1, 20(sp) +; RV32IFD-NEXT: lw a2, 24(sp) +; RV32IFD-NEXT: lw a3, 28(sp) +; RV32IFD-NEXT: sw a0, 0(s0) +; RV32IFD-NEXT: sw a1, 4(s0) +; RV32IFD-NEXT: sw a2, 8(s0) +; RV32IFD-NEXT: sw a3, 12(s0) ; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 48 @@ -1784,26 +1784,26 @@ define fp128 @test_frexp_f128_i32_only_use_fract(fp128 %a) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 40(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: lw a3, 0(a1) -; RV32IZFINXZDINX-NEXT: lw a2, 4(a1) -; RV32IZFINXZDINX-NEXT: lw a4, 8(a1) -; RV32IZFINXZDINX-NEXT: lw a1, 12(a1) +; RV32IZFINXZDINX-NEXT: lw a4, 4(a1) +; RV32IZFINXZDINX-NEXT: lw a5, 8(a1) +; RV32IZFINXZDINX-NEXT: lw a6, 12(a1) ; RV32IZFINXZDINX-NEXT: mv s0, a0 -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: sw a4, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a2, 4(sp) ; RV32IZFINXZDINX-NEXT: addi a0, sp, 16 ; RV32IZFINXZDINX-NEXT: mv a1, sp ; RV32IZFINXZDINX-NEXT: addi a2, sp, 36 ; RV32IZFINXZDINX-NEXT: sw a3, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a4, 4(sp) +; RV32IZFINXZDINX-NEXT: sw a5, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a6, 12(sp) ; RV32IZFINXZDINX-NEXT: call frexpl -; RV32IZFINXZDINX-NEXT: lw a0, 28(sp) -; RV32IZFINXZDINX-NEXT: lw a1, 24(sp) -; RV32IZFINXZDINX-NEXT: lw a2, 20(sp) -; RV32IZFINXZDINX-NEXT: lw a3, 16(sp) -; RV32IZFINXZDINX-NEXT: sw a0, 12(s0) -; RV32IZFINXZDINX-NEXT: sw a1, 8(s0) -; RV32IZFINXZDINX-NEXT: sw a2, 4(s0) -; RV32IZFINXZDINX-NEXT: sw a3, 0(s0) +; RV32IZFINXZDINX-NEXT: lw a0, 16(sp) +; RV32IZFINXZDINX-NEXT: lw a1, 20(sp) +; RV32IZFINXZDINX-NEXT: lw a2, 24(sp) +; RV32IZFINXZDINX-NEXT: lw a3, 28(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(s0) +; RV32IZFINXZDINX-NEXT: sw a1, 4(s0) +; RV32IZFINXZDINX-NEXT: sw a2, 8(s0) +; RV32IZFINXZDINX-NEXT: sw a3, 12(s0) ; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 48 @@ -1825,26 +1825,26 @@ define fp128 @test_frexp_f128_i32_only_use_fract(fp128 %a) nounwind { ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a2, 4(a1) -; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: lw a4, 4(a1) +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a6, 12(a1) ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: sw a1, 12(sp) -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a2, 4(sp) ; RV32I-NEXT: addi a0, sp, 16 ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: addi a2, sp, 36 ; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a6, 12(sp) ; RV32I-NEXT: call frexpl -; RV32I-NEXT: lw a0, 28(sp) -; RV32I-NEXT: lw a1, 24(sp) -; RV32I-NEXT: lw a2, 20(sp) -; RV32I-NEXT: lw a3, 16(sp) -; RV32I-NEXT: sw a0, 12(s0) -; RV32I-NEXT: sw a1, 8(s0) -; RV32I-NEXT: sw a2, 4(s0) -; RV32I-NEXT: sw a3, 0(s0) +; RV32I-NEXT: lw a0, 16(sp) +; RV32I-NEXT: lw a1, 20(sp) +; RV32I-NEXT: lw a2, 24(sp) +; RV32I-NEXT: lw a3, 28(sp) +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: sw a2, 8(s0) +; RV32I-NEXT: sw a3, 12(s0) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 48 @@ -1870,16 +1870,16 @@ define i32 @test_frexp_f128_i32_only_use_exp(fp128 %a) nounwind { ; RV32IFD-NEXT: addi sp, sp, -48 ; RV32IFD-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: lw a3, 0(a0) -; RV32IFD-NEXT: lw a1, 4(a0) -; RV32IFD-NEXT: lw a2, 8(a0) -; RV32IFD-NEXT: lw a0, 12(a0) -; RV32IFD-NEXT: sw a0, 20(sp) -; RV32IFD-NEXT: sw a2, 16(sp) -; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: lw a4, 4(a0) +; RV32IFD-NEXT: lw a5, 8(a0) +; RV32IFD-NEXT: lw a6, 12(a0) ; RV32IFD-NEXT: addi a0, sp, 24 ; RV32IFD-NEXT: addi a1, sp, 8 ; RV32IFD-NEXT: addi a2, sp, 40 ; RV32IFD-NEXT: sw a3, 8(sp) +; RV32IFD-NEXT: sw a4, 12(sp) +; RV32IFD-NEXT: sw a5, 16(sp) +; RV32IFD-NEXT: sw a6, 20(sp) ; RV32IFD-NEXT: call frexpl ; RV32IFD-NEXT: lw a0, 40(sp) ; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload @@ -1902,16 +1902,16 @@ define i32 @test_frexp_f128_i32_only_use_exp(fp128 %a) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -48 ; RV32IZFINXZDINX-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: lw a3, 0(a0) -; RV32IZFINXZDINX-NEXT: lw a1, 4(a0) -; RV32IZFINXZDINX-NEXT: lw a2, 8(a0) -; RV32IZFINXZDINX-NEXT: lw a0, 12(a0) -; RV32IZFINXZDINX-NEXT: sw a0, 20(sp) -; RV32IZFINXZDINX-NEXT: sw a2, 16(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) +; RV32IZFINXZDINX-NEXT: lw a4, 4(a0) +; RV32IZFINXZDINX-NEXT: lw a5, 8(a0) +; RV32IZFINXZDINX-NEXT: lw a6, 12(a0) ; RV32IZFINXZDINX-NEXT: addi a0, sp, 24 ; RV32IZFINXZDINX-NEXT: addi a1, sp, 8 ; RV32IZFINXZDINX-NEXT: addi a2, sp, 40 ; RV32IZFINXZDINX-NEXT: sw a3, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a4, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a5, 16(sp) +; RV32IZFINXZDINX-NEXT: sw a6, 20(sp) ; RV32IZFINXZDINX-NEXT: call frexpl ; RV32IZFINXZDINX-NEXT: lw a0, 40(sp) ; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload @@ -1934,16 +1934,16 @@ define i32 @test_frexp_f128_i32_only_use_exp(fp128 %a) nounwind { ; RV32I-NEXT: addi sp, sp, -48 ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: lw a1, 4(a0) -; RV32I-NEXT: lw a2, 8(a0) -; RV32I-NEXT: lw a0, 12(a0) -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a2, 16(sp) -; RV32I-NEXT: sw a1, 12(sp) +; RV32I-NEXT: lw a4, 4(a0) +; RV32I-NEXT: lw a5, 8(a0) +; RV32I-NEXT: lw a6, 12(a0) ; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: addi a2, sp, 40 ; RV32I-NEXT: sw a3, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) +; RV32I-NEXT: sw a5, 16(sp) +; RV32I-NEXT: sw a6, 20(sp) ; RV32I-NEXT: call frexpl ; RV32I-NEXT: lw a0, 40(sp) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/memcpy.ll b/llvm/test/CodeGen/RISCV/memcpy.ll index 41c27d83defe618..5b8955ee0e0a0c7 100644 --- a/llvm/test/CodeGen/RISCV/memcpy.ll +++ b/llvm/test/CodeGen/RISCV/memcpy.ll @@ -25,16 +25,16 @@ define i32 @t0() { ; RV32: # %bb.0: # %entry ; RV32-NEXT: lui a0, %hi(src) ; RV32-NEXT: lw a1, %lo(src)(a0) +; RV32-NEXT: lui a2, %hi(dst) +; RV32-NEXT: sw a1, %lo(dst)(a2) ; RV32-NEXT: addi a0, a0, %lo(src) -; RV32-NEXT: lw a2, 4(a0) +; RV32-NEXT: lw a1, 4(a0) ; RV32-NEXT: lh a3, 8(a0) ; RV32-NEXT: lbu a0, 10(a0) -; RV32-NEXT: lui a4, %hi(dst) -; RV32-NEXT: sw a1, %lo(dst)(a4) -; RV32-NEXT: addi a1, a4, %lo(dst) -; RV32-NEXT: sb a0, 10(a1) -; RV32-NEXT: sh a3, 8(a1) -; RV32-NEXT: sw a2, 4(a1) +; RV32-NEXT: addi a2, a2, %lo(dst) +; RV32-NEXT: sw a1, 4(a2) +; RV32-NEXT: sh a3, 8(a2) +; RV32-NEXT: sb a0, 10(a2) ; RV32-NEXT: li a0, 0 ; RV32-NEXT: ret ; @@ -42,14 +42,14 @@ define i32 @t0() { ; RV64: # %bb.0: # %entry ; RV64-NEXT: lui a0, %hi(src) ; RV64-NEXT: ld a1, %lo(src)(a0) +; RV64-NEXT: lui a2, %hi(dst) ; RV64-NEXT: addi a0, a0, %lo(src) -; RV64-NEXT: lh a2, 8(a0) +; RV64-NEXT: lh a3, 8(a0) ; RV64-NEXT: lbu a0, 10(a0) -; RV64-NEXT: lui a3, %hi(dst) -; RV64-NEXT: sd a1, %lo(dst)(a3) -; RV64-NEXT: addi a1, a3, %lo(dst) +; RV64-NEXT: sd a1, %lo(dst)(a2) +; RV64-NEXT: addi a1, a2, %lo(dst) +; RV64-NEXT: sh a3, 8(a1) ; RV64-NEXT: sb a0, 10(a1) -; RV64-NEXT: sh a2, 8(a1) ; RV64-NEXT: li a0, 0 ; RV64-NEXT: ret ; @@ -107,39 +107,39 @@ define void @t1(ptr nocapture %C) nounwind { ; RV32-FAST-NEXT: sw a1, 27(a0) ; RV32-FAST-NEXT: lui a1, 300325 ; RV32-FAST-NEXT: addi a1, a1, 1107 +; RV32-FAST-NEXT: lui a2, 132181 +; RV32-FAST-NEXT: addi a2, a2, -689 +; RV32-FAST-NEXT: lui a3, 340483 +; RV32-FAST-NEXT: addi a3, a3, -947 +; RV32-FAST-NEXT: sw a3, 16(a0) +; RV32-FAST-NEXT: sw a2, 20(a0) ; RV32-FAST-NEXT: sw a1, 24(a0) -; RV32-FAST-NEXT: lui a1, 132181 -; RV32-FAST-NEXT: addi a1, a1, -689 -; RV32-FAST-NEXT: sw a1, 20(a0) -; RV32-FAST-NEXT: lui a1, 340483 -; RV32-FAST-NEXT: addi a1, a1, -947 -; RV32-FAST-NEXT: sw a1, 16(a0) ; RV32-FAST-NEXT: lui a1, 267556 ; RV32-FAST-NEXT: addi a1, a1, 1871 +; RV32-FAST-NEXT: lui a2, 337154 +; RV32-FAST-NEXT: addi a2, a2, 69 +; RV32-FAST-NEXT: lui a3, 320757 +; RV32-FAST-NEXT: addi a3, a3, 1107 +; RV32-FAST-NEXT: lui a4, 365861 +; RV32-FAST-NEXT: addi a4, a4, -1980 +; RV32-FAST-NEXT: sw a4, 0(a0) +; RV32-FAST-NEXT: sw a3, 4(a0) +; RV32-FAST-NEXT: sw a2, 8(a0) ; RV32-FAST-NEXT: sw a1, 12(a0) -; RV32-FAST-NEXT: lui a1, 337154 -; RV32-FAST-NEXT: addi a1, a1, 69 -; RV32-FAST-NEXT: sw a1, 8(a0) -; RV32-FAST-NEXT: lui a1, 320757 -; RV32-FAST-NEXT: addi a1, a1, 1107 -; RV32-FAST-NEXT: sw a1, 4(a0) -; RV32-FAST-NEXT: lui a1, 365861 -; RV32-FAST-NEXT: addi a1, a1, -1980 -; RV32-FAST-NEXT: sw a1, 0(a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: t1: ; RV64-FAST: # %bb.0: # %entry ; RV64-FAST-NEXT: lui a1, %hi(.L.str1) -; RV64-FAST-NEXT: ld a2, %lo(.L.str1)(a1) -; RV64-FAST-NEXT: addi a1, a1, %lo(.L.str1) -; RV64-FAST-NEXT: ld a3, 23(a1) -; RV64-FAST-NEXT: ld a4, 16(a1) -; RV64-FAST-NEXT: ld a1, 8(a1) -; RV64-FAST-NEXT: sd a2, 0(a0) +; RV64-FAST-NEXT: addi a2, a1, %lo(.L.str1) +; RV64-FAST-NEXT: ld a3, 23(a2) +; RV64-FAST-NEXT: ld a1, %lo(.L.str1)(a1) +; RV64-FAST-NEXT: ld a4, 8(a2) +; RV64-FAST-NEXT: ld a2, 16(a2) ; RV64-FAST-NEXT: sd a3, 23(a0) -; RV64-FAST-NEXT: sd a4, 16(a0) -; RV64-FAST-NEXT: sd a1, 8(a0) +; RV64-FAST-NEXT: sd a1, 0(a0) +; RV64-FAST-NEXT: sd a4, 8(a0) +; RV64-FAST-NEXT: sd a2, 16(a0) ; RV64-FAST-NEXT: ret entry: tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str1, i64 31, i1 false) @@ -165,17 +165,17 @@ define void @t2(ptr nocapture %C) nounwind { ; RV64-FAST: # %bb.0: # %entry ; RV64-FAST-NEXT: lui a1, %hi(.L.str2) ; RV64-FAST-NEXT: ld a2, %lo(.L.str2)(a1) -; RV64-FAST-NEXT: sd a2, 0(a0) +; RV64-FAST-NEXT: lui a3, 1156 +; RV64-FAST-NEXT: addi a3, a3, 332 +; RV64-FAST-NEXT: sw a3, 32(a0) ; RV64-FAST-NEXT: addi a1, a1, %lo(.L.str2) -; RV64-FAST-NEXT: ld a2, 8(a1) -; RV64-FAST-NEXT: ld a3, 16(a1) +; RV64-FAST-NEXT: ld a3, 8(a1) +; RV64-FAST-NEXT: ld a4, 16(a1) ; RV64-FAST-NEXT: ld a1, 24(a1) -; RV64-FAST-NEXT: lui a4, 1156 -; RV64-FAST-NEXT: addi a4, a4, 332 -; RV64-FAST-NEXT: sw a4, 32(a0) +; RV64-FAST-NEXT: sd a2, 0(a0) +; RV64-FAST-NEXT: sd a3, 8(a0) +; RV64-FAST-NEXT: sd a4, 16(a0) ; RV64-FAST-NEXT: sd a1, 24(a0) -; RV64-FAST-NEXT: sd a3, 16(a0) -; RV64-FAST-NEXT: sd a2, 8(a0) ; RV64-FAST-NEXT: ret entry: tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str2, i64 36, i1 false) @@ -201,22 +201,22 @@ define void @t3(ptr nocapture %C) nounwind { ; RV32-FAST: # %bb.0: # %entry ; RV32-FAST-NEXT: lui a1, 1109 ; RV32-FAST-NEXT: addi a1, a1, -689 +; RV32-FAST-NEXT: lui a2, 340483 +; RV32-FAST-NEXT: addi a2, a2, -947 +; RV32-FAST-NEXT: sw a2, 16(a0) ; RV32-FAST-NEXT: sw a1, 20(a0) -; RV32-FAST-NEXT: lui a1, 340483 -; RV32-FAST-NEXT: addi a1, a1, -947 -; RV32-FAST-NEXT: sw a1, 16(a0) ; RV32-FAST-NEXT: lui a1, 267556 ; RV32-FAST-NEXT: addi a1, a1, 1871 +; RV32-FAST-NEXT: lui a2, 337154 +; RV32-FAST-NEXT: addi a2, a2, 69 +; RV32-FAST-NEXT: lui a3, 320757 +; RV32-FAST-NEXT: addi a3, a3, 1107 +; RV32-FAST-NEXT: lui a4, 365861 +; RV32-FAST-NEXT: addi a4, a4, -1980 +; RV32-FAST-NEXT: sw a4, 0(a0) +; RV32-FAST-NEXT: sw a3, 4(a0) +; RV32-FAST-NEXT: sw a2, 8(a0) ; RV32-FAST-NEXT: sw a1, 12(a0) -; RV32-FAST-NEXT: lui a1, 337154 -; RV32-FAST-NEXT: addi a1, a1, 69 -; RV32-FAST-NEXT: sw a1, 8(a0) -; RV32-FAST-NEXT: lui a1, 320757 -; RV32-FAST-NEXT: addi a1, a1, 1107 -; RV32-FAST-NEXT: sw a1, 4(a0) -; RV32-FAST-NEXT: lui a1, 365861 -; RV32-FAST-NEXT: addi a1, a1, -1980 -; RV32-FAST-NEXT: sw a1, 0(a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: t3: @@ -224,11 +224,11 @@ define void @t3(ptr nocapture %C) nounwind { ; RV64-FAST-NEXT: lui a1, %hi(.L.str3) ; RV64-FAST-NEXT: ld a2, %lo(.L.str3)(a1) ; RV64-FAST-NEXT: addi a1, a1, %lo(.L.str3) -; RV64-FAST-NEXT: ld a3, 16(a1) -; RV64-FAST-NEXT: ld a1, 8(a1) +; RV64-FAST-NEXT: ld a3, 8(a1) +; RV64-FAST-NEXT: ld a1, 16(a1) ; RV64-FAST-NEXT: sd a2, 0(a0) -; RV64-FAST-NEXT: sd a3, 16(a0) -; RV64-FAST-NEXT: sd a1, 8(a0) +; RV64-FAST-NEXT: sd a3, 8(a0) +; RV64-FAST-NEXT: sd a1, 16(a0) ; RV64-FAST-NEXT: ret entry: tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str3, i64 24, i1 false) @@ -256,16 +256,16 @@ define void @t4(ptr nocapture %C) nounwind { ; RV32-FAST-NEXT: sh a1, 16(a0) ; RV32-FAST-NEXT: lui a1, 132388 ; RV32-FAST-NEXT: addi a1, a1, 1871 +; RV32-FAST-NEXT: lui a2, 337154 +; RV32-FAST-NEXT: addi a2, a2, 69 +; RV32-FAST-NEXT: lui a3, 320757 +; RV32-FAST-NEXT: addi a3, a3, 1107 +; RV32-FAST-NEXT: lui a4, 365861 +; RV32-FAST-NEXT: addi a4, a4, -1980 +; RV32-FAST-NEXT: sw a4, 0(a0) +; RV32-FAST-NEXT: sw a3, 4(a0) +; RV32-FAST-NEXT: sw a2, 8(a0) ; RV32-FAST-NEXT: sw a1, 12(a0) -; RV32-FAST-NEXT: lui a1, 337154 -; RV32-FAST-NEXT: addi a1, a1, 69 -; RV32-FAST-NEXT: sw a1, 8(a0) -; RV32-FAST-NEXT: lui a1, 320757 -; RV32-FAST-NEXT: addi a1, a1, 1107 -; RV32-FAST-NEXT: sw a1, 4(a0) -; RV32-FAST-NEXT: lui a1, 365861 -; RV32-FAST-NEXT: addi a1, a1, -1980 -; RV32-FAST-NEXT: sw a1, 0(a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: t4: @@ -275,9 +275,9 @@ define void @t4(ptr nocapture %C) nounwind { ; RV64-FAST-NEXT: addi a1, a1, %lo(.L.str4) ; RV64-FAST-NEXT: ld a1, 8(a1) ; RV64-FAST-NEXT: li a3, 32 -; RV64-FAST-NEXT: sh a3, 16(a0) ; RV64-FAST-NEXT: sd a2, 0(a0) ; RV64-FAST-NEXT: sd a1, 8(a0) +; RV64-FAST-NEXT: sh a3, 16(a0) ; RV64-FAST-NEXT: ret entry: tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str4, i64 18, i1 false) @@ -287,36 +287,36 @@ entry: define void @t5(ptr nocapture %C) nounwind { ; RV32-LABEL: t5: ; RV32: # %bb.0: # %entry -; RV32-NEXT: sb zero, 6(a0) ; RV32-NEXT: li a1, 84 +; RV32-NEXT: li a2, 83 +; RV32-NEXT: sb a2, 4(a0) ; RV32-NEXT: sb a1, 5(a0) -; RV32-NEXT: li a1, 83 -; RV32-NEXT: sb a1, 4(a0) +; RV32-NEXT: sb zero, 6(a0) ; RV32-NEXT: li a1, 89 +; RV32-NEXT: li a2, 82 +; RV32-NEXT: li a3, 72 +; RV32-NEXT: li a4, 68 +; RV32-NEXT: sb a4, 0(a0) +; RV32-NEXT: sb a3, 1(a0) +; RV32-NEXT: sb a2, 2(a0) ; RV32-NEXT: sb a1, 3(a0) -; RV32-NEXT: li a1, 82 -; RV32-NEXT: sb a1, 2(a0) -; RV32-NEXT: li a1, 72 -; RV32-NEXT: sb a1, 1(a0) -; RV32-NEXT: li a1, 68 -; RV32-NEXT: sb a1, 0(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: t5: ; RV64: # %bb.0: # %entry -; RV64-NEXT: sb zero, 6(a0) ; RV64-NEXT: li a1, 84 +; RV64-NEXT: li a2, 83 +; RV64-NEXT: sb a2, 4(a0) ; RV64-NEXT: sb a1, 5(a0) -; RV64-NEXT: li a1, 83 -; RV64-NEXT: sb a1, 4(a0) +; RV64-NEXT: sb zero, 6(a0) ; RV64-NEXT: li a1, 89 +; RV64-NEXT: li a2, 82 +; RV64-NEXT: li a3, 72 +; RV64-NEXT: li a4, 68 +; RV64-NEXT: sb a4, 0(a0) +; RV64-NEXT: sb a3, 1(a0) +; RV64-NEXT: sb a2, 2(a0) ; RV64-NEXT: sb a1, 3(a0) -; RV64-NEXT: li a1, 82 -; RV64-NEXT: sb a1, 2(a0) -; RV64-NEXT: li a1, 72 -; RV64-NEXT: sb a1, 1(a0) -; RV64-NEXT: li a1, 68 -; RV64-NEXT: sb a1, 0(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: t5: diff --git a/llvm/test/CodeGen/RISCV/memset-inline.ll b/llvm/test/CodeGen/RISCV/memset-inline.ll index 55fe81a58805edc..6ee6e1261e7e916 100644 --- a/llvm/test/CodeGen/RISCV/memset-inline.ll +++ b/llvm/test/CodeGen/RISCV/memset-inline.ll @@ -31,14 +31,14 @@ define void @memset_1(ptr %a, i8 %value) nounwind { define void @memset_2(ptr %a, i8 %value) nounwind { ; RV32-LABEL: memset_2: ; RV32: # %bb.0: -; RV32-NEXT: sb a1, 1(a0) ; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: sb a1, 1(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: memset_2: ; RV64: # %bb.0: -; RV64-NEXT: sb a1, 1(a0) ; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: sb a1, 1(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: memset_2: @@ -63,18 +63,18 @@ define void @memset_2(ptr %a, i8 %value) nounwind { define void @memset_4(ptr %a, i8 %value) nounwind { ; RV32-LABEL: memset_4: ; RV32: # %bb.0: -; RV32-NEXT: sb a1, 3(a0) -; RV32-NEXT: sb a1, 2(a0) -; RV32-NEXT: sb a1, 1(a0) ; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: sb a1, 2(a0) +; RV32-NEXT: sb a1, 3(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: memset_4: ; RV64: # %bb.0: -; RV64-NEXT: sb a1, 3(a0) -; RV64-NEXT: sb a1, 2(a0) -; RV64-NEXT: sb a1, 1(a0) ; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: sb a1, 2(a0) +; RV64-NEXT: sb a1, 3(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: memset_4: @@ -102,26 +102,26 @@ define void @memset_4(ptr %a, i8 %value) nounwind { define void @memset_8(ptr %a, i8 %value) nounwind { ; RV32-LABEL: memset_8: ; RV32: # %bb.0: -; RV32-NEXT: sb a1, 7(a0) -; RV32-NEXT: sb a1, 6(a0) -; RV32-NEXT: sb a1, 5(a0) ; RV32-NEXT: sb a1, 4(a0) -; RV32-NEXT: sb a1, 3(a0) -; RV32-NEXT: sb a1, 2(a0) -; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: sb a1, 5(a0) +; RV32-NEXT: sb a1, 6(a0) +; RV32-NEXT: sb a1, 7(a0) ; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: sb a1, 2(a0) +; RV32-NEXT: sb a1, 3(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: memset_8: ; RV64: # %bb.0: -; RV64-NEXT: sb a1, 7(a0) -; RV64-NEXT: sb a1, 6(a0) -; RV64-NEXT: sb a1, 5(a0) ; RV64-NEXT: sb a1, 4(a0) -; RV64-NEXT: sb a1, 3(a0) -; RV64-NEXT: sb a1, 2(a0) -; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: sb a1, 5(a0) +; RV64-NEXT: sb a1, 6(a0) +; RV64-NEXT: sb a1, 7(a0) ; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: sb a1, 2(a0) +; RV64-NEXT: sb a1, 3(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: memset_8: @@ -130,8 +130,8 @@ define void @memset_8(ptr %a, i8 %value) nounwind { ; RV32-FAST-NEXT: lui a2, 4112 ; RV32-FAST-NEXT: addi a2, a2, 257 ; RV32-FAST-NEXT: mul a1, a1, a2 -; RV32-FAST-NEXT: sw a1, 4(a0) ; RV32-FAST-NEXT: sw a1, 0(a0) +; RV32-FAST-NEXT: sw a1, 4(a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: memset_8: @@ -151,42 +151,42 @@ define void @memset_8(ptr %a, i8 %value) nounwind { define void @memset_16(ptr %a, i8 %value) nounwind { ; RV32-LABEL: memset_16: ; RV32: # %bb.0: -; RV32-NEXT: sb a1, 15(a0) -; RV32-NEXT: sb a1, 14(a0) -; RV32-NEXT: sb a1, 13(a0) ; RV32-NEXT: sb a1, 12(a0) -; RV32-NEXT: sb a1, 11(a0) -; RV32-NEXT: sb a1, 10(a0) -; RV32-NEXT: sb a1, 9(a0) +; RV32-NEXT: sb a1, 13(a0) +; RV32-NEXT: sb a1, 14(a0) +; RV32-NEXT: sb a1, 15(a0) ; RV32-NEXT: sb a1, 8(a0) -; RV32-NEXT: sb a1, 7(a0) -; RV32-NEXT: sb a1, 6(a0) -; RV32-NEXT: sb a1, 5(a0) +; RV32-NEXT: sb a1, 9(a0) +; RV32-NEXT: sb a1, 10(a0) +; RV32-NEXT: sb a1, 11(a0) ; RV32-NEXT: sb a1, 4(a0) -; RV32-NEXT: sb a1, 3(a0) -; RV32-NEXT: sb a1, 2(a0) -; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: sb a1, 5(a0) +; RV32-NEXT: sb a1, 6(a0) +; RV32-NEXT: sb a1, 7(a0) ; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: sb a1, 2(a0) +; RV32-NEXT: sb a1, 3(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: memset_16: ; RV64: # %bb.0: -; RV64-NEXT: sb a1, 15(a0) -; RV64-NEXT: sb a1, 14(a0) -; RV64-NEXT: sb a1, 13(a0) ; RV64-NEXT: sb a1, 12(a0) -; RV64-NEXT: sb a1, 11(a0) -; RV64-NEXT: sb a1, 10(a0) -; RV64-NEXT: sb a1, 9(a0) +; RV64-NEXT: sb a1, 13(a0) +; RV64-NEXT: sb a1, 14(a0) +; RV64-NEXT: sb a1, 15(a0) ; RV64-NEXT: sb a1, 8(a0) -; RV64-NEXT: sb a1, 7(a0) -; RV64-NEXT: sb a1, 6(a0) -; RV64-NEXT: sb a1, 5(a0) +; RV64-NEXT: sb a1, 9(a0) +; RV64-NEXT: sb a1, 10(a0) +; RV64-NEXT: sb a1, 11(a0) ; RV64-NEXT: sb a1, 4(a0) -; RV64-NEXT: sb a1, 3(a0) -; RV64-NEXT: sb a1, 2(a0) -; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: sb a1, 5(a0) +; RV64-NEXT: sb a1, 6(a0) +; RV64-NEXT: sb a1, 7(a0) ; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: sb a1, 2(a0) +; RV64-NEXT: sb a1, 3(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: memset_16: @@ -195,10 +195,10 @@ define void @memset_16(ptr %a, i8 %value) nounwind { ; RV32-FAST-NEXT: lui a2, 4112 ; RV32-FAST-NEXT: addi a2, a2, 257 ; RV32-FAST-NEXT: mul a1, a1, a2 -; RV32-FAST-NEXT: sw a1, 12(a0) -; RV32-FAST-NEXT: sw a1, 8(a0) -; RV32-FAST-NEXT: sw a1, 4(a0) ; RV32-FAST-NEXT: sw a1, 0(a0) +; RV32-FAST-NEXT: sw a1, 4(a0) +; RV32-FAST-NEXT: sw a1, 8(a0) +; RV32-FAST-NEXT: sw a1, 12(a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: memset_16: @@ -209,8 +209,8 @@ define void @memset_16(ptr %a, i8 %value) nounwind { ; RV64-FAST-NEXT: slli a3, a2, 32 ; RV64-FAST-NEXT: add a2, a2, a3 ; RV64-FAST-NEXT: mul a1, a1, a2 -; RV64-FAST-NEXT: sd a1, 8(a0) ; RV64-FAST-NEXT: sd a1, 0(a0) +; RV64-FAST-NEXT: sd a1, 8(a0) ; RV64-FAST-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 16, i1 0) ret void @@ -219,74 +219,74 @@ define void @memset_16(ptr %a, i8 %value) nounwind { define void @memset_32(ptr %a, i8 %value) nounwind { ; RV32-LABEL: memset_32: ; RV32: # %bb.0: -; RV32-NEXT: sb a1, 31(a0) -; RV32-NEXT: sb a1, 30(a0) -; RV32-NEXT: sb a1, 29(a0) ; RV32-NEXT: sb a1, 28(a0) -; RV32-NEXT: sb a1, 27(a0) -; RV32-NEXT: sb a1, 26(a0) -; RV32-NEXT: sb a1, 25(a0) +; RV32-NEXT: sb a1, 29(a0) +; RV32-NEXT: sb a1, 30(a0) +; RV32-NEXT: sb a1, 31(a0) ; RV32-NEXT: sb a1, 24(a0) -; RV32-NEXT: sb a1, 23(a0) -; RV32-NEXT: sb a1, 22(a0) -; RV32-NEXT: sb a1, 21(a0) +; RV32-NEXT: sb a1, 25(a0) +; RV32-NEXT: sb a1, 26(a0) +; RV32-NEXT: sb a1, 27(a0) ; RV32-NEXT: sb a1, 20(a0) -; RV32-NEXT: sb a1, 19(a0) -; RV32-NEXT: sb a1, 18(a0) -; RV32-NEXT: sb a1, 17(a0) +; RV32-NEXT: sb a1, 21(a0) +; RV32-NEXT: sb a1, 22(a0) +; RV32-NEXT: sb a1, 23(a0) ; RV32-NEXT: sb a1, 16(a0) -; RV32-NEXT: sb a1, 15(a0) -; RV32-NEXT: sb a1, 14(a0) -; RV32-NEXT: sb a1, 13(a0) +; RV32-NEXT: sb a1, 17(a0) +; RV32-NEXT: sb a1, 18(a0) +; RV32-NEXT: sb a1, 19(a0) ; RV32-NEXT: sb a1, 12(a0) -; RV32-NEXT: sb a1, 11(a0) -; RV32-NEXT: sb a1, 10(a0) -; RV32-NEXT: sb a1, 9(a0) +; RV32-NEXT: sb a1, 13(a0) +; RV32-NEXT: sb a1, 14(a0) +; RV32-NEXT: sb a1, 15(a0) ; RV32-NEXT: sb a1, 8(a0) -; RV32-NEXT: sb a1, 7(a0) -; RV32-NEXT: sb a1, 6(a0) -; RV32-NEXT: sb a1, 5(a0) +; RV32-NEXT: sb a1, 9(a0) +; RV32-NEXT: sb a1, 10(a0) +; RV32-NEXT: sb a1, 11(a0) ; RV32-NEXT: sb a1, 4(a0) -; RV32-NEXT: sb a1, 3(a0) -; RV32-NEXT: sb a1, 2(a0) -; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: sb a1, 5(a0) +; RV32-NEXT: sb a1, 6(a0) +; RV32-NEXT: sb a1, 7(a0) ; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: sb a1, 2(a0) +; RV32-NEXT: sb a1, 3(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: memset_32: ; RV64: # %bb.0: -; RV64-NEXT: sb a1, 31(a0) -; RV64-NEXT: sb a1, 30(a0) -; RV64-NEXT: sb a1, 29(a0) ; RV64-NEXT: sb a1, 28(a0) -; RV64-NEXT: sb a1, 27(a0) -; RV64-NEXT: sb a1, 26(a0) -; RV64-NEXT: sb a1, 25(a0) +; RV64-NEXT: sb a1, 29(a0) +; RV64-NEXT: sb a1, 30(a0) +; RV64-NEXT: sb a1, 31(a0) ; RV64-NEXT: sb a1, 24(a0) -; RV64-NEXT: sb a1, 23(a0) -; RV64-NEXT: sb a1, 22(a0) -; RV64-NEXT: sb a1, 21(a0) +; RV64-NEXT: sb a1, 25(a0) +; RV64-NEXT: sb a1, 26(a0) +; RV64-NEXT: sb a1, 27(a0) ; RV64-NEXT: sb a1, 20(a0) -; RV64-NEXT: sb a1, 19(a0) -; RV64-NEXT: sb a1, 18(a0) -; RV64-NEXT: sb a1, 17(a0) +; RV64-NEXT: sb a1, 21(a0) +; RV64-NEXT: sb a1, 22(a0) +; RV64-NEXT: sb a1, 23(a0) ; RV64-NEXT: sb a1, 16(a0) -; RV64-NEXT: sb a1, 15(a0) -; RV64-NEXT: sb a1, 14(a0) -; RV64-NEXT: sb a1, 13(a0) +; RV64-NEXT: sb a1, 17(a0) +; RV64-NEXT: sb a1, 18(a0) +; RV64-NEXT: sb a1, 19(a0) ; RV64-NEXT: sb a1, 12(a0) -; RV64-NEXT: sb a1, 11(a0) -; RV64-NEXT: sb a1, 10(a0) -; RV64-NEXT: sb a1, 9(a0) +; RV64-NEXT: sb a1, 13(a0) +; RV64-NEXT: sb a1, 14(a0) +; RV64-NEXT: sb a1, 15(a0) ; RV64-NEXT: sb a1, 8(a0) -; RV64-NEXT: sb a1, 7(a0) -; RV64-NEXT: sb a1, 6(a0) -; RV64-NEXT: sb a1, 5(a0) +; RV64-NEXT: sb a1, 9(a0) +; RV64-NEXT: sb a1, 10(a0) +; RV64-NEXT: sb a1, 11(a0) ; RV64-NEXT: sb a1, 4(a0) -; RV64-NEXT: sb a1, 3(a0) -; RV64-NEXT: sb a1, 2(a0) -; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: sb a1, 5(a0) +; RV64-NEXT: sb a1, 6(a0) +; RV64-NEXT: sb a1, 7(a0) ; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: sb a1, 2(a0) +; RV64-NEXT: sb a1, 3(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: memset_32: @@ -295,14 +295,14 @@ define void @memset_32(ptr %a, i8 %value) nounwind { ; RV32-FAST-NEXT: lui a2, 4112 ; RV32-FAST-NEXT: addi a2, a2, 257 ; RV32-FAST-NEXT: mul a1, a1, a2 -; RV32-FAST-NEXT: sw a1, 28(a0) -; RV32-FAST-NEXT: sw a1, 24(a0) -; RV32-FAST-NEXT: sw a1, 20(a0) ; RV32-FAST-NEXT: sw a1, 16(a0) -; RV32-FAST-NEXT: sw a1, 12(a0) -; RV32-FAST-NEXT: sw a1, 8(a0) -; RV32-FAST-NEXT: sw a1, 4(a0) +; RV32-FAST-NEXT: sw a1, 20(a0) +; RV32-FAST-NEXT: sw a1, 24(a0) +; RV32-FAST-NEXT: sw a1, 28(a0) ; RV32-FAST-NEXT: sw a1, 0(a0) +; RV32-FAST-NEXT: sw a1, 4(a0) +; RV32-FAST-NEXT: sw a1, 8(a0) +; RV32-FAST-NEXT: sw a1, 12(a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: memset_32: @@ -313,10 +313,10 @@ define void @memset_32(ptr %a, i8 %value) nounwind { ; RV64-FAST-NEXT: slli a3, a2, 32 ; RV64-FAST-NEXT: add a2, a2, a3 ; RV64-FAST-NEXT: mul a1, a1, a2 -; RV64-FAST-NEXT: sd a1, 24(a0) -; RV64-FAST-NEXT: sd a1, 16(a0) -; RV64-FAST-NEXT: sd a1, 8(a0) ; RV64-FAST-NEXT: sd a1, 0(a0) +; RV64-FAST-NEXT: sd a1, 8(a0) +; RV64-FAST-NEXT: sd a1, 16(a0) +; RV64-FAST-NEXT: sd a1, 24(a0) ; RV64-FAST-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 32, i1 0) ret void @@ -325,138 +325,138 @@ define void @memset_32(ptr %a, i8 %value) nounwind { define void @memset_64(ptr %a, i8 %value) nounwind { ; RV32-LABEL: memset_64: ; RV32: # %bb.0: -; RV32-NEXT: sb a1, 63(a0) -; RV32-NEXT: sb a1, 62(a0) -; RV32-NEXT: sb a1, 61(a0) ; RV32-NEXT: sb a1, 60(a0) -; RV32-NEXT: sb a1, 59(a0) -; RV32-NEXT: sb a1, 58(a0) -; RV32-NEXT: sb a1, 57(a0) +; RV32-NEXT: sb a1, 61(a0) +; RV32-NEXT: sb a1, 62(a0) +; RV32-NEXT: sb a1, 63(a0) ; RV32-NEXT: sb a1, 56(a0) -; RV32-NEXT: sb a1, 55(a0) -; RV32-NEXT: sb a1, 54(a0) -; RV32-NEXT: sb a1, 53(a0) +; RV32-NEXT: sb a1, 57(a0) +; RV32-NEXT: sb a1, 58(a0) +; RV32-NEXT: sb a1, 59(a0) ; RV32-NEXT: sb a1, 52(a0) -; RV32-NEXT: sb a1, 51(a0) -; RV32-NEXT: sb a1, 50(a0) -; RV32-NEXT: sb a1, 49(a0) +; RV32-NEXT: sb a1, 53(a0) +; RV32-NEXT: sb a1, 54(a0) +; RV32-NEXT: sb a1, 55(a0) ; RV32-NEXT: sb a1, 48(a0) -; RV32-NEXT: sb a1, 47(a0) -; RV32-NEXT: sb a1, 46(a0) -; RV32-NEXT: sb a1, 45(a0) +; RV32-NEXT: sb a1, 49(a0) +; RV32-NEXT: sb a1, 50(a0) +; RV32-NEXT: sb a1, 51(a0) ; RV32-NEXT: sb a1, 44(a0) -; RV32-NEXT: sb a1, 43(a0) -; RV32-NEXT: sb a1, 42(a0) -; RV32-NEXT: sb a1, 41(a0) +; RV32-NEXT: sb a1, 45(a0) +; RV32-NEXT: sb a1, 46(a0) +; RV32-NEXT: sb a1, 47(a0) ; RV32-NEXT: sb a1, 40(a0) -; RV32-NEXT: sb a1, 39(a0) -; RV32-NEXT: sb a1, 38(a0) -; RV32-NEXT: sb a1, 37(a0) +; RV32-NEXT: sb a1, 41(a0) +; RV32-NEXT: sb a1, 42(a0) +; RV32-NEXT: sb a1, 43(a0) ; RV32-NEXT: sb a1, 36(a0) -; RV32-NEXT: sb a1, 35(a0) -; RV32-NEXT: sb a1, 34(a0) -; RV32-NEXT: sb a1, 33(a0) -; RV32-NEXT: sb a1, 32(a0) -; RV32-NEXT: sb a1, 31(a0) -; RV32-NEXT: sb a1, 30(a0) -; RV32-NEXT: sb a1, 29(a0) +; RV32-NEXT: sb a1, 37(a0) +; RV32-NEXT: sb a1, 38(a0) +; RV32-NEXT: sb a1, 39(a0) +; RV32-NEXT: sb a1, 32(a0) +; RV32-NEXT: sb a1, 33(a0) +; RV32-NEXT: sb a1, 34(a0) +; RV32-NEXT: sb a1, 35(a0) ; RV32-NEXT: sb a1, 28(a0) -; RV32-NEXT: sb a1, 27(a0) -; RV32-NEXT: sb a1, 26(a0) -; RV32-NEXT: sb a1, 25(a0) +; RV32-NEXT: sb a1, 29(a0) +; RV32-NEXT: sb a1, 30(a0) +; RV32-NEXT: sb a1, 31(a0) ; RV32-NEXT: sb a1, 24(a0) -; RV32-NEXT: sb a1, 23(a0) -; RV32-NEXT: sb a1, 22(a0) -; RV32-NEXT: sb a1, 21(a0) +; RV32-NEXT: sb a1, 25(a0) +; RV32-NEXT: sb a1, 26(a0) +; RV32-NEXT: sb a1, 27(a0) ; RV32-NEXT: sb a1, 20(a0) -; RV32-NEXT: sb a1, 19(a0) -; RV32-NEXT: sb a1, 18(a0) -; RV32-NEXT: sb a1, 17(a0) +; RV32-NEXT: sb a1, 21(a0) +; RV32-NEXT: sb a1, 22(a0) +; RV32-NEXT: sb a1, 23(a0) ; RV32-NEXT: sb a1, 16(a0) -; RV32-NEXT: sb a1, 15(a0) -; RV32-NEXT: sb a1, 14(a0) -; RV32-NEXT: sb a1, 13(a0) +; RV32-NEXT: sb a1, 17(a0) +; RV32-NEXT: sb a1, 18(a0) +; RV32-NEXT: sb a1, 19(a0) ; RV32-NEXT: sb a1, 12(a0) -; RV32-NEXT: sb a1, 11(a0) -; RV32-NEXT: sb a1, 10(a0) -; RV32-NEXT: sb a1, 9(a0) +; RV32-NEXT: sb a1, 13(a0) +; RV32-NEXT: sb a1, 14(a0) +; RV32-NEXT: sb a1, 15(a0) ; RV32-NEXT: sb a1, 8(a0) -; RV32-NEXT: sb a1, 7(a0) -; RV32-NEXT: sb a1, 6(a0) -; RV32-NEXT: sb a1, 5(a0) +; RV32-NEXT: sb a1, 9(a0) +; RV32-NEXT: sb a1, 10(a0) +; RV32-NEXT: sb a1, 11(a0) ; RV32-NEXT: sb a1, 4(a0) -; RV32-NEXT: sb a1, 3(a0) -; RV32-NEXT: sb a1, 2(a0) -; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: sb a1, 5(a0) +; RV32-NEXT: sb a1, 6(a0) +; RV32-NEXT: sb a1, 7(a0) ; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: sb a1, 2(a0) +; RV32-NEXT: sb a1, 3(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: memset_64: ; RV64: # %bb.0: -; RV64-NEXT: sb a1, 63(a0) -; RV64-NEXT: sb a1, 62(a0) -; RV64-NEXT: sb a1, 61(a0) ; RV64-NEXT: sb a1, 60(a0) -; RV64-NEXT: sb a1, 59(a0) -; RV64-NEXT: sb a1, 58(a0) -; RV64-NEXT: sb a1, 57(a0) +; RV64-NEXT: sb a1, 61(a0) +; RV64-NEXT: sb a1, 62(a0) +; RV64-NEXT: sb a1, 63(a0) ; RV64-NEXT: sb a1, 56(a0) -; RV64-NEXT: sb a1, 55(a0) -; RV64-NEXT: sb a1, 54(a0) -; RV64-NEXT: sb a1, 53(a0) +; RV64-NEXT: sb a1, 57(a0) +; RV64-NEXT: sb a1, 58(a0) +; RV64-NEXT: sb a1, 59(a0) ; RV64-NEXT: sb a1, 52(a0) -; RV64-NEXT: sb a1, 51(a0) -; RV64-NEXT: sb a1, 50(a0) -; RV64-NEXT: sb a1, 49(a0) +; RV64-NEXT: sb a1, 53(a0) +; RV64-NEXT: sb a1, 54(a0) +; RV64-NEXT: sb a1, 55(a0) ; RV64-NEXT: sb a1, 48(a0) -; RV64-NEXT: sb a1, 47(a0) -; RV64-NEXT: sb a1, 46(a0) -; RV64-NEXT: sb a1, 45(a0) +; RV64-NEXT: sb a1, 49(a0) +; RV64-NEXT: sb a1, 50(a0) +; RV64-NEXT: sb a1, 51(a0) ; RV64-NEXT: sb a1, 44(a0) -; RV64-NEXT: sb a1, 43(a0) -; RV64-NEXT: sb a1, 42(a0) -; RV64-NEXT: sb a1, 41(a0) +; RV64-NEXT: sb a1, 45(a0) +; RV64-NEXT: sb a1, 46(a0) +; RV64-NEXT: sb a1, 47(a0) ; RV64-NEXT: sb a1, 40(a0) -; RV64-NEXT: sb a1, 39(a0) -; RV64-NEXT: sb a1, 38(a0) -; RV64-NEXT: sb a1, 37(a0) +; RV64-NEXT: sb a1, 41(a0) +; RV64-NEXT: sb a1, 42(a0) +; RV64-NEXT: sb a1, 43(a0) ; RV64-NEXT: sb a1, 36(a0) -; RV64-NEXT: sb a1, 35(a0) -; RV64-NEXT: sb a1, 34(a0) -; RV64-NEXT: sb a1, 33(a0) +; RV64-NEXT: sb a1, 37(a0) +; RV64-NEXT: sb a1, 38(a0) +; RV64-NEXT: sb a1, 39(a0) ; RV64-NEXT: sb a1, 32(a0) -; RV64-NEXT: sb a1, 31(a0) -; RV64-NEXT: sb a1, 30(a0) -; RV64-NEXT: sb a1, 29(a0) +; RV64-NEXT: sb a1, 33(a0) +; RV64-NEXT: sb a1, 34(a0) +; RV64-NEXT: sb a1, 35(a0) ; RV64-NEXT: sb a1, 28(a0) -; RV64-NEXT: sb a1, 27(a0) -; RV64-NEXT: sb a1, 26(a0) -; RV64-NEXT: sb a1, 25(a0) +; RV64-NEXT: sb a1, 29(a0) +; RV64-NEXT: sb a1, 30(a0) +; RV64-NEXT: sb a1, 31(a0) ; RV64-NEXT: sb a1, 24(a0) -; RV64-NEXT: sb a1, 23(a0) -; RV64-NEXT: sb a1, 22(a0) -; RV64-NEXT: sb a1, 21(a0) +; RV64-NEXT: sb a1, 25(a0) +; RV64-NEXT: sb a1, 26(a0) +; RV64-NEXT: sb a1, 27(a0) ; RV64-NEXT: sb a1, 20(a0) -; RV64-NEXT: sb a1, 19(a0) -; RV64-NEXT: sb a1, 18(a0) -; RV64-NEXT: sb a1, 17(a0) +; RV64-NEXT: sb a1, 21(a0) +; RV64-NEXT: sb a1, 22(a0) +; RV64-NEXT: sb a1, 23(a0) ; RV64-NEXT: sb a1, 16(a0) -; RV64-NEXT: sb a1, 15(a0) -; RV64-NEXT: sb a1, 14(a0) -; RV64-NEXT: sb a1, 13(a0) +; RV64-NEXT: sb a1, 17(a0) +; RV64-NEXT: sb a1, 18(a0) +; RV64-NEXT: sb a1, 19(a0) ; RV64-NEXT: sb a1, 12(a0) -; RV64-NEXT: sb a1, 11(a0) -; RV64-NEXT: sb a1, 10(a0) -; RV64-NEXT: sb a1, 9(a0) +; RV64-NEXT: sb a1, 13(a0) +; RV64-NEXT: sb a1, 14(a0) +; RV64-NEXT: sb a1, 15(a0) ; RV64-NEXT: sb a1, 8(a0) -; RV64-NEXT: sb a1, 7(a0) -; RV64-NEXT: sb a1, 6(a0) -; RV64-NEXT: sb a1, 5(a0) +; RV64-NEXT: sb a1, 9(a0) +; RV64-NEXT: sb a1, 10(a0) +; RV64-NEXT: sb a1, 11(a0) ; RV64-NEXT: sb a1, 4(a0) -; RV64-NEXT: sb a1, 3(a0) -; RV64-NEXT: sb a1, 2(a0) -; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: sb a1, 5(a0) +; RV64-NEXT: sb a1, 6(a0) +; RV64-NEXT: sb a1, 7(a0) ; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: sb a1, 2(a0) +; RV64-NEXT: sb a1, 3(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: memset_64: @@ -465,22 +465,22 @@ define void @memset_64(ptr %a, i8 %value) nounwind { ; RV32-FAST-NEXT: lui a2, 4112 ; RV32-FAST-NEXT: addi a2, a2, 257 ; RV32-FAST-NEXT: mul a1, a1, a2 -; RV32-FAST-NEXT: sw a1, 60(a0) -; RV32-FAST-NEXT: sw a1, 56(a0) -; RV32-FAST-NEXT: sw a1, 52(a0) ; RV32-FAST-NEXT: sw a1, 48(a0) -; RV32-FAST-NEXT: sw a1, 44(a0) -; RV32-FAST-NEXT: sw a1, 40(a0) -; RV32-FAST-NEXT: sw a1, 36(a0) +; RV32-FAST-NEXT: sw a1, 52(a0) +; RV32-FAST-NEXT: sw a1, 56(a0) +; RV32-FAST-NEXT: sw a1, 60(a0) ; RV32-FAST-NEXT: sw a1, 32(a0) -; RV32-FAST-NEXT: sw a1, 28(a0) -; RV32-FAST-NEXT: sw a1, 24(a0) -; RV32-FAST-NEXT: sw a1, 20(a0) +; RV32-FAST-NEXT: sw a1, 36(a0) +; RV32-FAST-NEXT: sw a1, 40(a0) +; RV32-FAST-NEXT: sw a1, 44(a0) ; RV32-FAST-NEXT: sw a1, 16(a0) -; RV32-FAST-NEXT: sw a1, 12(a0) -; RV32-FAST-NEXT: sw a1, 8(a0) -; RV32-FAST-NEXT: sw a1, 4(a0) +; RV32-FAST-NEXT: sw a1, 20(a0) +; RV32-FAST-NEXT: sw a1, 24(a0) +; RV32-FAST-NEXT: sw a1, 28(a0) ; RV32-FAST-NEXT: sw a1, 0(a0) +; RV32-FAST-NEXT: sw a1, 4(a0) +; RV32-FAST-NEXT: sw a1, 8(a0) +; RV32-FAST-NEXT: sw a1, 12(a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: memset_64: @@ -491,14 +491,14 @@ define void @memset_64(ptr %a, i8 %value) nounwind { ; RV64-FAST-NEXT: slli a3, a2, 32 ; RV64-FAST-NEXT: add a2, a2, a3 ; RV64-FAST-NEXT: mul a1, a1, a2 -; RV64-FAST-NEXT: sd a1, 56(a0) -; RV64-FAST-NEXT: sd a1, 48(a0) -; RV64-FAST-NEXT: sd a1, 40(a0) ; RV64-FAST-NEXT: sd a1, 32(a0) -; RV64-FAST-NEXT: sd a1, 24(a0) -; RV64-FAST-NEXT: sd a1, 16(a0) -; RV64-FAST-NEXT: sd a1, 8(a0) +; RV64-FAST-NEXT: sd a1, 40(a0) +; RV64-FAST-NEXT: sd a1, 48(a0) +; RV64-FAST-NEXT: sd a1, 56(a0) ; RV64-FAST-NEXT: sd a1, 0(a0) +; RV64-FAST-NEXT: sd a1, 8(a0) +; RV64-FAST-NEXT: sd a1, 16(a0) +; RV64-FAST-NEXT: sd a1, 24(a0) ; RV64-FAST-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 64, i1 0) ret void @@ -556,8 +556,8 @@ define void @aligned_memset_8(ptr align 8 %a, i8 %value) nounwind { ; RV32-BOTH-NEXT: lui a2, 4112 ; RV32-BOTH-NEXT: addi a2, a2, 257 ; RV32-BOTH-NEXT: mul a1, a1, a2 -; RV32-BOTH-NEXT: sw a1, 4(a0) ; RV32-BOTH-NEXT: sw a1, 0(a0) +; RV32-BOTH-NEXT: sw a1, 4(a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_memset_8: @@ -581,10 +581,10 @@ define void @aligned_memset_16(ptr align 16 %a, i8 %value) nounwind { ; RV32-BOTH-NEXT: lui a2, 4112 ; RV32-BOTH-NEXT: addi a2, a2, 257 ; RV32-BOTH-NEXT: mul a1, a1, a2 -; RV32-BOTH-NEXT: sw a1, 12(a0) -; RV32-BOTH-NEXT: sw a1, 8(a0) -; RV32-BOTH-NEXT: sw a1, 4(a0) ; RV32-BOTH-NEXT: sw a1, 0(a0) +; RV32-BOTH-NEXT: sw a1, 4(a0) +; RV32-BOTH-NEXT: sw a1, 8(a0) +; RV32-BOTH-NEXT: sw a1, 12(a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_memset_16: @@ -595,8 +595,8 @@ define void @aligned_memset_16(ptr align 16 %a, i8 %value) nounwind { ; RV64-BOTH-NEXT: slli a3, a2, 32 ; RV64-BOTH-NEXT: add a2, a2, a3 ; RV64-BOTH-NEXT: mul a1, a1, a2 -; RV64-BOTH-NEXT: sd a1, 8(a0) ; RV64-BOTH-NEXT: sd a1, 0(a0) +; RV64-BOTH-NEXT: sd a1, 8(a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 16 %a, i8 %value, i64 16, i1 0) ret void @@ -609,14 +609,14 @@ define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind { ; RV32-BOTH-NEXT: lui a2, 4112 ; RV32-BOTH-NEXT: addi a2, a2, 257 ; RV32-BOTH-NEXT: mul a1, a1, a2 -; RV32-BOTH-NEXT: sw a1, 28(a0) -; RV32-BOTH-NEXT: sw a1, 24(a0) -; RV32-BOTH-NEXT: sw a1, 20(a0) ; RV32-BOTH-NEXT: sw a1, 16(a0) -; RV32-BOTH-NEXT: sw a1, 12(a0) -; RV32-BOTH-NEXT: sw a1, 8(a0) -; RV32-BOTH-NEXT: sw a1, 4(a0) +; RV32-BOTH-NEXT: sw a1, 20(a0) +; RV32-BOTH-NEXT: sw a1, 24(a0) +; RV32-BOTH-NEXT: sw a1, 28(a0) ; RV32-BOTH-NEXT: sw a1, 0(a0) +; RV32-BOTH-NEXT: sw a1, 4(a0) +; RV32-BOTH-NEXT: sw a1, 8(a0) +; RV32-BOTH-NEXT: sw a1, 12(a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_memset_32: @@ -627,10 +627,10 @@ define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind { ; RV64-BOTH-NEXT: slli a3, a2, 32 ; RV64-BOTH-NEXT: add a2, a2, a3 ; RV64-BOTH-NEXT: mul a1, a1, a2 -; RV64-BOTH-NEXT: sd a1, 24(a0) -; RV64-BOTH-NEXT: sd a1, 16(a0) -; RV64-BOTH-NEXT: sd a1, 8(a0) ; RV64-BOTH-NEXT: sd a1, 0(a0) +; RV64-BOTH-NEXT: sd a1, 8(a0) +; RV64-BOTH-NEXT: sd a1, 16(a0) +; RV64-BOTH-NEXT: sd a1, 24(a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 %value, i64 32, i1 0) ret void @@ -643,22 +643,22 @@ define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind { ; RV32-BOTH-NEXT: lui a2, 4112 ; RV32-BOTH-NEXT: addi a2, a2, 257 ; RV32-BOTH-NEXT: mul a1, a1, a2 -; RV32-BOTH-NEXT: sw a1, 60(a0) -; RV32-BOTH-NEXT: sw a1, 56(a0) -; RV32-BOTH-NEXT: sw a1, 52(a0) ; RV32-BOTH-NEXT: sw a1, 48(a0) -; RV32-BOTH-NEXT: sw a1, 44(a0) -; RV32-BOTH-NEXT: sw a1, 40(a0) -; RV32-BOTH-NEXT: sw a1, 36(a0) +; RV32-BOTH-NEXT: sw a1, 52(a0) +; RV32-BOTH-NEXT: sw a1, 56(a0) +; RV32-BOTH-NEXT: sw a1, 60(a0) ; RV32-BOTH-NEXT: sw a1, 32(a0) -; RV32-BOTH-NEXT: sw a1, 28(a0) -; RV32-BOTH-NEXT: sw a1, 24(a0) -; RV32-BOTH-NEXT: sw a1, 20(a0) +; RV32-BOTH-NEXT: sw a1, 36(a0) +; RV32-BOTH-NEXT: sw a1, 40(a0) +; RV32-BOTH-NEXT: sw a1, 44(a0) ; RV32-BOTH-NEXT: sw a1, 16(a0) -; RV32-BOTH-NEXT: sw a1, 12(a0) -; RV32-BOTH-NEXT: sw a1, 8(a0) -; RV32-BOTH-NEXT: sw a1, 4(a0) +; RV32-BOTH-NEXT: sw a1, 20(a0) +; RV32-BOTH-NEXT: sw a1, 24(a0) +; RV32-BOTH-NEXT: sw a1, 28(a0) ; RV32-BOTH-NEXT: sw a1, 0(a0) +; RV32-BOTH-NEXT: sw a1, 4(a0) +; RV32-BOTH-NEXT: sw a1, 8(a0) +; RV32-BOTH-NEXT: sw a1, 12(a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_memset_64: @@ -669,14 +669,14 @@ define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind { ; RV64-BOTH-NEXT: slli a3, a2, 32 ; RV64-BOTH-NEXT: add a2, a2, a3 ; RV64-BOTH-NEXT: mul a1, a1, a2 -; RV64-BOTH-NEXT: sd a1, 56(a0) -; RV64-BOTH-NEXT: sd a1, 48(a0) -; RV64-BOTH-NEXT: sd a1, 40(a0) ; RV64-BOTH-NEXT: sd a1, 32(a0) -; RV64-BOTH-NEXT: sd a1, 24(a0) -; RV64-BOTH-NEXT: sd a1, 16(a0) -; RV64-BOTH-NEXT: sd a1, 8(a0) +; RV64-BOTH-NEXT: sd a1, 40(a0) +; RV64-BOTH-NEXT: sd a1, 48(a0) +; RV64-BOTH-NEXT: sd a1, 56(a0) ; RV64-BOTH-NEXT: sd a1, 0(a0) +; RV64-BOTH-NEXT: sd a1, 8(a0) +; RV64-BOTH-NEXT: sd a1, 16(a0) +; RV64-BOTH-NEXT: sd a1, 24(a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 %value, i64 64, i1 0) ret void @@ -701,14 +701,14 @@ define void @bzero_1(ptr %a) nounwind { define void @bzero_2(ptr %a) nounwind { ; RV32-LABEL: bzero_2: ; RV32: # %bb.0: -; RV32-NEXT: sb zero, 1(a0) ; RV32-NEXT: sb zero, 0(a0) +; RV32-NEXT: sb zero, 1(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: bzero_2: ; RV64: # %bb.0: -; RV64-NEXT: sb zero, 1(a0) ; RV64-NEXT: sb zero, 0(a0) +; RV64-NEXT: sb zero, 1(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: bzero_2: @@ -727,18 +727,18 @@ define void @bzero_2(ptr %a) nounwind { define void @bzero_4(ptr %a) nounwind { ; RV32-LABEL: bzero_4: ; RV32: # %bb.0: -; RV32-NEXT: sb zero, 3(a0) -; RV32-NEXT: sb zero, 2(a0) -; RV32-NEXT: sb zero, 1(a0) ; RV32-NEXT: sb zero, 0(a0) +; RV32-NEXT: sb zero, 1(a0) +; RV32-NEXT: sb zero, 2(a0) +; RV32-NEXT: sb zero, 3(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: bzero_4: ; RV64: # %bb.0: -; RV64-NEXT: sb zero, 3(a0) -; RV64-NEXT: sb zero, 2(a0) -; RV64-NEXT: sb zero, 1(a0) ; RV64-NEXT: sb zero, 0(a0) +; RV64-NEXT: sb zero, 1(a0) +; RV64-NEXT: sb zero, 2(a0) +; RV64-NEXT: sb zero, 3(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: bzero_4: @@ -757,32 +757,32 @@ define void @bzero_4(ptr %a) nounwind { define void @bzero_8(ptr %a) nounwind { ; RV32-LABEL: bzero_8: ; RV32: # %bb.0: -; RV32-NEXT: sb zero, 7(a0) -; RV32-NEXT: sb zero, 6(a0) -; RV32-NEXT: sb zero, 5(a0) ; RV32-NEXT: sb zero, 4(a0) -; RV32-NEXT: sb zero, 3(a0) -; RV32-NEXT: sb zero, 2(a0) -; RV32-NEXT: sb zero, 1(a0) +; RV32-NEXT: sb zero, 5(a0) +; RV32-NEXT: sb zero, 6(a0) +; RV32-NEXT: sb zero, 7(a0) ; RV32-NEXT: sb zero, 0(a0) +; RV32-NEXT: sb zero, 1(a0) +; RV32-NEXT: sb zero, 2(a0) +; RV32-NEXT: sb zero, 3(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: bzero_8: ; RV64: # %bb.0: -; RV64-NEXT: sb zero, 7(a0) -; RV64-NEXT: sb zero, 6(a0) -; RV64-NEXT: sb zero, 5(a0) ; RV64-NEXT: sb zero, 4(a0) -; RV64-NEXT: sb zero, 3(a0) -; RV64-NEXT: sb zero, 2(a0) -; RV64-NEXT: sb zero, 1(a0) +; RV64-NEXT: sb zero, 5(a0) +; RV64-NEXT: sb zero, 6(a0) +; RV64-NEXT: sb zero, 7(a0) ; RV64-NEXT: sb zero, 0(a0) +; RV64-NEXT: sb zero, 1(a0) +; RV64-NEXT: sb zero, 2(a0) +; RV64-NEXT: sb zero, 3(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: bzero_8: ; RV32-FAST: # %bb.0: -; RV32-FAST-NEXT: sw zero, 4(a0) ; RV32-FAST-NEXT: sw zero, 0(a0) +; RV32-FAST-NEXT: sw zero, 4(a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: bzero_8: @@ -796,56 +796,56 @@ define void @bzero_8(ptr %a) nounwind { define void @bzero_16(ptr %a) nounwind { ; RV32-LABEL: bzero_16: ; RV32: # %bb.0: -; RV32-NEXT: sb zero, 15(a0) -; RV32-NEXT: sb zero, 14(a0) -; RV32-NEXT: sb zero, 13(a0) ; RV32-NEXT: sb zero, 12(a0) -; RV32-NEXT: sb zero, 11(a0) -; RV32-NEXT: sb zero, 10(a0) -; RV32-NEXT: sb zero, 9(a0) +; RV32-NEXT: sb zero, 13(a0) +; RV32-NEXT: sb zero, 14(a0) +; RV32-NEXT: sb zero, 15(a0) ; RV32-NEXT: sb zero, 8(a0) -; RV32-NEXT: sb zero, 7(a0) -; RV32-NEXT: sb zero, 6(a0) -; RV32-NEXT: sb zero, 5(a0) +; RV32-NEXT: sb zero, 9(a0) +; RV32-NEXT: sb zero, 10(a0) +; RV32-NEXT: sb zero, 11(a0) ; RV32-NEXT: sb zero, 4(a0) -; RV32-NEXT: sb zero, 3(a0) -; RV32-NEXT: sb zero, 2(a0) -; RV32-NEXT: sb zero, 1(a0) +; RV32-NEXT: sb zero, 5(a0) +; RV32-NEXT: sb zero, 6(a0) +; RV32-NEXT: sb zero, 7(a0) ; RV32-NEXT: sb zero, 0(a0) +; RV32-NEXT: sb zero, 1(a0) +; RV32-NEXT: sb zero, 2(a0) +; RV32-NEXT: sb zero, 3(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: bzero_16: ; RV64: # %bb.0: -; RV64-NEXT: sb zero, 15(a0) -; RV64-NEXT: sb zero, 14(a0) -; RV64-NEXT: sb zero, 13(a0) ; RV64-NEXT: sb zero, 12(a0) -; RV64-NEXT: sb zero, 11(a0) -; RV64-NEXT: sb zero, 10(a0) -; RV64-NEXT: sb zero, 9(a0) +; RV64-NEXT: sb zero, 13(a0) +; RV64-NEXT: sb zero, 14(a0) +; RV64-NEXT: sb zero, 15(a0) ; RV64-NEXT: sb zero, 8(a0) -; RV64-NEXT: sb zero, 7(a0) -; RV64-NEXT: sb zero, 6(a0) -; RV64-NEXT: sb zero, 5(a0) +; RV64-NEXT: sb zero, 9(a0) +; RV64-NEXT: sb zero, 10(a0) +; RV64-NEXT: sb zero, 11(a0) ; RV64-NEXT: sb zero, 4(a0) -; RV64-NEXT: sb zero, 3(a0) -; RV64-NEXT: sb zero, 2(a0) -; RV64-NEXT: sb zero, 1(a0) +; RV64-NEXT: sb zero, 5(a0) +; RV64-NEXT: sb zero, 6(a0) +; RV64-NEXT: sb zero, 7(a0) ; RV64-NEXT: sb zero, 0(a0) +; RV64-NEXT: sb zero, 1(a0) +; RV64-NEXT: sb zero, 2(a0) +; RV64-NEXT: sb zero, 3(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: bzero_16: ; RV32-FAST: # %bb.0: -; RV32-FAST-NEXT: sw zero, 12(a0) -; RV32-FAST-NEXT: sw zero, 8(a0) -; RV32-FAST-NEXT: sw zero, 4(a0) ; RV32-FAST-NEXT: sw zero, 0(a0) +; RV32-FAST-NEXT: sw zero, 4(a0) +; RV32-FAST-NEXT: sw zero, 8(a0) +; RV32-FAST-NEXT: sw zero, 12(a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: bzero_16: ; RV64-FAST: # %bb.0: -; RV64-FAST-NEXT: sd zero, 8(a0) ; RV64-FAST-NEXT: sd zero, 0(a0) +; RV64-FAST-NEXT: sd zero, 8(a0) ; RV64-FAST-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 16, i1 0) ret void @@ -854,94 +854,94 @@ define void @bzero_16(ptr %a) nounwind { define void @bzero_32(ptr %a) nounwind { ; RV32-LABEL: bzero_32: ; RV32: # %bb.0: -; RV32-NEXT: sb zero, 31(a0) -; RV32-NEXT: sb zero, 30(a0) -; RV32-NEXT: sb zero, 29(a0) ; RV32-NEXT: sb zero, 28(a0) -; RV32-NEXT: sb zero, 27(a0) -; RV32-NEXT: sb zero, 26(a0) -; RV32-NEXT: sb zero, 25(a0) +; RV32-NEXT: sb zero, 29(a0) +; RV32-NEXT: sb zero, 30(a0) +; RV32-NEXT: sb zero, 31(a0) ; RV32-NEXT: sb zero, 24(a0) -; RV32-NEXT: sb zero, 23(a0) -; RV32-NEXT: sb zero, 22(a0) -; RV32-NEXT: sb zero, 21(a0) +; RV32-NEXT: sb zero, 25(a0) +; RV32-NEXT: sb zero, 26(a0) +; RV32-NEXT: sb zero, 27(a0) ; RV32-NEXT: sb zero, 20(a0) -; RV32-NEXT: sb zero, 19(a0) -; RV32-NEXT: sb zero, 18(a0) -; RV32-NEXT: sb zero, 17(a0) +; RV32-NEXT: sb zero, 21(a0) +; RV32-NEXT: sb zero, 22(a0) +; RV32-NEXT: sb zero, 23(a0) ; RV32-NEXT: sb zero, 16(a0) -; RV32-NEXT: sb zero, 15(a0) -; RV32-NEXT: sb zero, 14(a0) -; RV32-NEXT: sb zero, 13(a0) +; RV32-NEXT: sb zero, 17(a0) +; RV32-NEXT: sb zero, 18(a0) +; RV32-NEXT: sb zero, 19(a0) ; RV32-NEXT: sb zero, 12(a0) -; RV32-NEXT: sb zero, 11(a0) -; RV32-NEXT: sb zero, 10(a0) -; RV32-NEXT: sb zero, 9(a0) +; RV32-NEXT: sb zero, 13(a0) +; RV32-NEXT: sb zero, 14(a0) +; RV32-NEXT: sb zero, 15(a0) ; RV32-NEXT: sb zero, 8(a0) -; RV32-NEXT: sb zero, 7(a0) -; RV32-NEXT: sb zero, 6(a0) -; RV32-NEXT: sb zero, 5(a0) +; RV32-NEXT: sb zero, 9(a0) +; RV32-NEXT: sb zero, 10(a0) +; RV32-NEXT: sb zero, 11(a0) ; RV32-NEXT: sb zero, 4(a0) -; RV32-NEXT: sb zero, 3(a0) -; RV32-NEXT: sb zero, 2(a0) -; RV32-NEXT: sb zero, 1(a0) +; RV32-NEXT: sb zero, 5(a0) +; RV32-NEXT: sb zero, 6(a0) +; RV32-NEXT: sb zero, 7(a0) ; RV32-NEXT: sb zero, 0(a0) +; RV32-NEXT: sb zero, 1(a0) +; RV32-NEXT: sb zero, 2(a0) +; RV32-NEXT: sb zero, 3(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: bzero_32: ; RV64: # %bb.0: -; RV64-NEXT: sb zero, 31(a0) -; RV64-NEXT: sb zero, 30(a0) -; RV64-NEXT: sb zero, 29(a0) ; RV64-NEXT: sb zero, 28(a0) -; RV64-NEXT: sb zero, 27(a0) -; RV64-NEXT: sb zero, 26(a0) -; RV64-NEXT: sb zero, 25(a0) +; RV64-NEXT: sb zero, 29(a0) +; RV64-NEXT: sb zero, 30(a0) +; RV64-NEXT: sb zero, 31(a0) ; RV64-NEXT: sb zero, 24(a0) -; RV64-NEXT: sb zero, 23(a0) -; RV64-NEXT: sb zero, 22(a0) -; RV64-NEXT: sb zero, 21(a0) +; RV64-NEXT: sb zero, 25(a0) +; RV64-NEXT: sb zero, 26(a0) +; RV64-NEXT: sb zero, 27(a0) ; RV64-NEXT: sb zero, 20(a0) -; RV64-NEXT: sb zero, 19(a0) -; RV64-NEXT: sb zero, 18(a0) -; RV64-NEXT: sb zero, 17(a0) +; RV64-NEXT: sb zero, 21(a0) +; RV64-NEXT: sb zero, 22(a0) +; RV64-NEXT: sb zero, 23(a0) ; RV64-NEXT: sb zero, 16(a0) -; RV64-NEXT: sb zero, 15(a0) -; RV64-NEXT: sb zero, 14(a0) -; RV64-NEXT: sb zero, 13(a0) +; RV64-NEXT: sb zero, 17(a0) +; RV64-NEXT: sb zero, 18(a0) +; RV64-NEXT: sb zero, 19(a0) ; RV64-NEXT: sb zero, 12(a0) -; RV64-NEXT: sb zero, 11(a0) -; RV64-NEXT: sb zero, 10(a0) -; RV64-NEXT: sb zero, 9(a0) +; RV64-NEXT: sb zero, 13(a0) +; RV64-NEXT: sb zero, 14(a0) +; RV64-NEXT: sb zero, 15(a0) ; RV64-NEXT: sb zero, 8(a0) -; RV64-NEXT: sb zero, 7(a0) -; RV64-NEXT: sb zero, 6(a0) -; RV64-NEXT: sb zero, 5(a0) +; RV64-NEXT: sb zero, 9(a0) +; RV64-NEXT: sb zero, 10(a0) +; RV64-NEXT: sb zero, 11(a0) ; RV64-NEXT: sb zero, 4(a0) -; RV64-NEXT: sb zero, 3(a0) -; RV64-NEXT: sb zero, 2(a0) -; RV64-NEXT: sb zero, 1(a0) +; RV64-NEXT: sb zero, 5(a0) +; RV64-NEXT: sb zero, 6(a0) +; RV64-NEXT: sb zero, 7(a0) ; RV64-NEXT: sb zero, 0(a0) +; RV64-NEXT: sb zero, 1(a0) +; RV64-NEXT: sb zero, 2(a0) +; RV64-NEXT: sb zero, 3(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: bzero_32: ; RV32-FAST: # %bb.0: -; RV32-FAST-NEXT: sw zero, 28(a0) -; RV32-FAST-NEXT: sw zero, 24(a0) -; RV32-FAST-NEXT: sw zero, 20(a0) ; RV32-FAST-NEXT: sw zero, 16(a0) -; RV32-FAST-NEXT: sw zero, 12(a0) -; RV32-FAST-NEXT: sw zero, 8(a0) -; RV32-FAST-NEXT: sw zero, 4(a0) +; RV32-FAST-NEXT: sw zero, 20(a0) +; RV32-FAST-NEXT: sw zero, 24(a0) +; RV32-FAST-NEXT: sw zero, 28(a0) ; RV32-FAST-NEXT: sw zero, 0(a0) +; RV32-FAST-NEXT: sw zero, 4(a0) +; RV32-FAST-NEXT: sw zero, 8(a0) +; RV32-FAST-NEXT: sw zero, 12(a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: bzero_32: ; RV64-FAST: # %bb.0: -; RV64-FAST-NEXT: sd zero, 24(a0) -; RV64-FAST-NEXT: sd zero, 16(a0) -; RV64-FAST-NEXT: sd zero, 8(a0) ; RV64-FAST-NEXT: sd zero, 0(a0) +; RV64-FAST-NEXT: sd zero, 8(a0) +; RV64-FAST-NEXT: sd zero, 16(a0) +; RV64-FAST-NEXT: sd zero, 24(a0) ; RV64-FAST-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 32, i1 0) ret void @@ -950,170 +950,170 @@ define void @bzero_32(ptr %a) nounwind { define void @bzero_64(ptr %a) nounwind { ; RV32-LABEL: bzero_64: ; RV32: # %bb.0: -; RV32-NEXT: sb zero, 63(a0) -; RV32-NEXT: sb zero, 62(a0) -; RV32-NEXT: sb zero, 61(a0) ; RV32-NEXT: sb zero, 60(a0) -; RV32-NEXT: sb zero, 59(a0) -; RV32-NEXT: sb zero, 58(a0) -; RV32-NEXT: sb zero, 57(a0) +; RV32-NEXT: sb zero, 61(a0) +; RV32-NEXT: sb zero, 62(a0) +; RV32-NEXT: sb zero, 63(a0) ; RV32-NEXT: sb zero, 56(a0) -; RV32-NEXT: sb zero, 55(a0) -; RV32-NEXT: sb zero, 54(a0) -; RV32-NEXT: sb zero, 53(a0) +; RV32-NEXT: sb zero, 57(a0) +; RV32-NEXT: sb zero, 58(a0) +; RV32-NEXT: sb zero, 59(a0) ; RV32-NEXT: sb zero, 52(a0) -; RV32-NEXT: sb zero, 51(a0) -; RV32-NEXT: sb zero, 50(a0) -; RV32-NEXT: sb zero, 49(a0) +; RV32-NEXT: sb zero, 53(a0) +; RV32-NEXT: sb zero, 54(a0) +; RV32-NEXT: sb zero, 55(a0) ; RV32-NEXT: sb zero, 48(a0) -; RV32-NEXT: sb zero, 47(a0) -; RV32-NEXT: sb zero, 46(a0) -; RV32-NEXT: sb zero, 45(a0) +; RV32-NEXT: sb zero, 49(a0) +; RV32-NEXT: sb zero, 50(a0) +; RV32-NEXT: sb zero, 51(a0) ; RV32-NEXT: sb zero, 44(a0) -; RV32-NEXT: sb zero, 43(a0) -; RV32-NEXT: sb zero, 42(a0) -; RV32-NEXT: sb zero, 41(a0) +; RV32-NEXT: sb zero, 45(a0) +; RV32-NEXT: sb zero, 46(a0) +; RV32-NEXT: sb zero, 47(a0) ; RV32-NEXT: sb zero, 40(a0) -; RV32-NEXT: sb zero, 39(a0) -; RV32-NEXT: sb zero, 38(a0) -; RV32-NEXT: sb zero, 37(a0) +; RV32-NEXT: sb zero, 41(a0) +; RV32-NEXT: sb zero, 42(a0) +; RV32-NEXT: sb zero, 43(a0) ; RV32-NEXT: sb zero, 36(a0) -; RV32-NEXT: sb zero, 35(a0) -; RV32-NEXT: sb zero, 34(a0) -; RV32-NEXT: sb zero, 33(a0) +; RV32-NEXT: sb zero, 37(a0) +; RV32-NEXT: sb zero, 38(a0) +; RV32-NEXT: sb zero, 39(a0) ; RV32-NEXT: sb zero, 32(a0) -; RV32-NEXT: sb zero, 31(a0) -; RV32-NEXT: sb zero, 30(a0) -; RV32-NEXT: sb zero, 29(a0) +; RV32-NEXT: sb zero, 33(a0) +; RV32-NEXT: sb zero, 34(a0) +; RV32-NEXT: sb zero, 35(a0) ; RV32-NEXT: sb zero, 28(a0) -; RV32-NEXT: sb zero, 27(a0) -; RV32-NEXT: sb zero, 26(a0) -; RV32-NEXT: sb zero, 25(a0) +; RV32-NEXT: sb zero, 29(a0) +; RV32-NEXT: sb zero, 30(a0) +; RV32-NEXT: sb zero, 31(a0) ; RV32-NEXT: sb zero, 24(a0) -; RV32-NEXT: sb zero, 23(a0) -; RV32-NEXT: sb zero, 22(a0) -; RV32-NEXT: sb zero, 21(a0) +; RV32-NEXT: sb zero, 25(a0) +; RV32-NEXT: sb zero, 26(a0) +; RV32-NEXT: sb zero, 27(a0) ; RV32-NEXT: sb zero, 20(a0) -; RV32-NEXT: sb zero, 19(a0) -; RV32-NEXT: sb zero, 18(a0) -; RV32-NEXT: sb zero, 17(a0) +; RV32-NEXT: sb zero, 21(a0) +; RV32-NEXT: sb zero, 22(a0) +; RV32-NEXT: sb zero, 23(a0) ; RV32-NEXT: sb zero, 16(a0) -; RV32-NEXT: sb zero, 15(a0) -; RV32-NEXT: sb zero, 14(a0) -; RV32-NEXT: sb zero, 13(a0) +; RV32-NEXT: sb zero, 17(a0) +; RV32-NEXT: sb zero, 18(a0) +; RV32-NEXT: sb zero, 19(a0) ; RV32-NEXT: sb zero, 12(a0) -; RV32-NEXT: sb zero, 11(a0) -; RV32-NEXT: sb zero, 10(a0) -; RV32-NEXT: sb zero, 9(a0) +; RV32-NEXT: sb zero, 13(a0) +; RV32-NEXT: sb zero, 14(a0) +; RV32-NEXT: sb zero, 15(a0) ; RV32-NEXT: sb zero, 8(a0) -; RV32-NEXT: sb zero, 7(a0) -; RV32-NEXT: sb zero, 6(a0) -; RV32-NEXT: sb zero, 5(a0) +; RV32-NEXT: sb zero, 9(a0) +; RV32-NEXT: sb zero, 10(a0) +; RV32-NEXT: sb zero, 11(a0) ; RV32-NEXT: sb zero, 4(a0) -; RV32-NEXT: sb zero, 3(a0) -; RV32-NEXT: sb zero, 2(a0) -; RV32-NEXT: sb zero, 1(a0) +; RV32-NEXT: sb zero, 5(a0) +; RV32-NEXT: sb zero, 6(a0) +; RV32-NEXT: sb zero, 7(a0) ; RV32-NEXT: sb zero, 0(a0) +; RV32-NEXT: sb zero, 1(a0) +; RV32-NEXT: sb zero, 2(a0) +; RV32-NEXT: sb zero, 3(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: bzero_64: ; RV64: # %bb.0: -; RV64-NEXT: sb zero, 63(a0) -; RV64-NEXT: sb zero, 62(a0) -; RV64-NEXT: sb zero, 61(a0) ; RV64-NEXT: sb zero, 60(a0) -; RV64-NEXT: sb zero, 59(a0) -; RV64-NEXT: sb zero, 58(a0) -; RV64-NEXT: sb zero, 57(a0) +; RV64-NEXT: sb zero, 61(a0) +; RV64-NEXT: sb zero, 62(a0) +; RV64-NEXT: sb zero, 63(a0) ; RV64-NEXT: sb zero, 56(a0) -; RV64-NEXT: sb zero, 55(a0) -; RV64-NEXT: sb zero, 54(a0) -; RV64-NEXT: sb zero, 53(a0) +; RV64-NEXT: sb zero, 57(a0) +; RV64-NEXT: sb zero, 58(a0) +; RV64-NEXT: sb zero, 59(a0) ; RV64-NEXT: sb zero, 52(a0) -; RV64-NEXT: sb zero, 51(a0) -; RV64-NEXT: sb zero, 50(a0) -; RV64-NEXT: sb zero, 49(a0) +; RV64-NEXT: sb zero, 53(a0) +; RV64-NEXT: sb zero, 54(a0) +; RV64-NEXT: sb zero, 55(a0) ; RV64-NEXT: sb zero, 48(a0) -; RV64-NEXT: sb zero, 47(a0) -; RV64-NEXT: sb zero, 46(a0) -; RV64-NEXT: sb zero, 45(a0) +; RV64-NEXT: sb zero, 49(a0) +; RV64-NEXT: sb zero, 50(a0) +; RV64-NEXT: sb zero, 51(a0) ; RV64-NEXT: sb zero, 44(a0) -; RV64-NEXT: sb zero, 43(a0) -; RV64-NEXT: sb zero, 42(a0) -; RV64-NEXT: sb zero, 41(a0) +; RV64-NEXT: sb zero, 45(a0) +; RV64-NEXT: sb zero, 46(a0) +; RV64-NEXT: sb zero, 47(a0) ; RV64-NEXT: sb zero, 40(a0) -; RV64-NEXT: sb zero, 39(a0) -; RV64-NEXT: sb zero, 38(a0) -; RV64-NEXT: sb zero, 37(a0) +; RV64-NEXT: sb zero, 41(a0) +; RV64-NEXT: sb zero, 42(a0) +; RV64-NEXT: sb zero, 43(a0) ; RV64-NEXT: sb zero, 36(a0) -; RV64-NEXT: sb zero, 35(a0) -; RV64-NEXT: sb zero, 34(a0) -; RV64-NEXT: sb zero, 33(a0) +; RV64-NEXT: sb zero, 37(a0) +; RV64-NEXT: sb zero, 38(a0) +; RV64-NEXT: sb zero, 39(a0) ; RV64-NEXT: sb zero, 32(a0) -; RV64-NEXT: sb zero, 31(a0) -; RV64-NEXT: sb zero, 30(a0) -; RV64-NEXT: sb zero, 29(a0) +; RV64-NEXT: sb zero, 33(a0) +; RV64-NEXT: sb zero, 34(a0) +; RV64-NEXT: sb zero, 35(a0) ; RV64-NEXT: sb zero, 28(a0) -; RV64-NEXT: sb zero, 27(a0) -; RV64-NEXT: sb zero, 26(a0) -; RV64-NEXT: sb zero, 25(a0) +; RV64-NEXT: sb zero, 29(a0) +; RV64-NEXT: sb zero, 30(a0) +; RV64-NEXT: sb zero, 31(a0) ; RV64-NEXT: sb zero, 24(a0) -; RV64-NEXT: sb zero, 23(a0) -; RV64-NEXT: sb zero, 22(a0) -; RV64-NEXT: sb zero, 21(a0) +; RV64-NEXT: sb zero, 25(a0) +; RV64-NEXT: sb zero, 26(a0) +; RV64-NEXT: sb zero, 27(a0) ; RV64-NEXT: sb zero, 20(a0) -; RV64-NEXT: sb zero, 19(a0) -; RV64-NEXT: sb zero, 18(a0) -; RV64-NEXT: sb zero, 17(a0) +; RV64-NEXT: sb zero, 21(a0) +; RV64-NEXT: sb zero, 22(a0) +; RV64-NEXT: sb zero, 23(a0) ; RV64-NEXT: sb zero, 16(a0) -; RV64-NEXT: sb zero, 15(a0) -; RV64-NEXT: sb zero, 14(a0) -; RV64-NEXT: sb zero, 13(a0) +; RV64-NEXT: sb zero, 17(a0) +; RV64-NEXT: sb zero, 18(a0) +; RV64-NEXT: sb zero, 19(a0) ; RV64-NEXT: sb zero, 12(a0) -; RV64-NEXT: sb zero, 11(a0) -; RV64-NEXT: sb zero, 10(a0) -; RV64-NEXT: sb zero, 9(a0) +; RV64-NEXT: sb zero, 13(a0) +; RV64-NEXT: sb zero, 14(a0) +; RV64-NEXT: sb zero, 15(a0) ; RV64-NEXT: sb zero, 8(a0) -; RV64-NEXT: sb zero, 7(a0) -; RV64-NEXT: sb zero, 6(a0) -; RV64-NEXT: sb zero, 5(a0) +; RV64-NEXT: sb zero, 9(a0) +; RV64-NEXT: sb zero, 10(a0) +; RV64-NEXT: sb zero, 11(a0) ; RV64-NEXT: sb zero, 4(a0) -; RV64-NEXT: sb zero, 3(a0) -; RV64-NEXT: sb zero, 2(a0) -; RV64-NEXT: sb zero, 1(a0) +; RV64-NEXT: sb zero, 5(a0) +; RV64-NEXT: sb zero, 6(a0) +; RV64-NEXT: sb zero, 7(a0) ; RV64-NEXT: sb zero, 0(a0) +; RV64-NEXT: sb zero, 1(a0) +; RV64-NEXT: sb zero, 2(a0) +; RV64-NEXT: sb zero, 3(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: bzero_64: ; RV32-FAST: # %bb.0: -; RV32-FAST-NEXT: sw zero, 60(a0) -; RV32-FAST-NEXT: sw zero, 56(a0) -; RV32-FAST-NEXT: sw zero, 52(a0) ; RV32-FAST-NEXT: sw zero, 48(a0) -; RV32-FAST-NEXT: sw zero, 44(a0) -; RV32-FAST-NEXT: sw zero, 40(a0) -; RV32-FAST-NEXT: sw zero, 36(a0) +; RV32-FAST-NEXT: sw zero, 52(a0) +; RV32-FAST-NEXT: sw zero, 56(a0) +; RV32-FAST-NEXT: sw zero, 60(a0) ; RV32-FAST-NEXT: sw zero, 32(a0) -; RV32-FAST-NEXT: sw zero, 28(a0) -; RV32-FAST-NEXT: sw zero, 24(a0) -; RV32-FAST-NEXT: sw zero, 20(a0) +; RV32-FAST-NEXT: sw zero, 36(a0) +; RV32-FAST-NEXT: sw zero, 40(a0) +; RV32-FAST-NEXT: sw zero, 44(a0) ; RV32-FAST-NEXT: sw zero, 16(a0) -; RV32-FAST-NEXT: sw zero, 12(a0) -; RV32-FAST-NEXT: sw zero, 8(a0) -; RV32-FAST-NEXT: sw zero, 4(a0) +; RV32-FAST-NEXT: sw zero, 20(a0) +; RV32-FAST-NEXT: sw zero, 24(a0) +; RV32-FAST-NEXT: sw zero, 28(a0) ; RV32-FAST-NEXT: sw zero, 0(a0) +; RV32-FAST-NEXT: sw zero, 4(a0) +; RV32-FAST-NEXT: sw zero, 8(a0) +; RV32-FAST-NEXT: sw zero, 12(a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: bzero_64: ; RV64-FAST: # %bb.0: -; RV64-FAST-NEXT: sd zero, 56(a0) -; RV64-FAST-NEXT: sd zero, 48(a0) -; RV64-FAST-NEXT: sd zero, 40(a0) ; RV64-FAST-NEXT: sd zero, 32(a0) -; RV64-FAST-NEXT: sd zero, 24(a0) -; RV64-FAST-NEXT: sd zero, 16(a0) -; RV64-FAST-NEXT: sd zero, 8(a0) +; RV64-FAST-NEXT: sd zero, 40(a0) +; RV64-FAST-NEXT: sd zero, 48(a0) +; RV64-FAST-NEXT: sd zero, 56(a0) ; RV64-FAST-NEXT: sd zero, 0(a0) +; RV64-FAST-NEXT: sd zero, 8(a0) +; RV64-FAST-NEXT: sd zero, 16(a0) +; RV64-FAST-NEXT: sd zero, 24(a0) ; RV64-FAST-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 64, i1 0) ret void @@ -1152,8 +1152,8 @@ define void @aligned_bzero_4(ptr %a) nounwind { define void @aligned_bzero_8(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_8: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: sw zero, 4(a0) ; RV32-BOTH-NEXT: sw zero, 0(a0) +; RV32-BOTH-NEXT: sw zero, 4(a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_8: @@ -1168,16 +1168,16 @@ define void @aligned_bzero_8(ptr %a) nounwind { define void @aligned_bzero_16(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_16: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: sw zero, 12(a0) -; RV32-BOTH-NEXT: sw zero, 8(a0) -; RV32-BOTH-NEXT: sw zero, 4(a0) ; RV32-BOTH-NEXT: sw zero, 0(a0) +; RV32-BOTH-NEXT: sw zero, 4(a0) +; RV32-BOTH-NEXT: sw zero, 8(a0) +; RV32-BOTH-NEXT: sw zero, 12(a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_16: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: sd zero, 8(a0) ; RV64-BOTH-NEXT: sd zero, 0(a0) +; RV64-BOTH-NEXT: sd zero, 8(a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 16 %a, i8 0, i64 16, i1 0) ret void @@ -1186,22 +1186,22 @@ define void @aligned_bzero_16(ptr %a) nounwind { define void @aligned_bzero_32(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_32: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: sw zero, 28(a0) -; RV32-BOTH-NEXT: sw zero, 24(a0) -; RV32-BOTH-NEXT: sw zero, 20(a0) ; RV32-BOTH-NEXT: sw zero, 16(a0) -; RV32-BOTH-NEXT: sw zero, 12(a0) -; RV32-BOTH-NEXT: sw zero, 8(a0) -; RV32-BOTH-NEXT: sw zero, 4(a0) +; RV32-BOTH-NEXT: sw zero, 20(a0) +; RV32-BOTH-NEXT: sw zero, 24(a0) +; RV32-BOTH-NEXT: sw zero, 28(a0) ; RV32-BOTH-NEXT: sw zero, 0(a0) +; RV32-BOTH-NEXT: sw zero, 4(a0) +; RV32-BOTH-NEXT: sw zero, 8(a0) +; RV32-BOTH-NEXT: sw zero, 12(a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_32: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: sd zero, 24(a0) -; RV64-BOTH-NEXT: sd zero, 16(a0) -; RV64-BOTH-NEXT: sd zero, 8(a0) ; RV64-BOTH-NEXT: sd zero, 0(a0) +; RV64-BOTH-NEXT: sd zero, 8(a0) +; RV64-BOTH-NEXT: sd zero, 16(a0) +; RV64-BOTH-NEXT: sd zero, 24(a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 0, i64 32, i1 0) ret void @@ -1210,34 +1210,34 @@ define void @aligned_bzero_32(ptr %a) nounwind { define void @aligned_bzero_64(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_64: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: sw zero, 60(a0) -; RV32-BOTH-NEXT: sw zero, 56(a0) -; RV32-BOTH-NEXT: sw zero, 52(a0) ; RV32-BOTH-NEXT: sw zero, 48(a0) -; RV32-BOTH-NEXT: sw zero, 44(a0) -; RV32-BOTH-NEXT: sw zero, 40(a0) -; RV32-BOTH-NEXT: sw zero, 36(a0) +; RV32-BOTH-NEXT: sw zero, 52(a0) +; RV32-BOTH-NEXT: sw zero, 56(a0) +; RV32-BOTH-NEXT: sw zero, 60(a0) ; RV32-BOTH-NEXT: sw zero, 32(a0) -; RV32-BOTH-NEXT: sw zero, 28(a0) -; RV32-BOTH-NEXT: sw zero, 24(a0) -; RV32-BOTH-NEXT: sw zero, 20(a0) +; RV32-BOTH-NEXT: sw zero, 36(a0) +; RV32-BOTH-NEXT: sw zero, 40(a0) +; RV32-BOTH-NEXT: sw zero, 44(a0) ; RV32-BOTH-NEXT: sw zero, 16(a0) -; RV32-BOTH-NEXT: sw zero, 12(a0) -; RV32-BOTH-NEXT: sw zero, 8(a0) -; RV32-BOTH-NEXT: sw zero, 4(a0) +; RV32-BOTH-NEXT: sw zero, 20(a0) +; RV32-BOTH-NEXT: sw zero, 24(a0) +; RV32-BOTH-NEXT: sw zero, 28(a0) ; RV32-BOTH-NEXT: sw zero, 0(a0) +; RV32-BOTH-NEXT: sw zero, 4(a0) +; RV32-BOTH-NEXT: sw zero, 8(a0) +; RV32-BOTH-NEXT: sw zero, 12(a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_64: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: sd zero, 56(a0) -; RV64-BOTH-NEXT: sd zero, 48(a0) -; RV64-BOTH-NEXT: sd zero, 40(a0) ; RV64-BOTH-NEXT: sd zero, 32(a0) -; RV64-BOTH-NEXT: sd zero, 24(a0) -; RV64-BOTH-NEXT: sd zero, 16(a0) -; RV64-BOTH-NEXT: sd zero, 8(a0) +; RV64-BOTH-NEXT: sd zero, 40(a0) +; RV64-BOTH-NEXT: sd zero, 48(a0) +; RV64-BOTH-NEXT: sd zero, 56(a0) ; RV64-BOTH-NEXT: sd zero, 0(a0) +; RV64-BOTH-NEXT: sd zero, 8(a0) +; RV64-BOTH-NEXT: sd zero, 16(a0) +; RV64-BOTH-NEXT: sd zero, 24(a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 64, i1 0) ret void @@ -1250,16 +1250,16 @@ define void @aligned_bzero_64(ptr %a) nounwind { define void @aligned_bzero_7(ptr %a) nounwind { ; RV32-LABEL: aligned_bzero_7: ; RV32: # %bb.0: -; RV32-NEXT: sb zero, 6(a0) -; RV32-NEXT: sh zero, 4(a0) ; RV32-NEXT: sw zero, 0(a0) +; RV32-NEXT: sh zero, 4(a0) +; RV32-NEXT: sb zero, 6(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: aligned_bzero_7: ; RV64: # %bb.0: -; RV64-NEXT: sb zero, 6(a0) -; RV64-NEXT: sh zero, 4(a0) ; RV64-NEXT: sw zero, 0(a0) +; RV64-NEXT: sh zero, 4(a0) +; RV64-NEXT: sb zero, 6(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: aligned_bzero_7: @@ -1281,26 +1281,26 @@ define void @aligned_bzero_15(ptr %a) nounwind { ; RV32-LABEL: aligned_bzero_15: ; RV32: # %bb.0: ; RV32-NEXT: sb zero, 14(a0) -; RV32-NEXT: sh zero, 12(a0) -; RV32-NEXT: sw zero, 8(a0) -; RV32-NEXT: sw zero, 4(a0) ; RV32-NEXT: sw zero, 0(a0) +; RV32-NEXT: sw zero, 4(a0) +; RV32-NEXT: sw zero, 8(a0) +; RV32-NEXT: sh zero, 12(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: aligned_bzero_15: ; RV64: # %bb.0: -; RV64-NEXT: sb zero, 14(a0) -; RV64-NEXT: sh zero, 12(a0) -; RV64-NEXT: sw zero, 8(a0) ; RV64-NEXT: sd zero, 0(a0) +; RV64-NEXT: sw zero, 8(a0) +; RV64-NEXT: sh zero, 12(a0) +; RV64-NEXT: sb zero, 14(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: aligned_bzero_15: ; RV32-FAST: # %bb.0: ; RV32-FAST-NEXT: sw zero, 11(a0) -; RV32-FAST-NEXT: sw zero, 8(a0) -; RV32-FAST-NEXT: sw zero, 4(a0) ; RV32-FAST-NEXT: sw zero, 0(a0) +; RV32-FAST-NEXT: sw zero, 4(a0) +; RV32-FAST-NEXT: sw zero, 8(a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: aligned_bzero_15: diff --git a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll index cf290a0b8682dad..160f0aefa36a7b8 100644 --- a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll +++ b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll @@ -1,8 +1,8 @@ ; REQUIRES: asserts -; RUN: llc -mtriple=riscv32 -verify-misched -riscv-misched-load-clustering=false \ +; RUN: llc -mtriple=riscv32 -verify-misched -riscv-misched-load-store-clustering=false \ ; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=NOCLUSTER %s -; RUN: llc -mtriple=riscv64 -verify-misched -riscv-misched-load-clustering=false \ +; RUN: llc -mtriple=riscv64 -verify-misched -riscv-misched-load-store-clustering=false \ ; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=NOCLUSTER %s ; RUN: llc -mtriple=riscv32 -verify-misched \ diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll index 6f301882b452c0f..676b41344611636 100644 --- a/llvm/test/CodeGen/RISCV/neg-abs.ll +++ b/llvm/test/CodeGen/RISCV/neg-abs.ll @@ -208,12 +208,13 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) { ; RV32I-NEXT: sub a1, a1, a3 ; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: .LBB5_2: -; RV32I-NEXT: sw a0, 0(a2) ; RV32I-NEXT: snez a3, a0 ; RV32I-NEXT: neg a4, a1 ; RV32I-NEXT: sub a3, a4, a3 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: neg a4, a0 +; RV32I-NEXT: sw a0, 0(a2) ; RV32I-NEXT: sw a1, 4(a2) +; RV32I-NEXT: mv a0, a4 ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: ret ; @@ -226,12 +227,13 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) { ; RV32ZBB-NEXT: sub a1, a1, a3 ; RV32ZBB-NEXT: neg a0, a0 ; RV32ZBB-NEXT: .LBB5_2: -; RV32ZBB-NEXT: sw a0, 0(a2) ; RV32ZBB-NEXT: snez a3, a0 ; RV32ZBB-NEXT: neg a4, a1 ; RV32ZBB-NEXT: sub a3, a4, a3 -; RV32ZBB-NEXT: neg a0, a0 +; RV32ZBB-NEXT: neg a4, a0 +; RV32ZBB-NEXT: sw a0, 0(a2) ; RV32ZBB-NEXT: sw a1, 4(a2) +; RV32ZBB-NEXT: mv a0, a4 ; RV32ZBB-NEXT: mv a1, a3 ; RV32ZBB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/nontemporal.ll b/llvm/test/CodeGen/RISCV/nontemporal.ll index 55bd32e4857345b..ada0758831a5292 100644 --- a/llvm/test/CodeGen/RISCV/nontemporal.ll +++ b/llvm/test/CodeGen/RISCV/nontemporal.ll @@ -327,53 +327,53 @@ define <16 x i8> @test_nontemporal_load_v16i8(ptr %p) { ; CHECK-RV64-LABEL: test_nontemporal_load_v16i8: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: ld a2, 8(a1) +; CHECK-RV64-NEXT: ld a2, 0(a1) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: ld a1, 0(a1) -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ld a1, 8(a1) +; CHECK-RV64-NEXT: sd a2, 0(a0) +; CHECK-RV64-NEXT: sd a1, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_load_v16i8: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_load_v16i8: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: ld a2, 8(a1) +; CHECK-RV64C-NEXT: ld a2, 0(a1) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: ld a1, 0(a1) -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: ld a1, 8(a1) +; CHECK-RV64C-NEXT: sd a2, 0(a0) +; CHECK-RV64C-NEXT: sd a1, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_load_v16i8: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_load_v16i8: @@ -398,53 +398,53 @@ define <8 x i16> @test_nontemporal_load_v8i16(ptr %p) { ; CHECK-RV64-LABEL: test_nontemporal_load_v8i16: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: ld a2, 8(a1) +; CHECK-RV64-NEXT: ld a2, 0(a1) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: ld a1, 0(a1) -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ld a1, 8(a1) +; CHECK-RV64-NEXT: sd a2, 0(a0) +; CHECK-RV64-NEXT: sd a1, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_load_v8i16: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_load_v8i16: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: ld a2, 8(a1) +; CHECK-RV64C-NEXT: ld a2, 0(a1) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: ld a1, 0(a1) -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: ld a1, 8(a1) +; CHECK-RV64C-NEXT: sd a2, 0(a0) +; CHECK-RV64C-NEXT: sd a1, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_load_v8i16: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_load_v8i16: @@ -469,53 +469,53 @@ define <4 x i32> @test_nontemporal_load_v4i32(ptr %p) { ; CHECK-RV64-LABEL: test_nontemporal_load_v4i32: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: ld a2, 8(a1) +; CHECK-RV64-NEXT: ld a2, 0(a1) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: ld a1, 0(a1) -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ld a1, 8(a1) +; CHECK-RV64-NEXT: sd a2, 0(a0) +; CHECK-RV64-NEXT: sd a1, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_load_v4i32: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_load_v4i32: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: ld a2, 8(a1) +; CHECK-RV64C-NEXT: ld a2, 0(a1) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: ld a1, 0(a1) -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: ld a1, 8(a1) +; CHECK-RV64C-NEXT: sd a2, 0(a0) +; CHECK-RV64C-NEXT: sd a1, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_load_v4i32: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_load_v4i32: @@ -549,17 +549,17 @@ define <2 x i64> @test_nontemporal_load_v2i64(ptr %p) { ; CHECK-RV32-LABEL: test_nontemporal_load_v2i64: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_load_v2i64: @@ -574,17 +574,17 @@ define <2 x i64> @test_nontemporal_load_v2i64(ptr %p) { ; CHECK-RV32C-LABEL: test_nontemporal_load_v2i64: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_load_v2i64: @@ -615,9 +615,9 @@ define void @test_nontemporal_store_i64(ptr %p, i64 %v) { ; CHECK-RV32-LABEL: test_nontemporal_store_i64: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a2, 4(a0) -; CHECK-RV32-NEXT: ntl.all ; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: ntl.all +; CHECK-RV32-NEXT: sw a2, 4(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_store_i64: @@ -629,9 +629,9 @@ define void @test_nontemporal_store_i64(ptr %p, i64 %v) { ; CHECK-RV32C-LABEL: test_nontemporal_store_i64: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a2, 4(a0) -; CHECK-RV32C-NEXT: c.ntl.all ; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: c.ntl.all +; CHECK-RV32C-NEXT: sw a2, 4(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_store_i64: @@ -643,9 +643,9 @@ define void @test_nontemporal_store_i64(ptr %p, i64 %v) { ; CHECK-RV32V-LABEL: test_nontemporal_store_i64: ; CHECK-RV32V: # %bb.0: ; CHECK-RV32V-NEXT: ntl.all -; CHECK-RV32V-NEXT: sw a2, 4(a0) -; CHECK-RV32V-NEXT: ntl.all ; CHECK-RV32V-NEXT: sw a1, 0(a0) +; CHECK-RV32V-NEXT: ntl.all +; CHECK-RV32V-NEXT: sw a2, 4(a0) ; CHECK-RV32V-NEXT: ret store i64 %v, ptr %p, !nontemporal !0 @@ -915,46 +915,46 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64-NEXT: lbu a7, 40(a1) ; CHECK-RV64-NEXT: lbu t0, 48(a1) ; CHECK-RV64-NEXT: lbu t1, 56(a1) -; CHECK-RV64-NEXT: lbu t2, 96(a1) -; CHECK-RV64-NEXT: lbu t3, 104(a1) -; CHECK-RV64-NEXT: lbu t4, 112(a1) -; CHECK-RV64-NEXT: lbu t5, 120(a1) -; CHECK-RV64-NEXT: lbu t6, 64(a1) -; CHECK-RV64-NEXT: lbu s0, 72(a1) -; CHECK-RV64-NEXT: lbu s1, 80(a1) -; CHECK-RV64-NEXT: lbu a1, 88(a1) +; CHECK-RV64-NEXT: lbu t2, 64(a1) +; CHECK-RV64-NEXT: lbu t3, 72(a1) +; CHECK-RV64-NEXT: lbu t4, 80(a1) +; CHECK-RV64-NEXT: lbu t5, 88(a1) +; CHECK-RV64-NEXT: lbu t6, 96(a1) +; CHECK-RV64-NEXT: lbu s0, 104(a1) +; CHECK-RV64-NEXT: lbu s1, 112(a1) +; CHECK-RV64-NEXT: lbu a1, 120(a1) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t5, 15(a0) +; CHECK-RV64-NEXT: sb t6, 12(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t4, 14(a0) +; CHECK-RV64-NEXT: sb s0, 13(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t3, 13(a0) +; CHECK-RV64-NEXT: sb s1, 14(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t2, 12(a0) +; CHECK-RV64-NEXT: sb a1, 15(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb a1, 11(a0) +; CHECK-RV64-NEXT: sb t2, 8(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb s1, 10(a0) +; CHECK-RV64-NEXT: sb t3, 9(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb s0, 9(a0) +; CHECK-RV64-NEXT: sb t4, 10(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t6, 8(a0) +; CHECK-RV64-NEXT: sb t5, 11(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t1, 7(a0) -; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t0, 6(a0) +; CHECK-RV64-NEXT: sb a6, 4(a0) ; CHECK-RV64-NEXT: ntl.all ; CHECK-RV64-NEXT: sb a7, 5(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb a6, 4(a0) +; CHECK-RV64-NEXT: sb t0, 6(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb a5, 3(a0) +; CHECK-RV64-NEXT: sb t1, 7(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb a4, 2(a0) +; CHECK-RV64-NEXT: sb a2, 0(a0) ; CHECK-RV64-NEXT: ntl.all ; CHECK-RV64-NEXT: sb a3, 1(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb a2, 0(a0) +; CHECK-RV64-NEXT: sb a4, 2(a0) +; CHECK-RV64-NEXT: ntl.all +; CHECK-RV64-NEXT: sb a5, 3(a0) ; CHECK-RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; CHECK-RV64-NEXT: ld s1, 0(sp) # 8-byte Folded Reload ; CHECK-RV64-NEXT: addi sp, sp, 16 @@ -976,46 +976,46 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32-NEXT: lbu a7, 20(a1) ; CHECK-RV32-NEXT: lbu t0, 24(a1) ; CHECK-RV32-NEXT: lbu t1, 28(a1) -; CHECK-RV32-NEXT: lbu t2, 48(a1) -; CHECK-RV32-NEXT: lbu t3, 52(a1) -; CHECK-RV32-NEXT: lbu t4, 56(a1) -; CHECK-RV32-NEXT: lbu t5, 60(a1) -; CHECK-RV32-NEXT: lbu t6, 32(a1) -; CHECK-RV32-NEXT: lbu s0, 36(a1) -; CHECK-RV32-NEXT: lbu s1, 40(a1) -; CHECK-RV32-NEXT: lbu a1, 44(a1) +; CHECK-RV32-NEXT: lbu t2, 32(a1) +; CHECK-RV32-NEXT: lbu t3, 36(a1) +; CHECK-RV32-NEXT: lbu t4, 40(a1) +; CHECK-RV32-NEXT: lbu t5, 44(a1) +; CHECK-RV32-NEXT: lbu t6, 48(a1) +; CHECK-RV32-NEXT: lbu s0, 52(a1) +; CHECK-RV32-NEXT: lbu s1, 56(a1) +; CHECK-RV32-NEXT: lbu a1, 60(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t5, 15(a0) +; CHECK-RV32-NEXT: sb t6, 12(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t4, 14(a0) +; CHECK-RV32-NEXT: sb s0, 13(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t3, 13(a0) +; CHECK-RV32-NEXT: sb s1, 14(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t2, 12(a0) +; CHECK-RV32-NEXT: sb a1, 15(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb a1, 11(a0) +; CHECK-RV32-NEXT: sb t2, 8(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb s1, 10(a0) +; CHECK-RV32-NEXT: sb t3, 9(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb s0, 9(a0) +; CHECK-RV32-NEXT: sb t4, 10(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t6, 8(a0) -; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t1, 7(a0) +; CHECK-RV32-NEXT: sb t5, 11(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t0, 6(a0) +; CHECK-RV32-NEXT: sb a6, 4(a0) ; CHECK-RV32-NEXT: ntl.all ; CHECK-RV32-NEXT: sb a7, 5(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb a6, 4(a0) +; CHECK-RV32-NEXT: sb t0, 6(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb a5, 3(a0) +; CHECK-RV32-NEXT: sb t1, 7(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb a4, 2(a0) +; CHECK-RV32-NEXT: sb a2, 0(a0) ; CHECK-RV32-NEXT: ntl.all ; CHECK-RV32-NEXT: sb a3, 1(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb a2, 0(a0) +; CHECK-RV32-NEXT: sb a4, 2(a0) +; CHECK-RV32-NEXT: ntl.all +; CHECK-RV32-NEXT: sb a5, 3(a0) ; CHECK-RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; CHECK-RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; CHECK-RV32-NEXT: addi sp, sp, 16 @@ -1037,46 +1037,46 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64C-NEXT: lbu t3, 40(a1) ; CHECK-RV64C-NEXT: lbu t4, 48(a1) ; CHECK-RV64C-NEXT: lbu t5, 56(a1) -; CHECK-RV64C-NEXT: lbu a2, 96(a1) -; CHECK-RV64C-NEXT: lbu a3, 104(a1) -; CHECK-RV64C-NEXT: lbu a4, 112(a1) -; CHECK-RV64C-NEXT: lbu a5, 120(a1) ; CHECK-RV64C-NEXT: lbu t6, 64(a1) -; CHECK-RV64C-NEXT: lbu s0, 72(a1) -; CHECK-RV64C-NEXT: lbu s1, 80(a1) -; CHECK-RV64C-NEXT: lbu a1, 88(a1) -; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a5, 15(a0) -; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a4, 14(a0) -; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a3, 13(a0) +; CHECK-RV64C-NEXT: lbu a3, 72(a1) +; CHECK-RV64C-NEXT: lbu a4, 80(a1) +; CHECK-RV64C-NEXT: lbu a5, 88(a1) +; CHECK-RV64C-NEXT: lbu a2, 96(a1) +; CHECK-RV64C-NEXT: lbu s0, 104(a1) +; CHECK-RV64C-NEXT: lbu s1, 112(a1) +; CHECK-RV64C-NEXT: lbu a1, 120(a1) ; CHECK-RV64C-NEXT: c.ntl.all ; CHECK-RV64C-NEXT: sb a2, 12(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a1, 11(a0) +; CHECK-RV64C-NEXT: sb s0, 13(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb s1, 10(a0) +; CHECK-RV64C-NEXT: sb s1, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb s0, 9(a0) +; CHECK-RV64C-NEXT: sb a1, 15(a0) ; CHECK-RV64C-NEXT: c.ntl.all ; CHECK-RV64C-NEXT: sb t6, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb t5, 7(a0) +; CHECK-RV64C-NEXT: sb a3, 9(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb t4, 6(a0) +; CHECK-RV64C-NEXT: sb a4, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb t3, 5(a0) +; CHECK-RV64C-NEXT: sb a5, 11(a0) ; CHECK-RV64C-NEXT: c.ntl.all ; CHECK-RV64C-NEXT: sb t2, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb t1, 3(a0) +; CHECK-RV64C-NEXT: sb t3, 5(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb t0, 2(a0) +; CHECK-RV64C-NEXT: sb t4, 6(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a7, 1(a0) +; CHECK-RV64C-NEXT: sb t5, 7(a0) ; CHECK-RV64C-NEXT: c.ntl.all ; CHECK-RV64C-NEXT: sb a6, 0(a0) +; CHECK-RV64C-NEXT: c.ntl.all +; CHECK-RV64C-NEXT: sb a7, 1(a0) +; CHECK-RV64C-NEXT: c.ntl.all +; CHECK-RV64C-NEXT: sb t0, 2(a0) +; CHECK-RV64C-NEXT: c.ntl.all +; CHECK-RV64C-NEXT: sb t1, 3(a0) ; CHECK-RV64C-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; CHECK-RV64C-NEXT: ld s1, 0(sp) # 8-byte Folded Reload ; CHECK-RV64C-NEXT: addi sp, sp, 16 @@ -1098,46 +1098,46 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32C-NEXT: lbu t3, 20(a1) ; CHECK-RV32C-NEXT: lbu t4, 24(a1) ; CHECK-RV32C-NEXT: lbu t5, 28(a1) -; CHECK-RV32C-NEXT: lbu a2, 48(a1) -; CHECK-RV32C-NEXT: lbu a3, 52(a1) -; CHECK-RV32C-NEXT: lbu a4, 56(a1) -; CHECK-RV32C-NEXT: lbu a5, 60(a1) ; CHECK-RV32C-NEXT: lbu t6, 32(a1) -; CHECK-RV32C-NEXT: lbu s0, 36(a1) -; CHECK-RV32C-NEXT: lbu s1, 40(a1) -; CHECK-RV32C-NEXT: lbu a1, 44(a1) -; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a5, 15(a0) -; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a4, 14(a0) -; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a3, 13(a0) +; CHECK-RV32C-NEXT: lbu a3, 36(a1) +; CHECK-RV32C-NEXT: lbu a4, 40(a1) +; CHECK-RV32C-NEXT: lbu a5, 44(a1) +; CHECK-RV32C-NEXT: lbu a2, 48(a1) +; CHECK-RV32C-NEXT: lbu s0, 52(a1) +; CHECK-RV32C-NEXT: lbu s1, 56(a1) +; CHECK-RV32C-NEXT: lbu a1, 60(a1) ; CHECK-RV32C-NEXT: c.ntl.all ; CHECK-RV32C-NEXT: sb a2, 12(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a1, 11(a0) +; CHECK-RV32C-NEXT: sb s0, 13(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb s1, 10(a0) +; CHECK-RV32C-NEXT: sb s1, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb s0, 9(a0) +; CHECK-RV32C-NEXT: sb a1, 15(a0) ; CHECK-RV32C-NEXT: c.ntl.all ; CHECK-RV32C-NEXT: sb t6, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb t5, 7(a0) +; CHECK-RV32C-NEXT: sb a3, 9(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb t4, 6(a0) +; CHECK-RV32C-NEXT: sb a4, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb t3, 5(a0) +; CHECK-RV32C-NEXT: sb a5, 11(a0) ; CHECK-RV32C-NEXT: c.ntl.all ; CHECK-RV32C-NEXT: sb t2, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb t1, 3(a0) +; CHECK-RV32C-NEXT: sb t3, 5(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb t0, 2(a0) +; CHECK-RV32C-NEXT: sb t4, 6(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a7, 1(a0) +; CHECK-RV32C-NEXT: sb t5, 7(a0) ; CHECK-RV32C-NEXT: c.ntl.all ; CHECK-RV32C-NEXT: sb a6, 0(a0) +; CHECK-RV32C-NEXT: c.ntl.all +; CHECK-RV32C-NEXT: sb a7, 1(a0) +; CHECK-RV32C-NEXT: c.ntl.all +; CHECK-RV32C-NEXT: sb t0, 2(a0) +; CHECK-RV32C-NEXT: c.ntl.all +; CHECK-RV32C-NEXT: sb t1, 3(a0) ; CHECK-RV32C-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; CHECK-RV32C-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; CHECK-RV32C-NEXT: addi sp, sp, 16 @@ -1163,114 +1163,114 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) { define void @test_nontemporal_store_v8i16(ptr %p, <8 x i16> %v) { ; CHECK-RV64-LABEL: test_nontemporal_store_v8i16: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: lh a2, 32(a1) -; CHECK-RV64-NEXT: lh a3, 40(a1) -; CHECK-RV64-NEXT: lh a4, 48(a1) -; CHECK-RV64-NEXT: lh a5, 56(a1) -; CHECK-RV64-NEXT: lh a6, 0(a1) -; CHECK-RV64-NEXT: lh a7, 8(a1) -; CHECK-RV64-NEXT: lh t0, 16(a1) -; CHECK-RV64-NEXT: lh a1, 24(a1) +; CHECK-RV64-NEXT: lh a2, 0(a1) +; CHECK-RV64-NEXT: lh a3, 8(a1) +; CHECK-RV64-NEXT: lh a4, 16(a1) +; CHECK-RV64-NEXT: lh a5, 24(a1) +; CHECK-RV64-NEXT: lh a6, 32(a1) +; CHECK-RV64-NEXT: lh a7, 40(a1) +; CHECK-RV64-NEXT: lh t0, 48(a1) +; CHECK-RV64-NEXT: lh a1, 56(a1) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a5, 14(a0) +; CHECK-RV64-NEXT: sh a6, 8(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a4, 12(a0) +; CHECK-RV64-NEXT: sh a7, 10(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a3, 10(a0) +; CHECK-RV64-NEXT: sh t0, 12(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a2, 8(a0) +; CHECK-RV64-NEXT: sh a1, 14(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a1, 6(a0) +; CHECK-RV64-NEXT: sh a2, 0(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh t0, 4(a0) +; CHECK-RV64-NEXT: sh a3, 2(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a7, 2(a0) +; CHECK-RV64-NEXT: sh a4, 4(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a6, 0(a0) +; CHECK-RV64-NEXT: sh a5, 6(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_store_v8i16: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lh a2, 16(a1) -; CHECK-RV32-NEXT: lh a3, 20(a1) -; CHECK-RV32-NEXT: lh a4, 24(a1) -; CHECK-RV32-NEXT: lh a5, 28(a1) -; CHECK-RV32-NEXT: lh a6, 0(a1) -; CHECK-RV32-NEXT: lh a7, 4(a1) -; CHECK-RV32-NEXT: lh t0, 8(a1) -; CHECK-RV32-NEXT: lh a1, 12(a1) +; CHECK-RV32-NEXT: lh a2, 0(a1) +; CHECK-RV32-NEXT: lh a3, 4(a1) +; CHECK-RV32-NEXT: lh a4, 8(a1) +; CHECK-RV32-NEXT: lh a5, 12(a1) +; CHECK-RV32-NEXT: lh a6, 16(a1) +; CHECK-RV32-NEXT: lh a7, 20(a1) +; CHECK-RV32-NEXT: lh t0, 24(a1) +; CHECK-RV32-NEXT: lh a1, 28(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a5, 14(a0) +; CHECK-RV32-NEXT: sh a6, 8(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a4, 12(a0) +; CHECK-RV32-NEXT: sh a7, 10(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a3, 10(a0) +; CHECK-RV32-NEXT: sh t0, 12(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a2, 8(a0) +; CHECK-RV32-NEXT: sh a1, 14(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a1, 6(a0) +; CHECK-RV32-NEXT: sh a2, 0(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh t0, 4(a0) +; CHECK-RV32-NEXT: sh a3, 2(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a7, 2(a0) +; CHECK-RV32-NEXT: sh a4, 4(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a6, 0(a0) +; CHECK-RV32-NEXT: sh a5, 6(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_store_v8i16: ; CHECK-RV64C: # %bb.0: -; CHECK-RV64C-NEXT: lh a7, 32(a1) +; CHECK-RV64C-NEXT: lh a6, 0(a1) +; CHECK-RV64C-NEXT: lh a7, 8(a1) +; CHECK-RV64C-NEXT: lh t0, 16(a1) +; CHECK-RV64C-NEXT: lh a5, 24(a1) +; CHECK-RV64C-NEXT: lh a2, 32(a1) ; CHECK-RV64C-NEXT: lh a3, 40(a1) ; CHECK-RV64C-NEXT: lh a4, 48(a1) -; CHECK-RV64C-NEXT: lh a5, 56(a1) -; CHECK-RV64C-NEXT: lh a6, 0(a1) -; CHECK-RV64C-NEXT: lh t0, 8(a1) -; CHECK-RV64C-NEXT: lh a2, 16(a1) -; CHECK-RV64C-NEXT: lh a1, 24(a1) +; CHECK-RV64C-NEXT: lh a1, 56(a1) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a5, 14(a0) -; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a4, 12(a0) +; CHECK-RV64C-NEXT: sh a2, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.all ; CHECK-RV64C-NEXT: sh a3, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a7, 8(a0) +; CHECK-RV64C-NEXT: sh a4, 12(a0) +; CHECK-RV64C-NEXT: c.ntl.all +; CHECK-RV64C-NEXT: sh a1, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a1, 6(a0) +; CHECK-RV64C-NEXT: sh a6, 0(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a2, 4(a0) +; CHECK-RV64C-NEXT: sh a7, 2(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh t0, 2(a0) +; CHECK-RV64C-NEXT: sh t0, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a6, 0(a0) +; CHECK-RV64C-NEXT: sh a5, 6(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_store_v8i16: ; CHECK-RV32C: # %bb.0: -; CHECK-RV32C-NEXT: lh a7, 16(a1) +; CHECK-RV32C-NEXT: lh a6, 0(a1) +; CHECK-RV32C-NEXT: lh a7, 4(a1) +; CHECK-RV32C-NEXT: lh t0, 8(a1) +; CHECK-RV32C-NEXT: lh a5, 12(a1) +; CHECK-RV32C-NEXT: lh a2, 16(a1) ; CHECK-RV32C-NEXT: lh a3, 20(a1) ; CHECK-RV32C-NEXT: lh a4, 24(a1) -; CHECK-RV32C-NEXT: lh a5, 28(a1) -; CHECK-RV32C-NEXT: lh a6, 0(a1) -; CHECK-RV32C-NEXT: lh t0, 4(a1) -; CHECK-RV32C-NEXT: lh a2, 8(a1) -; CHECK-RV32C-NEXT: lh a1, 12(a1) +; CHECK-RV32C-NEXT: lh a1, 28(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a5, 14(a0) -; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a4, 12(a0) +; CHECK-RV32C-NEXT: sh a2, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.all ; CHECK-RV32C-NEXT: sh a3, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a7, 8(a0) +; CHECK-RV32C-NEXT: sh a4, 12(a0) +; CHECK-RV32C-NEXT: c.ntl.all +; CHECK-RV32C-NEXT: sh a1, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a1, 6(a0) +; CHECK-RV32C-NEXT: sh a6, 0(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a2, 4(a0) +; CHECK-RV32C-NEXT: sh a7, 2(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh t0, 2(a0) +; CHECK-RV32C-NEXT: sh t0, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a6, 0(a0) +; CHECK-RV32C-NEXT: sh a5, 6(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_store_v8i16: @@ -1293,66 +1293,66 @@ define void @test_nontemporal_store_v8i16(ptr %p, <8 x i16> %v) { define void @test_nontemporal_store_v4i32(ptr %p, <4 x i32> %v) { ; CHECK-RV64-LABEL: test_nontemporal_store_v4i32: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: lw a2, 24(a1) -; CHECK-RV64-NEXT: lw a3, 16(a1) -; CHECK-RV64-NEXT: lw a4, 8(a1) -; CHECK-RV64-NEXT: lw a1, 0(a1) +; CHECK-RV64-NEXT: lw a2, 0(a1) +; CHECK-RV64-NEXT: lw a3, 8(a1) +; CHECK-RV64-NEXT: lw a4, 16(a1) +; CHECK-RV64-NEXT: lw a1, 24(a1) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sw a2, 12(a0) +; CHECK-RV64-NEXT: sw a2, 0(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sw a3, 8(a0) +; CHECK-RV64-NEXT: sw a3, 4(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sw a4, 4(a0) +; CHECK-RV64-NEXT: sw a4, 8(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sw a1, 0(a0) +; CHECK-RV64-NEXT: sw a1, 12(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_store_v4i32: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lw a2, 12(a1) -; CHECK-RV32-NEXT: lw a3, 8(a1) -; CHECK-RV32-NEXT: lw a4, 4(a1) -; CHECK-RV32-NEXT: lw a1, 0(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) +; CHECK-RV32-NEXT: lw a1, 12(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a2, 12(a0) +; CHECK-RV32-NEXT: sw a2, 0(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a3, 8(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a4, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_store_v4i32: ; CHECK-RV64C: # %bb.0: -; CHECK-RV64C-NEXT: lw a2, 24(a1) -; CHECK-RV64C-NEXT: lw a3, 16(a1) -; CHECK-RV64C-NEXT: lw a4, 8(a1) -; CHECK-RV64C-NEXT: lw a1, 0(a1) +; CHECK-RV64C-NEXT: lw a2, 0(a1) +; CHECK-RV64C-NEXT: lw a3, 8(a1) +; CHECK-RV64C-NEXT: lw a4, 16(a1) +; CHECK-RV64C-NEXT: lw a1, 24(a1) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sw a2, 12(a0) +; CHECK-RV64C-NEXT: sw a2, 0(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sw a3, 8(a0) +; CHECK-RV64C-NEXT: sw a3, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sw a4, 4(a0) +; CHECK-RV64C-NEXT: sw a4, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sw a1, 0(a0) +; CHECK-RV64C-NEXT: sw a1, 12(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_store_v4i32: ; CHECK-RV32C: # %bb.0: -; CHECK-RV32C-NEXT: lw a2, 12(a1) -; CHECK-RV32C-NEXT: lw a3, 8(a1) -; CHECK-RV32C-NEXT: lw a4, 4(a1) -; CHECK-RV32C-NEXT: lw a1, 0(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) +; CHECK-RV32C-NEXT: lw a1, 12(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a2, 12(a0) +; CHECK-RV32C-NEXT: sw a2, 0(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a3, 8(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a4, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_store_v4i32: @@ -1376,49 +1376,49 @@ define void @test_nontemporal_store_v2i64(ptr %p, <2 x i64> %v) { ; CHECK-RV64-LABEL: test_nontemporal_store_v2i64: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: ntl.all ; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ntl.all +; CHECK-RV64-NEXT: sd a2, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_store_v2i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lw a2, 12(a1) -; CHECK-RV32-NEXT: lw a3, 8(a1) -; CHECK-RV32-NEXT: lw a4, 4(a1) -; CHECK-RV32-NEXT: lw a1, 0(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) +; CHECK-RV32-NEXT: lw a1, 12(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a2, 12(a0) +; CHECK-RV32-NEXT: sw a2, 0(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a3, 8(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a4, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_store_v2i64: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: c.ntl.all ; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: c.ntl.all +; CHECK-RV64C-NEXT: sd a2, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_store_v2i64: ; CHECK-RV32C: # %bb.0: -; CHECK-RV32C-NEXT: lw a2, 12(a1) -; CHECK-RV32C-NEXT: lw a3, 8(a1) -; CHECK-RV32C-NEXT: lw a4, 4(a1) -; CHECK-RV32C-NEXT: lw a1, 0(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) +; CHECK-RV32C-NEXT: lw a1, 12(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a2, 12(a0) +; CHECK-RV32C-NEXT: sw a2, 0(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a3, 8(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a4, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_store_v2i64: @@ -1752,53 +1752,53 @@ define <16 x i8> @test_nontemporal_P1_load_v16i8(ptr %p) { ; CHECK-RV64-LABEL: test_nontemporal_P1_load_v16i8: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: ld a2, 8(a1) +; CHECK-RV64-NEXT: ld a2, 0(a1) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: ld a1, 0(a1) -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ld a1, 8(a1) +; CHECK-RV64-NEXT: sd a2, 0(a0) +; CHECK-RV64-NEXT: sd a1, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_P1_load_v16i8: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_P1_load_v16i8: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: ld a2, 8(a1) +; CHECK-RV64C-NEXT: ld a2, 0(a1) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: ld a1, 0(a1) -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: ld a1, 8(a1) +; CHECK-RV64C-NEXT: sd a2, 0(a0) +; CHECK-RV64C-NEXT: sd a1, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_P1_load_v16i8: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_P1_load_v16i8: @@ -1822,53 +1822,53 @@ define <8 x i16> @test_nontemporal_P1_load_v8i16(ptr %p) { ; CHECK-RV64-LABEL: test_nontemporal_P1_load_v8i16: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: ld a2, 8(a1) +; CHECK-RV64-NEXT: ld a2, 0(a1) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: ld a1, 0(a1) -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ld a1, 8(a1) +; CHECK-RV64-NEXT: sd a2, 0(a0) +; CHECK-RV64-NEXT: sd a1, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_P1_load_v8i16: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_P1_load_v8i16: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: ld a2, 8(a1) +; CHECK-RV64C-NEXT: ld a2, 0(a1) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: ld a1, 0(a1) -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: ld a1, 8(a1) +; CHECK-RV64C-NEXT: sd a2, 0(a0) +; CHECK-RV64C-NEXT: sd a1, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_P1_load_v8i16: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_P1_load_v8i16: @@ -1892,53 +1892,53 @@ define <4 x i32> @test_nontemporal_P1_load_v4i32(ptr %p) { ; CHECK-RV64-LABEL: test_nontemporal_P1_load_v4i32: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: ld a2, 8(a1) +; CHECK-RV64-NEXT: ld a2, 0(a1) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: ld a1, 0(a1) -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ld a1, 8(a1) +; CHECK-RV64-NEXT: sd a2, 0(a0) +; CHECK-RV64-NEXT: sd a1, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_P1_load_v4i32: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_P1_load_v4i32: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: ld a2, 8(a1) +; CHECK-RV64C-NEXT: ld a2, 0(a1) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: ld a1, 0(a1) -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: ld a1, 8(a1) +; CHECK-RV64C-NEXT: sd a2, 0(a0) +; CHECK-RV64C-NEXT: sd a1, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_P1_load_v4i32: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_P1_load_v4i32: @@ -1971,17 +1971,17 @@ define <2 x i64> @test_nontemporal_P1_load_v2i64(ptr %p) { ; CHECK-RV32-LABEL: test_nontemporal_P1_load_v2i64: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_P1_load_v2i64: @@ -1996,17 +1996,17 @@ define <2 x i64> @test_nontemporal_P1_load_v2i64(ptr %p) { ; CHECK-RV32C-LABEL: test_nontemporal_P1_load_v2i64: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_P1_load_v2i64: @@ -2036,9 +2036,9 @@ define void @test_nontemporal_P1_store_i64(ptr %p, i64 %v) { ; CHECK-RV32-LABEL: test_nontemporal_P1_store_i64: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sw a2, 4(a0) -; CHECK-RV32-NEXT: ntl.p1 ; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: ntl.p1 +; CHECK-RV32-NEXT: sw a2, 4(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_P1_store_i64: @@ -2050,9 +2050,9 @@ define void @test_nontemporal_P1_store_i64(ptr %p, i64 %v) { ; CHECK-RV32C-LABEL: test_nontemporal_P1_store_i64: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sw a2, 4(a0) -; CHECK-RV32C-NEXT: c.ntl.p1 ; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: c.ntl.p1 +; CHECK-RV32C-NEXT: sw a2, 4(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_P1_store_i64: @@ -2064,9 +2064,9 @@ define void @test_nontemporal_P1_store_i64(ptr %p, i64 %v) { ; CHECK-RV32V-LABEL: test_nontemporal_P1_store_i64: ; CHECK-RV32V: # %bb.0: ; CHECK-RV32V-NEXT: ntl.p1 -; CHECK-RV32V-NEXT: sw a2, 4(a0) -; CHECK-RV32V-NEXT: ntl.p1 ; CHECK-RV32V-NEXT: sw a1, 0(a0) +; CHECK-RV32V-NEXT: ntl.p1 +; CHECK-RV32V-NEXT: sw a2, 4(a0) ; CHECK-RV32V-NEXT: ret store i64 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1 ret void @@ -2329,46 +2329,46 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64-NEXT: lbu a7, 40(a1) ; CHECK-RV64-NEXT: lbu t0, 48(a1) ; CHECK-RV64-NEXT: lbu t1, 56(a1) -; CHECK-RV64-NEXT: lbu t2, 96(a1) -; CHECK-RV64-NEXT: lbu t3, 104(a1) -; CHECK-RV64-NEXT: lbu t4, 112(a1) -; CHECK-RV64-NEXT: lbu t5, 120(a1) -; CHECK-RV64-NEXT: lbu t6, 64(a1) -; CHECK-RV64-NEXT: lbu s0, 72(a1) -; CHECK-RV64-NEXT: lbu s1, 80(a1) -; CHECK-RV64-NEXT: lbu a1, 88(a1) +; CHECK-RV64-NEXT: lbu t2, 64(a1) +; CHECK-RV64-NEXT: lbu t3, 72(a1) +; CHECK-RV64-NEXT: lbu t4, 80(a1) +; CHECK-RV64-NEXT: lbu t5, 88(a1) +; CHECK-RV64-NEXT: lbu t6, 96(a1) +; CHECK-RV64-NEXT: lbu s0, 104(a1) +; CHECK-RV64-NEXT: lbu s1, 112(a1) +; CHECK-RV64-NEXT: lbu a1, 120(a1) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb t5, 15(a0) +; CHECK-RV64-NEXT: sb t6, 12(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb t4, 14(a0) +; CHECK-RV64-NEXT: sb s0, 13(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb t3, 13(a0) +; CHECK-RV64-NEXT: sb s1, 14(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb t2, 12(a0) +; CHECK-RV64-NEXT: sb a1, 15(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb a1, 11(a0) +; CHECK-RV64-NEXT: sb t2, 8(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb s1, 10(a0) +; CHECK-RV64-NEXT: sb t3, 9(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb s0, 9(a0) +; CHECK-RV64-NEXT: sb t4, 10(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb t6, 8(a0) -; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb t1, 7(a0) +; CHECK-RV64-NEXT: sb t5, 11(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb t0, 6(a0) +; CHECK-RV64-NEXT: sb a6, 4(a0) ; CHECK-RV64-NEXT: ntl.p1 ; CHECK-RV64-NEXT: sb a7, 5(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb a6, 4(a0) +; CHECK-RV64-NEXT: sb t0, 6(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb a5, 3(a0) +; CHECK-RV64-NEXT: sb t1, 7(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb a4, 2(a0) +; CHECK-RV64-NEXT: sb a2, 0(a0) ; CHECK-RV64-NEXT: ntl.p1 ; CHECK-RV64-NEXT: sb a3, 1(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sb a2, 0(a0) +; CHECK-RV64-NEXT: sb a4, 2(a0) +; CHECK-RV64-NEXT: ntl.p1 +; CHECK-RV64-NEXT: sb a5, 3(a0) ; CHECK-RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; CHECK-RV64-NEXT: ld s1, 0(sp) # 8-byte Folded Reload ; CHECK-RV64-NEXT: addi sp, sp, 16 @@ -2390,46 +2390,46 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32-NEXT: lbu a7, 20(a1) ; CHECK-RV32-NEXT: lbu t0, 24(a1) ; CHECK-RV32-NEXT: lbu t1, 28(a1) -; CHECK-RV32-NEXT: lbu t2, 48(a1) -; CHECK-RV32-NEXT: lbu t3, 52(a1) -; CHECK-RV32-NEXT: lbu t4, 56(a1) -; CHECK-RV32-NEXT: lbu t5, 60(a1) -; CHECK-RV32-NEXT: lbu t6, 32(a1) -; CHECK-RV32-NEXT: lbu s0, 36(a1) -; CHECK-RV32-NEXT: lbu s1, 40(a1) -; CHECK-RV32-NEXT: lbu a1, 44(a1) +; CHECK-RV32-NEXT: lbu t2, 32(a1) +; CHECK-RV32-NEXT: lbu t3, 36(a1) +; CHECK-RV32-NEXT: lbu t4, 40(a1) +; CHECK-RV32-NEXT: lbu t5, 44(a1) +; CHECK-RV32-NEXT: lbu t6, 48(a1) +; CHECK-RV32-NEXT: lbu s0, 52(a1) +; CHECK-RV32-NEXT: lbu s1, 56(a1) +; CHECK-RV32-NEXT: lbu a1, 60(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb t5, 15(a0) +; CHECK-RV32-NEXT: sb t6, 12(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb t4, 14(a0) +; CHECK-RV32-NEXT: sb s0, 13(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb t3, 13(a0) +; CHECK-RV32-NEXT: sb s1, 14(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb t2, 12(a0) +; CHECK-RV32-NEXT: sb a1, 15(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb a1, 11(a0) +; CHECK-RV32-NEXT: sb t2, 8(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb s1, 10(a0) +; CHECK-RV32-NEXT: sb t3, 9(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb s0, 9(a0) +; CHECK-RV32-NEXT: sb t4, 10(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb t6, 8(a0) +; CHECK-RV32-NEXT: sb t5, 11(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb t1, 7(a0) -; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb t0, 6(a0) +; CHECK-RV32-NEXT: sb a6, 4(a0) ; CHECK-RV32-NEXT: ntl.p1 ; CHECK-RV32-NEXT: sb a7, 5(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb a6, 4(a0) +; CHECK-RV32-NEXT: sb t0, 6(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb a5, 3(a0) +; CHECK-RV32-NEXT: sb t1, 7(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb a4, 2(a0) +; CHECK-RV32-NEXT: sb a2, 0(a0) ; CHECK-RV32-NEXT: ntl.p1 ; CHECK-RV32-NEXT: sb a3, 1(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sb a2, 0(a0) +; CHECK-RV32-NEXT: sb a4, 2(a0) +; CHECK-RV32-NEXT: ntl.p1 +; CHECK-RV32-NEXT: sb a5, 3(a0) ; CHECK-RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; CHECK-RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; CHECK-RV32-NEXT: addi sp, sp, 16 @@ -2451,46 +2451,46 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64C-NEXT: lbu t3, 40(a1) ; CHECK-RV64C-NEXT: lbu t4, 48(a1) ; CHECK-RV64C-NEXT: lbu t5, 56(a1) -; CHECK-RV64C-NEXT: lbu a2, 96(a1) -; CHECK-RV64C-NEXT: lbu a3, 104(a1) -; CHECK-RV64C-NEXT: lbu a4, 112(a1) -; CHECK-RV64C-NEXT: lbu a5, 120(a1) ; CHECK-RV64C-NEXT: lbu t6, 64(a1) -; CHECK-RV64C-NEXT: lbu s0, 72(a1) -; CHECK-RV64C-NEXT: lbu s1, 80(a1) -; CHECK-RV64C-NEXT: lbu a1, 88(a1) -; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb a5, 15(a0) -; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb a4, 14(a0) -; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb a3, 13(a0) +; CHECK-RV64C-NEXT: lbu a3, 72(a1) +; CHECK-RV64C-NEXT: lbu a4, 80(a1) +; CHECK-RV64C-NEXT: lbu a5, 88(a1) +; CHECK-RV64C-NEXT: lbu a2, 96(a1) +; CHECK-RV64C-NEXT: lbu s0, 104(a1) +; CHECK-RV64C-NEXT: lbu s1, 112(a1) +; CHECK-RV64C-NEXT: lbu a1, 120(a1) ; CHECK-RV64C-NEXT: c.ntl.p1 ; CHECK-RV64C-NEXT: sb a2, 12(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb a1, 11(a0) +; CHECK-RV64C-NEXT: sb s0, 13(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb s1, 10(a0) +; CHECK-RV64C-NEXT: sb s1, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb s0, 9(a0) +; CHECK-RV64C-NEXT: sb a1, 15(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 ; CHECK-RV64C-NEXT: sb t6, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb t5, 7(a0) +; CHECK-RV64C-NEXT: sb a3, 9(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb t4, 6(a0) +; CHECK-RV64C-NEXT: sb a4, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb t3, 5(a0) +; CHECK-RV64C-NEXT: sb a5, 11(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 ; CHECK-RV64C-NEXT: sb t2, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb t1, 3(a0) +; CHECK-RV64C-NEXT: sb t3, 5(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb t0, 2(a0) +; CHECK-RV64C-NEXT: sb t4, 6(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sb a7, 1(a0) +; CHECK-RV64C-NEXT: sb t5, 7(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 ; CHECK-RV64C-NEXT: sb a6, 0(a0) +; CHECK-RV64C-NEXT: c.ntl.p1 +; CHECK-RV64C-NEXT: sb a7, 1(a0) +; CHECK-RV64C-NEXT: c.ntl.p1 +; CHECK-RV64C-NEXT: sb t0, 2(a0) +; CHECK-RV64C-NEXT: c.ntl.p1 +; CHECK-RV64C-NEXT: sb t1, 3(a0) ; CHECK-RV64C-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; CHECK-RV64C-NEXT: ld s1, 0(sp) # 8-byte Folded Reload ; CHECK-RV64C-NEXT: addi sp, sp, 16 @@ -2512,46 +2512,46 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32C-NEXT: lbu t3, 20(a1) ; CHECK-RV32C-NEXT: lbu t4, 24(a1) ; CHECK-RV32C-NEXT: lbu t5, 28(a1) -; CHECK-RV32C-NEXT: lbu a2, 48(a1) -; CHECK-RV32C-NEXT: lbu a3, 52(a1) -; CHECK-RV32C-NEXT: lbu a4, 56(a1) -; CHECK-RV32C-NEXT: lbu a5, 60(a1) ; CHECK-RV32C-NEXT: lbu t6, 32(a1) -; CHECK-RV32C-NEXT: lbu s0, 36(a1) -; CHECK-RV32C-NEXT: lbu s1, 40(a1) -; CHECK-RV32C-NEXT: lbu a1, 44(a1) -; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb a5, 15(a0) -; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb a4, 14(a0) -; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb a3, 13(a0) +; CHECK-RV32C-NEXT: lbu a3, 36(a1) +; CHECK-RV32C-NEXT: lbu a4, 40(a1) +; CHECK-RV32C-NEXT: lbu a5, 44(a1) +; CHECK-RV32C-NEXT: lbu a2, 48(a1) +; CHECK-RV32C-NEXT: lbu s0, 52(a1) +; CHECK-RV32C-NEXT: lbu s1, 56(a1) +; CHECK-RV32C-NEXT: lbu a1, 60(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 ; CHECK-RV32C-NEXT: sb a2, 12(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb a1, 11(a0) +; CHECK-RV32C-NEXT: sb s0, 13(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb s1, 10(a0) +; CHECK-RV32C-NEXT: sb s1, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb s0, 9(a0) +; CHECK-RV32C-NEXT: sb a1, 15(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 ; CHECK-RV32C-NEXT: sb t6, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb t5, 7(a0) +; CHECK-RV32C-NEXT: sb a3, 9(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb t4, 6(a0) +; CHECK-RV32C-NEXT: sb a4, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb t3, 5(a0) +; CHECK-RV32C-NEXT: sb a5, 11(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 ; CHECK-RV32C-NEXT: sb t2, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb t1, 3(a0) +; CHECK-RV32C-NEXT: sb t3, 5(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb t0, 2(a0) +; CHECK-RV32C-NEXT: sb t4, 6(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sb a7, 1(a0) +; CHECK-RV32C-NEXT: sb t5, 7(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 ; CHECK-RV32C-NEXT: sb a6, 0(a0) +; CHECK-RV32C-NEXT: c.ntl.p1 +; CHECK-RV32C-NEXT: sb a7, 1(a0) +; CHECK-RV32C-NEXT: c.ntl.p1 +; CHECK-RV32C-NEXT: sb t0, 2(a0) +; CHECK-RV32C-NEXT: c.ntl.p1 +; CHECK-RV32C-NEXT: sb t1, 3(a0) ; CHECK-RV32C-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; CHECK-RV32C-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; CHECK-RV32C-NEXT: addi sp, sp, 16 @@ -2577,114 +2577,114 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) { define void @test_nontemporal_P1_store_v8i16(ptr %p, <8 x i16> %v) { ; CHECK-RV64-LABEL: test_nontemporal_P1_store_v8i16: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: lh a2, 32(a1) -; CHECK-RV64-NEXT: lh a3, 40(a1) -; CHECK-RV64-NEXT: lh a4, 48(a1) -; CHECK-RV64-NEXT: lh a5, 56(a1) -; CHECK-RV64-NEXT: lh a6, 0(a1) -; CHECK-RV64-NEXT: lh a7, 8(a1) -; CHECK-RV64-NEXT: lh t0, 16(a1) -; CHECK-RV64-NEXT: lh a1, 24(a1) +; CHECK-RV64-NEXT: lh a2, 0(a1) +; CHECK-RV64-NEXT: lh a3, 8(a1) +; CHECK-RV64-NEXT: lh a4, 16(a1) +; CHECK-RV64-NEXT: lh a5, 24(a1) +; CHECK-RV64-NEXT: lh a6, 32(a1) +; CHECK-RV64-NEXT: lh a7, 40(a1) +; CHECK-RV64-NEXT: lh t0, 48(a1) +; CHECK-RV64-NEXT: lh a1, 56(a1) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sh a5, 14(a0) +; CHECK-RV64-NEXT: sh a6, 8(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sh a4, 12(a0) +; CHECK-RV64-NEXT: sh a7, 10(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sh a3, 10(a0) +; CHECK-RV64-NEXT: sh t0, 12(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sh a2, 8(a0) +; CHECK-RV64-NEXT: sh a1, 14(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sh a1, 6(a0) +; CHECK-RV64-NEXT: sh a2, 0(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sh t0, 4(a0) +; CHECK-RV64-NEXT: sh a3, 2(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sh a7, 2(a0) +; CHECK-RV64-NEXT: sh a4, 4(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sh a6, 0(a0) +; CHECK-RV64-NEXT: sh a5, 6(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_P1_store_v8i16: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lh a2, 16(a1) -; CHECK-RV32-NEXT: lh a3, 20(a1) -; CHECK-RV32-NEXT: lh a4, 24(a1) -; CHECK-RV32-NEXT: lh a5, 28(a1) -; CHECK-RV32-NEXT: lh a6, 0(a1) -; CHECK-RV32-NEXT: lh a7, 4(a1) -; CHECK-RV32-NEXT: lh t0, 8(a1) -; CHECK-RV32-NEXT: lh a1, 12(a1) +; CHECK-RV32-NEXT: lh a2, 0(a1) +; CHECK-RV32-NEXT: lh a3, 4(a1) +; CHECK-RV32-NEXT: lh a4, 8(a1) +; CHECK-RV32-NEXT: lh a5, 12(a1) +; CHECK-RV32-NEXT: lh a6, 16(a1) +; CHECK-RV32-NEXT: lh a7, 20(a1) +; CHECK-RV32-NEXT: lh t0, 24(a1) +; CHECK-RV32-NEXT: lh a1, 28(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sh a5, 14(a0) +; CHECK-RV32-NEXT: sh a6, 8(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sh a4, 12(a0) +; CHECK-RV32-NEXT: sh a7, 10(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sh a3, 10(a0) +; CHECK-RV32-NEXT: sh t0, 12(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sh a2, 8(a0) +; CHECK-RV32-NEXT: sh a1, 14(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sh a1, 6(a0) +; CHECK-RV32-NEXT: sh a2, 0(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sh t0, 4(a0) +; CHECK-RV32-NEXT: sh a3, 2(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sh a7, 2(a0) +; CHECK-RV32-NEXT: sh a4, 4(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sh a6, 0(a0) +; CHECK-RV32-NEXT: sh a5, 6(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_P1_store_v8i16: ; CHECK-RV64C: # %bb.0: -; CHECK-RV64C-NEXT: lh a7, 32(a1) +; CHECK-RV64C-NEXT: lh a6, 0(a1) +; CHECK-RV64C-NEXT: lh a7, 8(a1) +; CHECK-RV64C-NEXT: lh t0, 16(a1) +; CHECK-RV64C-NEXT: lh a5, 24(a1) +; CHECK-RV64C-NEXT: lh a2, 32(a1) ; CHECK-RV64C-NEXT: lh a3, 40(a1) ; CHECK-RV64C-NEXT: lh a4, 48(a1) -; CHECK-RV64C-NEXT: lh a5, 56(a1) -; CHECK-RV64C-NEXT: lh a6, 0(a1) -; CHECK-RV64C-NEXT: lh t0, 8(a1) -; CHECK-RV64C-NEXT: lh a2, 16(a1) -; CHECK-RV64C-NEXT: lh a1, 24(a1) +; CHECK-RV64C-NEXT: lh a1, 56(a1) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sh a5, 14(a0) -; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sh a4, 12(a0) +; CHECK-RV64C-NEXT: sh a2, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 ; CHECK-RV64C-NEXT: sh a3, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sh a7, 8(a0) +; CHECK-RV64C-NEXT: sh a4, 12(a0) +; CHECK-RV64C-NEXT: c.ntl.p1 +; CHECK-RV64C-NEXT: sh a1, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sh a1, 6(a0) +; CHECK-RV64C-NEXT: sh a6, 0(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sh a2, 4(a0) +; CHECK-RV64C-NEXT: sh a7, 2(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sh t0, 2(a0) +; CHECK-RV64C-NEXT: sh t0, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sh a6, 0(a0) +; CHECK-RV64C-NEXT: sh a5, 6(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_P1_store_v8i16: ; CHECK-RV32C: # %bb.0: -; CHECK-RV32C-NEXT: lh a7, 16(a1) +; CHECK-RV32C-NEXT: lh a6, 0(a1) +; CHECK-RV32C-NEXT: lh a7, 4(a1) +; CHECK-RV32C-NEXT: lh t0, 8(a1) +; CHECK-RV32C-NEXT: lh a5, 12(a1) +; CHECK-RV32C-NEXT: lh a2, 16(a1) ; CHECK-RV32C-NEXT: lh a3, 20(a1) ; CHECK-RV32C-NEXT: lh a4, 24(a1) -; CHECK-RV32C-NEXT: lh a5, 28(a1) -; CHECK-RV32C-NEXT: lh a6, 0(a1) -; CHECK-RV32C-NEXT: lh t0, 4(a1) -; CHECK-RV32C-NEXT: lh a2, 8(a1) -; CHECK-RV32C-NEXT: lh a1, 12(a1) +; CHECK-RV32C-NEXT: lh a1, 28(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sh a5, 14(a0) -; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sh a4, 12(a0) +; CHECK-RV32C-NEXT: sh a2, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 ; CHECK-RV32C-NEXT: sh a3, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sh a7, 8(a0) +; CHECK-RV32C-NEXT: sh a4, 12(a0) +; CHECK-RV32C-NEXT: c.ntl.p1 +; CHECK-RV32C-NEXT: sh a1, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sh a1, 6(a0) +; CHECK-RV32C-NEXT: sh a6, 0(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sh a2, 4(a0) +; CHECK-RV32C-NEXT: sh a7, 2(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sh t0, 2(a0) +; CHECK-RV32C-NEXT: sh t0, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sh a6, 0(a0) +; CHECK-RV32C-NEXT: sh a5, 6(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_P1_store_v8i16: @@ -2707,66 +2707,66 @@ define void @test_nontemporal_P1_store_v8i16(ptr %p, <8 x i16> %v) { define void @test_nontemporal_P1_store_v4i32(ptr %p, <4 x i32> %v) { ; CHECK-RV64-LABEL: test_nontemporal_P1_store_v4i32: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: lw a2, 24(a1) -; CHECK-RV64-NEXT: lw a3, 16(a1) -; CHECK-RV64-NEXT: lw a4, 8(a1) -; CHECK-RV64-NEXT: lw a1, 0(a1) +; CHECK-RV64-NEXT: lw a2, 0(a1) +; CHECK-RV64-NEXT: lw a3, 8(a1) +; CHECK-RV64-NEXT: lw a4, 16(a1) +; CHECK-RV64-NEXT: lw a1, 24(a1) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sw a2, 12(a0) +; CHECK-RV64-NEXT: sw a2, 0(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sw a3, 8(a0) +; CHECK-RV64-NEXT: sw a3, 4(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sw a4, 4(a0) +; CHECK-RV64-NEXT: sw a4, 8(a0) ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sw a1, 0(a0) +; CHECK-RV64-NEXT: sw a1, 12(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_P1_store_v4i32: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lw a2, 12(a1) -; CHECK-RV32-NEXT: lw a3, 8(a1) -; CHECK-RV32-NEXT: lw a4, 4(a1) -; CHECK-RV32-NEXT: lw a1, 0(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) +; CHECK-RV32-NEXT: lw a1, 12(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sw a2, 12(a0) +; CHECK-RV32-NEXT: sw a2, 0(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sw a3, 8(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sw a4, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_P1_store_v4i32: ; CHECK-RV64C: # %bb.0: -; CHECK-RV64C-NEXT: lw a2, 24(a1) -; CHECK-RV64C-NEXT: lw a3, 16(a1) -; CHECK-RV64C-NEXT: lw a4, 8(a1) -; CHECK-RV64C-NEXT: lw a1, 0(a1) +; CHECK-RV64C-NEXT: lw a2, 0(a1) +; CHECK-RV64C-NEXT: lw a3, 8(a1) +; CHECK-RV64C-NEXT: lw a4, 16(a1) +; CHECK-RV64C-NEXT: lw a1, 24(a1) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sw a2, 12(a0) +; CHECK-RV64C-NEXT: sw a2, 0(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sw a3, 8(a0) +; CHECK-RV64C-NEXT: sw a3, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sw a4, 4(a0) +; CHECK-RV64C-NEXT: sw a4, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sw a1, 0(a0) +; CHECK-RV64C-NEXT: sw a1, 12(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_P1_store_v4i32: ; CHECK-RV32C: # %bb.0: -; CHECK-RV32C-NEXT: lw a2, 12(a1) -; CHECK-RV32C-NEXT: lw a3, 8(a1) -; CHECK-RV32C-NEXT: lw a4, 4(a1) -; CHECK-RV32C-NEXT: lw a1, 0(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) +; CHECK-RV32C-NEXT: lw a1, 12(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sw a2, 12(a0) +; CHECK-RV32C-NEXT: sw a2, 0(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sw a3, 8(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sw a4, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_P1_store_v4i32: @@ -2790,49 +2790,49 @@ define void @test_nontemporal_P1_store_v2i64(ptr %p, <2 x i64> %v) { ; CHECK-RV64-LABEL: test_nontemporal_P1_store_v2i64: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.p1 -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: ntl.p1 ; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ntl.p1 +; CHECK-RV64-NEXT: sd a2, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_P1_store_v2i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lw a2, 12(a1) -; CHECK-RV32-NEXT: lw a3, 8(a1) -; CHECK-RV32-NEXT: lw a4, 4(a1) -; CHECK-RV32-NEXT: lw a1, 0(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) +; CHECK-RV32-NEXT: lw a1, 12(a1) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sw a2, 12(a0) +; CHECK-RV32-NEXT: sw a2, 0(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sw a3, 8(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sw a4, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) ; CHECK-RV32-NEXT: ntl.p1 -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_P1_store_v2i64: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.p1 -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: c.ntl.p1 ; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: c.ntl.p1 +; CHECK-RV64C-NEXT: sd a2, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_P1_store_v2i64: ; CHECK-RV32C: # %bb.0: -; CHECK-RV32C-NEXT: lw a2, 12(a1) -; CHECK-RV32C-NEXT: lw a3, 8(a1) -; CHECK-RV32C-NEXT: lw a4, 4(a1) -; CHECK-RV32C-NEXT: lw a1, 0(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) +; CHECK-RV32C-NEXT: lw a1, 12(a1) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sw a2, 12(a0) +; CHECK-RV32C-NEXT: sw a2, 0(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sw a3, 8(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sw a4, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.p1 -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_P1_store_v2i64: @@ -3166,53 +3166,53 @@ define <16 x i8> @test_nontemporal_PALL_load_v16i8(ptr %p) { ; CHECK-RV64-LABEL: test_nontemporal_PALL_load_v16i8: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: ld a2, 8(a1) +; CHECK-RV64-NEXT: ld a2, 0(a1) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: ld a1, 0(a1) -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ld a1, 8(a1) +; CHECK-RV64-NEXT: sd a2, 0(a0) +; CHECK-RV64-NEXT: sd a1, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_PALL_load_v16i8: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_v16i8: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: ld a2, 8(a1) +; CHECK-RV64C-NEXT: ld a2, 0(a1) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: ld a1, 0(a1) -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: ld a1, 8(a1) +; CHECK-RV64C-NEXT: sd a2, 0(a0) +; CHECK-RV64C-NEXT: sd a1, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_v16i8: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_v16i8: @@ -3236,53 +3236,53 @@ define <8 x i16> @test_nontemporal_PALL_load_v8i16(ptr %p) { ; CHECK-RV64-LABEL: test_nontemporal_PALL_load_v8i16: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: ld a2, 8(a1) +; CHECK-RV64-NEXT: ld a2, 0(a1) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: ld a1, 0(a1) -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ld a1, 8(a1) +; CHECK-RV64-NEXT: sd a2, 0(a0) +; CHECK-RV64-NEXT: sd a1, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_PALL_load_v8i16: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_v8i16: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: ld a2, 8(a1) +; CHECK-RV64C-NEXT: ld a2, 0(a1) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: ld a1, 0(a1) -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: ld a1, 8(a1) +; CHECK-RV64C-NEXT: sd a2, 0(a0) +; CHECK-RV64C-NEXT: sd a1, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_v8i16: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_v8i16: @@ -3306,53 +3306,53 @@ define <4 x i32> @test_nontemporal_PALL_load_v4i32(ptr %p) { ; CHECK-RV64-LABEL: test_nontemporal_PALL_load_v4i32: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: ld a2, 8(a1) +; CHECK-RV64-NEXT: ld a2, 0(a1) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: ld a1, 0(a1) -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ld a1, 8(a1) +; CHECK-RV64-NEXT: sd a2, 0(a0) +; CHECK-RV64-NEXT: sd a1, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_PALL_load_v4i32: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_v4i32: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: ld a2, 8(a1) +; CHECK-RV64C-NEXT: ld a2, 0(a1) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: ld a1, 0(a1) -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: ld a1, 8(a1) +; CHECK-RV64C-NEXT: sd a2, 0(a0) +; CHECK-RV64C-NEXT: sd a1, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_v4i32: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_v4i32: @@ -3385,17 +3385,17 @@ define <2 x i64> @test_nontemporal_PALL_load_v2i64(ptr %p) { ; CHECK-RV32-LABEL: test_nontemporal_PALL_load_v2i64: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_v2i64: @@ -3410,17 +3410,17 @@ define <2 x i64> @test_nontemporal_PALL_load_v2i64(ptr %p) { ; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_v2i64: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_v2i64: @@ -3450,9 +3450,9 @@ define void @test_nontemporal_PALL_store_i64(ptr %p, i64 %v) { ; CHECK-RV32-LABEL: test_nontemporal_PALL_store_i64: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sw a2, 4(a0) -; CHECK-RV32-NEXT: ntl.pall ; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: ntl.pall +; CHECK-RV32-NEXT: sw a2, 4(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_i64: @@ -3464,9 +3464,9 @@ define void @test_nontemporal_PALL_store_i64(ptr %p, i64 %v) { ; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_i64: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sw a2, 4(a0) -; CHECK-RV32C-NEXT: c.ntl.pall ; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: c.ntl.pall +; CHECK-RV32C-NEXT: sw a2, 4(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_i64: @@ -3478,9 +3478,9 @@ define void @test_nontemporal_PALL_store_i64(ptr %p, i64 %v) { ; CHECK-RV32V-LABEL: test_nontemporal_PALL_store_i64: ; CHECK-RV32V: # %bb.0: ; CHECK-RV32V-NEXT: ntl.pall -; CHECK-RV32V-NEXT: sw a2, 4(a0) -; CHECK-RV32V-NEXT: ntl.pall ; CHECK-RV32V-NEXT: sw a1, 0(a0) +; CHECK-RV32V-NEXT: ntl.pall +; CHECK-RV32V-NEXT: sw a2, 4(a0) ; CHECK-RV32V-NEXT: ret store i64 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2 ret void @@ -3743,46 +3743,46 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64-NEXT: lbu a7, 40(a1) ; CHECK-RV64-NEXT: lbu t0, 48(a1) ; CHECK-RV64-NEXT: lbu t1, 56(a1) -; CHECK-RV64-NEXT: lbu t2, 96(a1) -; CHECK-RV64-NEXT: lbu t3, 104(a1) -; CHECK-RV64-NEXT: lbu t4, 112(a1) -; CHECK-RV64-NEXT: lbu t5, 120(a1) -; CHECK-RV64-NEXT: lbu t6, 64(a1) -; CHECK-RV64-NEXT: lbu s0, 72(a1) -; CHECK-RV64-NEXT: lbu s1, 80(a1) -; CHECK-RV64-NEXT: lbu a1, 88(a1) -; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb t5, 15(a0) +; CHECK-RV64-NEXT: lbu t2, 64(a1) +; CHECK-RV64-NEXT: lbu t3, 72(a1) +; CHECK-RV64-NEXT: lbu t4, 80(a1) +; CHECK-RV64-NEXT: lbu t5, 88(a1) +; CHECK-RV64-NEXT: lbu t6, 96(a1) +; CHECK-RV64-NEXT: lbu s0, 104(a1) +; CHECK-RV64-NEXT: lbu s1, 112(a1) +; CHECK-RV64-NEXT: lbu a1, 120(a1) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb t4, 14(a0) +; CHECK-RV64-NEXT: sb t6, 12(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb t3, 13(a0) +; CHECK-RV64-NEXT: sb s0, 13(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb t2, 12(a0) +; CHECK-RV64-NEXT: sb s1, 14(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb a1, 11(a0) +; CHECK-RV64-NEXT: sb a1, 15(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb s1, 10(a0) +; CHECK-RV64-NEXT: sb t2, 8(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb s0, 9(a0) +; CHECK-RV64-NEXT: sb t3, 9(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb t6, 8(a0) +; CHECK-RV64-NEXT: sb t4, 10(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb t1, 7(a0) +; CHECK-RV64-NEXT: sb t5, 11(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb t0, 6(a0) +; CHECK-RV64-NEXT: sb a6, 4(a0) ; CHECK-RV64-NEXT: ntl.pall ; CHECK-RV64-NEXT: sb a7, 5(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb a6, 4(a0) +; CHECK-RV64-NEXT: sb t0, 6(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb a5, 3(a0) +; CHECK-RV64-NEXT: sb t1, 7(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb a4, 2(a0) +; CHECK-RV64-NEXT: sb a2, 0(a0) ; CHECK-RV64-NEXT: ntl.pall ; CHECK-RV64-NEXT: sb a3, 1(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sb a2, 0(a0) +; CHECK-RV64-NEXT: sb a4, 2(a0) +; CHECK-RV64-NEXT: ntl.pall +; CHECK-RV64-NEXT: sb a5, 3(a0) ; CHECK-RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; CHECK-RV64-NEXT: ld s1, 0(sp) # 8-byte Folded Reload ; CHECK-RV64-NEXT: addi sp, sp, 16 @@ -3804,46 +3804,46 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32-NEXT: lbu a7, 20(a1) ; CHECK-RV32-NEXT: lbu t0, 24(a1) ; CHECK-RV32-NEXT: lbu t1, 28(a1) -; CHECK-RV32-NEXT: lbu t2, 48(a1) -; CHECK-RV32-NEXT: lbu t3, 52(a1) -; CHECK-RV32-NEXT: lbu t4, 56(a1) -; CHECK-RV32-NEXT: lbu t5, 60(a1) -; CHECK-RV32-NEXT: lbu t6, 32(a1) -; CHECK-RV32-NEXT: lbu s0, 36(a1) -; CHECK-RV32-NEXT: lbu s1, 40(a1) -; CHECK-RV32-NEXT: lbu a1, 44(a1) +; CHECK-RV32-NEXT: lbu t2, 32(a1) +; CHECK-RV32-NEXT: lbu t3, 36(a1) +; CHECK-RV32-NEXT: lbu t4, 40(a1) +; CHECK-RV32-NEXT: lbu t5, 44(a1) +; CHECK-RV32-NEXT: lbu t6, 48(a1) +; CHECK-RV32-NEXT: lbu s0, 52(a1) +; CHECK-RV32-NEXT: lbu s1, 56(a1) +; CHECK-RV32-NEXT: lbu a1, 60(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb t5, 15(a0) +; CHECK-RV32-NEXT: sb t6, 12(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb t4, 14(a0) +; CHECK-RV32-NEXT: sb s0, 13(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb t3, 13(a0) +; CHECK-RV32-NEXT: sb s1, 14(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb t2, 12(a0) +; CHECK-RV32-NEXT: sb a1, 15(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb a1, 11(a0) +; CHECK-RV32-NEXT: sb t2, 8(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb s1, 10(a0) +; CHECK-RV32-NEXT: sb t3, 9(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb s0, 9(a0) +; CHECK-RV32-NEXT: sb t4, 10(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb t6, 8(a0) +; CHECK-RV32-NEXT: sb t5, 11(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb t1, 7(a0) -; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb t0, 6(a0) +; CHECK-RV32-NEXT: sb a6, 4(a0) ; CHECK-RV32-NEXT: ntl.pall ; CHECK-RV32-NEXT: sb a7, 5(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb a6, 4(a0) +; CHECK-RV32-NEXT: sb t0, 6(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb a5, 3(a0) +; CHECK-RV32-NEXT: sb t1, 7(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb a4, 2(a0) +; CHECK-RV32-NEXT: sb a2, 0(a0) ; CHECK-RV32-NEXT: ntl.pall ; CHECK-RV32-NEXT: sb a3, 1(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sb a2, 0(a0) +; CHECK-RV32-NEXT: sb a4, 2(a0) +; CHECK-RV32-NEXT: ntl.pall +; CHECK-RV32-NEXT: sb a5, 3(a0) ; CHECK-RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; CHECK-RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; CHECK-RV32-NEXT: addi sp, sp, 16 @@ -3865,46 +3865,46 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64C-NEXT: lbu t3, 40(a1) ; CHECK-RV64C-NEXT: lbu t4, 48(a1) ; CHECK-RV64C-NEXT: lbu t5, 56(a1) -; CHECK-RV64C-NEXT: lbu a2, 96(a1) -; CHECK-RV64C-NEXT: lbu a3, 104(a1) -; CHECK-RV64C-NEXT: lbu a4, 112(a1) -; CHECK-RV64C-NEXT: lbu a5, 120(a1) ; CHECK-RV64C-NEXT: lbu t6, 64(a1) -; CHECK-RV64C-NEXT: lbu s0, 72(a1) -; CHECK-RV64C-NEXT: lbu s1, 80(a1) -; CHECK-RV64C-NEXT: lbu a1, 88(a1) -; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb a5, 15(a0) -; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb a4, 14(a0) -; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb a3, 13(a0) +; CHECK-RV64C-NEXT: lbu a3, 72(a1) +; CHECK-RV64C-NEXT: lbu a4, 80(a1) +; CHECK-RV64C-NEXT: lbu a5, 88(a1) +; CHECK-RV64C-NEXT: lbu a2, 96(a1) +; CHECK-RV64C-NEXT: lbu s0, 104(a1) +; CHECK-RV64C-NEXT: lbu s1, 112(a1) +; CHECK-RV64C-NEXT: lbu a1, 120(a1) ; CHECK-RV64C-NEXT: c.ntl.pall ; CHECK-RV64C-NEXT: sb a2, 12(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb a1, 11(a0) +; CHECK-RV64C-NEXT: sb s0, 13(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb s1, 10(a0) +; CHECK-RV64C-NEXT: sb s1, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb s0, 9(a0) +; CHECK-RV64C-NEXT: sb a1, 15(a0) ; CHECK-RV64C-NEXT: c.ntl.pall ; CHECK-RV64C-NEXT: sb t6, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb t5, 7(a0) +; CHECK-RV64C-NEXT: sb a3, 9(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb t4, 6(a0) +; CHECK-RV64C-NEXT: sb a4, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb t3, 5(a0) +; CHECK-RV64C-NEXT: sb a5, 11(a0) ; CHECK-RV64C-NEXT: c.ntl.pall ; CHECK-RV64C-NEXT: sb t2, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb t1, 3(a0) +; CHECK-RV64C-NEXT: sb t3, 5(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb t0, 2(a0) +; CHECK-RV64C-NEXT: sb t4, 6(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sb a7, 1(a0) +; CHECK-RV64C-NEXT: sb t5, 7(a0) ; CHECK-RV64C-NEXT: c.ntl.pall ; CHECK-RV64C-NEXT: sb a6, 0(a0) +; CHECK-RV64C-NEXT: c.ntl.pall +; CHECK-RV64C-NEXT: sb a7, 1(a0) +; CHECK-RV64C-NEXT: c.ntl.pall +; CHECK-RV64C-NEXT: sb t0, 2(a0) +; CHECK-RV64C-NEXT: c.ntl.pall +; CHECK-RV64C-NEXT: sb t1, 3(a0) ; CHECK-RV64C-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; CHECK-RV64C-NEXT: ld s1, 0(sp) # 8-byte Folded Reload ; CHECK-RV64C-NEXT: addi sp, sp, 16 @@ -3926,46 +3926,46 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32C-NEXT: lbu t3, 20(a1) ; CHECK-RV32C-NEXT: lbu t4, 24(a1) ; CHECK-RV32C-NEXT: lbu t5, 28(a1) -; CHECK-RV32C-NEXT: lbu a2, 48(a1) -; CHECK-RV32C-NEXT: lbu a3, 52(a1) -; CHECK-RV32C-NEXT: lbu a4, 56(a1) -; CHECK-RV32C-NEXT: lbu a5, 60(a1) ; CHECK-RV32C-NEXT: lbu t6, 32(a1) -; CHECK-RV32C-NEXT: lbu s0, 36(a1) -; CHECK-RV32C-NEXT: lbu s1, 40(a1) -; CHECK-RV32C-NEXT: lbu a1, 44(a1) -; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb a5, 15(a0) -; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb a4, 14(a0) -; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb a3, 13(a0) +; CHECK-RV32C-NEXT: lbu a3, 36(a1) +; CHECK-RV32C-NEXT: lbu a4, 40(a1) +; CHECK-RV32C-NEXT: lbu a5, 44(a1) +; CHECK-RV32C-NEXT: lbu a2, 48(a1) +; CHECK-RV32C-NEXT: lbu s0, 52(a1) +; CHECK-RV32C-NEXT: lbu s1, 56(a1) +; CHECK-RV32C-NEXT: lbu a1, 60(a1) ; CHECK-RV32C-NEXT: c.ntl.pall ; CHECK-RV32C-NEXT: sb a2, 12(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb a1, 11(a0) +; CHECK-RV32C-NEXT: sb s0, 13(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb s1, 10(a0) +; CHECK-RV32C-NEXT: sb s1, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb s0, 9(a0) +; CHECK-RV32C-NEXT: sb a1, 15(a0) ; CHECK-RV32C-NEXT: c.ntl.pall ; CHECK-RV32C-NEXT: sb t6, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb t5, 7(a0) +; CHECK-RV32C-NEXT: sb a3, 9(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb t4, 6(a0) +; CHECK-RV32C-NEXT: sb a4, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb t3, 5(a0) +; CHECK-RV32C-NEXT: sb a5, 11(a0) ; CHECK-RV32C-NEXT: c.ntl.pall ; CHECK-RV32C-NEXT: sb t2, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb t1, 3(a0) +; CHECK-RV32C-NEXT: sb t3, 5(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb t0, 2(a0) +; CHECK-RV32C-NEXT: sb t4, 6(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sb a7, 1(a0) +; CHECK-RV32C-NEXT: sb t5, 7(a0) ; CHECK-RV32C-NEXT: c.ntl.pall ; CHECK-RV32C-NEXT: sb a6, 0(a0) +; CHECK-RV32C-NEXT: c.ntl.pall +; CHECK-RV32C-NEXT: sb a7, 1(a0) +; CHECK-RV32C-NEXT: c.ntl.pall +; CHECK-RV32C-NEXT: sb t0, 2(a0) +; CHECK-RV32C-NEXT: c.ntl.pall +; CHECK-RV32C-NEXT: sb t1, 3(a0) ; CHECK-RV32C-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; CHECK-RV32C-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; CHECK-RV32C-NEXT: addi sp, sp, 16 @@ -3991,114 +3991,114 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) { define void @test_nontemporal_PALL_store_v8i16(ptr %p, <8 x i16> %v) { ; CHECK-RV64-LABEL: test_nontemporal_PALL_store_v8i16: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: lh a2, 32(a1) -; CHECK-RV64-NEXT: lh a3, 40(a1) -; CHECK-RV64-NEXT: lh a4, 48(a1) -; CHECK-RV64-NEXT: lh a5, 56(a1) -; CHECK-RV64-NEXT: lh a6, 0(a1) -; CHECK-RV64-NEXT: lh a7, 8(a1) -; CHECK-RV64-NEXT: lh t0, 16(a1) -; CHECK-RV64-NEXT: lh a1, 24(a1) +; CHECK-RV64-NEXT: lh a2, 0(a1) +; CHECK-RV64-NEXT: lh a3, 8(a1) +; CHECK-RV64-NEXT: lh a4, 16(a1) +; CHECK-RV64-NEXT: lh a5, 24(a1) +; CHECK-RV64-NEXT: lh a6, 32(a1) +; CHECK-RV64-NEXT: lh a7, 40(a1) +; CHECK-RV64-NEXT: lh t0, 48(a1) +; CHECK-RV64-NEXT: lh a1, 56(a1) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sh a5, 14(a0) +; CHECK-RV64-NEXT: sh a6, 8(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sh a4, 12(a0) +; CHECK-RV64-NEXT: sh a7, 10(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sh a3, 10(a0) +; CHECK-RV64-NEXT: sh t0, 12(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sh a2, 8(a0) +; CHECK-RV64-NEXT: sh a1, 14(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sh a1, 6(a0) +; CHECK-RV64-NEXT: sh a2, 0(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sh t0, 4(a0) +; CHECK-RV64-NEXT: sh a3, 2(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sh a7, 2(a0) +; CHECK-RV64-NEXT: sh a4, 4(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sh a6, 0(a0) +; CHECK-RV64-NEXT: sh a5, 6(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_PALL_store_v8i16: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lh a2, 16(a1) -; CHECK-RV32-NEXT: lh a3, 20(a1) -; CHECK-RV32-NEXT: lh a4, 24(a1) -; CHECK-RV32-NEXT: lh a5, 28(a1) -; CHECK-RV32-NEXT: lh a6, 0(a1) -; CHECK-RV32-NEXT: lh a7, 4(a1) -; CHECK-RV32-NEXT: lh t0, 8(a1) -; CHECK-RV32-NEXT: lh a1, 12(a1) +; CHECK-RV32-NEXT: lh a2, 0(a1) +; CHECK-RV32-NEXT: lh a3, 4(a1) +; CHECK-RV32-NEXT: lh a4, 8(a1) +; CHECK-RV32-NEXT: lh a5, 12(a1) +; CHECK-RV32-NEXT: lh a6, 16(a1) +; CHECK-RV32-NEXT: lh a7, 20(a1) +; CHECK-RV32-NEXT: lh t0, 24(a1) +; CHECK-RV32-NEXT: lh a1, 28(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sh a5, 14(a0) +; CHECK-RV32-NEXT: sh a6, 8(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sh a4, 12(a0) +; CHECK-RV32-NEXT: sh a7, 10(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sh a3, 10(a0) +; CHECK-RV32-NEXT: sh t0, 12(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sh a2, 8(a0) +; CHECK-RV32-NEXT: sh a1, 14(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sh a1, 6(a0) +; CHECK-RV32-NEXT: sh a2, 0(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sh t0, 4(a0) +; CHECK-RV32-NEXT: sh a3, 2(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sh a7, 2(a0) +; CHECK-RV32-NEXT: sh a4, 4(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sh a6, 0(a0) +; CHECK-RV32-NEXT: sh a5, 6(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_v8i16: ; CHECK-RV64C: # %bb.0: -; CHECK-RV64C-NEXT: lh a7, 32(a1) +; CHECK-RV64C-NEXT: lh a6, 0(a1) +; CHECK-RV64C-NEXT: lh a7, 8(a1) +; CHECK-RV64C-NEXT: lh t0, 16(a1) +; CHECK-RV64C-NEXT: lh a5, 24(a1) +; CHECK-RV64C-NEXT: lh a2, 32(a1) ; CHECK-RV64C-NEXT: lh a3, 40(a1) ; CHECK-RV64C-NEXT: lh a4, 48(a1) -; CHECK-RV64C-NEXT: lh a5, 56(a1) -; CHECK-RV64C-NEXT: lh a6, 0(a1) -; CHECK-RV64C-NEXT: lh t0, 8(a1) -; CHECK-RV64C-NEXT: lh a2, 16(a1) -; CHECK-RV64C-NEXT: lh a1, 24(a1) +; CHECK-RV64C-NEXT: lh a1, 56(a1) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sh a5, 14(a0) -; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sh a4, 12(a0) +; CHECK-RV64C-NEXT: sh a2, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.pall ; CHECK-RV64C-NEXT: sh a3, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sh a7, 8(a0) +; CHECK-RV64C-NEXT: sh a4, 12(a0) +; CHECK-RV64C-NEXT: c.ntl.pall +; CHECK-RV64C-NEXT: sh a1, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sh a1, 6(a0) +; CHECK-RV64C-NEXT: sh a6, 0(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sh a2, 4(a0) +; CHECK-RV64C-NEXT: sh a7, 2(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sh t0, 2(a0) +; CHECK-RV64C-NEXT: sh t0, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sh a6, 0(a0) +; CHECK-RV64C-NEXT: sh a5, 6(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_v8i16: ; CHECK-RV32C: # %bb.0: -; CHECK-RV32C-NEXT: lh a7, 16(a1) +; CHECK-RV32C-NEXT: lh a6, 0(a1) +; CHECK-RV32C-NEXT: lh a7, 4(a1) +; CHECK-RV32C-NEXT: lh t0, 8(a1) +; CHECK-RV32C-NEXT: lh a5, 12(a1) +; CHECK-RV32C-NEXT: lh a2, 16(a1) ; CHECK-RV32C-NEXT: lh a3, 20(a1) ; CHECK-RV32C-NEXT: lh a4, 24(a1) -; CHECK-RV32C-NEXT: lh a5, 28(a1) -; CHECK-RV32C-NEXT: lh a6, 0(a1) -; CHECK-RV32C-NEXT: lh t0, 4(a1) -; CHECK-RV32C-NEXT: lh a2, 8(a1) -; CHECK-RV32C-NEXT: lh a1, 12(a1) -; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sh a5, 14(a0) +; CHECK-RV32C-NEXT: lh a1, 28(a1) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sh a4, 12(a0) +; CHECK-RV32C-NEXT: sh a2, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.pall ; CHECK-RV32C-NEXT: sh a3, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sh a7, 8(a0) +; CHECK-RV32C-NEXT: sh a4, 12(a0) +; CHECK-RV32C-NEXT: c.ntl.pall +; CHECK-RV32C-NEXT: sh a1, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sh a1, 6(a0) +; CHECK-RV32C-NEXT: sh a6, 0(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sh a2, 4(a0) +; CHECK-RV32C-NEXT: sh a7, 2(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sh t0, 2(a0) +; CHECK-RV32C-NEXT: sh t0, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sh a6, 0(a0) +; CHECK-RV32C-NEXT: sh a5, 6(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_v8i16: @@ -4121,66 +4121,66 @@ define void @test_nontemporal_PALL_store_v8i16(ptr %p, <8 x i16> %v) { define void @test_nontemporal_PALL_store_v4i32(ptr %p, <4 x i32> %v) { ; CHECK-RV64-LABEL: test_nontemporal_PALL_store_v4i32: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: lw a2, 24(a1) -; CHECK-RV64-NEXT: lw a3, 16(a1) -; CHECK-RV64-NEXT: lw a4, 8(a1) -; CHECK-RV64-NEXT: lw a1, 0(a1) +; CHECK-RV64-NEXT: lw a2, 0(a1) +; CHECK-RV64-NEXT: lw a3, 8(a1) +; CHECK-RV64-NEXT: lw a4, 16(a1) +; CHECK-RV64-NEXT: lw a1, 24(a1) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sw a2, 12(a0) +; CHECK-RV64-NEXT: sw a2, 0(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sw a3, 8(a0) +; CHECK-RV64-NEXT: sw a3, 4(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sw a4, 4(a0) +; CHECK-RV64-NEXT: sw a4, 8(a0) ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sw a1, 0(a0) +; CHECK-RV64-NEXT: sw a1, 12(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_PALL_store_v4i32: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lw a2, 12(a1) -; CHECK-RV32-NEXT: lw a3, 8(a1) -; CHECK-RV32-NEXT: lw a4, 4(a1) -; CHECK-RV32-NEXT: lw a1, 0(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) +; CHECK-RV32-NEXT: lw a1, 12(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sw a2, 12(a0) +; CHECK-RV32-NEXT: sw a2, 0(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sw a3, 8(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sw a4, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_v4i32: ; CHECK-RV64C: # %bb.0: -; CHECK-RV64C-NEXT: lw a2, 24(a1) -; CHECK-RV64C-NEXT: lw a3, 16(a1) -; CHECK-RV64C-NEXT: lw a4, 8(a1) -; CHECK-RV64C-NEXT: lw a1, 0(a1) +; CHECK-RV64C-NEXT: lw a2, 0(a1) +; CHECK-RV64C-NEXT: lw a3, 8(a1) +; CHECK-RV64C-NEXT: lw a4, 16(a1) +; CHECK-RV64C-NEXT: lw a1, 24(a1) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sw a2, 12(a0) +; CHECK-RV64C-NEXT: sw a2, 0(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sw a3, 8(a0) +; CHECK-RV64C-NEXT: sw a3, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sw a4, 4(a0) +; CHECK-RV64C-NEXT: sw a4, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sw a1, 0(a0) +; CHECK-RV64C-NEXT: sw a1, 12(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_v4i32: ; CHECK-RV32C: # %bb.0: -; CHECK-RV32C-NEXT: lw a2, 12(a1) -; CHECK-RV32C-NEXT: lw a3, 8(a1) -; CHECK-RV32C-NEXT: lw a4, 4(a1) -; CHECK-RV32C-NEXT: lw a1, 0(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) +; CHECK-RV32C-NEXT: lw a1, 12(a1) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sw a2, 12(a0) +; CHECK-RV32C-NEXT: sw a2, 0(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sw a3, 8(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sw a4, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_v4i32: @@ -4204,49 +4204,49 @@ define void @test_nontemporal_PALL_store_v2i64(ptr %p, <2 x i64> %v) { ; CHECK-RV64-LABEL: test_nontemporal_PALL_store_v2i64: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.pall -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: ntl.pall ; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ntl.pall +; CHECK-RV64-NEXT: sd a2, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_PALL_store_v2i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lw a2, 12(a1) -; CHECK-RV32-NEXT: lw a3, 8(a1) -; CHECK-RV32-NEXT: lw a4, 4(a1) -; CHECK-RV32-NEXT: lw a1, 0(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) +; CHECK-RV32-NEXT: lw a1, 12(a1) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sw a2, 12(a0) +; CHECK-RV32-NEXT: sw a2, 0(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sw a3, 8(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sw a4, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) ; CHECK-RV32-NEXT: ntl.pall -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_v2i64: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.pall -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: c.ntl.pall ; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: c.ntl.pall +; CHECK-RV64C-NEXT: sd a2, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_v2i64: ; CHECK-RV32C: # %bb.0: -; CHECK-RV32C-NEXT: lw a2, 12(a1) -; CHECK-RV32C-NEXT: lw a3, 8(a1) -; CHECK-RV32C-NEXT: lw a4, 4(a1) -; CHECK-RV32C-NEXT: lw a1, 0(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) +; CHECK-RV32C-NEXT: lw a1, 12(a1) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sw a2, 12(a0) +; CHECK-RV32C-NEXT: sw a2, 0(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sw a3, 8(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sw a4, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.pall -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_v2i64: @@ -4580,53 +4580,53 @@ define <16 x i8> @test_nontemporal_S1_load_v16i8(ptr %p) { ; CHECK-RV64-LABEL: test_nontemporal_S1_load_v16i8: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: ld a2, 8(a1) +; CHECK-RV64-NEXT: ld a2, 0(a1) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: ld a1, 0(a1) -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ld a1, 8(a1) +; CHECK-RV64-NEXT: sd a2, 0(a0) +; CHECK-RV64-NEXT: sd a1, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_S1_load_v16i8: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_S1_load_v16i8: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: ld a2, 8(a1) +; CHECK-RV64C-NEXT: ld a2, 0(a1) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: ld a1, 0(a1) -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: ld a1, 8(a1) +; CHECK-RV64C-NEXT: sd a2, 0(a0) +; CHECK-RV64C-NEXT: sd a1, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_S1_load_v16i8: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_S1_load_v16i8: @@ -4650,53 +4650,53 @@ define <8 x i16> @test_nontemporal_S1_load_v8i16(ptr %p) { ; CHECK-RV64-LABEL: test_nontemporal_S1_load_v8i16: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: ld a2, 8(a1) +; CHECK-RV64-NEXT: ld a2, 0(a1) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: ld a1, 0(a1) -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ld a1, 8(a1) +; CHECK-RV64-NEXT: sd a2, 0(a0) +; CHECK-RV64-NEXT: sd a1, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_S1_load_v8i16: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_S1_load_v8i16: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: ld a2, 8(a1) +; CHECK-RV64C-NEXT: ld a2, 0(a1) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: ld a1, 0(a1) -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: ld a1, 8(a1) +; CHECK-RV64C-NEXT: sd a2, 0(a0) +; CHECK-RV64C-NEXT: sd a1, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_S1_load_v8i16: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_S1_load_v8i16: @@ -4720,53 +4720,53 @@ define <4 x i32> @test_nontemporal_S1_load_v4i32(ptr %p) { ; CHECK-RV64-LABEL: test_nontemporal_S1_load_v4i32: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: ld a2, 8(a1) +; CHECK-RV64-NEXT: ld a2, 0(a1) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: ld a1, 0(a1) -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ld a1, 8(a1) +; CHECK-RV64-NEXT: sd a2, 0(a0) +; CHECK-RV64-NEXT: sd a1, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_S1_load_v4i32: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_S1_load_v4i32: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: ld a2, 8(a1) +; CHECK-RV64C-NEXT: ld a2, 0(a1) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: ld a1, 0(a1) -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: ld a1, 8(a1) +; CHECK-RV64C-NEXT: sd a2, 0(a0) +; CHECK-RV64C-NEXT: sd a1, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_S1_load_v4i32: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_S1_load_v4i32: @@ -4799,17 +4799,17 @@ define <2 x i64> @test_nontemporal_S1_load_v2i64(ptr %p) { ; CHECK-RV32-LABEL: test_nontemporal_S1_load_v2i64: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_S1_load_v2i64: @@ -4824,17 +4824,17 @@ define <2 x i64> @test_nontemporal_S1_load_v2i64(ptr %p) { ; CHECK-RV32C-LABEL: test_nontemporal_S1_load_v2i64: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_S1_load_v2i64: @@ -4864,9 +4864,9 @@ define void @test_nontemporal_S1_store_i64(ptr %p, i64 %v) { ; CHECK-RV32-LABEL: test_nontemporal_S1_store_i64: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sw a2, 4(a0) -; CHECK-RV32-NEXT: ntl.s1 ; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: ntl.s1 +; CHECK-RV32-NEXT: sw a2, 4(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_S1_store_i64: @@ -4878,9 +4878,9 @@ define void @test_nontemporal_S1_store_i64(ptr %p, i64 %v) { ; CHECK-RV32C-LABEL: test_nontemporal_S1_store_i64: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sw a2, 4(a0) -; CHECK-RV32C-NEXT: c.ntl.s1 ; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: c.ntl.s1 +; CHECK-RV32C-NEXT: sw a2, 4(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_S1_store_i64: @@ -4892,9 +4892,9 @@ define void @test_nontemporal_S1_store_i64(ptr %p, i64 %v) { ; CHECK-RV32V-LABEL: test_nontemporal_S1_store_i64: ; CHECK-RV32V: # %bb.0: ; CHECK-RV32V-NEXT: ntl.s1 -; CHECK-RV32V-NEXT: sw a2, 4(a0) -; CHECK-RV32V-NEXT: ntl.s1 ; CHECK-RV32V-NEXT: sw a1, 0(a0) +; CHECK-RV32V-NEXT: ntl.s1 +; CHECK-RV32V-NEXT: sw a2, 4(a0) ; CHECK-RV32V-NEXT: ret store i64 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3 ret void @@ -5157,46 +5157,46 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64-NEXT: lbu a7, 40(a1) ; CHECK-RV64-NEXT: lbu t0, 48(a1) ; CHECK-RV64-NEXT: lbu t1, 56(a1) -; CHECK-RV64-NEXT: lbu t2, 96(a1) -; CHECK-RV64-NEXT: lbu t3, 104(a1) -; CHECK-RV64-NEXT: lbu t4, 112(a1) -; CHECK-RV64-NEXT: lbu t5, 120(a1) -; CHECK-RV64-NEXT: lbu t6, 64(a1) -; CHECK-RV64-NEXT: lbu s0, 72(a1) -; CHECK-RV64-NEXT: lbu s1, 80(a1) -; CHECK-RV64-NEXT: lbu a1, 88(a1) -; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb t5, 15(a0) +; CHECK-RV64-NEXT: lbu t2, 64(a1) +; CHECK-RV64-NEXT: lbu t3, 72(a1) +; CHECK-RV64-NEXT: lbu t4, 80(a1) +; CHECK-RV64-NEXT: lbu t5, 88(a1) +; CHECK-RV64-NEXT: lbu t6, 96(a1) +; CHECK-RV64-NEXT: lbu s0, 104(a1) +; CHECK-RV64-NEXT: lbu s1, 112(a1) +; CHECK-RV64-NEXT: lbu a1, 120(a1) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb t4, 14(a0) +; CHECK-RV64-NEXT: sb t6, 12(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb t3, 13(a0) +; CHECK-RV64-NEXT: sb s0, 13(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb t2, 12(a0) +; CHECK-RV64-NEXT: sb s1, 14(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb a1, 11(a0) +; CHECK-RV64-NEXT: sb a1, 15(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb s1, 10(a0) +; CHECK-RV64-NEXT: sb t2, 8(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb s0, 9(a0) +; CHECK-RV64-NEXT: sb t3, 9(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb t6, 8(a0) +; CHECK-RV64-NEXT: sb t4, 10(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb t1, 7(a0) +; CHECK-RV64-NEXT: sb t5, 11(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb t0, 6(a0) +; CHECK-RV64-NEXT: sb a6, 4(a0) ; CHECK-RV64-NEXT: ntl.s1 ; CHECK-RV64-NEXT: sb a7, 5(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb a6, 4(a0) +; CHECK-RV64-NEXT: sb t0, 6(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb a5, 3(a0) +; CHECK-RV64-NEXT: sb t1, 7(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb a4, 2(a0) +; CHECK-RV64-NEXT: sb a2, 0(a0) ; CHECK-RV64-NEXT: ntl.s1 ; CHECK-RV64-NEXT: sb a3, 1(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sb a2, 0(a0) +; CHECK-RV64-NEXT: sb a4, 2(a0) +; CHECK-RV64-NEXT: ntl.s1 +; CHECK-RV64-NEXT: sb a5, 3(a0) ; CHECK-RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; CHECK-RV64-NEXT: ld s1, 0(sp) # 8-byte Folded Reload ; CHECK-RV64-NEXT: addi sp, sp, 16 @@ -5218,46 +5218,46 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32-NEXT: lbu a7, 20(a1) ; CHECK-RV32-NEXT: lbu t0, 24(a1) ; CHECK-RV32-NEXT: lbu t1, 28(a1) -; CHECK-RV32-NEXT: lbu t2, 48(a1) -; CHECK-RV32-NEXT: lbu t3, 52(a1) -; CHECK-RV32-NEXT: lbu t4, 56(a1) -; CHECK-RV32-NEXT: lbu t5, 60(a1) -; CHECK-RV32-NEXT: lbu t6, 32(a1) -; CHECK-RV32-NEXT: lbu s0, 36(a1) -; CHECK-RV32-NEXT: lbu s1, 40(a1) -; CHECK-RV32-NEXT: lbu a1, 44(a1) -; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb t5, 15(a0) +; CHECK-RV32-NEXT: lbu t2, 32(a1) +; CHECK-RV32-NEXT: lbu t3, 36(a1) +; CHECK-RV32-NEXT: lbu t4, 40(a1) +; CHECK-RV32-NEXT: lbu t5, 44(a1) +; CHECK-RV32-NEXT: lbu t6, 48(a1) +; CHECK-RV32-NEXT: lbu s0, 52(a1) +; CHECK-RV32-NEXT: lbu s1, 56(a1) +; CHECK-RV32-NEXT: lbu a1, 60(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb t4, 14(a0) +; CHECK-RV32-NEXT: sb t6, 12(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb t3, 13(a0) +; CHECK-RV32-NEXT: sb s0, 13(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb t2, 12(a0) +; CHECK-RV32-NEXT: sb s1, 14(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb a1, 11(a0) +; CHECK-RV32-NEXT: sb a1, 15(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb s1, 10(a0) +; CHECK-RV32-NEXT: sb t2, 8(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb s0, 9(a0) +; CHECK-RV32-NEXT: sb t3, 9(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb t6, 8(a0) +; CHECK-RV32-NEXT: sb t4, 10(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb t1, 7(a0) +; CHECK-RV32-NEXT: sb t5, 11(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb t0, 6(a0) +; CHECK-RV32-NEXT: sb a6, 4(a0) ; CHECK-RV32-NEXT: ntl.s1 ; CHECK-RV32-NEXT: sb a7, 5(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb a6, 4(a0) +; CHECK-RV32-NEXT: sb t0, 6(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb a5, 3(a0) +; CHECK-RV32-NEXT: sb t1, 7(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb a4, 2(a0) +; CHECK-RV32-NEXT: sb a2, 0(a0) ; CHECK-RV32-NEXT: ntl.s1 ; CHECK-RV32-NEXT: sb a3, 1(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sb a2, 0(a0) +; CHECK-RV32-NEXT: sb a4, 2(a0) +; CHECK-RV32-NEXT: ntl.s1 +; CHECK-RV32-NEXT: sb a5, 3(a0) ; CHECK-RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; CHECK-RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; CHECK-RV32-NEXT: addi sp, sp, 16 @@ -5279,46 +5279,46 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64C-NEXT: lbu t3, 40(a1) ; CHECK-RV64C-NEXT: lbu t4, 48(a1) ; CHECK-RV64C-NEXT: lbu t5, 56(a1) -; CHECK-RV64C-NEXT: lbu a2, 96(a1) -; CHECK-RV64C-NEXT: lbu a3, 104(a1) -; CHECK-RV64C-NEXT: lbu a4, 112(a1) -; CHECK-RV64C-NEXT: lbu a5, 120(a1) ; CHECK-RV64C-NEXT: lbu t6, 64(a1) -; CHECK-RV64C-NEXT: lbu s0, 72(a1) -; CHECK-RV64C-NEXT: lbu s1, 80(a1) -; CHECK-RV64C-NEXT: lbu a1, 88(a1) -; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb a5, 15(a0) -; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb a4, 14(a0) -; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb a3, 13(a0) +; CHECK-RV64C-NEXT: lbu a3, 72(a1) +; CHECK-RV64C-NEXT: lbu a4, 80(a1) +; CHECK-RV64C-NEXT: lbu a5, 88(a1) +; CHECK-RV64C-NEXT: lbu a2, 96(a1) +; CHECK-RV64C-NEXT: lbu s0, 104(a1) +; CHECK-RV64C-NEXT: lbu s1, 112(a1) +; CHECK-RV64C-NEXT: lbu a1, 120(a1) ; CHECK-RV64C-NEXT: c.ntl.s1 ; CHECK-RV64C-NEXT: sb a2, 12(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb a1, 11(a0) +; CHECK-RV64C-NEXT: sb s0, 13(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb s1, 10(a0) +; CHECK-RV64C-NEXT: sb s1, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb s0, 9(a0) +; CHECK-RV64C-NEXT: sb a1, 15(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 ; CHECK-RV64C-NEXT: sb t6, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb t5, 7(a0) +; CHECK-RV64C-NEXT: sb a3, 9(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb t4, 6(a0) +; CHECK-RV64C-NEXT: sb a4, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb t3, 5(a0) +; CHECK-RV64C-NEXT: sb a5, 11(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 ; CHECK-RV64C-NEXT: sb t2, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb t1, 3(a0) +; CHECK-RV64C-NEXT: sb t3, 5(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb t0, 2(a0) +; CHECK-RV64C-NEXT: sb t4, 6(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sb a7, 1(a0) +; CHECK-RV64C-NEXT: sb t5, 7(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 ; CHECK-RV64C-NEXT: sb a6, 0(a0) +; CHECK-RV64C-NEXT: c.ntl.s1 +; CHECK-RV64C-NEXT: sb a7, 1(a0) +; CHECK-RV64C-NEXT: c.ntl.s1 +; CHECK-RV64C-NEXT: sb t0, 2(a0) +; CHECK-RV64C-NEXT: c.ntl.s1 +; CHECK-RV64C-NEXT: sb t1, 3(a0) ; CHECK-RV64C-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; CHECK-RV64C-NEXT: ld s1, 0(sp) # 8-byte Folded Reload ; CHECK-RV64C-NEXT: addi sp, sp, 16 @@ -5340,46 +5340,46 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32C-NEXT: lbu t3, 20(a1) ; CHECK-RV32C-NEXT: lbu t4, 24(a1) ; CHECK-RV32C-NEXT: lbu t5, 28(a1) -; CHECK-RV32C-NEXT: lbu a2, 48(a1) -; CHECK-RV32C-NEXT: lbu a3, 52(a1) -; CHECK-RV32C-NEXT: lbu a4, 56(a1) -; CHECK-RV32C-NEXT: lbu a5, 60(a1) ; CHECK-RV32C-NEXT: lbu t6, 32(a1) -; CHECK-RV32C-NEXT: lbu s0, 36(a1) -; CHECK-RV32C-NEXT: lbu s1, 40(a1) -; CHECK-RV32C-NEXT: lbu a1, 44(a1) -; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb a5, 15(a0) -; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb a4, 14(a0) -; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb a3, 13(a0) +; CHECK-RV32C-NEXT: lbu a3, 36(a1) +; CHECK-RV32C-NEXT: lbu a4, 40(a1) +; CHECK-RV32C-NEXT: lbu a5, 44(a1) +; CHECK-RV32C-NEXT: lbu a2, 48(a1) +; CHECK-RV32C-NEXT: lbu s0, 52(a1) +; CHECK-RV32C-NEXT: lbu s1, 56(a1) +; CHECK-RV32C-NEXT: lbu a1, 60(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 ; CHECK-RV32C-NEXT: sb a2, 12(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb a1, 11(a0) +; CHECK-RV32C-NEXT: sb s0, 13(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb s1, 10(a0) +; CHECK-RV32C-NEXT: sb s1, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb s0, 9(a0) +; CHECK-RV32C-NEXT: sb a1, 15(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 ; CHECK-RV32C-NEXT: sb t6, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb t5, 7(a0) +; CHECK-RV32C-NEXT: sb a3, 9(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb t4, 6(a0) +; CHECK-RV32C-NEXT: sb a4, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb t3, 5(a0) +; CHECK-RV32C-NEXT: sb a5, 11(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 ; CHECK-RV32C-NEXT: sb t2, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb t1, 3(a0) +; CHECK-RV32C-NEXT: sb t3, 5(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb t0, 2(a0) +; CHECK-RV32C-NEXT: sb t4, 6(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sb a7, 1(a0) +; CHECK-RV32C-NEXT: sb t5, 7(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 ; CHECK-RV32C-NEXT: sb a6, 0(a0) +; CHECK-RV32C-NEXT: c.ntl.s1 +; CHECK-RV32C-NEXT: sb a7, 1(a0) +; CHECK-RV32C-NEXT: c.ntl.s1 +; CHECK-RV32C-NEXT: sb t0, 2(a0) +; CHECK-RV32C-NEXT: c.ntl.s1 +; CHECK-RV32C-NEXT: sb t1, 3(a0) ; CHECK-RV32C-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; CHECK-RV32C-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; CHECK-RV32C-NEXT: addi sp, sp, 16 @@ -5405,114 +5405,114 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) { define void @test_nontemporal_S1_store_v8i16(ptr %p, <8 x i16> %v) { ; CHECK-RV64-LABEL: test_nontemporal_S1_store_v8i16: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: lh a2, 32(a1) -; CHECK-RV64-NEXT: lh a3, 40(a1) -; CHECK-RV64-NEXT: lh a4, 48(a1) -; CHECK-RV64-NEXT: lh a5, 56(a1) -; CHECK-RV64-NEXT: lh a6, 0(a1) -; CHECK-RV64-NEXT: lh a7, 8(a1) -; CHECK-RV64-NEXT: lh t0, 16(a1) -; CHECK-RV64-NEXT: lh a1, 24(a1) +; CHECK-RV64-NEXT: lh a2, 0(a1) +; CHECK-RV64-NEXT: lh a3, 8(a1) +; CHECK-RV64-NEXT: lh a4, 16(a1) +; CHECK-RV64-NEXT: lh a5, 24(a1) +; CHECK-RV64-NEXT: lh a6, 32(a1) +; CHECK-RV64-NEXT: lh a7, 40(a1) +; CHECK-RV64-NEXT: lh t0, 48(a1) +; CHECK-RV64-NEXT: lh a1, 56(a1) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sh a5, 14(a0) +; CHECK-RV64-NEXT: sh a6, 8(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sh a4, 12(a0) +; CHECK-RV64-NEXT: sh a7, 10(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sh a3, 10(a0) +; CHECK-RV64-NEXT: sh t0, 12(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sh a2, 8(a0) +; CHECK-RV64-NEXT: sh a1, 14(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sh a1, 6(a0) +; CHECK-RV64-NEXT: sh a2, 0(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sh t0, 4(a0) +; CHECK-RV64-NEXT: sh a3, 2(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sh a7, 2(a0) +; CHECK-RV64-NEXT: sh a4, 4(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sh a6, 0(a0) +; CHECK-RV64-NEXT: sh a5, 6(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_S1_store_v8i16: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lh a2, 16(a1) -; CHECK-RV32-NEXT: lh a3, 20(a1) -; CHECK-RV32-NEXT: lh a4, 24(a1) -; CHECK-RV32-NEXT: lh a5, 28(a1) -; CHECK-RV32-NEXT: lh a6, 0(a1) -; CHECK-RV32-NEXT: lh a7, 4(a1) -; CHECK-RV32-NEXT: lh t0, 8(a1) -; CHECK-RV32-NEXT: lh a1, 12(a1) +; CHECK-RV32-NEXT: lh a2, 0(a1) +; CHECK-RV32-NEXT: lh a3, 4(a1) +; CHECK-RV32-NEXT: lh a4, 8(a1) +; CHECK-RV32-NEXT: lh a5, 12(a1) +; CHECK-RV32-NEXT: lh a6, 16(a1) +; CHECK-RV32-NEXT: lh a7, 20(a1) +; CHECK-RV32-NEXT: lh t0, 24(a1) +; CHECK-RV32-NEXT: lh a1, 28(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sh a5, 14(a0) +; CHECK-RV32-NEXT: sh a6, 8(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sh a4, 12(a0) +; CHECK-RV32-NEXT: sh a7, 10(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sh a3, 10(a0) +; CHECK-RV32-NEXT: sh t0, 12(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sh a2, 8(a0) +; CHECK-RV32-NEXT: sh a1, 14(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sh a1, 6(a0) +; CHECK-RV32-NEXT: sh a2, 0(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sh t0, 4(a0) +; CHECK-RV32-NEXT: sh a3, 2(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sh a7, 2(a0) +; CHECK-RV32-NEXT: sh a4, 4(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sh a6, 0(a0) +; CHECK-RV32-NEXT: sh a5, 6(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_S1_store_v8i16: ; CHECK-RV64C: # %bb.0: -; CHECK-RV64C-NEXT: lh a7, 32(a1) +; CHECK-RV64C-NEXT: lh a6, 0(a1) +; CHECK-RV64C-NEXT: lh a7, 8(a1) +; CHECK-RV64C-NEXT: lh t0, 16(a1) +; CHECK-RV64C-NEXT: lh a5, 24(a1) +; CHECK-RV64C-NEXT: lh a2, 32(a1) ; CHECK-RV64C-NEXT: lh a3, 40(a1) ; CHECK-RV64C-NEXT: lh a4, 48(a1) -; CHECK-RV64C-NEXT: lh a5, 56(a1) -; CHECK-RV64C-NEXT: lh a6, 0(a1) -; CHECK-RV64C-NEXT: lh t0, 8(a1) -; CHECK-RV64C-NEXT: lh a2, 16(a1) -; CHECK-RV64C-NEXT: lh a1, 24(a1) -; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sh a5, 14(a0) +; CHECK-RV64C-NEXT: lh a1, 56(a1) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sh a4, 12(a0) +; CHECK-RV64C-NEXT: sh a2, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 ; CHECK-RV64C-NEXT: sh a3, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sh a7, 8(a0) +; CHECK-RV64C-NEXT: sh a4, 12(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sh a1, 6(a0) +; CHECK-RV64C-NEXT: sh a1, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sh a2, 4(a0) +; CHECK-RV64C-NEXT: sh a6, 0(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sh t0, 2(a0) +; CHECK-RV64C-NEXT: sh a7, 2(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sh a6, 0(a0) +; CHECK-RV64C-NEXT: sh t0, 4(a0) +; CHECK-RV64C-NEXT: c.ntl.s1 +; CHECK-RV64C-NEXT: sh a5, 6(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_S1_store_v8i16: ; CHECK-RV32C: # %bb.0: -; CHECK-RV32C-NEXT: lh a7, 16(a1) +; CHECK-RV32C-NEXT: lh a6, 0(a1) +; CHECK-RV32C-NEXT: lh a7, 4(a1) +; CHECK-RV32C-NEXT: lh t0, 8(a1) +; CHECK-RV32C-NEXT: lh a5, 12(a1) +; CHECK-RV32C-NEXT: lh a2, 16(a1) ; CHECK-RV32C-NEXT: lh a3, 20(a1) ; CHECK-RV32C-NEXT: lh a4, 24(a1) -; CHECK-RV32C-NEXT: lh a5, 28(a1) -; CHECK-RV32C-NEXT: lh a6, 0(a1) -; CHECK-RV32C-NEXT: lh t0, 4(a1) -; CHECK-RV32C-NEXT: lh a2, 8(a1) -; CHECK-RV32C-NEXT: lh a1, 12(a1) +; CHECK-RV32C-NEXT: lh a1, 28(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sh a5, 14(a0) -; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sh a4, 12(a0) +; CHECK-RV32C-NEXT: sh a2, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 ; CHECK-RV32C-NEXT: sh a3, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sh a7, 8(a0) +; CHECK-RV32C-NEXT: sh a4, 12(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sh a1, 6(a0) +; CHECK-RV32C-NEXT: sh a1, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sh a2, 4(a0) +; CHECK-RV32C-NEXT: sh a6, 0(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sh t0, 2(a0) +; CHECK-RV32C-NEXT: sh a7, 2(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sh a6, 0(a0) +; CHECK-RV32C-NEXT: sh t0, 4(a0) +; CHECK-RV32C-NEXT: c.ntl.s1 +; CHECK-RV32C-NEXT: sh a5, 6(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_S1_store_v8i16: @@ -5535,66 +5535,66 @@ define void @test_nontemporal_S1_store_v8i16(ptr %p, <8 x i16> %v) { define void @test_nontemporal_S1_store_v4i32(ptr %p, <4 x i32> %v) { ; CHECK-RV64-LABEL: test_nontemporal_S1_store_v4i32: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: lw a2, 24(a1) -; CHECK-RV64-NEXT: lw a3, 16(a1) -; CHECK-RV64-NEXT: lw a4, 8(a1) -; CHECK-RV64-NEXT: lw a1, 0(a1) +; CHECK-RV64-NEXT: lw a2, 0(a1) +; CHECK-RV64-NEXT: lw a3, 8(a1) +; CHECK-RV64-NEXT: lw a4, 16(a1) +; CHECK-RV64-NEXT: lw a1, 24(a1) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sw a2, 12(a0) +; CHECK-RV64-NEXT: sw a2, 0(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sw a3, 8(a0) +; CHECK-RV64-NEXT: sw a3, 4(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sw a4, 4(a0) +; CHECK-RV64-NEXT: sw a4, 8(a0) ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sw a1, 0(a0) +; CHECK-RV64-NEXT: sw a1, 12(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_S1_store_v4i32: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lw a2, 12(a1) -; CHECK-RV32-NEXT: lw a3, 8(a1) -; CHECK-RV32-NEXT: lw a4, 4(a1) -; CHECK-RV32-NEXT: lw a1, 0(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) +; CHECK-RV32-NEXT: lw a1, 12(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sw a2, 12(a0) +; CHECK-RV32-NEXT: sw a2, 0(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sw a3, 8(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sw a4, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_S1_store_v4i32: ; CHECK-RV64C: # %bb.0: -; CHECK-RV64C-NEXT: lw a2, 24(a1) -; CHECK-RV64C-NEXT: lw a3, 16(a1) -; CHECK-RV64C-NEXT: lw a4, 8(a1) -; CHECK-RV64C-NEXT: lw a1, 0(a1) +; CHECK-RV64C-NEXT: lw a2, 0(a1) +; CHECK-RV64C-NEXT: lw a3, 8(a1) +; CHECK-RV64C-NEXT: lw a4, 16(a1) +; CHECK-RV64C-NEXT: lw a1, 24(a1) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sw a2, 12(a0) +; CHECK-RV64C-NEXT: sw a2, 0(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sw a3, 8(a0) +; CHECK-RV64C-NEXT: sw a3, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sw a4, 4(a0) +; CHECK-RV64C-NEXT: sw a4, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sw a1, 0(a0) +; CHECK-RV64C-NEXT: sw a1, 12(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_S1_store_v4i32: ; CHECK-RV32C: # %bb.0: -; CHECK-RV32C-NEXT: lw a2, 12(a1) -; CHECK-RV32C-NEXT: lw a3, 8(a1) -; CHECK-RV32C-NEXT: lw a4, 4(a1) -; CHECK-RV32C-NEXT: lw a1, 0(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) +; CHECK-RV32C-NEXT: lw a1, 12(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sw a2, 12(a0) +; CHECK-RV32C-NEXT: sw a2, 0(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sw a3, 8(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sw a4, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_S1_store_v4i32: @@ -5618,49 +5618,49 @@ define void @test_nontemporal_S1_store_v2i64(ptr %p, <2 x i64> %v) { ; CHECK-RV64-LABEL: test_nontemporal_S1_store_v2i64: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.s1 -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: ntl.s1 ; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ntl.s1 +; CHECK-RV64-NEXT: sd a2, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_S1_store_v2i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lw a2, 12(a1) -; CHECK-RV32-NEXT: lw a3, 8(a1) -; CHECK-RV32-NEXT: lw a4, 4(a1) -; CHECK-RV32-NEXT: lw a1, 0(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) +; CHECK-RV32-NEXT: lw a1, 12(a1) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sw a2, 12(a0) +; CHECK-RV32-NEXT: sw a2, 0(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sw a3, 8(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sw a4, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) ; CHECK-RV32-NEXT: ntl.s1 -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_S1_store_v2i64: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.s1 -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: c.ntl.s1 ; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: c.ntl.s1 +; CHECK-RV64C-NEXT: sd a2, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_S1_store_v2i64: ; CHECK-RV32C: # %bb.0: -; CHECK-RV32C-NEXT: lw a2, 12(a1) -; CHECK-RV32C-NEXT: lw a3, 8(a1) -; CHECK-RV32C-NEXT: lw a4, 4(a1) -; CHECK-RV32C-NEXT: lw a1, 0(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) +; CHECK-RV32C-NEXT: lw a1, 12(a1) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sw a2, 12(a0) +; CHECK-RV32C-NEXT: sw a2, 0(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sw a3, 8(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sw a4, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.s1 -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_S1_store_v2i64: @@ -5994,53 +5994,53 @@ define <16 x i8> @test_nontemporal_ALL_load_v16i8(ptr %p) { ; CHECK-RV64-LABEL: test_nontemporal_ALL_load_v16i8: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: ld a2, 8(a1) +; CHECK-RV64-NEXT: ld a2, 0(a1) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: ld a1, 0(a1) -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ld a1, 8(a1) +; CHECK-RV64-NEXT: sd a2, 0(a0) +; CHECK-RV64-NEXT: sd a1, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_ALL_load_v16i8: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_v16i8: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: ld a2, 8(a1) +; CHECK-RV64C-NEXT: ld a2, 0(a1) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: ld a1, 0(a1) -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: ld a1, 8(a1) +; CHECK-RV64C-NEXT: sd a2, 0(a0) +; CHECK-RV64C-NEXT: sd a1, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_v16i8: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_v16i8: @@ -6064,53 +6064,53 @@ define <8 x i16> @test_nontemporal_ALL_load_v8i16(ptr %p) { ; CHECK-RV64-LABEL: test_nontemporal_ALL_load_v8i16: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: ld a2, 8(a1) +; CHECK-RV64-NEXT: ld a2, 0(a1) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: ld a1, 0(a1) -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ld a1, 8(a1) +; CHECK-RV64-NEXT: sd a2, 0(a0) +; CHECK-RV64-NEXT: sd a1, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_ALL_load_v8i16: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_v8i16: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: ld a2, 8(a1) +; CHECK-RV64C-NEXT: ld a2, 0(a1) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: ld a1, 0(a1) -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: ld a1, 8(a1) +; CHECK-RV64C-NEXT: sd a2, 0(a0) +; CHECK-RV64C-NEXT: sd a1, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_v8i16: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_v8i16: @@ -6134,53 +6134,53 @@ define <4 x i32> @test_nontemporal_ALL_load_v4i32(ptr %p) { ; CHECK-RV64-LABEL: test_nontemporal_ALL_load_v4i32: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: ld a2, 8(a1) +; CHECK-RV64-NEXT: ld a2, 0(a1) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: ld a1, 0(a1) -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ld a1, 8(a1) +; CHECK-RV64-NEXT: sd a2, 0(a0) +; CHECK-RV64-NEXT: sd a1, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_ALL_load_v4i32: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_v4i32: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: ld a2, 8(a1) +; CHECK-RV64C-NEXT: ld a2, 0(a1) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: ld a1, 0(a1) -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: ld a1, 8(a1) +; CHECK-RV64C-NEXT: sd a2, 0(a0) +; CHECK-RV64C-NEXT: sd a1, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_v4i32: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_v4i32: @@ -6213,17 +6213,17 @@ define <2 x i64> @test_nontemporal_ALL_load_v2i64(ptr %p) { ; CHECK-RV32-LABEL: test_nontemporal_ALL_load_v2i64: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a2, 12(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a3, 8(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a4, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: lw a1, 0(a1) -; CHECK-RV32-NEXT: sw a2, 12(a0) -; CHECK-RV32-NEXT: sw a3, 8(a0) -; CHECK-RV32-NEXT: sw a4, 4(a0) -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: lw a1, 12(a1) +; CHECK-RV32-NEXT: sw a2, 0(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_v2i64: @@ -6238,17 +6238,17 @@ define <2 x i64> @test_nontemporal_ALL_load_v2i64(ptr %p) { ; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_v2i64: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a2, 12(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a3, 8(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a4, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: lw a1, 0(a1) -; CHECK-RV32C-NEXT: sw a2, 12(a0) -; CHECK-RV32C-NEXT: sw a3, 8(a0) -; CHECK-RV32C-NEXT: sw a4, 4(a0) -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: lw a1, 12(a1) +; CHECK-RV32C-NEXT: sw a2, 0(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_v2i64: @@ -6278,9 +6278,9 @@ define void @test_nontemporal_ALL_store_i64(ptr %p, i64 %v) { ; CHECK-RV32-LABEL: test_nontemporal_ALL_store_i64: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a2, 4(a0) -; CHECK-RV32-NEXT: ntl.all ; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: ntl.all +; CHECK-RV32-NEXT: sw a2, 4(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_i64: @@ -6292,9 +6292,9 @@ define void @test_nontemporal_ALL_store_i64(ptr %p, i64 %v) { ; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_i64: ; CHECK-RV32C: # %bb.0: ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a2, 4(a0) -; CHECK-RV32C-NEXT: c.ntl.all ; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: c.ntl.all +; CHECK-RV32C-NEXT: sw a2, 4(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_i64: @@ -6306,9 +6306,9 @@ define void @test_nontemporal_ALL_store_i64(ptr %p, i64 %v) { ; CHECK-RV32V-LABEL: test_nontemporal_ALL_store_i64: ; CHECK-RV32V: # %bb.0: ; CHECK-RV32V-NEXT: ntl.all -; CHECK-RV32V-NEXT: sw a2, 4(a0) -; CHECK-RV32V-NEXT: ntl.all ; CHECK-RV32V-NEXT: sw a1, 0(a0) +; CHECK-RV32V-NEXT: ntl.all +; CHECK-RV32V-NEXT: sw a2, 4(a0) ; CHECK-RV32V-NEXT: ret store i64 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4 ret void @@ -6571,46 +6571,46 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64-NEXT: lbu a7, 40(a1) ; CHECK-RV64-NEXT: lbu t0, 48(a1) ; CHECK-RV64-NEXT: lbu t1, 56(a1) -; CHECK-RV64-NEXT: lbu t2, 96(a1) -; CHECK-RV64-NEXT: lbu t3, 104(a1) -; CHECK-RV64-NEXT: lbu t4, 112(a1) -; CHECK-RV64-NEXT: lbu t5, 120(a1) -; CHECK-RV64-NEXT: lbu t6, 64(a1) -; CHECK-RV64-NEXT: lbu s0, 72(a1) -; CHECK-RV64-NEXT: lbu s1, 80(a1) -; CHECK-RV64-NEXT: lbu a1, 88(a1) +; CHECK-RV64-NEXT: lbu t2, 64(a1) +; CHECK-RV64-NEXT: lbu t3, 72(a1) +; CHECK-RV64-NEXT: lbu t4, 80(a1) +; CHECK-RV64-NEXT: lbu t5, 88(a1) +; CHECK-RV64-NEXT: lbu t6, 96(a1) +; CHECK-RV64-NEXT: lbu s0, 104(a1) +; CHECK-RV64-NEXT: lbu s1, 112(a1) +; CHECK-RV64-NEXT: lbu a1, 120(a1) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t5, 15(a0) +; CHECK-RV64-NEXT: sb t6, 12(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t4, 14(a0) +; CHECK-RV64-NEXT: sb s0, 13(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t3, 13(a0) +; CHECK-RV64-NEXT: sb s1, 14(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t2, 12(a0) +; CHECK-RV64-NEXT: sb a1, 15(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb a1, 11(a0) +; CHECK-RV64-NEXT: sb t2, 8(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb s1, 10(a0) +; CHECK-RV64-NEXT: sb t3, 9(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb s0, 9(a0) +; CHECK-RV64-NEXT: sb t4, 10(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t6, 8(a0) +; CHECK-RV64-NEXT: sb t5, 11(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t1, 7(a0) -; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb t0, 6(a0) +; CHECK-RV64-NEXT: sb a6, 4(a0) ; CHECK-RV64-NEXT: ntl.all ; CHECK-RV64-NEXT: sb a7, 5(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb a6, 4(a0) +; CHECK-RV64-NEXT: sb t0, 6(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb a5, 3(a0) +; CHECK-RV64-NEXT: sb t1, 7(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb a4, 2(a0) +; CHECK-RV64-NEXT: sb a2, 0(a0) ; CHECK-RV64-NEXT: ntl.all ; CHECK-RV64-NEXT: sb a3, 1(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sb a2, 0(a0) +; CHECK-RV64-NEXT: sb a4, 2(a0) +; CHECK-RV64-NEXT: ntl.all +; CHECK-RV64-NEXT: sb a5, 3(a0) ; CHECK-RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; CHECK-RV64-NEXT: ld s1, 0(sp) # 8-byte Folded Reload ; CHECK-RV64-NEXT: addi sp, sp, 16 @@ -6632,46 +6632,46 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32-NEXT: lbu a7, 20(a1) ; CHECK-RV32-NEXT: lbu t0, 24(a1) ; CHECK-RV32-NEXT: lbu t1, 28(a1) -; CHECK-RV32-NEXT: lbu t2, 48(a1) -; CHECK-RV32-NEXT: lbu t3, 52(a1) -; CHECK-RV32-NEXT: lbu t4, 56(a1) -; CHECK-RV32-NEXT: lbu t5, 60(a1) -; CHECK-RV32-NEXT: lbu t6, 32(a1) -; CHECK-RV32-NEXT: lbu s0, 36(a1) -; CHECK-RV32-NEXT: lbu s1, 40(a1) -; CHECK-RV32-NEXT: lbu a1, 44(a1) -; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t5, 15(a0) +; CHECK-RV32-NEXT: lbu t2, 32(a1) +; CHECK-RV32-NEXT: lbu t3, 36(a1) +; CHECK-RV32-NEXT: lbu t4, 40(a1) +; CHECK-RV32-NEXT: lbu t5, 44(a1) +; CHECK-RV32-NEXT: lbu t6, 48(a1) +; CHECK-RV32-NEXT: lbu s0, 52(a1) +; CHECK-RV32-NEXT: lbu s1, 56(a1) +; CHECK-RV32-NEXT: lbu a1, 60(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t4, 14(a0) +; CHECK-RV32-NEXT: sb t6, 12(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t3, 13(a0) +; CHECK-RV32-NEXT: sb s0, 13(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t2, 12(a0) +; CHECK-RV32-NEXT: sb s1, 14(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb a1, 11(a0) +; CHECK-RV32-NEXT: sb a1, 15(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb s1, 10(a0) +; CHECK-RV32-NEXT: sb t2, 8(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb s0, 9(a0) +; CHECK-RV32-NEXT: sb t3, 9(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t6, 8(a0) +; CHECK-RV32-NEXT: sb t4, 10(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t1, 7(a0) +; CHECK-RV32-NEXT: sb t5, 11(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb t0, 6(a0) +; CHECK-RV32-NEXT: sb a6, 4(a0) ; CHECK-RV32-NEXT: ntl.all ; CHECK-RV32-NEXT: sb a7, 5(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb a6, 4(a0) +; CHECK-RV32-NEXT: sb t0, 6(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb a5, 3(a0) +; CHECK-RV32-NEXT: sb t1, 7(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb a4, 2(a0) +; CHECK-RV32-NEXT: sb a2, 0(a0) ; CHECK-RV32-NEXT: ntl.all ; CHECK-RV32-NEXT: sb a3, 1(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sb a2, 0(a0) +; CHECK-RV32-NEXT: sb a4, 2(a0) +; CHECK-RV32-NEXT: ntl.all +; CHECK-RV32-NEXT: sb a5, 3(a0) ; CHECK-RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; CHECK-RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; CHECK-RV32-NEXT: addi sp, sp, 16 @@ -6693,46 +6693,46 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV64C-NEXT: lbu t3, 40(a1) ; CHECK-RV64C-NEXT: lbu t4, 48(a1) ; CHECK-RV64C-NEXT: lbu t5, 56(a1) -; CHECK-RV64C-NEXT: lbu a2, 96(a1) -; CHECK-RV64C-NEXT: lbu a3, 104(a1) -; CHECK-RV64C-NEXT: lbu a4, 112(a1) -; CHECK-RV64C-NEXT: lbu a5, 120(a1) ; CHECK-RV64C-NEXT: lbu t6, 64(a1) -; CHECK-RV64C-NEXT: lbu s0, 72(a1) -; CHECK-RV64C-NEXT: lbu s1, 80(a1) -; CHECK-RV64C-NEXT: lbu a1, 88(a1) -; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a5, 15(a0) -; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a4, 14(a0) -; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a3, 13(a0) +; CHECK-RV64C-NEXT: lbu a3, 72(a1) +; CHECK-RV64C-NEXT: lbu a4, 80(a1) +; CHECK-RV64C-NEXT: lbu a5, 88(a1) +; CHECK-RV64C-NEXT: lbu a2, 96(a1) +; CHECK-RV64C-NEXT: lbu s0, 104(a1) +; CHECK-RV64C-NEXT: lbu s1, 112(a1) +; CHECK-RV64C-NEXT: lbu a1, 120(a1) ; CHECK-RV64C-NEXT: c.ntl.all ; CHECK-RV64C-NEXT: sb a2, 12(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a1, 11(a0) +; CHECK-RV64C-NEXT: sb s0, 13(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb s1, 10(a0) +; CHECK-RV64C-NEXT: sb s1, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb s0, 9(a0) +; CHECK-RV64C-NEXT: sb a1, 15(a0) ; CHECK-RV64C-NEXT: c.ntl.all ; CHECK-RV64C-NEXT: sb t6, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb t5, 7(a0) +; CHECK-RV64C-NEXT: sb a3, 9(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb t4, 6(a0) +; CHECK-RV64C-NEXT: sb a4, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb t3, 5(a0) +; CHECK-RV64C-NEXT: sb a5, 11(a0) ; CHECK-RV64C-NEXT: c.ntl.all ; CHECK-RV64C-NEXT: sb t2, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb t1, 3(a0) +; CHECK-RV64C-NEXT: sb t3, 5(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb t0, 2(a0) +; CHECK-RV64C-NEXT: sb t4, 6(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sb a7, 1(a0) +; CHECK-RV64C-NEXT: sb t5, 7(a0) ; CHECK-RV64C-NEXT: c.ntl.all ; CHECK-RV64C-NEXT: sb a6, 0(a0) +; CHECK-RV64C-NEXT: c.ntl.all +; CHECK-RV64C-NEXT: sb a7, 1(a0) +; CHECK-RV64C-NEXT: c.ntl.all +; CHECK-RV64C-NEXT: sb t0, 2(a0) +; CHECK-RV64C-NEXT: c.ntl.all +; CHECK-RV64C-NEXT: sb t1, 3(a0) ; CHECK-RV64C-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; CHECK-RV64C-NEXT: ld s1, 0(sp) # 8-byte Folded Reload ; CHECK-RV64C-NEXT: addi sp, sp, 16 @@ -6754,46 +6754,46 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) { ; CHECK-RV32C-NEXT: lbu t3, 20(a1) ; CHECK-RV32C-NEXT: lbu t4, 24(a1) ; CHECK-RV32C-NEXT: lbu t5, 28(a1) -; CHECK-RV32C-NEXT: lbu a2, 48(a1) -; CHECK-RV32C-NEXT: lbu a3, 52(a1) -; CHECK-RV32C-NEXT: lbu a4, 56(a1) -; CHECK-RV32C-NEXT: lbu a5, 60(a1) ; CHECK-RV32C-NEXT: lbu t6, 32(a1) -; CHECK-RV32C-NEXT: lbu s0, 36(a1) -; CHECK-RV32C-NEXT: lbu s1, 40(a1) -; CHECK-RV32C-NEXT: lbu a1, 44(a1) -; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a5, 15(a0) -; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a4, 14(a0) -; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a3, 13(a0) +; CHECK-RV32C-NEXT: lbu a3, 36(a1) +; CHECK-RV32C-NEXT: lbu a4, 40(a1) +; CHECK-RV32C-NEXT: lbu a5, 44(a1) +; CHECK-RV32C-NEXT: lbu a2, 48(a1) +; CHECK-RV32C-NEXT: lbu s0, 52(a1) +; CHECK-RV32C-NEXT: lbu s1, 56(a1) +; CHECK-RV32C-NEXT: lbu a1, 60(a1) ; CHECK-RV32C-NEXT: c.ntl.all ; CHECK-RV32C-NEXT: sb a2, 12(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a1, 11(a0) +; CHECK-RV32C-NEXT: sb s0, 13(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb s1, 10(a0) +; CHECK-RV32C-NEXT: sb s1, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb s0, 9(a0) +; CHECK-RV32C-NEXT: sb a1, 15(a0) ; CHECK-RV32C-NEXT: c.ntl.all ; CHECK-RV32C-NEXT: sb t6, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb t5, 7(a0) +; CHECK-RV32C-NEXT: sb a3, 9(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb t4, 6(a0) +; CHECK-RV32C-NEXT: sb a4, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb t3, 5(a0) +; CHECK-RV32C-NEXT: sb a5, 11(a0) ; CHECK-RV32C-NEXT: c.ntl.all ; CHECK-RV32C-NEXT: sb t2, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb t1, 3(a0) +; CHECK-RV32C-NEXT: sb t3, 5(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb t0, 2(a0) +; CHECK-RV32C-NEXT: sb t4, 6(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sb a7, 1(a0) +; CHECK-RV32C-NEXT: sb t5, 7(a0) ; CHECK-RV32C-NEXT: c.ntl.all ; CHECK-RV32C-NEXT: sb a6, 0(a0) +; CHECK-RV32C-NEXT: c.ntl.all +; CHECK-RV32C-NEXT: sb a7, 1(a0) +; CHECK-RV32C-NEXT: c.ntl.all +; CHECK-RV32C-NEXT: sb t0, 2(a0) +; CHECK-RV32C-NEXT: c.ntl.all +; CHECK-RV32C-NEXT: sb t1, 3(a0) ; CHECK-RV32C-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; CHECK-RV32C-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; CHECK-RV32C-NEXT: addi sp, sp, 16 @@ -6819,114 +6819,114 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) { define void @test_nontemporal_ALL_store_v8i16(ptr %p, <8 x i16> %v) { ; CHECK-RV64-LABEL: test_nontemporal_ALL_store_v8i16: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: lh a2, 32(a1) -; CHECK-RV64-NEXT: lh a3, 40(a1) -; CHECK-RV64-NEXT: lh a4, 48(a1) -; CHECK-RV64-NEXT: lh a5, 56(a1) -; CHECK-RV64-NEXT: lh a6, 0(a1) -; CHECK-RV64-NEXT: lh a7, 8(a1) -; CHECK-RV64-NEXT: lh t0, 16(a1) -; CHECK-RV64-NEXT: lh a1, 24(a1) +; CHECK-RV64-NEXT: lh a2, 0(a1) +; CHECK-RV64-NEXT: lh a3, 8(a1) +; CHECK-RV64-NEXT: lh a4, 16(a1) +; CHECK-RV64-NEXT: lh a5, 24(a1) +; CHECK-RV64-NEXT: lh a6, 32(a1) +; CHECK-RV64-NEXT: lh a7, 40(a1) +; CHECK-RV64-NEXT: lh t0, 48(a1) +; CHECK-RV64-NEXT: lh a1, 56(a1) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a5, 14(a0) +; CHECK-RV64-NEXT: sh a6, 8(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a4, 12(a0) +; CHECK-RV64-NEXT: sh a7, 10(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a3, 10(a0) +; CHECK-RV64-NEXT: sh t0, 12(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a2, 8(a0) +; CHECK-RV64-NEXT: sh a1, 14(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a1, 6(a0) +; CHECK-RV64-NEXT: sh a2, 0(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh t0, 4(a0) +; CHECK-RV64-NEXT: sh a3, 2(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a7, 2(a0) +; CHECK-RV64-NEXT: sh a4, 4(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sh a6, 0(a0) +; CHECK-RV64-NEXT: sh a5, 6(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_ALL_store_v8i16: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lh a2, 16(a1) -; CHECK-RV32-NEXT: lh a3, 20(a1) -; CHECK-RV32-NEXT: lh a4, 24(a1) -; CHECK-RV32-NEXT: lh a5, 28(a1) -; CHECK-RV32-NEXT: lh a6, 0(a1) -; CHECK-RV32-NEXT: lh a7, 4(a1) -; CHECK-RV32-NEXT: lh t0, 8(a1) -; CHECK-RV32-NEXT: lh a1, 12(a1) +; CHECK-RV32-NEXT: lh a2, 0(a1) +; CHECK-RV32-NEXT: lh a3, 4(a1) +; CHECK-RV32-NEXT: lh a4, 8(a1) +; CHECK-RV32-NEXT: lh a5, 12(a1) +; CHECK-RV32-NEXT: lh a6, 16(a1) +; CHECK-RV32-NEXT: lh a7, 20(a1) +; CHECK-RV32-NEXT: lh t0, 24(a1) +; CHECK-RV32-NEXT: lh a1, 28(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a5, 14(a0) +; CHECK-RV32-NEXT: sh a6, 8(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a4, 12(a0) +; CHECK-RV32-NEXT: sh a7, 10(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a3, 10(a0) +; CHECK-RV32-NEXT: sh t0, 12(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a2, 8(a0) +; CHECK-RV32-NEXT: sh a1, 14(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a1, 6(a0) +; CHECK-RV32-NEXT: sh a2, 0(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh t0, 4(a0) +; CHECK-RV32-NEXT: sh a3, 2(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a7, 2(a0) +; CHECK-RV32-NEXT: sh a4, 4(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sh a6, 0(a0) +; CHECK-RV32-NEXT: sh a5, 6(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_v8i16: ; CHECK-RV64C: # %bb.0: -; CHECK-RV64C-NEXT: lh a7, 32(a1) +; CHECK-RV64C-NEXT: lh a6, 0(a1) +; CHECK-RV64C-NEXT: lh a7, 8(a1) +; CHECK-RV64C-NEXT: lh t0, 16(a1) +; CHECK-RV64C-NEXT: lh a5, 24(a1) +; CHECK-RV64C-NEXT: lh a2, 32(a1) ; CHECK-RV64C-NEXT: lh a3, 40(a1) ; CHECK-RV64C-NEXT: lh a4, 48(a1) -; CHECK-RV64C-NEXT: lh a5, 56(a1) -; CHECK-RV64C-NEXT: lh a6, 0(a1) -; CHECK-RV64C-NEXT: lh t0, 8(a1) -; CHECK-RV64C-NEXT: lh a2, 16(a1) -; CHECK-RV64C-NEXT: lh a1, 24(a1) -; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a5, 14(a0) +; CHECK-RV64C-NEXT: lh a1, 56(a1) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a4, 12(a0) +; CHECK-RV64C-NEXT: sh a2, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.all ; CHECK-RV64C-NEXT: sh a3, 10(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a7, 8(a0) +; CHECK-RV64C-NEXT: sh a4, 12(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a1, 6(a0) +; CHECK-RV64C-NEXT: sh a1, 14(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a2, 4(a0) +; CHECK-RV64C-NEXT: sh a6, 0(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh t0, 2(a0) +; CHECK-RV64C-NEXT: sh a7, 2(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sh a6, 0(a0) +; CHECK-RV64C-NEXT: sh t0, 4(a0) +; CHECK-RV64C-NEXT: c.ntl.all +; CHECK-RV64C-NEXT: sh a5, 6(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_v8i16: ; CHECK-RV32C: # %bb.0: -; CHECK-RV32C-NEXT: lh a7, 16(a1) +; CHECK-RV32C-NEXT: lh a6, 0(a1) +; CHECK-RV32C-NEXT: lh a7, 4(a1) +; CHECK-RV32C-NEXT: lh t0, 8(a1) +; CHECK-RV32C-NEXT: lh a5, 12(a1) +; CHECK-RV32C-NEXT: lh a2, 16(a1) ; CHECK-RV32C-NEXT: lh a3, 20(a1) ; CHECK-RV32C-NEXT: lh a4, 24(a1) -; CHECK-RV32C-NEXT: lh a5, 28(a1) -; CHECK-RV32C-NEXT: lh a6, 0(a1) -; CHECK-RV32C-NEXT: lh t0, 4(a1) -; CHECK-RV32C-NEXT: lh a2, 8(a1) -; CHECK-RV32C-NEXT: lh a1, 12(a1) -; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a5, 14(a0) +; CHECK-RV32C-NEXT: lh a1, 28(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a4, 12(a0) +; CHECK-RV32C-NEXT: sh a2, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.all ; CHECK-RV32C-NEXT: sh a3, 10(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a7, 8(a0) +; CHECK-RV32C-NEXT: sh a4, 12(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a1, 6(a0) +; CHECK-RV32C-NEXT: sh a1, 14(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a2, 4(a0) +; CHECK-RV32C-NEXT: sh a6, 0(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh t0, 2(a0) +; CHECK-RV32C-NEXT: sh a7, 2(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sh a6, 0(a0) +; CHECK-RV32C-NEXT: sh t0, 4(a0) +; CHECK-RV32C-NEXT: c.ntl.all +; CHECK-RV32C-NEXT: sh a5, 6(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_v8i16: @@ -6949,66 +6949,66 @@ define void @test_nontemporal_ALL_store_v8i16(ptr %p, <8 x i16> %v) { define void @test_nontemporal_ALL_store_v4i32(ptr %p, <4 x i32> %v) { ; CHECK-RV64-LABEL: test_nontemporal_ALL_store_v4i32: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: lw a2, 24(a1) -; CHECK-RV64-NEXT: lw a3, 16(a1) -; CHECK-RV64-NEXT: lw a4, 8(a1) -; CHECK-RV64-NEXT: lw a1, 0(a1) +; CHECK-RV64-NEXT: lw a2, 0(a1) +; CHECK-RV64-NEXT: lw a3, 8(a1) +; CHECK-RV64-NEXT: lw a4, 16(a1) +; CHECK-RV64-NEXT: lw a1, 24(a1) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sw a2, 12(a0) +; CHECK-RV64-NEXT: sw a2, 0(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sw a3, 8(a0) +; CHECK-RV64-NEXT: sw a3, 4(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sw a4, 4(a0) +; CHECK-RV64-NEXT: sw a4, 8(a0) ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sw a1, 0(a0) +; CHECK-RV64-NEXT: sw a1, 12(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_ALL_store_v4i32: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lw a2, 12(a1) -; CHECK-RV32-NEXT: lw a3, 8(a1) -; CHECK-RV32-NEXT: lw a4, 4(a1) -; CHECK-RV32-NEXT: lw a1, 0(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) +; CHECK-RV32-NEXT: lw a1, 12(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a2, 12(a0) +; CHECK-RV32-NEXT: sw a2, 0(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a3, 8(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a4, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_v4i32: ; CHECK-RV64C: # %bb.0: -; CHECK-RV64C-NEXT: lw a2, 24(a1) -; CHECK-RV64C-NEXT: lw a3, 16(a1) -; CHECK-RV64C-NEXT: lw a4, 8(a1) -; CHECK-RV64C-NEXT: lw a1, 0(a1) +; CHECK-RV64C-NEXT: lw a2, 0(a1) +; CHECK-RV64C-NEXT: lw a3, 8(a1) +; CHECK-RV64C-NEXT: lw a4, 16(a1) +; CHECK-RV64C-NEXT: lw a1, 24(a1) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sw a2, 12(a0) +; CHECK-RV64C-NEXT: sw a2, 0(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sw a3, 8(a0) +; CHECK-RV64C-NEXT: sw a3, 4(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sw a4, 4(a0) +; CHECK-RV64C-NEXT: sw a4, 8(a0) ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sw a1, 0(a0) +; CHECK-RV64C-NEXT: sw a1, 12(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_v4i32: ; CHECK-RV32C: # %bb.0: -; CHECK-RV32C-NEXT: lw a2, 12(a1) -; CHECK-RV32C-NEXT: lw a3, 8(a1) -; CHECK-RV32C-NEXT: lw a4, 4(a1) -; CHECK-RV32C-NEXT: lw a1, 0(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) +; CHECK-RV32C-NEXT: lw a1, 12(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a2, 12(a0) +; CHECK-RV32C-NEXT: sw a2, 0(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a3, 8(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a4, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_v4i32: @@ -7032,49 +7032,49 @@ define void @test_nontemporal_ALL_store_v2i64(ptr %p, <2 x i64> %v) { ; CHECK-RV64-LABEL: test_nontemporal_ALL_store_v2i64: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ntl.all -; CHECK-RV64-NEXT: sd a2, 8(a0) -; CHECK-RV64-NEXT: ntl.all ; CHECK-RV64-NEXT: sd a1, 0(a0) +; CHECK-RV64-NEXT: ntl.all +; CHECK-RV64-NEXT: sd a2, 8(a0) ; CHECK-RV64-NEXT: ret ; ; CHECK-RV32-LABEL: test_nontemporal_ALL_store_v2i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lw a2, 12(a1) -; CHECK-RV32-NEXT: lw a3, 8(a1) -; CHECK-RV32-NEXT: lw a4, 4(a1) -; CHECK-RV32-NEXT: lw a1, 0(a1) +; CHECK-RV32-NEXT: lw a2, 0(a1) +; CHECK-RV32-NEXT: lw a3, 4(a1) +; CHECK-RV32-NEXT: lw a4, 8(a1) +; CHECK-RV32-NEXT: lw a1, 12(a1) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a2, 12(a0) +; CHECK-RV32-NEXT: sw a2, 0(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a3, 8(a0) +; CHECK-RV32-NEXT: sw a3, 4(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a4, 4(a0) +; CHECK-RV32-NEXT: sw a4, 8(a0) ; CHECK-RV32-NEXT: ntl.all -; CHECK-RV32-NEXT: sw a1, 0(a0) +; CHECK-RV32-NEXT: sw a1, 12(a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_v2i64: ; CHECK-RV64C: # %bb.0: ; CHECK-RV64C-NEXT: c.ntl.all -; CHECK-RV64C-NEXT: sd a2, 8(a0) -; CHECK-RV64C-NEXT: c.ntl.all ; CHECK-RV64C-NEXT: sd a1, 0(a0) +; CHECK-RV64C-NEXT: c.ntl.all +; CHECK-RV64C-NEXT: sd a2, 8(a0) ; CHECK-RV64C-NEXT: ret ; ; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_v2i64: ; CHECK-RV32C: # %bb.0: -; CHECK-RV32C-NEXT: lw a2, 12(a1) -; CHECK-RV32C-NEXT: lw a3, 8(a1) -; CHECK-RV32C-NEXT: lw a4, 4(a1) -; CHECK-RV32C-NEXT: lw a1, 0(a1) +; CHECK-RV32C-NEXT: lw a2, 0(a1) +; CHECK-RV32C-NEXT: lw a3, 4(a1) +; CHECK-RV32C-NEXT: lw a4, 8(a1) +; CHECK-RV32C-NEXT: lw a1, 12(a1) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a2, 12(a0) +; CHECK-RV32C-NEXT: sw a2, 0(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a3, 8(a0) +; CHECK-RV32C-NEXT: sw a3, 4(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a4, 4(a0) +; CHECK-RV32C-NEXT: sw a4, 8(a0) ; CHECK-RV32C-NEXT: c.ntl.all -; CHECK-RV32C-NEXT: sw a1, 0(a0) +; CHECK-RV32C-NEXT: sw a1, 12(a0) ; CHECK-RV32C-NEXT: ret ; ; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_v2i64: diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll index fe602b5b8fc2bcd..0d571600a2d8130 100644 --- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll @@ -757,8 +757,8 @@ define i1 @uaddo_i42_increment_illegal_type(i42 %x, ptr %p) { ; RV64-NEXT: slli a0, a2, 22 ; RV64-NEXT: srli a3, a0, 22 ; RV64-NEXT: seqz a0, a3 -; RV64-NEXT: sw a2, 0(a1) ; RV64-NEXT: srli a3, a3, 32 +; RV64-NEXT: sw a2, 0(a1) ; RV64-NEXT: sh a3, 4(a1) ; RV64-NEXT: ret %a = add i42 %x, 1 @@ -1247,8 +1247,8 @@ define void @PR41129(ptr %p64) { ; RV32-NEXT: beqz a3, .LBB37_2 ; RV32-NEXT: # %bb.1: # %false ; RV32-NEXT: andi a1, a1, 7 -; RV32-NEXT: sw zero, 4(a0) ; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: sw zero, 4(a0) ; RV32-NEXT: ret ; RV32-NEXT: .LBB37_2: # %true ; RV32-NEXT: seqz a3, a1 diff --git a/llvm/test/CodeGen/RISCV/pr63816.ll b/llvm/test/CodeGen/RISCV/pr63816.ll index 2e33a0e994996af..75ddeda3de5071f 100644 --- a/llvm/test/CodeGen/RISCV/pr63816.ll +++ b/llvm/test/CodeGen/RISCV/pr63816.ll @@ -55,14 +55,14 @@ define void @test(ptr %0, ptr %1) nounwind { ; CHECK-NEXT: fcvt.d.s fs0, fs0 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: fcvt.d.s fa5, fa0 -; CHECK-NEXT: fsd fa5, 56(s0) -; CHECK-NEXT: fsd fs0, 48(s0) -; CHECK-NEXT: fsd fs1, 40(s0) ; CHECK-NEXT: fsd fs2, 32(s0) -; CHECK-NEXT: fsd fs3, 24(s0) -; CHECK-NEXT: fsd fs4, 16(s0) -; CHECK-NEXT: fsd fs5, 8(s0) +; CHECK-NEXT: fsd fs1, 40(s0) +; CHECK-NEXT: fsd fs0, 48(s0) +; CHECK-NEXT: fsd fa5, 56(s0) ; CHECK-NEXT: fsd fs6, 0(s0) +; CHECK-NEXT: fsd fs5, 8(s0) +; CHECK-NEXT: fsd fs4, 16(s0) +; CHECK-NEXT: fsd fs3, 24(s0) ; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 56(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/push-pop-popret.ll b/llvm/test/CodeGen/RISCV/push-pop-popret.ll index 85c2997e268a947..5cb00e617273a6f 100644 --- a/llvm/test/CodeGen/RISCV/push-pop-popret.ll +++ b/llvm/test/CodeGen/RISCV/push-pop-popret.ll @@ -1000,13 +1000,13 @@ define i32 @varargs(ptr %fmt, ...) nounwind { ; RV32IZCMP: # %bb.0: ; RV32IZCMP-NEXT: addi sp, sp, -48 ; RV32IZCMP-NEXT: mv a0, a1 -; RV32IZCMP-NEXT: sw a7, 44(sp) -; RV32IZCMP-NEXT: sw a6, 40(sp) ; RV32IZCMP-NEXT: sw a5, 36(sp) -; RV32IZCMP-NEXT: sw a4, 32(sp) -; RV32IZCMP-NEXT: sw a3, 28(sp) -; RV32IZCMP-NEXT: sw a2, 24(sp) +; RV32IZCMP-NEXT: sw a6, 40(sp) +; RV32IZCMP-NEXT: sw a7, 44(sp) ; RV32IZCMP-NEXT: sw a1, 20(sp) +; RV32IZCMP-NEXT: sw a2, 24(sp) +; RV32IZCMP-NEXT: sw a3, 28(sp) +; RV32IZCMP-NEXT: sw a4, 32(sp) ; RV32IZCMP-NEXT: addi a1, sp, 24 ; RV32IZCMP-NEXT: sw a1, 12(sp) ; RV32IZCMP-NEXT: addi sp, sp, 48 @@ -1016,15 +1016,15 @@ define i32 @varargs(ptr %fmt, ...) nounwind { ; RV64IZCMP: # %bb.0: ; RV64IZCMP-NEXT: addi sp, sp, -80 ; RV64IZCMP-NEXT: sd a1, 24(sp) -; RV64IZCMP-NEXT: sd a7, 72(sp) -; RV64IZCMP-NEXT: sd a6, 64(sp) ; RV64IZCMP-NEXT: addi a0, sp, 28 ; RV64IZCMP-NEXT: sd a0, 8(sp) ; RV64IZCMP-NEXT: lw a0, 24(sp) ; RV64IZCMP-NEXT: sd a5, 56(sp) -; RV64IZCMP-NEXT: sd a4, 48(sp) -; RV64IZCMP-NEXT: sd a3, 40(sp) +; RV64IZCMP-NEXT: sd a6, 64(sp) +; RV64IZCMP-NEXT: sd a7, 72(sp) ; RV64IZCMP-NEXT: sd a2, 32(sp) +; RV64IZCMP-NEXT: sd a3, 40(sp) +; RV64IZCMP-NEXT: sd a4, 48(sp) ; RV64IZCMP-NEXT: addi sp, sp, 80 ; RV64IZCMP-NEXT: ret ; @@ -1032,13 +1032,13 @@ define i32 @varargs(ptr %fmt, ...) nounwind { ; RV32IZCMP-SR: # %bb.0: ; RV32IZCMP-SR-NEXT: addi sp, sp, -48 ; RV32IZCMP-SR-NEXT: mv a0, a1 -; RV32IZCMP-SR-NEXT: sw a7, 44(sp) -; RV32IZCMP-SR-NEXT: sw a6, 40(sp) ; RV32IZCMP-SR-NEXT: sw a5, 36(sp) -; RV32IZCMP-SR-NEXT: sw a4, 32(sp) -; RV32IZCMP-SR-NEXT: sw a3, 28(sp) -; RV32IZCMP-SR-NEXT: sw a2, 24(sp) +; RV32IZCMP-SR-NEXT: sw a6, 40(sp) +; RV32IZCMP-SR-NEXT: sw a7, 44(sp) ; RV32IZCMP-SR-NEXT: sw a1, 20(sp) +; RV32IZCMP-SR-NEXT: sw a2, 24(sp) +; RV32IZCMP-SR-NEXT: sw a3, 28(sp) +; RV32IZCMP-SR-NEXT: sw a4, 32(sp) ; RV32IZCMP-SR-NEXT: addi a1, sp, 24 ; RV32IZCMP-SR-NEXT: sw a1, 12(sp) ; RV32IZCMP-SR-NEXT: addi sp, sp, 48 @@ -1048,15 +1048,15 @@ define i32 @varargs(ptr %fmt, ...) nounwind { ; RV64IZCMP-SR: # %bb.0: ; RV64IZCMP-SR-NEXT: addi sp, sp, -80 ; RV64IZCMP-SR-NEXT: sd a1, 24(sp) -; RV64IZCMP-SR-NEXT: sd a7, 72(sp) -; RV64IZCMP-SR-NEXT: sd a6, 64(sp) ; RV64IZCMP-SR-NEXT: addi a0, sp, 28 ; RV64IZCMP-SR-NEXT: sd a0, 8(sp) ; RV64IZCMP-SR-NEXT: lw a0, 24(sp) ; RV64IZCMP-SR-NEXT: sd a5, 56(sp) -; RV64IZCMP-SR-NEXT: sd a4, 48(sp) -; RV64IZCMP-SR-NEXT: sd a3, 40(sp) +; RV64IZCMP-SR-NEXT: sd a6, 64(sp) +; RV64IZCMP-SR-NEXT: sd a7, 72(sp) ; RV64IZCMP-SR-NEXT: sd a2, 32(sp) +; RV64IZCMP-SR-NEXT: sd a3, 40(sp) +; RV64IZCMP-SR-NEXT: sd a4, 48(sp) ; RV64IZCMP-SR-NEXT: addi sp, sp, 80 ; RV64IZCMP-SR-NEXT: ret ; @@ -1064,13 +1064,13 @@ define i32 @varargs(ptr %fmt, ...) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -48 ; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: sw a7, 44(sp) -; RV32I-NEXT: sw a6, 40(sp) ; RV32I-NEXT: sw a5, 36(sp) -; RV32I-NEXT: sw a4, 32(sp) -; RV32I-NEXT: sw a3, 28(sp) -; RV32I-NEXT: sw a2, 24(sp) +; RV32I-NEXT: sw a6, 40(sp) +; RV32I-NEXT: sw a7, 44(sp) ; RV32I-NEXT: sw a1, 20(sp) +; RV32I-NEXT: sw a2, 24(sp) +; RV32I-NEXT: sw a3, 28(sp) +; RV32I-NEXT: sw a4, 32(sp) ; RV32I-NEXT: addi a1, sp, 24 ; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi sp, sp, 48 @@ -1080,15 +1080,15 @@ define i32 @varargs(ptr %fmt, ...) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -80 ; RV64I-NEXT: sd a1, 24(sp) -; RV64I-NEXT: sd a7, 72(sp) -; RV64I-NEXT: sd a6, 64(sp) ; RV64I-NEXT: addi a0, sp, 28 ; RV64I-NEXT: sd a0, 8(sp) ; RV64I-NEXT: lw a0, 24(sp) ; RV64I-NEXT: sd a5, 56(sp) -; RV64I-NEXT: sd a4, 48(sp) -; RV64I-NEXT: sd a3, 40(sp) +; RV64I-NEXT: sd a6, 64(sp) +; RV64I-NEXT: sd a7, 72(sp) ; RV64I-NEXT: sd a2, 32(sp) +; RV64I-NEXT: sd a3, 40(sp) +; RV64I-NEXT: sd a4, 48(sp) ; RV64I-NEXT: addi sp, sp, 80 ; RV64I-NEXT: ret %va = alloca ptr diff --git a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll index 31ca8eab33508c7..352184c2d85ada0 100644 --- a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll +++ b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll @@ -65,8 +65,8 @@ define void @test2(ptr nocapture noundef %a, i32 noundef signext %n) { ; CHECK-NEXT: lw a5, -4(a4) ; CHECK-NEXT: lw a6, 0(a4) ; CHECK-NEXT: addi a5, a5, 4 -; CHECK-NEXT: sw a5, -4(a4) ; CHECK-NEXT: addi a6, a6, 4 +; CHECK-NEXT: sw a5, -4(a4) ; CHECK-NEXT: sw a6, 0(a4) ; CHECK-NEXT: addi a3, a3, 2 ; CHECK-NEXT: addi a4, a4, 8 diff --git a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll index 957f44f9f669dea..4901e268ec11a0c 100644 --- a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll +++ b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll @@ -8,9 +8,9 @@ define void @test(ptr nocapture noundef writeonly %array1, i32 noundef signext % ; RV64-NEXT: addiw a3, a1, 5 ; RV64-NEXT: slli a4, a3, 2 ; RV64-NEXT: add a4, a0, a4 -; RV64-NEXT: sw a2, 0(a4) ; RV64-NEXT: slli a1, a1, 2 ; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sw a2, 0(a4) ; RV64-NEXT: sw a2, 24(a0) ; RV64-NEXT: sw a3, 140(a0) ; RV64-NEXT: ret @@ -42,9 +42,9 @@ define void @test1(ptr nocapture noundef %array1, i32 noundef signext %a, i32 no ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: mv a6, a2 ; RV64-NEXT: .LBB1_2: # %entry -; RV64-NEXT: sw a6, 0(a5) ; RV64-NEXT: slli a1, a1, 2 ; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sw a6, 0(a5) ; RV64-NEXT: sw a6, 24(a0) ; RV64-NEXT: sw a4, 140(a0) ; RV64-NEXT: ret @@ -72,9 +72,9 @@ define void @test2(ptr nocapture noundef writeonly %array1, i64 noundef %a, i64 ; RV64-NEXT: addi a3, a1, 5 ; RV64-NEXT: slli a4, a3, 3 ; RV64-NEXT: add a4, a0, a4 -; RV64-NEXT: sd a2, 0(a4) ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sd a2, 0(a4) ; RV64-NEXT: sd a2, 48(a0) ; RV64-NEXT: sd a3, 280(a0) ; RV64-NEXT: ret @@ -102,9 +102,9 @@ define void @test3(ptr nocapture noundef %array1, i64 noundef %a, i64 noundef %b ; RV64-NEXT: .LBB3_2: # %entry ; RV64-NEXT: slli a2, a4, 3 ; RV64-NEXT: add a2, a0, a2 -; RV64-NEXT: sd a5, 0(a2) ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sd a5, 0(a2) ; RV64-NEXT: sd a5, 48(a0) ; RV64-NEXT: sd a4, 280(a0) ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index e24b1b41645cdf9..35cf65182a6c654 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -754,10 +754,10 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind { ; RV32I-NEXT: add a2, a2, a4 ; RV32I-NEXT: srli a2, a2, 24 ; RV32I-NEXT: add a1, a2, a1 -; RV32I-NEXT: sw zero, 12(a0) +; RV32I-NEXT: sw a3, 0(a0) ; RV32I-NEXT: sw zero, 4(a0) ; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw zero, 12(a0) ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: ctpop_v2i64: @@ -772,10 +772,10 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind { ; RV32ZBB-NEXT: cpop a1, a1 ; RV32ZBB-NEXT: cpop a3, a4 ; RV32ZBB-NEXT: add a1, a3, a1 -; RV32ZBB-NEXT: sw zero, 12(a0) +; RV32ZBB-NEXT: sw a2, 0(a0) ; RV32ZBB-NEXT: sw zero, 4(a0) ; RV32ZBB-NEXT: sw a1, 8(a0) -; RV32ZBB-NEXT: sw a2, 0(a0) +; RV32ZBB-NEXT: sw zero, 12(a0) ; RV32ZBB-NEXT: ret %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) ret <2 x i64> %1 diff --git a/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll b/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll index f26e57b5a0b7330..0e426ee309ababb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll +++ b/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll @@ -397,10 +397,10 @@ define @nxv1i64(i64 %x, i64 %y) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: add a2, a0, a2 -; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: add a1, a1, a3 ; RV32-NEXT: sltu a0, a2, a0 ; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma @@ -428,10 +428,10 @@ define @nxv2i64(i64 %x, i64 %y) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: add a2, a0, a2 -; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: add a1, a1, a3 ; RV32-NEXT: sltu a0, a2, a0 ; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma @@ -459,10 +459,10 @@ define @nxv4i64(i64 %x, i64 %y) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: add a2, a0, a2 -; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: add a1, a1, a3 ; RV32-NEXT: sltu a0, a2, a0 ; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma @@ -490,10 +490,10 @@ define @nxv8i64(i64 %x, i64 %y) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: add a2, a0, a2 -; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: add a1, a1, a3 ; RV32-NEXT: sltu a0, a2, a0 ; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma @@ -580,13 +580,13 @@ define @uaddsatnxv1i64(i64 %x, i64 %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 4(sp) ; RV32-NEXT: sw a2, 0(sp) +; RV32-NEXT: sw a3, 4(sp) ; RV32-NEXT: mv a0, sp ; RV32-NEXT: vlse64.v v9, (a0), zero ; RV32-NEXT: vsaddu.vv v8, v8, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll index 94e945f8032059d..a34f06948a762cc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll @@ -706,9 +706,9 @@ define @bitreverse_nxv1i64( %va) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v9, v8, a0 @@ -846,9 +846,9 @@ define @bitreverse_nxv2i64( %va) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vsrl.vx v10, v8, a0 @@ -986,9 +986,9 @@ define @bitreverse_nxv4i64( %va) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vsrl.vx v12, v8, a0 @@ -1130,9 +1130,9 @@ define @bitreverse_nxv8i64( %va) { ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: sub sp, sp, a0 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v16, v8, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll index 5709de567c18dc8..afce04d107e728f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -1437,9 +1437,9 @@ define @vp_bitreverse_nxv1i64( %va, @vp_bitreverse_nxv1i64_unmasked( %va ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsll.vx v9, v8, a1 @@ -1721,9 +1721,9 @@ define @vp_bitreverse_nxv2i64( %va, @vp_bitreverse_nxv2i64_unmasked( %va ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsll.vx v10, v8, a1 @@ -2005,9 +2005,9 @@ define @vp_bitreverse_nxv4i64( %va, @vp_bitreverse_nxv4i64_unmasked( %va ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsll.vx v12, v8, a1 @@ -2294,9 +2294,9 @@ define @vp_bitreverse_nxv7i64( %va, @vp_bitreverse_nxv7i64_unmasked( %va ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1 @@ -2655,9 +2655,9 @@ define @vp_bitreverse_nxv8i64( %va, @vp_bitreverse_nxv8i64_unmasked( %va ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll index d3fce3caf8aef95..e8e362b1f042dd9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll @@ -258,9 +258,9 @@ define @bswap_nxv1i64( %va) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v9, v8, a0 @@ -341,9 +341,9 @@ define @bswap_nxv2i64( %va) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vsrl.vx v10, v8, a0 @@ -424,9 +424,9 @@ define @bswap_nxv4i64( %va) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vsrl.vx v12, v8, a0 @@ -511,9 +511,9 @@ define @bswap_nxv8i64( %va) { ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: sub sp, sp, a0 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v16, v8, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll index 6917d7e44a8e6a9..171de6c2fddf179 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll @@ -513,9 +513,9 @@ define @vp_bswap_nxv1i64( %va, @vp_bswap_nxv1i64_unmasked( %va, i32 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsll.vx v9, v8, a1 @@ -683,9 +683,9 @@ define @vp_bswap_nxv2i64( %va, @vp_bswap_nxv2i64_unmasked( %va, i32 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsll.vx v10, v8, a1 @@ -853,9 +853,9 @@ define @vp_bswap_nxv4i64( %va, @vp_bswap_nxv4i64_unmasked( %va, i32 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsll.vx v12, v8, a1 @@ -1028,9 +1028,9 @@ define @vp_bswap_nxv7i64( %va, @vp_bswap_nxv7i64_unmasked( %va, i32 ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1 @@ -1274,9 +1274,9 @@ define @vp_bswap_nxv8i64( %va, @vp_bswap_nxv8i64_unmasked( %va, i32 ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1 @@ -1640,9 +1640,9 @@ define @vp_bswap_nxv1i48( %va, @v1i64(i64 %x, i64 %y) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: add a2, a0, a2 -; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: add a1, a1, a3 ; RV32-NEXT: sltu a0, a2, a0 ; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma @@ -493,10 +493,10 @@ define <2 x i64> @v2i64(i64 %x, i64 %y) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: add a2, a0, a2 -; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: add a1, a1, a3 ; RV32-NEXT: sltu a0, a2, a0 ; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma @@ -525,10 +525,10 @@ define <4 x i64> @v4i64(i64 %x, i64 %y) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: add a2, a0, a2 -; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: add a1, a1, a3 ; RV32-NEXT: sltu a0, a2, a0 ; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma @@ -557,10 +557,10 @@ define <8 x i64> @v8i64(i64 %x, i64 %y) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: add a2, a0, a2 -; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: add a1, a1, a3 ; RV32-NEXT: sltu a0, a2, a0 ; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll index 90bedf87e04d30f..54265193b09f6eb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -835,9 +835,9 @@ define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %e ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsll.vx v9, v8, a1, v0.t @@ -970,9 +970,9 @@ define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsll.vx v9, v8, a1 @@ -1107,9 +1107,9 @@ define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %e ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsll.vx v10, v8, a1, v0.t @@ -1242,9 +1242,9 @@ define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsll.vx v10, v8, a1 @@ -1379,9 +1379,9 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %e ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsll.vx v12, v8, a1, v0.t @@ -1514,9 +1514,9 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsll.vx v12, v8, a1 @@ -1656,21 +1656,21 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb -; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1, v0.t @@ -1862,21 +1862,21 @@ define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %ev ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb -; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1 @@ -2023,21 +2023,21 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb -; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1, v0.t @@ -2229,21 +2229,21 @@ define <16 x i64> @vp_bitreverse_v16i64_unmasked(<16 x i64> %va, i32 zeroext %ev ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb -; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll index 7f211d0f8f9bade..37c22d328a4b1a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -113,9 +113,9 @@ define void @bitreverse_v2i64(ptr %x, ptr %y) { ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v9, v8, a1 ; RV32-NEXT: li a2, 40 @@ -364,9 +364,9 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) { ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v10, v8, a1 ; RV32-NEXT: li a2, 40 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll index 6f2e86097d6ff92..b8ddf74c30dbdce 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -283,9 +283,9 @@ define <2 x i64> @vp_bswap_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsll.vx v9, v8, a1, v0.t @@ -361,9 +361,9 @@ define <2 x i64> @vp_bswap_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsll.vx v9, v8, a1 @@ -441,9 +441,9 @@ define <4 x i64> @vp_bswap_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsll.vx v10, v8, a1, v0.t @@ -519,9 +519,9 @@ define <4 x i64> @vp_bswap_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsll.vx v10, v8, a1 @@ -599,9 +599,9 @@ define <8 x i64> @vp_bswap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsll.vx v12, v8, a1, v0.t @@ -677,9 +677,9 @@ define <8 x i64> @vp_bswap_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsll.vx v12, v8, a1 @@ -762,9 +762,9 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1, v0.t @@ -902,9 +902,9 @@ define <15 x i64> @vp_bswap_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1 @@ -996,9 +996,9 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1, v0.t @@ -1136,9 +1136,9 @@ define <16 x i64> @vp_bswap_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll index d5338f9b6c6fc09..217597f6d57d697 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -71,9 +71,9 @@ define void @bswap_v2i64(ptr %x, ptr %y) { ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v9, v8, a1 ; RV32-NEXT: li a2, 40 @@ -223,9 +223,9 @@ define void @bswap_v4i64(ptr %x, ptr %y) { ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v10, v8, a1 ; RV32-NEXT: li a2, 40 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll index 9f48fdb3608a0bd..d8e294d32cd0805 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll @@ -308,8 +308,7 @@ define fastcc <32 x i32> @pass_vector_arg_direct_stack(<32 x i32> %x, <32 x i32> ; CHECK-NEXT: li a0, 1 ; CHECK-NEXT: sd a0, 144(sp) ; CHECK-NEXT: li a0, 13 -; CHECK-NEXT: sd a0, 8(sp) -; CHECK-NEXT: li a0, 12 +; CHECK-NEXT: li t0, 12 ; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: li a2, 2 ; CHECK-NEXT: li a3, 3 @@ -321,7 +320,8 @@ define fastcc <32 x i32> @pass_vector_arg_direct_stack(<32 x i32> %x, <32 x i32> ; CHECK-NEXT: li t4, 9 ; CHECK-NEXT: li t5, 10 ; CHECK-NEXT: li t6, 11 -; CHECK-NEXT: sd a0, 0(sp) +; CHECK-NEXT: sd t0, 0(sp) +; CHECK-NEXT: sd a0, 8(sp) ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: call vector_arg_direct_stack diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll index f5e6b9290519398..c59b45a1d4f8336 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll @@ -1511,20 +1511,20 @@ define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -1653,20 +1653,20 @@ define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -1774,20 +1774,20 @@ define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -1916,20 +1916,20 @@ define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -2045,21 +2045,21 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vslidedown.vi v24, v0, 2 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: li a3, 16 ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: bltu a0, a3, .LBB34_2 ; RV32-NEXT: # %bb.1: @@ -2472,21 +2472,21 @@ define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: li a2, 16 ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB35_2 ; RV32-NEXT: # %bb.1: @@ -4147,20 +4147,20 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -4289,20 +4289,20 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -4408,20 +4408,20 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -4550,20 +4550,20 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -4677,21 +4677,21 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vslidedown.vi v24, v0, 2 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: li a3, 16 ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: bltu a0, a3, .LBB70_2 ; RV32-NEXT: # %bb.1: @@ -5104,21 +5104,21 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: li a2, 16 ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB71_2 ; RV32-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll index e90e52fba642b12..ffc1bfd240804ef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -1128,20 +1128,20 @@ define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero @@ -1252,20 +1252,20 @@ define <15 x i64> @vp_ctpop_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero @@ -1345,20 +1345,20 @@ define <16 x i64> @vp_ctpop_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero @@ -1469,20 +1469,20 @@ define <16 x i64> @vp_ctpop_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero @@ -1561,7 +1561,8 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -1575,21 +1576,21 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: li a2, 16 ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB34_2 ; RV32-NEXT: # %bb.1: @@ -1606,8 +1607,7 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: addi a2, sp, 32 ; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill @@ -1618,47 +1618,40 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v24, v8, v24, v0.t -; RV32-NEXT: vand.vv v8, v24, v16, v0.t +; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v8, v24, v0.t ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: li a3, 40 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vadd.vv v16, v24, v8, v0.t ; RV32-NEXT: addi a2, sp, 24 ; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: vlse64.v v24, (a2), zero ; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: li a3, 40 @@ -1666,15 +1659,11 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv8r.v v8, v16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v24, v0.t -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v16, 4, v0.t +; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 @@ -1695,44 +1684,28 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v24, v8, v0.t +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: vsub.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v8, v24, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t @@ -1862,21 +1835,21 @@ define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: li a2, 16 ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB35_2 ; RV32-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index dfad7881066a27a..5b0022077297358 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -1271,20 +1271,20 @@ define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t @@ -1393,20 +1393,20 @@ define <15 x i64> @vp_cttz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 @@ -1494,20 +1494,20 @@ define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t @@ -1616,20 +1616,20 @@ define <16 x i64> @vp_cttz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 @@ -1725,21 +1725,21 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vslidedown.vi v24, v0, 2 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: li a3, 16 ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: bltu a0, a3, .LBB34_2 ; RV32-NEXT: # %bb.1: @@ -2112,21 +2112,21 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: li a2, 16 ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB35_2 ; RV32-NEXT: # %bb.1: @@ -3507,20 +3507,20 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t @@ -3629,20 +3629,20 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 @@ -3728,20 +3728,20 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t @@ -3850,20 +3850,20 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 @@ -3957,21 +3957,21 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vslidedown.vi v24, v0, 2 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: li a3, 16 ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: bltu a0, a3, .LBB70_2 ; RV32-NEXT: # %bb.1: @@ -4344,21 +4344,21 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: sw a1, 44(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: sw a1, 32(sp) +; RV32-NEXT: sw a1, 36(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: li a2, 16 ; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB71_2 ; RV32-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll index 81e20a298816305..04ebc7ca6b2b898 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll @@ -42,10 +42,10 @@ define void @add_v2i64(ptr %x, ptr %y) { ; RV32-NEXT: add t0, a6, t0 ; RV32-NEXT: sltu a4, t0, a6 ; RV32-NEXT: add a1, a1, a4 -; RV32-NEXT: sw t0, 8(a0) ; RV32-NEXT: sw a2, 0(a0) -; RV32-NEXT: sw a1, 12(a0) ; RV32-NEXT: sw a3, 4(a0) +; RV32-NEXT: sw t0, 8(a0) +; RV32-NEXT: sw a1, 12(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: add_v2i64: @@ -56,8 +56,8 @@ define void @add_v2i64(ptr %x, ptr %y) { ; RV64-NEXT: ld a1, 8(a1) ; RV64-NEXT: add a2, a2, a4 ; RV64-NEXT: add a1, a3, a1 -; RV64-NEXT: sd a1, 8(a0) ; RV64-NEXT: sd a2, 0(a0) +; RV64-NEXT: sd a1, 8(a0) ; RV64-NEXT: ret %a = load <2 x i64>, ptr %x %b = load <2 x i64>, ptr %y @@ -140,8 +140,8 @@ define void @fadd_v2f64(ptr %x, ptr %y) { ; CHECK-NEXT: fld fa2, 8(a1) ; CHECK-NEXT: fadd.d fa5, fa5, fa3 ; CHECK-NEXT: fadd.d fa4, fa4, fa2 -; CHECK-NEXT: fsd fa4, 8(a0) ; CHECK-NEXT: fsd fa5, 0(a0) +; CHECK-NEXT: fsd fa4, 8(a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = load <2 x double>, ptr %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll index 5bf8b07efc1dafd..d34235127e83897 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll @@ -603,10 +603,10 @@ define <1 x i64> @expandload_v1i64(ptr %base, <1 x i64> %src0, <1 x i1> %mask) { ; RV32-NEXT: # %bb.1: # %cond.load ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lw a1, 4(a0) -; RV32-NEXT: lw a0, 0(a0) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lw a0, 4(a0) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index 53059a4f28d42bc..96b9b2bac2993ca 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -502,22 +502,22 @@ define <16 x float> @buildvec_v16f32(float %e0, float %e1, float %e2, float %e3, ; RV32-NEXT: addi s0, sp, 128 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -64 -; RV32-NEXT: sw a7, 60(sp) -; RV32-NEXT: sw a6, 56(sp) -; RV32-NEXT: sw a5, 52(sp) ; RV32-NEXT: sw a4, 48(sp) -; RV32-NEXT: sw a3, 44(sp) -; RV32-NEXT: sw a2, 40(sp) -; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a5, 52(sp) +; RV32-NEXT: sw a6, 56(sp) +; RV32-NEXT: sw a7, 60(sp) ; RV32-NEXT: sw a0, 32(sp) -; RV32-NEXT: fsw fa7, 28(sp) -; RV32-NEXT: fsw fa6, 24(sp) -; RV32-NEXT: fsw fa5, 20(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a2, 40(sp) +; RV32-NEXT: sw a3, 44(sp) ; RV32-NEXT: fsw fa4, 16(sp) -; RV32-NEXT: fsw fa3, 12(sp) -; RV32-NEXT: fsw fa2, 8(sp) -; RV32-NEXT: fsw fa1, 4(sp) +; RV32-NEXT: fsw fa5, 20(sp) +; RV32-NEXT: fsw fa6, 24(sp) +; RV32-NEXT: fsw fa7, 28(sp) ; RV32-NEXT: fsw fa0, 0(sp) +; RV32-NEXT: fsw fa1, 4(sp) +; RV32-NEXT: fsw fa2, 8(sp) +; RV32-NEXT: fsw fa3, 12(sp) ; RV32-NEXT: mv a0, sp ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vle32.v v8, (a0) @@ -546,22 +546,22 @@ define <16 x float> @buildvec_v16f32(float %e0, float %e1, float %e2, float %e3, ; RV64-NEXT: fmv.w.x ft5, a5 ; RV64-NEXT: fmv.w.x ft6, a6 ; RV64-NEXT: fmv.w.x ft7, a7 -; RV64-NEXT: fsw fa7, 28(sp) -; RV64-NEXT: fsw fa6, 24(sp) -; RV64-NEXT: fsw fa5, 20(sp) ; RV64-NEXT: fsw fa4, 16(sp) -; RV64-NEXT: fsw fa3, 12(sp) -; RV64-NEXT: fsw fa2, 8(sp) -; RV64-NEXT: fsw fa1, 4(sp) +; RV64-NEXT: fsw fa5, 20(sp) +; RV64-NEXT: fsw fa6, 24(sp) +; RV64-NEXT: fsw fa7, 28(sp) ; RV64-NEXT: fsw fa0, 0(sp) -; RV64-NEXT: fsw ft7, 60(sp) -; RV64-NEXT: fsw ft6, 56(sp) -; RV64-NEXT: fsw ft5, 52(sp) +; RV64-NEXT: fsw fa1, 4(sp) +; RV64-NEXT: fsw fa2, 8(sp) +; RV64-NEXT: fsw fa3, 12(sp) ; RV64-NEXT: fsw ft4, 48(sp) -; RV64-NEXT: fsw ft3, 44(sp) -; RV64-NEXT: fsw ft2, 40(sp) -; RV64-NEXT: fsw ft1, 36(sp) +; RV64-NEXT: fsw ft5, 52(sp) +; RV64-NEXT: fsw ft6, 56(sp) +; RV64-NEXT: fsw ft7, 60(sp) ; RV64-NEXT: fsw ft0, 32(sp) +; RV64-NEXT: fsw ft1, 36(sp) +; RV64-NEXT: fsw ft2, 40(sp) +; RV64-NEXT: fsw ft3, 44(sp) ; RV64-NEXT: mv a0, sp ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vle32.v v8, (a0) @@ -621,42 +621,42 @@ define <32 x float> @buildvec_v32f32(float %e0, float %e1, float %e2, float %e3, ; RV32-NEXT: flw ft9, 36(s0) ; RV32-NEXT: flw ft10, 40(s0) ; RV32-NEXT: flw ft11, 44(s0) -; RV32-NEXT: flw fs0, 60(s0) -; RV32-NEXT: flw fs1, 56(s0) -; RV32-NEXT: flw fs2, 52(s0) -; RV32-NEXT: flw fs3, 48(s0) -; RV32-NEXT: fsw fs0, 124(sp) -; RV32-NEXT: fsw fs1, 120(sp) -; RV32-NEXT: fsw fs2, 116(sp) -; RV32-NEXT: fsw fs3, 112(sp) -; RV32-NEXT: fsw ft11, 108(sp) -; RV32-NEXT: fsw ft10, 104(sp) -; RV32-NEXT: fsw ft9, 100(sp) +; RV32-NEXT: flw fs0, 48(s0) +; RV32-NEXT: flw fs1, 52(s0) +; RV32-NEXT: flw fs2, 56(s0) +; RV32-NEXT: flw fs3, 60(s0) +; RV32-NEXT: fsw fs0, 112(sp) +; RV32-NEXT: fsw fs1, 116(sp) +; RV32-NEXT: fsw fs2, 120(sp) +; RV32-NEXT: fsw fs3, 124(sp) ; RV32-NEXT: fsw ft8, 96(sp) -; RV32-NEXT: fsw ft7, 92(sp) -; RV32-NEXT: fsw ft6, 88(sp) -; RV32-NEXT: fsw ft5, 84(sp) +; RV32-NEXT: fsw ft9, 100(sp) +; RV32-NEXT: fsw ft10, 104(sp) +; RV32-NEXT: fsw ft11, 108(sp) ; RV32-NEXT: fsw ft4, 80(sp) -; RV32-NEXT: fsw ft3, 76(sp) -; RV32-NEXT: fsw ft2, 72(sp) -; RV32-NEXT: fsw ft1, 68(sp) +; RV32-NEXT: fsw ft5, 84(sp) +; RV32-NEXT: fsw ft6, 88(sp) +; RV32-NEXT: fsw ft7, 92(sp) ; RV32-NEXT: fsw ft0, 64(sp) -; RV32-NEXT: sw a7, 60(sp) -; RV32-NEXT: sw a6, 56(sp) -; RV32-NEXT: sw a5, 52(sp) +; RV32-NEXT: fsw ft1, 68(sp) +; RV32-NEXT: fsw ft2, 72(sp) +; RV32-NEXT: fsw ft3, 76(sp) ; RV32-NEXT: sw a4, 48(sp) -; RV32-NEXT: sw a3, 44(sp) -; RV32-NEXT: sw a2, 40(sp) -; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a5, 52(sp) +; RV32-NEXT: sw a6, 56(sp) +; RV32-NEXT: sw a7, 60(sp) ; RV32-NEXT: sw a0, 32(sp) -; RV32-NEXT: fsw fa7, 28(sp) -; RV32-NEXT: fsw fa6, 24(sp) -; RV32-NEXT: fsw fa5, 20(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a2, 40(sp) +; RV32-NEXT: sw a3, 44(sp) ; RV32-NEXT: fsw fa4, 16(sp) -; RV32-NEXT: fsw fa3, 12(sp) -; RV32-NEXT: fsw fa2, 8(sp) -; RV32-NEXT: fsw fa1, 4(sp) +; RV32-NEXT: fsw fa5, 20(sp) +; RV32-NEXT: fsw fa6, 24(sp) +; RV32-NEXT: fsw fa7, 28(sp) ; RV32-NEXT: fsw fa0, 0(sp) +; RV32-NEXT: fsw fa1, 4(sp) +; RV32-NEXT: fsw fa2, 8(sp) +; RV32-NEXT: fsw fa3, 12(sp) ; RV32-NEXT: li a0, 32 ; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -726,42 +726,42 @@ define <32 x float> @buildvec_v32f32(float %e0, float %e1, float %e2, float %e3, ; RV64-NEXT: flw fs5, 72(s0) ; RV64-NEXT: flw fs6, 80(s0) ; RV64-NEXT: flw fs7, 88(s0) -; RV64-NEXT: flw fs8, 120(s0) -; RV64-NEXT: flw fs9, 112(s0) -; RV64-NEXT: flw fs10, 104(s0) -; RV64-NEXT: flw fs11, 96(s0) -; RV64-NEXT: fsw fs8, 124(sp) -; RV64-NEXT: fsw fs9, 120(sp) -; RV64-NEXT: fsw fs10, 116(sp) -; RV64-NEXT: fsw fs11, 112(sp) -; RV64-NEXT: fsw fs7, 108(sp) -; RV64-NEXT: fsw fs6, 104(sp) -; RV64-NEXT: fsw fs5, 100(sp) +; RV64-NEXT: flw fs8, 96(s0) +; RV64-NEXT: flw fs9, 104(s0) +; RV64-NEXT: flw fs10, 112(s0) +; RV64-NEXT: flw fs11, 120(s0) +; RV64-NEXT: fsw fs8, 112(sp) +; RV64-NEXT: fsw fs9, 116(sp) +; RV64-NEXT: fsw fs10, 120(sp) +; RV64-NEXT: fsw fs11, 124(sp) ; RV64-NEXT: fsw fs4, 96(sp) -; RV64-NEXT: fsw fs3, 92(sp) -; RV64-NEXT: fsw fs2, 88(sp) -; RV64-NEXT: fsw fs1, 84(sp) +; RV64-NEXT: fsw fs5, 100(sp) +; RV64-NEXT: fsw fs6, 104(sp) +; RV64-NEXT: fsw fs7, 108(sp) ; RV64-NEXT: fsw fs0, 80(sp) -; RV64-NEXT: fsw ft11, 76(sp) -; RV64-NEXT: fsw ft10, 72(sp) -; RV64-NEXT: fsw ft9, 68(sp) +; RV64-NEXT: fsw fs1, 84(sp) +; RV64-NEXT: fsw fs2, 88(sp) +; RV64-NEXT: fsw fs3, 92(sp) ; RV64-NEXT: fsw ft8, 64(sp) -; RV64-NEXT: fsw fa7, 28(sp) -; RV64-NEXT: fsw fa6, 24(sp) -; RV64-NEXT: fsw fa5, 20(sp) +; RV64-NEXT: fsw ft9, 68(sp) +; RV64-NEXT: fsw ft10, 72(sp) +; RV64-NEXT: fsw ft11, 76(sp) ; RV64-NEXT: fsw fa4, 16(sp) -; RV64-NEXT: fsw fa3, 12(sp) -; RV64-NEXT: fsw fa2, 8(sp) -; RV64-NEXT: fsw fa1, 4(sp) +; RV64-NEXT: fsw fa5, 20(sp) +; RV64-NEXT: fsw fa6, 24(sp) +; RV64-NEXT: fsw fa7, 28(sp) ; RV64-NEXT: fsw fa0, 0(sp) -; RV64-NEXT: fsw ft7, 60(sp) -; RV64-NEXT: fsw ft6, 56(sp) -; RV64-NEXT: fsw ft5, 52(sp) +; RV64-NEXT: fsw fa1, 4(sp) +; RV64-NEXT: fsw fa2, 8(sp) +; RV64-NEXT: fsw fa3, 12(sp) ; RV64-NEXT: fsw ft4, 48(sp) -; RV64-NEXT: fsw ft3, 44(sp) -; RV64-NEXT: fsw ft2, 40(sp) -; RV64-NEXT: fsw ft1, 36(sp) +; RV64-NEXT: fsw ft5, 52(sp) +; RV64-NEXT: fsw ft6, 56(sp) +; RV64-NEXT: fsw ft7, 60(sp) ; RV64-NEXT: fsw ft0, 32(sp) +; RV64-NEXT: fsw ft1, 36(sp) +; RV64-NEXT: fsw ft2, 40(sp) +; RV64-NEXT: fsw ft3, 44(sp) ; RV64-NEXT: li a0, 32 ; RV64-NEXT: mv a1, sp ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -830,14 +830,14 @@ define <8 x double> @buildvec_v8f64(double %e0, double %e1, double %e2, double % ; RV32-NEXT: addi s0, sp, 128 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -64 -; RV32-NEXT: fsd fa7, 56(sp) -; RV32-NEXT: fsd fa6, 48(sp) -; RV32-NEXT: fsd fa5, 40(sp) ; RV32-NEXT: fsd fa4, 32(sp) -; RV32-NEXT: fsd fa3, 24(sp) -; RV32-NEXT: fsd fa2, 16(sp) -; RV32-NEXT: fsd fa1, 8(sp) +; RV32-NEXT: fsd fa5, 40(sp) +; RV32-NEXT: fsd fa6, 48(sp) +; RV32-NEXT: fsd fa7, 56(sp) ; RV32-NEXT: fsd fa0, 0(sp) +; RV32-NEXT: fsd fa1, 8(sp) +; RV32-NEXT: fsd fa2, 16(sp) +; RV32-NEXT: fsd fa3, 24(sp) ; RV32-NEXT: mv a0, sp ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) @@ -858,14 +858,14 @@ define <8 x double> @buildvec_v8f64(double %e0, double %e1, double %e2, double % ; RV64-NEXT: addi s0, sp, 128 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -64 -; RV64-NEXT: fsd fa7, 56(sp) -; RV64-NEXT: fsd fa6, 48(sp) -; RV64-NEXT: fsd fa5, 40(sp) ; RV64-NEXT: fsd fa4, 32(sp) -; RV64-NEXT: fsd fa3, 24(sp) -; RV64-NEXT: fsd fa2, 16(sp) -; RV64-NEXT: fsd fa1, 8(sp) +; RV64-NEXT: fsd fa5, 40(sp) +; RV64-NEXT: fsd fa6, 48(sp) +; RV64-NEXT: fsd fa7, 56(sp) ; RV64-NEXT: fsd fa0, 0(sp) +; RV64-NEXT: fsd fa1, 8(sp) +; RV64-NEXT: fsd fa2, 16(sp) +; RV64-NEXT: fsd fa3, 24(sp) ; RV64-NEXT: mv a0, sp ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) @@ -909,26 +909,26 @@ define <16 x double> @buildvec_v16f64(double %e0, double %e1, double %e2, double ; RV32-NEXT: sw a6, 120(sp) ; RV32-NEXT: sw a7, 124(sp) ; RV32-NEXT: fld ft3, 120(sp) -; RV32-NEXT: fld ft4, 24(s0) -; RV32-NEXT: fld ft5, 16(s0) -; RV32-NEXT: fld ft6, 8(s0) -; RV32-NEXT: fld ft7, 0(s0) -; RV32-NEXT: fsd ft4, 248(sp) -; RV32-NEXT: fsd ft5, 240(sp) -; RV32-NEXT: fsd ft6, 232(sp) -; RV32-NEXT: fsd ft7, 224(sp) -; RV32-NEXT: fsd fa7, 184(sp) -; RV32-NEXT: fsd fa6, 176(sp) -; RV32-NEXT: fsd fa5, 168(sp) +; RV32-NEXT: fld ft4, 0(s0) +; RV32-NEXT: fld ft5, 8(s0) +; RV32-NEXT: fld ft6, 16(s0) +; RV32-NEXT: fld ft7, 24(s0) +; RV32-NEXT: fsd ft4, 224(sp) +; RV32-NEXT: fsd ft5, 232(sp) +; RV32-NEXT: fsd ft6, 240(sp) +; RV32-NEXT: fsd ft7, 248(sp) ; RV32-NEXT: fsd fa4, 160(sp) -; RV32-NEXT: fsd fa3, 152(sp) -; RV32-NEXT: fsd fa2, 144(sp) -; RV32-NEXT: fsd fa1, 136(sp) +; RV32-NEXT: fsd fa5, 168(sp) +; RV32-NEXT: fsd fa6, 176(sp) +; RV32-NEXT: fsd fa7, 184(sp) ; RV32-NEXT: fsd fa0, 128(sp) -; RV32-NEXT: fsd ft3, 216(sp) -; RV32-NEXT: fsd ft2, 208(sp) -; RV32-NEXT: fsd ft1, 200(sp) +; RV32-NEXT: fsd fa1, 136(sp) +; RV32-NEXT: fsd fa2, 144(sp) +; RV32-NEXT: fsd fa3, 152(sp) ; RV32-NEXT: fsd ft0, 192(sp) +; RV32-NEXT: fsd ft1, 200(sp) +; RV32-NEXT: fsd ft2, 208(sp) +; RV32-NEXT: fsd ft3, 216(sp) ; RV32-NEXT: addi a0, sp, 128 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) @@ -949,22 +949,22 @@ define <16 x double> @buildvec_v16f64(double %e0, double %e1, double %e2, double ; RV64-NEXT: addi s0, sp, 256 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -128 -; RV64-NEXT: sd a7, 120(sp) -; RV64-NEXT: sd a6, 112(sp) -; RV64-NEXT: sd a5, 104(sp) ; RV64-NEXT: sd a4, 96(sp) -; RV64-NEXT: sd a3, 88(sp) -; RV64-NEXT: sd a2, 80(sp) -; RV64-NEXT: sd a1, 72(sp) +; RV64-NEXT: sd a5, 104(sp) +; RV64-NEXT: sd a6, 112(sp) +; RV64-NEXT: sd a7, 120(sp) ; RV64-NEXT: sd a0, 64(sp) -; RV64-NEXT: fsd fa7, 56(sp) -; RV64-NEXT: fsd fa6, 48(sp) -; RV64-NEXT: fsd fa5, 40(sp) +; RV64-NEXT: sd a1, 72(sp) +; RV64-NEXT: sd a2, 80(sp) +; RV64-NEXT: sd a3, 88(sp) ; RV64-NEXT: fsd fa4, 32(sp) -; RV64-NEXT: fsd fa3, 24(sp) -; RV64-NEXT: fsd fa2, 16(sp) -; RV64-NEXT: fsd fa1, 8(sp) +; RV64-NEXT: fsd fa5, 40(sp) +; RV64-NEXT: fsd fa6, 48(sp) +; RV64-NEXT: fsd fa7, 56(sp) ; RV64-NEXT: fsd fa0, 0(sp) +; RV64-NEXT: fsd fa1, 8(sp) +; RV64-NEXT: fsd fa2, 16(sp) +; RV64-NEXT: fsd fa3, 24(sp) ; RV64-NEXT: mv a0, sp ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) @@ -1056,42 +1056,42 @@ define <32 x double> @buildvec_v32f64(double %e0, double %e1, double %e2, double ; RV32-NEXT: fld fs5, 104(s0) ; RV32-NEXT: fld fs6, 112(s0) ; RV32-NEXT: fld fs7, 120(s0) -; RV32-NEXT: fld fs8, 152(s0) -; RV32-NEXT: fld fs9, 144(s0) -; RV32-NEXT: fld fs10, 136(s0) -; RV32-NEXT: fld fs11, 128(s0) -; RV32-NEXT: fsd fs8, 248(sp) -; RV32-NEXT: fsd fs9, 240(sp) -; RV32-NEXT: fsd fs10, 232(sp) -; RV32-NEXT: fsd fs11, 224(sp) -; RV32-NEXT: fsd fs7, 216(sp) -; RV32-NEXT: fsd fs6, 208(sp) -; RV32-NEXT: fsd fs5, 200(sp) +; RV32-NEXT: fld fs8, 128(s0) +; RV32-NEXT: fld fs9, 136(s0) +; RV32-NEXT: fld fs10, 144(s0) +; RV32-NEXT: fld fs11, 152(s0) +; RV32-NEXT: fsd fs8, 224(sp) +; RV32-NEXT: fsd fs9, 232(sp) +; RV32-NEXT: fsd fs10, 240(sp) +; RV32-NEXT: fsd fs11, 248(sp) ; RV32-NEXT: fsd fs4, 192(sp) -; RV32-NEXT: fsd fs3, 184(sp) -; RV32-NEXT: fsd fs2, 176(sp) -; RV32-NEXT: fsd fs1, 168(sp) +; RV32-NEXT: fsd fs5, 200(sp) +; RV32-NEXT: fsd fs6, 208(sp) +; RV32-NEXT: fsd fs7, 216(sp) ; RV32-NEXT: fsd fs0, 160(sp) -; RV32-NEXT: fsd ft11, 152(sp) -; RV32-NEXT: fsd ft10, 144(sp) -; RV32-NEXT: fsd ft9, 136(sp) +; RV32-NEXT: fsd fs1, 168(sp) +; RV32-NEXT: fsd fs2, 176(sp) +; RV32-NEXT: fsd fs3, 184(sp) ; RV32-NEXT: fsd ft8, 128(sp) -; RV32-NEXT: fsd ft7, 376(sp) -; RV32-NEXT: fsd ft6, 368(sp) -; RV32-NEXT: fsd ft5, 360(sp) +; RV32-NEXT: fsd ft9, 136(sp) +; RV32-NEXT: fsd ft10, 144(sp) +; RV32-NEXT: fsd ft11, 152(sp) ; RV32-NEXT: fsd ft4, 352(sp) -; RV32-NEXT: fsd fa7, 312(sp) -; RV32-NEXT: fsd fa6, 304(sp) -; RV32-NEXT: fsd fa5, 296(sp) +; RV32-NEXT: fsd ft5, 360(sp) +; RV32-NEXT: fsd ft6, 368(sp) +; RV32-NEXT: fsd ft7, 376(sp) ; RV32-NEXT: fsd fa4, 288(sp) -; RV32-NEXT: fsd fa3, 280(sp) -; RV32-NEXT: fsd fa2, 272(sp) -; RV32-NEXT: fsd fa1, 264(sp) +; RV32-NEXT: fsd fa5, 296(sp) +; RV32-NEXT: fsd fa6, 304(sp) +; RV32-NEXT: fsd fa7, 312(sp) ; RV32-NEXT: fsd fa0, 256(sp) -; RV32-NEXT: fsd ft3, 344(sp) -; RV32-NEXT: fsd ft2, 336(sp) -; RV32-NEXT: fsd ft1, 328(sp) +; RV32-NEXT: fsd fa1, 264(sp) +; RV32-NEXT: fsd fa2, 272(sp) +; RV32-NEXT: fsd fa3, 280(sp) ; RV32-NEXT: fsd ft0, 320(sp) +; RV32-NEXT: fsd ft1, 328(sp) +; RV32-NEXT: fsd ft2, 336(sp) +; RV32-NEXT: fsd ft3, 344(sp) ; RV32-NEXT: addi a0, sp, 128 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v16, (a0) @@ -1150,38 +1150,38 @@ define <32 x double> @buildvec_v32f64(double %e0, double %e1, double %e2, double ; RV64-NEXT: fld fs1, 104(s0) ; RV64-NEXT: fld fs2, 112(s0) ; RV64-NEXT: fld fs3, 120(s0) -; RV64-NEXT: sd a7, 248(sp) -; RV64-NEXT: sd a6, 240(sp) -; RV64-NEXT: sd a5, 232(sp) ; RV64-NEXT: sd a4, 224(sp) -; RV64-NEXT: sd a3, 216(sp) -; RV64-NEXT: sd a2, 208(sp) -; RV64-NEXT: sd a1, 200(sp) +; RV64-NEXT: sd a5, 232(sp) +; RV64-NEXT: sd a6, 240(sp) +; RV64-NEXT: sd a7, 248(sp) ; RV64-NEXT: sd a0, 192(sp) -; RV64-NEXT: fsd fa7, 184(sp) -; RV64-NEXT: fsd fa6, 176(sp) -; RV64-NEXT: fsd fa5, 168(sp) +; RV64-NEXT: sd a1, 200(sp) +; RV64-NEXT: sd a2, 208(sp) +; RV64-NEXT: sd a3, 216(sp) ; RV64-NEXT: fsd fa4, 160(sp) -; RV64-NEXT: fsd fa3, 152(sp) -; RV64-NEXT: fsd fa2, 144(sp) -; RV64-NEXT: fsd fa1, 136(sp) +; RV64-NEXT: fsd fa5, 168(sp) +; RV64-NEXT: fsd fa6, 176(sp) +; RV64-NEXT: fsd fa7, 184(sp) ; RV64-NEXT: fsd fa0, 128(sp) -; RV64-NEXT: fsd fs3, 120(sp) -; RV64-NEXT: fsd fs2, 112(sp) -; RV64-NEXT: fsd fs1, 104(sp) +; RV64-NEXT: fsd fa1, 136(sp) +; RV64-NEXT: fsd fa2, 144(sp) +; RV64-NEXT: fsd fa3, 152(sp) ; RV64-NEXT: fsd fs0, 96(sp) -; RV64-NEXT: fsd ft11, 88(sp) -; RV64-NEXT: fsd ft10, 80(sp) -; RV64-NEXT: fsd ft9, 72(sp) +; RV64-NEXT: fsd fs1, 104(sp) +; RV64-NEXT: fsd fs2, 112(sp) +; RV64-NEXT: fsd fs3, 120(sp) ; RV64-NEXT: fsd ft8, 64(sp) -; RV64-NEXT: fsd ft7, 56(sp) -; RV64-NEXT: fsd ft6, 48(sp) -; RV64-NEXT: fsd ft5, 40(sp) +; RV64-NEXT: fsd ft9, 72(sp) +; RV64-NEXT: fsd ft10, 80(sp) +; RV64-NEXT: fsd ft11, 88(sp) ; RV64-NEXT: fsd ft4, 32(sp) -; RV64-NEXT: fsd ft3, 24(sp) -; RV64-NEXT: fsd ft2, 16(sp) -; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: fsd ft5, 40(sp) +; RV64-NEXT: fsd ft6, 48(sp) +; RV64-NEXT: fsd ft7, 56(sp) ; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: fsd ft2, 16(sp) +; RV64-NEXT: fsd ft3, 24(sp) ; RV64-NEXT: addi a0, sp, 128 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll index a4a491989c7f021..9d92018db2e88ea 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -126,9 +126,6 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8 ; ZVFH32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFH32-NEXT: vmv.x.s a1, v8 -; ZVFH32-NEXT: slli a2, a1, 17 -; ZVFH32-NEXT: srli a2, a2, 19 -; ZVFH32-NEXT: sh a2, 4(a0) ; ZVFH32-NEXT: vmv.x.s a2, v9 ; ZVFH32-NEXT: lui a3, 8 ; ZVFH32-NEXT: addi a3, a3, -1 @@ -136,11 +133,14 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; ZVFH32-NEXT: vslidedown.vi v8, v9, 1 ; ZVFH32-NEXT: vmv.x.s a4, v8 ; ZVFH32-NEXT: and a3, a4, a3 +; ZVFH32-NEXT: slli a4, a1, 17 +; ZVFH32-NEXT: srli a4, a4, 19 ; ZVFH32-NEXT: slli a3, a3, 15 ; ZVFH32-NEXT: slli a1, a1, 30 ; ZVFH32-NEXT: or a1, a2, a1 ; ZVFH32-NEXT: or a1, a1, a3 ; ZVFH32-NEXT: sw a1, 0(a0) +; ZVFH32-NEXT: sh a4, 4(a0) ; ZVFH32-NEXT: ret ; ; ZVFH64-LABEL: fp2si_v3f32_v3i15: @@ -160,10 +160,10 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; ZVFH64-NEXT: slli a3, a3, 30 ; ZVFH64-NEXT: or a1, a1, a3 ; ZVFH64-NEXT: or a1, a1, a2 +; ZVFH64-NEXT: slli a2, a1, 19 +; ZVFH64-NEXT: srli a2, a2, 51 ; ZVFH64-NEXT: sw a1, 0(a0) -; ZVFH64-NEXT: slli a1, a1, 19 -; ZVFH64-NEXT: srli a1, a1, 51 -; ZVFH64-NEXT: sh a1, 4(a0) +; ZVFH64-NEXT: sh a2, 4(a0) ; ZVFH64-NEXT: ret ; ; ZVFHMIN32-LABEL: fp2si_v3f32_v3i15: @@ -172,9 +172,6 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8 ; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMIN32-NEXT: vmv.x.s a1, v8 -; ZVFHMIN32-NEXT: slli a2, a1, 17 -; ZVFHMIN32-NEXT: srli a2, a2, 19 -; ZVFHMIN32-NEXT: sh a2, 4(a0) ; ZVFHMIN32-NEXT: vmv.x.s a2, v9 ; ZVFHMIN32-NEXT: lui a3, 8 ; ZVFHMIN32-NEXT: addi a3, a3, -1 @@ -182,11 +179,14 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 1 ; ZVFHMIN32-NEXT: vmv.x.s a4, v8 ; ZVFHMIN32-NEXT: and a3, a4, a3 +; ZVFHMIN32-NEXT: slli a4, a1, 17 +; ZVFHMIN32-NEXT: srli a4, a4, 19 ; ZVFHMIN32-NEXT: slli a3, a3, 15 ; ZVFHMIN32-NEXT: slli a1, a1, 30 ; ZVFHMIN32-NEXT: or a1, a2, a1 ; ZVFHMIN32-NEXT: or a1, a1, a3 ; ZVFHMIN32-NEXT: sw a1, 0(a0) +; ZVFHMIN32-NEXT: sh a4, 4(a0) ; ZVFHMIN32-NEXT: ret ; ; ZVFHMIN64-LABEL: fp2si_v3f32_v3i15: @@ -206,10 +206,10 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; ZVFHMIN64-NEXT: slli a3, a3, 30 ; ZVFHMIN64-NEXT: or a1, a1, a3 ; ZVFHMIN64-NEXT: or a1, a1, a2 +; ZVFHMIN64-NEXT: slli a2, a1, 19 +; ZVFHMIN64-NEXT: srli a2, a2, 51 ; ZVFHMIN64-NEXT: sw a1, 0(a0) -; ZVFHMIN64-NEXT: slli a1, a1, 19 -; ZVFHMIN64-NEXT: srli a1, a1, 51 -; ZVFHMIN64-NEXT: sh a1, 4(a0) +; ZVFHMIN64-NEXT: sh a2, 4(a0) ; ZVFHMIN64-NEXT: ret %z = fptosi <3 x float> %x to <3 x i15> ret <3 x i15> %z @@ -223,9 +223,6 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { ; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8 ; ZVFH32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFH32-NEXT: vmv.x.s a1, v8 -; ZVFH32-NEXT: slli a2, a1, 17 -; ZVFH32-NEXT: srli a2, a2, 19 -; ZVFH32-NEXT: sh a2, 4(a0) ; ZVFH32-NEXT: vmv.x.s a2, v9 ; ZVFH32-NEXT: lui a3, 16 ; ZVFH32-NEXT: addi a3, a3, -1 @@ -233,11 +230,14 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { ; ZVFH32-NEXT: vslidedown.vi v8, v9, 1 ; ZVFH32-NEXT: vmv.x.s a4, v8 ; ZVFH32-NEXT: and a3, a4, a3 +; ZVFH32-NEXT: slli a4, a1, 17 +; ZVFH32-NEXT: srli a4, a4, 19 ; ZVFH32-NEXT: slli a3, a3, 15 ; ZVFH32-NEXT: slli a1, a1, 30 ; ZVFH32-NEXT: or a1, a2, a1 ; ZVFH32-NEXT: or a1, a1, a3 ; ZVFH32-NEXT: sw a1, 0(a0) +; ZVFH32-NEXT: sh a4, 4(a0) ; ZVFH32-NEXT: ret ; ; ZVFH64-LABEL: fp2ui_v3f32_v3i15: @@ -257,10 +257,10 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { ; ZVFH64-NEXT: slli a3, a3, 30 ; ZVFH64-NEXT: or a1, a1, a3 ; ZVFH64-NEXT: or a1, a1, a2 +; ZVFH64-NEXT: slli a2, a1, 19 +; ZVFH64-NEXT: srli a2, a2, 51 ; ZVFH64-NEXT: sw a1, 0(a0) -; ZVFH64-NEXT: slli a1, a1, 19 -; ZVFH64-NEXT: srli a1, a1, 51 -; ZVFH64-NEXT: sh a1, 4(a0) +; ZVFH64-NEXT: sh a2, 4(a0) ; ZVFH64-NEXT: ret ; ; ZVFHMIN32-LABEL: fp2ui_v3f32_v3i15: @@ -269,9 +269,6 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { ; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8 ; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMIN32-NEXT: vmv.x.s a1, v8 -; ZVFHMIN32-NEXT: slli a2, a1, 17 -; ZVFHMIN32-NEXT: srli a2, a2, 19 -; ZVFHMIN32-NEXT: sh a2, 4(a0) ; ZVFHMIN32-NEXT: vmv.x.s a2, v9 ; ZVFHMIN32-NEXT: lui a3, 16 ; ZVFHMIN32-NEXT: addi a3, a3, -1 @@ -279,11 +276,14 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { ; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 1 ; ZVFHMIN32-NEXT: vmv.x.s a4, v8 ; ZVFHMIN32-NEXT: and a3, a4, a3 +; ZVFHMIN32-NEXT: slli a4, a1, 17 +; ZVFHMIN32-NEXT: srli a4, a4, 19 ; ZVFHMIN32-NEXT: slli a3, a3, 15 ; ZVFHMIN32-NEXT: slli a1, a1, 30 ; ZVFHMIN32-NEXT: or a1, a2, a1 ; ZVFHMIN32-NEXT: or a1, a1, a3 ; ZVFHMIN32-NEXT: sw a1, 0(a0) +; ZVFHMIN32-NEXT: sh a4, 4(a0) ; ZVFHMIN32-NEXT: ret ; ; ZVFHMIN64-LABEL: fp2ui_v3f32_v3i15: @@ -303,10 +303,10 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { ; ZVFHMIN64-NEXT: slli a3, a3, 30 ; ZVFHMIN64-NEXT: or a1, a1, a3 ; ZVFHMIN64-NEXT: or a1, a1, a2 +; ZVFHMIN64-NEXT: slli a2, a1, 19 +; ZVFHMIN64-NEXT: srli a2, a2, 51 ; ZVFHMIN64-NEXT: sw a1, 0(a0) -; ZVFHMIN64-NEXT: slli a1, a1, 19 -; ZVFHMIN64-NEXT: srli a1, a1, 51 -; ZVFHMIN64-NEXT: sh a1, 4(a0) +; ZVFHMIN64-NEXT: sh a2, 4(a0) ; ZVFHMIN64-NEXT: ret %z = fptoui <3 x float> %x to <3 x i15> ret <3 x i15> %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index 81fb86cd81cd35d..7853e91ca53a391 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -195,8 +195,8 @@ define <4 x i64> @insertelt_v4i64(<4 x i64> %a, i64 %y) { define void @insertelt_v4i64_store(ptr %x, i64 %y) { ; RV32-LABEL: insertelt_v4i64_store: ; RV32: # %bb.0: -; RV32-NEXT: sw a2, 28(a0) ; RV32-NEXT: sw a1, 24(a0) +; RV32-NEXT: sw a2, 28(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v4i64_store: @@ -253,8 +253,8 @@ define <3 x i64> @insertelt_v3i64(<3 x i64> %a, i64 %y) { define void @insertelt_v3i64_store(ptr %x, i64 %y) { ; RV32-LABEL: insertelt_v3i64_store: ; RV32: # %bb.0: -; RV32-NEXT: sw a2, 20(a0) ; RV32-NEXT: sw a1, 16(a0) +; RV32-NEXT: sw a2, 20(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v3i64_store: @@ -382,8 +382,8 @@ define void @insertelt_v8i64_0_store(ptr %x) { ; RV32-LABEL: insertelt_v8i64_0_store: ; RV32: # %bb.0: ; RV32-NEXT: li a1, -1 -; RV32-NEXT: sw a1, 4(a0) ; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: sw a1, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v8i64_0_store: @@ -428,8 +428,8 @@ define void @insertelt_v8i64_store(ptr %x, i32 %idx) { ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: li a1, -1 -; RV32-NEXT: sw a1, 4(a0) ; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: sw a1, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v8i64_store: @@ -460,9 +460,9 @@ define <8 x i64> @insertelt_c6_v8i64_0(<8 x i64> %a, ptr %x) { define void @insertelt_c6_v8i64_0_store(ptr %x) { ; RV32-LABEL: insertelt_c6_v8i64_0_store: ; RV32: # %bb.0: -; RV32-NEXT: sw zero, 4(a0) ; RV32-NEXT: li a1, 6 ; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: sw zero, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_c6_v8i64_0_store: @@ -506,9 +506,9 @@ define void @insertelt_c6_v8i64_store(ptr %x, i32 %idx) { ; RV32-NEXT: andi a1, a1, 7 ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: sw zero, 4(a0) ; RV32-NEXT: li a1, 6 ; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: sw zero, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_c6_v8i64_store: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index 43cee6610e7872f..7bf47d42de3b951 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -310,12 +310,12 @@ define <4 x i64> @buildvec_vid_step1_add0_v4i64() { ; RV64ZVE32-LABEL: buildvec_vid_step1_add0_v4i64: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: li a1, 3 -; RV64ZVE32-NEXT: sd a1, 24(a0) -; RV64ZVE32-NEXT: li a1, 2 -; RV64ZVE32-NEXT: sd a1, 16(a0) -; RV64ZVE32-NEXT: li a1, 1 -; RV64ZVE32-NEXT: sd a1, 8(a0) +; RV64ZVE32-NEXT: li a2, 2 +; RV64ZVE32-NEXT: li a3, 1 ; RV64ZVE32-NEXT: sd zero, 0(a0) +; RV64ZVE32-NEXT: sd a3, 8(a0) +; RV64ZVE32-NEXT: sd a2, 16(a0) +; RV64ZVE32-NEXT: sd a1, 24(a0) ; RV64ZVE32-NEXT: ret ret <4 x i64> } @@ -340,12 +340,12 @@ define <4 x i64> @buildvec_vid_step2_add0_v4i64() { ; RV64ZVE32-LABEL: buildvec_vid_step2_add0_v4i64: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: li a1, 6 -; RV64ZVE32-NEXT: sd a1, 24(a0) -; RV64ZVE32-NEXT: li a1, 4 -; RV64ZVE32-NEXT: sd a1, 16(a0) -; RV64ZVE32-NEXT: li a1, 2 -; RV64ZVE32-NEXT: sd a1, 8(a0) +; RV64ZVE32-NEXT: li a2, 4 +; RV64ZVE32-NEXT: li a3, 2 ; RV64ZVE32-NEXT: sd zero, 0(a0) +; RV64ZVE32-NEXT: sd a3, 8(a0) +; RV64ZVE32-NEXT: sd a2, 16(a0) +; RV64ZVE32-NEXT: sd a1, 24(a0) ; RV64ZVE32-NEXT: ret ret <4 x i64> } @@ -517,8 +517,8 @@ define void @buildvec_dominant0_v2i32(ptr %x) { ; RV64ZVE32-NEXT: lui a1, %hi(.LCPI38_0) ; RV64ZVE32-NEXT: ld a1, %lo(.LCPI38_0)(a1) ; RV64ZVE32-NEXT: li a2, -1 -; RV64ZVE32-NEXT: sd a2, 8(a0) ; RV64ZVE32-NEXT: sd a1, 0(a0) +; RV64ZVE32-NEXT: sd a2, 8(a0) ; RV64ZVE32-NEXT: ret store <2 x i64> , ptr %x ret void @@ -548,8 +548,8 @@ define void @buildvec_dominant1_optsize_v2i32(ptr %x) optsize { ; RV64ZVE32-NEXT: lui a1, %hi(.LCPI39_0) ; RV64ZVE32-NEXT: ld a1, %lo(.LCPI39_0)(a1) ; RV64ZVE32-NEXT: li a2, -1 -; RV64ZVE32-NEXT: sd a2, 8(a0) ; RV64ZVE32-NEXT: sd a1, 0(a0) +; RV64ZVE32-NEXT: sd a2, 8(a0) ; RV64ZVE32-NEXT: ret store <2 x i64> , ptr %x ret void @@ -1130,10 +1130,10 @@ define <4 x i64> @v4xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) ; ; RV64ZVE32-LABEL: v4xi64_exact: ; RV64ZVE32: # %bb.0: -; RV64ZVE32-NEXT: sd a4, 24(a0) -; RV64ZVE32-NEXT: sd a3, 16(a0) -; RV64ZVE32-NEXT: sd a2, 8(a0) ; RV64ZVE32-NEXT: sd a1, 0(a0) +; RV64ZVE32-NEXT: sd a2, 8(a0) +; RV64ZVE32-NEXT: sd a3, 16(a0) +; RV64ZVE32-NEXT: sd a4, 24(a0) ; RV64ZVE32-NEXT: ret %v1 = insertelement <4 x i64> poison, i64 %a, i32 0 %v2 = insertelement <4 x i64> %v1, i64 %b, i32 1 @@ -1194,14 +1194,14 @@ define <8 x i64> @v8xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i ; RV64ZVE32-LABEL: v8xi64_exact: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: ld t0, 0(sp) -; RV64ZVE32-NEXT: sd t0, 56(a0) -; RV64ZVE32-NEXT: sd a7, 48(a0) -; RV64ZVE32-NEXT: sd a6, 40(a0) ; RV64ZVE32-NEXT: sd a5, 32(a0) -; RV64ZVE32-NEXT: sd a4, 24(a0) -; RV64ZVE32-NEXT: sd a3, 16(a0) -; RV64ZVE32-NEXT: sd a2, 8(a0) +; RV64ZVE32-NEXT: sd a6, 40(a0) +; RV64ZVE32-NEXT: sd a7, 48(a0) +; RV64ZVE32-NEXT: sd t0, 56(a0) ; RV64ZVE32-NEXT: sd a1, 0(a0) +; RV64ZVE32-NEXT: sd a2, 8(a0) +; RV64ZVE32-NEXT: sd a3, 16(a0) +; RV64ZVE32-NEXT: sd a4, 24(a0) ; RV64ZVE32-NEXT: ret %v1 = insertelement <8 x i64> poison, i64 %a, i32 0 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 1 @@ -1243,14 +1243,14 @@ define <8 x i64> @v8xi64_exact_equal_halves(i64 %a, i64 %b, i64 %c, i64 %d) vsca ; ; RV64ZVE32-LABEL: v8xi64_exact_equal_halves: ; RV64ZVE32: # %bb.0: -; RV64ZVE32-NEXT: sd a4, 56(a0) -; RV64ZVE32-NEXT: sd a3, 48(a0) -; RV64ZVE32-NEXT: sd a2, 40(a0) ; RV64ZVE32-NEXT: sd a1, 32(a0) -; RV64ZVE32-NEXT: sd a4, 24(a0) -; RV64ZVE32-NEXT: sd a3, 16(a0) -; RV64ZVE32-NEXT: sd a2, 8(a0) +; RV64ZVE32-NEXT: sd a2, 40(a0) +; RV64ZVE32-NEXT: sd a3, 48(a0) +; RV64ZVE32-NEXT: sd a4, 56(a0) ; RV64ZVE32-NEXT: sd a1, 0(a0) +; RV64ZVE32-NEXT: sd a2, 8(a0) +; RV64ZVE32-NEXT: sd a3, 16(a0) +; RV64ZVE32-NEXT: sd a4, 24(a0) ; RV64ZVE32-NEXT: ret %v1 = insertelement <8 x i64> poison, i64 %a, i32 0 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 1 @@ -1288,10 +1288,10 @@ define <8 x i64> @v8xi64_exact_undef_suffix(i64 %a, i64 %b, i64 %c, i64 %d) vsca ; ; RV64ZVE32-LABEL: v8xi64_exact_undef_suffix: ; RV64ZVE32: # %bb.0: -; RV64ZVE32-NEXT: sd a4, 24(a0) -; RV64ZVE32-NEXT: sd a3, 16(a0) -; RV64ZVE32-NEXT: sd a2, 8(a0) ; RV64ZVE32-NEXT: sd a1, 0(a0) +; RV64ZVE32-NEXT: sd a2, 8(a0) +; RV64ZVE32-NEXT: sd a3, 16(a0) +; RV64ZVE32-NEXT: sd a4, 24(a0) ; RV64ZVE32-NEXT: ret %v1 = insertelement <8 x i64> poison, i64 %a, i32 0 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 1 @@ -1325,10 +1325,10 @@ define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vsca ; ; RV64ZVE32-LABEL: v8xi64_exact_undef_prefix: ; RV64ZVE32: # %bb.0: -; RV64ZVE32-NEXT: sd a4, 56(a0) -; RV64ZVE32-NEXT: sd a3, 48(a0) -; RV64ZVE32-NEXT: sd a2, 40(a0) ; RV64ZVE32-NEXT: sd a1, 32(a0) +; RV64ZVE32-NEXT: sd a2, 40(a0) +; RV64ZVE32-NEXT: sd a3, 48(a0) +; RV64ZVE32-NEXT: sd a4, 56(a0) ; RV64ZVE32-NEXT: ret %v1 = insertelement <8 x i64> poison, i64 %a, i32 4 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll index 336a64b1b89ca83..f6354d3e2f619a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -46,8 +46,8 @@ define void @splat_v2i64(ptr %x, i64 %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v8, (a1), zero @@ -112,8 +112,8 @@ define void @splat_v4i64(ptr %x, i64 %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v8, (a1), zero @@ -405,8 +405,8 @@ define void @vadd_vx_v16i64(ptr %a, i64 %b, ptr %c) { ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vadd.vv v8, v8, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 70bda8c2da0f27e..f9992a5de793d07 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -5527,9 +5527,9 @@ define void @mulhu_vx_v2i64(ptr %x) { ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: lui a1, 699051 ; RV32-NEXT: addi a2, a1, -1366 -; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: addi a1, a1, -1365 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vmulhu.vv v8, v8, v9 @@ -5632,9 +5632,9 @@ define void @mulhs_vx_v2i64(ptr %x) { ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a2, a1, 1365 -; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: addi a1, a1, 1366 ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vmulh.vv v8, v8, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll index 805b371f1e3d56b..7f1493544eabcfe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll @@ -14,8 +14,8 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: mv a0, sp ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero @@ -332,64 +332,64 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 68(sp) ; RV32-NEXT: sw a0, 64(sp) +; RV32-NEXT: sw a1, 68(sp) ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 7 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 124(sp) ; RV32-NEXT: sw a0, 120(sp) +; RV32-NEXT: sw a1, 124(sp) ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 6 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 116(sp) ; RV32-NEXT: sw a0, 112(sp) +; RV32-NEXT: sw a1, 116(sp) ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 5 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 108(sp) ; RV32-NEXT: sw a0, 104(sp) +; RV32-NEXT: sw a1, 108(sp) ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 4 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 100(sp) ; RV32-NEXT: sw a0, 96(sp) +; RV32-NEXT: sw a1, 100(sp) ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 92(sp) ; RV32-NEXT: sw a0, 88(sp) +; RV32-NEXT: sw a1, 92(sp) ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 2 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 84(sp) ; RV32-NEXT: sw a0, 80(sp) +; RV32-NEXT: sw a1, 84(sp) ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 76(sp) ; RV32-NEXT: sw a0, 72(sp) +; RV32-NEXT: sw a1, 76(sp) ; RV32-NEXT: addi a0, sp, 64 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vle32.v v8, (a0) @@ -413,36 +413,36 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { ; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64-NEXT: vfmv.f.s fa5, v8 ; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: sd a0, 0(sp) ; RV64-NEXT: vslidedown.vi v10, v8, 7 ; RV64-NEXT: vfmv.f.s fa5, v10 -; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: sd a0, 56(sp) +; RV64-NEXT: fcvt.l.s a1, fa5 ; RV64-NEXT: vslidedown.vi v10, v8, 6 ; RV64-NEXT: vfmv.f.s fa5, v10 -; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: sd a0, 48(sp) +; RV64-NEXT: fcvt.l.s a2, fa5 ; RV64-NEXT: vslidedown.vi v10, v8, 5 ; RV64-NEXT: vfmv.f.s fa5, v10 -; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: sd a0, 40(sp) +; RV64-NEXT: fcvt.l.s a3, fa5 ; RV64-NEXT: vslidedown.vi v10, v8, 4 ; RV64-NEXT: vfmv.f.s fa5, v10 -; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: sd a0, 32(sp) +; RV64-NEXT: fcvt.l.s a4, fa5 +; RV64-NEXT: sd a4, 32(sp) +; RV64-NEXT: sd a3, 40(sp) +; RV64-NEXT: sd a2, 48(sp) +; RV64-NEXT: sd a1, 56(sp) ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v9, v8, 3 ; RV64-NEXT: vfmv.f.s fa5, v9 -; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: sd a0, 24(sp) +; RV64-NEXT: fcvt.l.s a1, fa5 ; RV64-NEXT: vslidedown.vi v9, v8, 2 ; RV64-NEXT: vfmv.f.s fa5, v9 -; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: fcvt.l.s a2, fa5 ; RV64-NEXT: vslidedown.vi v8, v8, 1 ; RV64-NEXT: vfmv.f.s fa5, v8 -; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: fcvt.l.s a3, fa5 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a3, 8(sp) +; RV64-NEXT: sd a2, 16(sp) +; RV64-NEXT: sd a1, 24(sp) ; RV64-NEXT: mv a0, sp ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) @@ -478,99 +478,99 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: flw fa0, 124(sp) ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 252(sp) ; RV32-NEXT: sw a0, 248(sp) +; RV32-NEXT: sw a1, 252(sp) ; RV32-NEXT: flw fa0, 120(sp) ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 244(sp) ; RV32-NEXT: sw a0, 240(sp) +; RV32-NEXT: sw a1, 244(sp) ; RV32-NEXT: flw fa0, 116(sp) ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 236(sp) ; RV32-NEXT: sw a0, 232(sp) +; RV32-NEXT: sw a1, 236(sp) ; RV32-NEXT: flw fa0, 112(sp) ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 228(sp) ; RV32-NEXT: sw a0, 224(sp) +; RV32-NEXT: sw a1, 228(sp) ; RV32-NEXT: flw fa0, 108(sp) ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 220(sp) ; RV32-NEXT: sw a0, 216(sp) +; RV32-NEXT: sw a1, 220(sp) ; RV32-NEXT: flw fa0, 104(sp) ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 212(sp) ; RV32-NEXT: sw a0, 208(sp) +; RV32-NEXT: sw a1, 212(sp) ; RV32-NEXT: flw fa0, 100(sp) ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 204(sp) ; RV32-NEXT: sw a0, 200(sp) +; RV32-NEXT: sw a1, 204(sp) ; RV32-NEXT: flw fa0, 96(sp) ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 196(sp) ; RV32-NEXT: sw a0, 192(sp) +; RV32-NEXT: sw a1, 196(sp) ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 132(sp) ; RV32-NEXT: sw a0, 128(sp) +; RV32-NEXT: sw a1, 132(sp) ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 156(sp) ; RV32-NEXT: sw a0, 152(sp) +; RV32-NEXT: sw a1, 156(sp) ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 2 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 148(sp) ; RV32-NEXT: sw a0, 144(sp) +; RV32-NEXT: sw a1, 148(sp) ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 140(sp) ; RV32-NEXT: sw a0, 136(sp) +; RV32-NEXT: sw a1, 140(sp) ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 7 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 188(sp) ; RV32-NEXT: sw a0, 184(sp) +; RV32-NEXT: sw a1, 188(sp) ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 6 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 180(sp) ; RV32-NEXT: sw a0, 176(sp) +; RV32-NEXT: sw a1, 180(sp) ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 5 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 172(sp) ; RV32-NEXT: sw a0, 168(sp) +; RV32-NEXT: sw a1, 172(sp) ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 4 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: sw a1, 164(sp) ; RV32-NEXT: sw a0, 160(sp) +; RV32-NEXT: sw a1, 164(sp) ; RV32-NEXT: li a0, 32 ; RV32-NEXT: addi a1, sp, 128 ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -621,37 +621,37 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { ; RV64-NEXT: sd a0, 192(sp) ; RV64-NEXT: vfmv.f.s fa5, v8 ; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: sd a0, 128(sp) ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v10, v8, 3 ; RV64-NEXT: vfmv.f.s fa5, v10 -; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: sd a0, 152(sp) +; RV64-NEXT: fcvt.l.s a1, fa5 ; RV64-NEXT: vslidedown.vi v10, v8, 2 ; RV64-NEXT: vfmv.f.s fa5, v10 -; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: sd a0, 144(sp) +; RV64-NEXT: fcvt.l.s a2, fa5 ; RV64-NEXT: vslidedown.vi v10, v8, 1 ; RV64-NEXT: vfmv.f.s fa5, v10 -; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: sd a0, 136(sp) +; RV64-NEXT: fcvt.l.s a3, fa5 +; RV64-NEXT: sd a0, 128(sp) +; RV64-NEXT: sd a3, 136(sp) +; RV64-NEXT: sd a2, 144(sp) +; RV64-NEXT: sd a1, 152(sp) ; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64-NEXT: vslidedown.vi v10, v8, 7 ; RV64-NEXT: vfmv.f.s fa5, v10 ; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: sd a0, 184(sp) ; RV64-NEXT: vslidedown.vi v10, v8, 6 ; RV64-NEXT: vfmv.f.s fa5, v10 -; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: sd a0, 176(sp) +; RV64-NEXT: fcvt.l.s a1, fa5 ; RV64-NEXT: vslidedown.vi v10, v8, 5 ; RV64-NEXT: vfmv.f.s fa5, v10 -; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: sd a0, 168(sp) +; RV64-NEXT: fcvt.l.s a2, fa5 ; RV64-NEXT: vslidedown.vi v8, v8, 4 ; RV64-NEXT: vfmv.f.s fa5, v8 -; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: sd a0, 160(sp) +; RV64-NEXT: fcvt.l.s a3, fa5 +; RV64-NEXT: sd a3, 160(sp) +; RV64-NEXT: sd a2, 168(sp) +; RV64-NEXT: sd a1, 176(sp) +; RV64-NEXT: sd a0, 184(sp) ; RV64-NEXT: addi a0, sp, 128 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) @@ -675,8 +675,8 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint -; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: mv a0, sp ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero @@ -879,51 +879,51 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: fld fa0, 120(sp) ; RV32-NEXT: call llrint -; RV32-NEXT: sw a1, 188(sp) ; RV32-NEXT: sw a0, 184(sp) +; RV32-NEXT: sw a1, 188(sp) ; RV32-NEXT: fld fa0, 112(sp) ; RV32-NEXT: call llrint -; RV32-NEXT: sw a1, 180(sp) ; RV32-NEXT: sw a0, 176(sp) +; RV32-NEXT: sw a1, 180(sp) ; RV32-NEXT: fld fa0, 104(sp) ; RV32-NEXT: call llrint -; RV32-NEXT: sw a1, 172(sp) ; RV32-NEXT: sw a0, 168(sp) +; RV32-NEXT: sw a1, 172(sp) ; RV32-NEXT: fld fa0, 96(sp) ; RV32-NEXT: call llrint -; RV32-NEXT: sw a1, 164(sp) ; RV32-NEXT: sw a0, 160(sp) +; RV32-NEXT: sw a1, 164(sp) ; RV32-NEXT: addi a0, sp, 256 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint -; RV32-NEXT: sw a1, 132(sp) ; RV32-NEXT: sw a0, 128(sp) +; RV32-NEXT: sw a1, 132(sp) ; RV32-NEXT: addi a0, sp, 256 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint -; RV32-NEXT: sw a1, 140(sp) ; RV32-NEXT: sw a0, 136(sp) +; RV32-NEXT: sw a1, 140(sp) ; RV32-NEXT: addi a0, sp, 256 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint -; RV32-NEXT: sw a1, 156(sp) ; RV32-NEXT: sw a0, 152(sp) +; RV32-NEXT: sw a1, 156(sp) ; RV32-NEXT: addi a0, sp, 256 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 2 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint -; RV32-NEXT: sw a1, 148(sp) ; RV32-NEXT: sw a0, 144(sp) +; RV32-NEXT: sw a1, 148(sp) ; RV32-NEXT: addi a0, sp, 128 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vle32.v v8, (a0) @@ -961,21 +961,21 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ; RV64-NEXT: sd a0, 96(sp) ; RV64-NEXT: vfmv.f.s fa5, v8 ; RV64-NEXT: fcvt.l.d a0, fa5 -; RV64-NEXT: sd a0, 64(sp) ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vslidedown.vi v10, v8, 1 ; RV64-NEXT: vfmv.f.s fa5, v10 -; RV64-NEXT: fcvt.l.d a0, fa5 -; RV64-NEXT: sd a0, 72(sp) +; RV64-NEXT: fcvt.l.d a1, fa5 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV64-NEXT: vslidedown.vi v10, v8, 3 ; RV64-NEXT: vfmv.f.s fa5, v10 -; RV64-NEXT: fcvt.l.d a0, fa5 -; RV64-NEXT: sd a0, 88(sp) +; RV64-NEXT: fcvt.l.d a2, fa5 ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: vfmv.f.s fa5, v8 -; RV64-NEXT: fcvt.l.d a0, fa5 -; RV64-NEXT: sd a0, 80(sp) +; RV64-NEXT: fcvt.l.d a3, fa5 +; RV64-NEXT: sd a0, 64(sp) +; RV64-NEXT: sd a1, 72(sp) +; RV64-NEXT: sd a3, 80(sp) +; RV64-NEXT: sd a2, 88(sp) ; RV64-NEXT: addi a0, sp, 64 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll index 43184a28ba3238c..0b1c5ea56cec8e3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll @@ -315,36 +315,36 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { ; RV64-i64-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64-i64-NEXT: vfmv.f.s fa5, v8 ; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: sd a0, 0(sp) ; RV64-i64-NEXT: vslidedown.vi v10, v8, 7 ; RV64-i64-NEXT: vfmv.f.s fa5, v10 -; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: sd a0, 56(sp) +; RV64-i64-NEXT: fcvt.l.s a1, fa5 ; RV64-i64-NEXT: vslidedown.vi v10, v8, 6 ; RV64-i64-NEXT: vfmv.f.s fa5, v10 -; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: sd a0, 48(sp) +; RV64-i64-NEXT: fcvt.l.s a2, fa5 ; RV64-i64-NEXT: vslidedown.vi v10, v8, 5 ; RV64-i64-NEXT: vfmv.f.s fa5, v10 -; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: sd a0, 40(sp) +; RV64-i64-NEXT: fcvt.l.s a3, fa5 ; RV64-i64-NEXT: vslidedown.vi v10, v8, 4 ; RV64-i64-NEXT: vfmv.f.s fa5, v10 -; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: sd a0, 32(sp) +; RV64-i64-NEXT: fcvt.l.s a4, fa5 +; RV64-i64-NEXT: sd a4, 32(sp) +; RV64-i64-NEXT: sd a3, 40(sp) +; RV64-i64-NEXT: sd a2, 48(sp) +; RV64-i64-NEXT: sd a1, 56(sp) ; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-i64-NEXT: vslidedown.vi v9, v8, 3 ; RV64-i64-NEXT: vfmv.f.s fa5, v9 -; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: sd a0, 24(sp) +; RV64-i64-NEXT: fcvt.l.s a1, fa5 ; RV64-i64-NEXT: vslidedown.vi v9, v8, 2 ; RV64-i64-NEXT: vfmv.f.s fa5, v9 -; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: sd a0, 16(sp) +; RV64-i64-NEXT: fcvt.l.s a2, fa5 ; RV64-i64-NEXT: vslidedown.vi v8, v8, 1 ; RV64-i64-NEXT: vfmv.f.s fa5, v8 -; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: sd a0, 8(sp) +; RV64-i64-NEXT: fcvt.l.s a3, fa5 +; RV64-i64-NEXT: sd a0, 0(sp) +; RV64-i64-NEXT: sd a3, 8(sp) +; RV64-i64-NEXT: sd a2, 16(sp) +; RV64-i64-NEXT: sd a1, 24(sp) ; RV64-i64-NEXT: mv a0, sp ; RV64-i64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-i64-NEXT: vle64.v v8, (a0) @@ -399,37 +399,37 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { ; RV32-NEXT: sw a0, 96(sp) ; RV32-NEXT: vfmv.f.s fa5, v8 ; RV32-NEXT: fcvt.w.s a0, fa5 -; RV32-NEXT: sw a0, 64(sp) ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v10, v8, 3 ; RV32-NEXT: vfmv.f.s fa5, v10 -; RV32-NEXT: fcvt.w.s a0, fa5 -; RV32-NEXT: sw a0, 76(sp) +; RV32-NEXT: fcvt.w.s a1, fa5 ; RV32-NEXT: vslidedown.vi v10, v8, 2 ; RV32-NEXT: vfmv.f.s fa5, v10 -; RV32-NEXT: fcvt.w.s a0, fa5 -; RV32-NEXT: sw a0, 72(sp) +; RV32-NEXT: fcvt.w.s a2, fa5 ; RV32-NEXT: vslidedown.vi v10, v8, 1 ; RV32-NEXT: vfmv.f.s fa5, v10 -; RV32-NEXT: fcvt.w.s a0, fa5 -; RV32-NEXT: sw a0, 68(sp) +; RV32-NEXT: fcvt.w.s a3, fa5 +; RV32-NEXT: sw a0, 64(sp) +; RV32-NEXT: sw a3, 68(sp) +; RV32-NEXT: sw a2, 72(sp) +; RV32-NEXT: sw a1, 76(sp) ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v10, v8, 7 ; RV32-NEXT: vfmv.f.s fa5, v10 ; RV32-NEXT: fcvt.w.s a0, fa5 -; RV32-NEXT: sw a0, 92(sp) ; RV32-NEXT: vslidedown.vi v10, v8, 6 ; RV32-NEXT: vfmv.f.s fa5, v10 -; RV32-NEXT: fcvt.w.s a0, fa5 -; RV32-NEXT: sw a0, 88(sp) +; RV32-NEXT: fcvt.w.s a1, fa5 ; RV32-NEXT: vslidedown.vi v10, v8, 5 ; RV32-NEXT: vfmv.f.s fa5, v10 -; RV32-NEXT: fcvt.w.s a0, fa5 -; RV32-NEXT: sw a0, 84(sp) +; RV32-NEXT: fcvt.w.s a2, fa5 ; RV32-NEXT: vslidedown.vi v8, v8, 4 ; RV32-NEXT: vfmv.f.s fa5, v8 -; RV32-NEXT: fcvt.w.s a0, fa5 -; RV32-NEXT: sw a0, 80(sp) +; RV32-NEXT: fcvt.w.s a3, fa5 +; RV32-NEXT: sw a3, 80(sp) +; RV32-NEXT: sw a2, 84(sp) +; RV32-NEXT: sw a1, 88(sp) +; RV32-NEXT: sw a0, 92(sp) ; RV32-NEXT: addi a0, sp, 64 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vle32.v v8, (a0) @@ -479,37 +479,37 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { ; RV64-i32-NEXT: sw a0, 96(sp) ; RV64-i32-NEXT: vfmv.f.s fa5, v8 ; RV64-i32-NEXT: fcvt.l.s a0, fa5 -; RV64-i32-NEXT: sw a0, 64(sp) ; RV64-i32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-i32-NEXT: vslidedown.vi v10, v8, 3 ; RV64-i32-NEXT: vfmv.f.s fa5, v10 -; RV64-i32-NEXT: fcvt.l.s a0, fa5 -; RV64-i32-NEXT: sw a0, 76(sp) +; RV64-i32-NEXT: fcvt.l.s a1, fa5 ; RV64-i32-NEXT: vslidedown.vi v10, v8, 2 ; RV64-i32-NEXT: vfmv.f.s fa5, v10 -; RV64-i32-NEXT: fcvt.l.s a0, fa5 -; RV64-i32-NEXT: sw a0, 72(sp) +; RV64-i32-NEXT: fcvt.l.s a2, fa5 ; RV64-i32-NEXT: vslidedown.vi v10, v8, 1 ; RV64-i32-NEXT: vfmv.f.s fa5, v10 -; RV64-i32-NEXT: fcvt.l.s a0, fa5 -; RV64-i32-NEXT: sw a0, 68(sp) +; RV64-i32-NEXT: fcvt.l.s a3, fa5 +; RV64-i32-NEXT: sw a0, 64(sp) +; RV64-i32-NEXT: sw a3, 68(sp) +; RV64-i32-NEXT: sw a2, 72(sp) +; RV64-i32-NEXT: sw a1, 76(sp) ; RV64-i32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64-i32-NEXT: vslidedown.vi v10, v8, 7 ; RV64-i32-NEXT: vfmv.f.s fa5, v10 ; RV64-i32-NEXT: fcvt.l.s a0, fa5 -; RV64-i32-NEXT: sw a0, 92(sp) ; RV64-i32-NEXT: vslidedown.vi v10, v8, 6 ; RV64-i32-NEXT: vfmv.f.s fa5, v10 -; RV64-i32-NEXT: fcvt.l.s a0, fa5 -; RV64-i32-NEXT: sw a0, 88(sp) +; RV64-i32-NEXT: fcvt.l.s a1, fa5 ; RV64-i32-NEXT: vslidedown.vi v10, v8, 5 ; RV64-i32-NEXT: vfmv.f.s fa5, v10 -; RV64-i32-NEXT: fcvt.l.s a0, fa5 -; RV64-i32-NEXT: sw a0, 84(sp) +; RV64-i32-NEXT: fcvt.l.s a2, fa5 ; RV64-i32-NEXT: vslidedown.vi v8, v8, 4 ; RV64-i32-NEXT: vfmv.f.s fa5, v8 -; RV64-i32-NEXT: fcvt.l.s a0, fa5 -; RV64-i32-NEXT: sw a0, 80(sp) +; RV64-i32-NEXT: fcvt.l.s a3, fa5 +; RV64-i32-NEXT: sw a3, 80(sp) +; RV64-i32-NEXT: sw a2, 84(sp) +; RV64-i32-NEXT: sw a1, 88(sp) +; RV64-i32-NEXT: sw a0, 92(sp) ; RV64-i32-NEXT: addi a0, sp, 64 ; RV64-i32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-i32-NEXT: vle32.v v8, (a0) @@ -559,37 +559,37 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { ; RV64-i64-NEXT: sd a0, 192(sp) ; RV64-i64-NEXT: vfmv.f.s fa5, v8 ; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: sd a0, 128(sp) ; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-i64-NEXT: vslidedown.vi v10, v8, 3 ; RV64-i64-NEXT: vfmv.f.s fa5, v10 -; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: sd a0, 152(sp) +; RV64-i64-NEXT: fcvt.l.s a1, fa5 ; RV64-i64-NEXT: vslidedown.vi v10, v8, 2 ; RV64-i64-NEXT: vfmv.f.s fa5, v10 -; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: sd a0, 144(sp) +; RV64-i64-NEXT: fcvt.l.s a2, fa5 ; RV64-i64-NEXT: vslidedown.vi v10, v8, 1 ; RV64-i64-NEXT: vfmv.f.s fa5, v10 -; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: sd a0, 136(sp) +; RV64-i64-NEXT: fcvt.l.s a3, fa5 +; RV64-i64-NEXT: sd a0, 128(sp) +; RV64-i64-NEXT: sd a3, 136(sp) +; RV64-i64-NEXT: sd a2, 144(sp) +; RV64-i64-NEXT: sd a1, 152(sp) ; RV64-i64-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64-i64-NEXT: vslidedown.vi v10, v8, 7 ; RV64-i64-NEXT: vfmv.f.s fa5, v10 ; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: sd a0, 184(sp) ; RV64-i64-NEXT: vslidedown.vi v10, v8, 6 ; RV64-i64-NEXT: vfmv.f.s fa5, v10 -; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: sd a0, 176(sp) +; RV64-i64-NEXT: fcvt.l.s a1, fa5 ; RV64-i64-NEXT: vslidedown.vi v10, v8, 5 ; RV64-i64-NEXT: vfmv.f.s fa5, v10 -; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: sd a0, 168(sp) +; RV64-i64-NEXT: fcvt.l.s a2, fa5 ; RV64-i64-NEXT: vslidedown.vi v8, v8, 4 ; RV64-i64-NEXT: vfmv.f.s fa5, v8 -; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: sd a0, 160(sp) +; RV64-i64-NEXT: fcvt.l.s a3, fa5 +; RV64-i64-NEXT: sd a3, 160(sp) +; RV64-i64-NEXT: sd a2, 168(sp) +; RV64-i64-NEXT: sd a1, 176(sp) +; RV64-i64-NEXT: sd a0, 184(sp) ; RV64-i64-NEXT: addi a0, sp, 128 ; RV64-i64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-i64-NEXT: vle64.v v8, (a0) @@ -880,21 +880,21 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ; RV64-i64-NEXT: sd a0, 96(sp) ; RV64-i64-NEXT: vfmv.f.s fa5, v8 ; RV64-i64-NEXT: fcvt.l.d a0, fa5 -; RV64-i64-NEXT: sd a0, 64(sp) ; RV64-i64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-i64-NEXT: vslidedown.vi v10, v8, 1 ; RV64-i64-NEXT: vfmv.f.s fa5, v10 -; RV64-i64-NEXT: fcvt.l.d a0, fa5 -; RV64-i64-NEXT: sd a0, 72(sp) +; RV64-i64-NEXT: fcvt.l.d a1, fa5 ; RV64-i64-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV64-i64-NEXT: vslidedown.vi v10, v8, 3 ; RV64-i64-NEXT: vfmv.f.s fa5, v10 -; RV64-i64-NEXT: fcvt.l.d a0, fa5 -; RV64-i64-NEXT: sd a0, 88(sp) +; RV64-i64-NEXT: fcvt.l.d a2, fa5 ; RV64-i64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-i64-NEXT: vfmv.f.s fa5, v8 -; RV64-i64-NEXT: fcvt.l.d a0, fa5 -; RV64-i64-NEXT: sd a0, 80(sp) +; RV64-i64-NEXT: fcvt.l.d a3, fa5 +; RV64-i64-NEXT: sd a0, 64(sp) +; RV64-i64-NEXT: sd a1, 72(sp) +; RV64-i64-NEXT: sd a3, 80(sp) +; RV64-i64-NEXT: sd a2, 88(sp) ; RV64-i64-NEXT: addi a0, sp, 64 ; RV64-i64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-i64-NEXT: vle64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 9cd380563644941..9c6ec6aef603476 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -341,8 +341,8 @@ define <2 x i64> @mgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV32ZVE32F-NEXT: vmv.x.s a3, v9 ; RV32ZVE32F-NEXT: srai a4, a3, 31 ; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a1, 8(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) +; RV32ZVE32F-NEXT: sw a1, 8(a0) ; RV32ZVE32F-NEXT: sw a2, 12(a0) ; RV32ZVE32F-NEXT: ret ; @@ -402,10 +402,10 @@ define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV32ZVE32F-NEXT: andi a1, a1, 255 ; RV32ZVE32F-NEXT: vmv.x.s a2, v9 ; RV32ZVE32F-NEXT: andi a2, a2, 255 -; RV32ZVE32F-NEXT: sw zero, 12(a0) -; RV32ZVE32F-NEXT: sw zero, 4(a0) ; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw zero, 4(a0) ; RV32ZVE32F-NEXT: sw a1, 8(a0) +; RV32ZVE32F-NEXT: sw zero, 12(a0) ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i64: @@ -1034,8 +1034,8 @@ define <2 x i64> @mgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV32ZVE32F-NEXT: vmv.x.s a3, v9 ; RV32ZVE32F-NEXT: srai a4, a3, 31 ; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a1, 8(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) +; RV32ZVE32F-NEXT: sw a1, 8(a0) ; RV32ZVE32F-NEXT: sw a2, 12(a0) ; RV32ZVE32F-NEXT: ret ; @@ -1097,10 +1097,10 @@ define <2 x i64> @mgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV32ZVE32F-NEXT: and a1, a1, a2 ; RV32ZVE32F-NEXT: vmv.x.s a3, v9 ; RV32ZVE32F-NEXT: and a2, a3, a2 -; RV32ZVE32F-NEXT: sw zero, 12(a0) -; RV32ZVE32F-NEXT: sw zero, 4(a0) ; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw zero, 4(a0) ; RV32ZVE32F-NEXT: sw a1, 8(a0) +; RV32ZVE32F-NEXT: sw zero, 12(a0) ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_v2i16_zextload_v2i64: @@ -2154,8 +2154,8 @@ define <2 x i64> @mgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1 -; RV32ZVE32F-NEXT: sw zero, 12(a0) ; RV32ZVE32F-NEXT: sw zero, 4(a0) +; RV32ZVE32F-NEXT: sw zero, 12(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vse32.v v9, (a0) ; RV32ZVE32F-NEXT: addi a0, a0, 8 @@ -3775,36 +3775,36 @@ define <4 x i64> @mgather_truemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) { ; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a5, v9 -; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a6, v8 -; RV32ZVE32F-NEXT: lw a7, 0(a6) -; RV32ZVE32F-NEXT: lw a6, 4(a6) -; RV32ZVE32F-NEXT: lw t0, 0(a5) +; RV32ZVE32F-NEXT: lw a6, 0(a5) ; RV32ZVE32F-NEXT: lw a5, 4(a5) -; RV32ZVE32F-NEXT: sw a1, 4(a0) -; RV32ZVE32F-NEXT: sw a2, 0(a0) -; RV32ZVE32F-NEXT: sw a6, 28(a0) -; RV32ZVE32F-NEXT: sw a7, 24(a0) +; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 +; RV32ZVE32F-NEXT: vmv.x.s a7, v8 +; RV32ZVE32F-NEXT: lw t0, 0(a7) +; RV32ZVE32F-NEXT: lw a7, 4(a7) +; RV32ZVE32F-NEXT: sw a6, 16(a0) ; RV32ZVE32F-NEXT: sw a5, 20(a0) -; RV32ZVE32F-NEXT: sw t0, 16(a0) -; RV32ZVE32F-NEXT: sw a3, 12(a0) +; RV32ZVE32F-NEXT: sw t0, 24(a0) +; RV32ZVE32F-NEXT: sw a7, 28(a0) +; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw a1, 4(a0) ; RV32ZVE32F-NEXT: sw a4, 8(a0) +; RV32ZVE32F-NEXT: sw a3, 12(a0) ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_truemask_v4i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a2, 24(a1) -; RV64ZVE32F-NEXT: ld a3, 16(a1) -; RV64ZVE32F-NEXT: ld a4, 8(a1) -; RV64ZVE32F-NEXT: ld a1, 0(a1) +; RV64ZVE32F-NEXT: ld a2, 0(a1) +; RV64ZVE32F-NEXT: ld a3, 8(a1) +; RV64ZVE32F-NEXT: ld a4, 16(a1) +; RV64ZVE32F-NEXT: ld a1, 24(a1) ; RV64ZVE32F-NEXT: ld a2, 0(a2) ; RV64ZVE32F-NEXT: ld a3, 0(a3) ; RV64ZVE32F-NEXT: ld a4, 0(a4) ; RV64ZVE32F-NEXT: ld a1, 0(a1) -; RV64ZVE32F-NEXT: sd a2, 24(a0) -; RV64ZVE32F-NEXT: sd a3, 16(a0) -; RV64ZVE32F-NEXT: sd a4, 8(a0) -; RV64ZVE32F-NEXT: sd a1, 0(a0) +; RV64ZVE32F-NEXT: sd a2, 0(a0) +; RV64ZVE32F-NEXT: sd a3, 8(a0) +; RV64ZVE32F-NEXT: sd a4, 16(a0) +; RV64ZVE32F-NEXT: sd a1, 24(a0) ; RV64ZVE32F-NEXT: ret %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1), <4 x i64> %passthru) ret <4 x i64> %v @@ -3823,34 +3823,34 @@ define <4 x i64> @mgather_falsemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) ; ; RV32ZVE32F-LABEL: mgather_falsemask_v4i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: lw a2, 16(a1) -; RV32ZVE32F-NEXT: lw a3, 20(a1) -; RV32ZVE32F-NEXT: lw a4, 24(a1) -; RV32ZVE32F-NEXT: lw a5, 28(a1) -; RV32ZVE32F-NEXT: lw a6, 0(a1) -; RV32ZVE32F-NEXT: lw a7, 4(a1) -; RV32ZVE32F-NEXT: lw t0, 8(a1) -; RV32ZVE32F-NEXT: lw a1, 12(a1) -; RV32ZVE32F-NEXT: sw a5, 28(a0) -; RV32ZVE32F-NEXT: sw a4, 24(a0) -; RV32ZVE32F-NEXT: sw a3, 20(a0) -; RV32ZVE32F-NEXT: sw a2, 16(a0) -; RV32ZVE32F-NEXT: sw a1, 12(a0) -; RV32ZVE32F-NEXT: sw t0, 8(a0) -; RV32ZVE32F-NEXT: sw a7, 4(a0) -; RV32ZVE32F-NEXT: sw a6, 0(a0) +; RV32ZVE32F-NEXT: lw a2, 0(a1) +; RV32ZVE32F-NEXT: lw a3, 4(a1) +; RV32ZVE32F-NEXT: lw a4, 8(a1) +; RV32ZVE32F-NEXT: lw a5, 12(a1) +; RV32ZVE32F-NEXT: lw a6, 16(a1) +; RV32ZVE32F-NEXT: lw a7, 20(a1) +; RV32ZVE32F-NEXT: lw t0, 24(a1) +; RV32ZVE32F-NEXT: lw a1, 28(a1) +; RV32ZVE32F-NEXT: sw a6, 16(a0) +; RV32ZVE32F-NEXT: sw a7, 20(a0) +; RV32ZVE32F-NEXT: sw t0, 24(a0) +; RV32ZVE32F-NEXT: sw a1, 28(a0) +; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw a3, 4(a0) +; RV32ZVE32F-NEXT: sw a4, 8(a0) +; RV32ZVE32F-NEXT: sw a5, 12(a0) ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_falsemask_v4i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a2) -; RV64ZVE32F-NEXT: ld a3, 16(a2) -; RV64ZVE32F-NEXT: ld a4, 8(a2) -; RV64ZVE32F-NEXT: ld a2, 0(a2) -; RV64ZVE32F-NEXT: sd a1, 24(a0) -; RV64ZVE32F-NEXT: sd a3, 16(a0) -; RV64ZVE32F-NEXT: sd a4, 8(a0) -; RV64ZVE32F-NEXT: sd a2, 0(a0) +; RV64ZVE32F-NEXT: ld a1, 0(a2) +; RV64ZVE32F-NEXT: ld a3, 8(a2) +; RV64ZVE32F-NEXT: ld a4, 16(a2) +; RV64ZVE32F-NEXT: ld a2, 24(a2) +; RV64ZVE32F-NEXT: sd a1, 0(a0) +; RV64ZVE32F-NEXT: sd a3, 8(a0) +; RV64ZVE32F-NEXT: sd a4, 16(a0) +; RV64ZVE32F-NEXT: sd a2, 24(a0) ; RV64ZVE32F-NEXT: ret %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer, <4 x i64> %passthru) ret <4 x i64> %v @@ -9433,35 +9433,35 @@ define <4 x double> @mgather_truemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passt ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa5, 0(a1) -; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v9 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 +; RV32ZVE32F-NEXT: vmv.x.s a1, v9 ; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v9 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fld fa3, 0(a1) -; RV32ZVE32F-NEXT: vmv.x.s a1, v9 ; RV32ZVE32F-NEXT: fld fa2, 0(a1) ; RV32ZVE32F-NEXT: fsd fa5, 0(a0) -; RV32ZVE32F-NEXT: fsd fa3, 24(a0) -; RV32ZVE32F-NEXT: fsd fa4, 16(a0) -; RV32ZVE32F-NEXT: fsd fa2, 8(a0) +; RV32ZVE32F-NEXT: fsd fa4, 8(a0) +; RV32ZVE32F-NEXT: fsd fa3, 16(a0) +; RV32ZVE32F-NEXT: fsd fa2, 24(a0) ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_truemask_v4f64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a2, 24(a1) -; RV64ZVE32F-NEXT: ld a3, 16(a1) -; RV64ZVE32F-NEXT: ld a4, 8(a1) -; RV64ZVE32F-NEXT: ld a1, 0(a1) +; RV64ZVE32F-NEXT: ld a2, 0(a1) +; RV64ZVE32F-NEXT: ld a3, 8(a1) +; RV64ZVE32F-NEXT: ld a4, 16(a1) +; RV64ZVE32F-NEXT: ld a1, 24(a1) ; RV64ZVE32F-NEXT: fld fa5, 0(a2) ; RV64ZVE32F-NEXT: fld fa4, 0(a3) ; RV64ZVE32F-NEXT: fld fa3, 0(a4) ; RV64ZVE32F-NEXT: fld fa2, 0(a1) -; RV64ZVE32F-NEXT: fsd fa5, 24(a0) -; RV64ZVE32F-NEXT: fsd fa4, 16(a0) -; RV64ZVE32F-NEXT: fsd fa3, 8(a0) -; RV64ZVE32F-NEXT: fsd fa2, 0(a0) +; RV64ZVE32F-NEXT: fsd fa5, 0(a0) +; RV64ZVE32F-NEXT: fsd fa4, 8(a0) +; RV64ZVE32F-NEXT: fsd fa3, 16(a0) +; RV64ZVE32F-NEXT: fsd fa2, 24(a0) ; RV64ZVE32F-NEXT: ret %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1), <4 x double> %passthru) ret <4 x double> %v @@ -9480,18 +9480,18 @@ define <4 x double> @mgather_falsemask_v4f64(<4 x ptr> %ptrs, <4 x double> %pass ; ; RV32ZVE32F-LABEL: mgather_falsemask_v4f64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: fsd fa3, 24(a0) -; RV32ZVE32F-NEXT: fsd fa2, 16(a0) -; RV32ZVE32F-NEXT: fsd fa1, 8(a0) ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: fsd fa1, 8(a0) +; RV32ZVE32F-NEXT: fsd fa2, 16(a0) +; RV32ZVE32F-NEXT: fsd fa3, 24(a0) ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_falsemask_v4f64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: fsd fa3, 24(a0) -; RV64ZVE32F-NEXT: fsd fa2, 16(a0) -; RV64ZVE32F-NEXT: fsd fa1, 8(a0) ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) +; RV64ZVE32F-NEXT: fsd fa1, 8(a0) +; RV64ZVE32F-NEXT: fsd fa2, 16(a0) +; RV64ZVE32F-NEXT: fsd fa3, 24(a0) ; RV64ZVE32F-NEXT: ret %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer, <4 x double> %passthru) ret <4 x double> %v @@ -13489,41 +13489,41 @@ define <32 x i64> @mgather_strided_split(ptr %base) { ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: lw a3, 0(a1) -; RV32ZVE32F-NEXT: sw a3, 216(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw a3, 252(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: lw a1, 4(a1) -; RV32ZVE32F-NEXT: sw a1, 208(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw a1, 248(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v16 ; RV32ZVE32F-NEXT: lw a3, 0(a1) -; RV32ZVE32F-NEXT: sw a3, 252(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw a3, 244(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: lw a1, 4(a1) -; RV32ZVE32F-NEXT: sw a1, 248(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw a1, 240(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a1, v16 ; RV32ZVE32F-NEXT: lw a3, 0(a1) -; RV32ZVE32F-NEXT: sw a3, 244(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw a3, 220(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: lw a1, 4(a1) -; RV32ZVE32F-NEXT: sw a1, 236(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw a1, 216(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v16 ; RV32ZVE32F-NEXT: lw a3, 0(a1) -; RV32ZVE32F-NEXT: sw a3, 228(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw a3, 212(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: lw a1, 4(a1) -; RV32ZVE32F-NEXT: sw a1, 220(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw a1, 208(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a1, v16 ; RV32ZVE32F-NEXT: lw a3, 0(a1) -; RV32ZVE32F-NEXT: sw a3, 240(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw a3, 236(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: lw a1, 4(a1) ; RV32ZVE32F-NEXT: sw a1, 232(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a1, v16 ; RV32ZVE32F-NEXT: lw a3, 0(a1) -; RV32ZVE32F-NEXT: sw a3, 224(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw a3, 228(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: lw a1, 4(a1) -; RV32ZVE32F-NEXT: sw a1, 212(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw a1, 224(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a1, v16 ; RV32ZVE32F-NEXT: lw a3, 0(a1) @@ -13583,160 +13583,160 @@ define <32 x i64> @mgather_strided_split(ptr %base) { ; RV32ZVE32F-NEXT: lw a2, 324(sp) ; RV32ZVE32F-NEXT: lw a3, 328(sp) ; RV32ZVE32F-NEXT: lw a4, 332(sp) -; RV32ZVE32F-NEXT: lw a5, 0(a1) -; RV32ZVE32F-NEXT: sw a5, 124(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a1, 4(a1) -; RV32ZVE32F-NEXT: sw a1, 120(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a1, 0(a2) -; RV32ZVE32F-NEXT: sw a1, 116(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a1, 4(a2) -; RV32ZVE32F-NEXT: sw a1, 112(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a1, 0(a3) -; RV32ZVE32F-NEXT: sw a1, 104(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw ra, 4(a3) -; RV32ZVE32F-NEXT: lw s10, 0(a4) -; RV32ZVE32F-NEXT: lw s8, 4(a4) +; RV32ZVE32F-NEXT: lw s8, 0(a1) +; RV32ZVE32F-NEXT: lw s9, 4(a1) +; RV32ZVE32F-NEXT: lw s10, 0(a2) +; RV32ZVE32F-NEXT: lw s11, 4(a2) +; RV32ZVE32F-NEXT: lw t5, 0(a3) +; RV32ZVE32F-NEXT: lw t6, 4(a3) +; RV32ZVE32F-NEXT: lw s2, 0(a4) +; RV32ZVE32F-NEXT: lw s3, 4(a4) ; RV32ZVE32F-NEXT: lw a2, 336(sp) ; RV32ZVE32F-NEXT: lw a4, 340(sp) -; RV32ZVE32F-NEXT: lw a6, 344(sp) -; RV32ZVE32F-NEXT: lw t0, 348(sp) -; RV32ZVE32F-NEXT: lw s5, 0(a2) -; RV32ZVE32F-NEXT: lw s4, 4(a2) -; RV32ZVE32F-NEXT: lw t6, 0(a4) -; RV32ZVE32F-NEXT: lw t5, 4(a4) -; RV32ZVE32F-NEXT: lw t3, 0(a6) -; RV32ZVE32F-NEXT: lw t2, 4(a6) -; RV32ZVE32F-NEXT: lw t1, 0(t0) -; RV32ZVE32F-NEXT: lw a7, 4(t0) -; RV32ZVE32F-NEXT: lw a6, 352(sp) -; RV32ZVE32F-NEXT: lw t0, 356(sp) -; RV32ZVE32F-NEXT: lw t4, 360(sp) -; RV32ZVE32F-NEXT: lw a1, 364(sp) +; RV32ZVE32F-NEXT: lw a5, 344(sp) +; RV32ZVE32F-NEXT: lw a6, 348(sp) +; RV32ZVE32F-NEXT: lw a7, 0(a2) +; RV32ZVE32F-NEXT: lw t0, 4(a2) +; RV32ZVE32F-NEXT: lw t1, 0(a4) +; RV32ZVE32F-NEXT: lw t2, 4(a4) +; RV32ZVE32F-NEXT: lw a1, 0(a5) +; RV32ZVE32F-NEXT: lw a2, 4(a5) +; RV32ZVE32F-NEXT: lw a3, 0(a6) +; RV32ZVE32F-NEXT: lw a4, 4(a6) +; RV32ZVE32F-NEXT: lw a5, 352(sp) +; RV32ZVE32F-NEXT: lw a6, 356(sp) +; RV32ZVE32F-NEXT: lw t3, 360(sp) +; RV32ZVE32F-NEXT: lw t4, 364(sp) +; RV32ZVE32F-NEXT: lw s4, 0(a5) +; RV32ZVE32F-NEXT: sw s4, 116(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a5, 4(a5) +; RV32ZVE32F-NEXT: sw a5, 112(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: lw a5, 0(a6) -; RV32ZVE32F-NEXT: lw a6, 4(a6) -; RV32ZVE32F-NEXT: lw a2, 0(t0) -; RV32ZVE32F-NEXT: sw a2, 108(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw a2, 4(t0) -; RV32ZVE32F-NEXT: sw a2, 100(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: lw s11, 0(t4) -; RV32ZVE32F-NEXT: lw s9, 4(t4) -; RV32ZVE32F-NEXT: lw s7, 0(a1) -; RV32ZVE32F-NEXT: lw s6, 4(a1) -; RV32ZVE32F-NEXT: lw a4, 368(sp) -; RV32ZVE32F-NEXT: lw a3, 372(sp) -; RV32ZVE32F-NEXT: lw a2, 376(sp) -; RV32ZVE32F-NEXT: lw a1, 380(sp) -; RV32ZVE32F-NEXT: lw s3, 0(a4) -; RV32ZVE32F-NEXT: lw s2, 4(a4) -; RV32ZVE32F-NEXT: lw t4, 0(a3) -; RV32ZVE32F-NEXT: lw t0, 4(a3) -; RV32ZVE32F-NEXT: lw a4, 0(a2) -; RV32ZVE32F-NEXT: lw a3, 4(a2) -; RV32ZVE32F-NEXT: lw a2, 0(a1) -; RV32ZVE32F-NEXT: lw a1, 4(a1) -; RV32ZVE32F-NEXT: sw a6, 196(a0) -; RV32ZVE32F-NEXT: sw a5, 192(a0) -; RV32ZVE32F-NEXT: sw a7, 188(a0) -; RV32ZVE32F-NEXT: sw t1, 184(a0) -; RV32ZVE32F-NEXT: sw t2, 180(a0) -; RV32ZVE32F-NEXT: sw t3, 176(a0) -; RV32ZVE32F-NEXT: sw t5, 172(a0) -; RV32ZVE32F-NEXT: sw t6, 168(a0) -; RV32ZVE32F-NEXT: sw s4, 164(a0) -; RV32ZVE32F-NEXT: sw s5, 160(a0) -; RV32ZVE32F-NEXT: sw s8, 156(a0) -; RV32ZVE32F-NEXT: sw s10, 152(a0) -; RV32ZVE32F-NEXT: sw ra, 148(a0) -; RV32ZVE32F-NEXT: lw a5, 104(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 144(a0) -; RV32ZVE32F-NEXT: lw a5, 112(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 140(a0) -; RV32ZVE32F-NEXT: lw a5, 116(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 136(a0) -; RV32ZVE32F-NEXT: lw a5, 120(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 132(a0) -; RV32ZVE32F-NEXT: lw a5, 124(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 128(a0) -; RV32ZVE32F-NEXT: lw a5, 128(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 124(a0) -; RV32ZVE32F-NEXT: lw a5, 132(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 120(a0) -; RV32ZVE32F-NEXT: lw a5, 136(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 116(a0) -; RV32ZVE32F-NEXT: lw a5, 140(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 112(a0) -; RV32ZVE32F-NEXT: lw a5, 144(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 108(a0) -; RV32ZVE32F-NEXT: lw a5, 148(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 104(a0) -; RV32ZVE32F-NEXT: lw a5, 152(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 100(a0) -; RV32ZVE32F-NEXT: lw a5, 156(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 96(a0) -; RV32ZVE32F-NEXT: lw a5, 160(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 92(a0) -; RV32ZVE32F-NEXT: lw a5, 164(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 88(a0) -; RV32ZVE32F-NEXT: lw a5, 168(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 84(a0) -; RV32ZVE32F-NEXT: lw a5, 172(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 80(a0) -; RV32ZVE32F-NEXT: lw a5, 176(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 76(a0) -; RV32ZVE32F-NEXT: lw a5, 180(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 72(a0) -; RV32ZVE32F-NEXT: lw a5, 184(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 68(a0) -; RV32ZVE32F-NEXT: lw a5, 188(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 64(a0) -; RV32ZVE32F-NEXT: lw a5, 208(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 4(a0) -; RV32ZVE32F-NEXT: lw a5, 216(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a5, 0(a0) -; RV32ZVE32F-NEXT: sw a1, 252(a0) -; RV32ZVE32F-NEXT: sw a2, 248(a0) -; RV32ZVE32F-NEXT: sw a3, 244(a0) -; RV32ZVE32F-NEXT: sw a4, 240(a0) -; RV32ZVE32F-NEXT: sw t0, 236(a0) -; RV32ZVE32F-NEXT: sw t4, 232(a0) -; RV32ZVE32F-NEXT: sw s2, 228(a0) -; RV32ZVE32F-NEXT: sw s3, 224(a0) -; RV32ZVE32F-NEXT: sw s6, 220(a0) -; RV32ZVE32F-NEXT: sw s7, 216(a0) -; RV32ZVE32F-NEXT: sw s9, 212(a0) -; RV32ZVE32F-NEXT: sw s11, 208(a0) -; RV32ZVE32F-NEXT: lw a1, 100(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a1, 204(a0) +; RV32ZVE32F-NEXT: sw a5, 124(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a5, 4(a6) +; RV32ZVE32F-NEXT: sw a5, 120(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw ra, 0(t3) +; RV32ZVE32F-NEXT: lw a5, 4(t3) +; RV32ZVE32F-NEXT: sw a5, 108(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a5, 0(t4) +; RV32ZVE32F-NEXT: sw a5, 104(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a5, 4(t4) +; RV32ZVE32F-NEXT: sw a5, 100(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a5, 368(sp) +; RV32ZVE32F-NEXT: lw a6, 372(sp) +; RV32ZVE32F-NEXT: lw t3, 376(sp) +; RV32ZVE32F-NEXT: lw t4, 380(sp) +; RV32ZVE32F-NEXT: lw s4, 0(a5) +; RV32ZVE32F-NEXT: lw s5, 4(a5) +; RV32ZVE32F-NEXT: lw s6, 0(a6) +; RV32ZVE32F-NEXT: lw s7, 4(a6) +; RV32ZVE32F-NEXT: lw a5, 0(t3) +; RV32ZVE32F-NEXT: lw a6, 4(t3) +; RV32ZVE32F-NEXT: lw t3, 0(t4) +; RV32ZVE32F-NEXT: lw t4, 4(t4) +; RV32ZVE32F-NEXT: sw a1, 176(a0) +; RV32ZVE32F-NEXT: sw a2, 180(a0) +; RV32ZVE32F-NEXT: sw a3, 184(a0) +; RV32ZVE32F-NEXT: sw a4, 188(a0) +; RV32ZVE32F-NEXT: sw a7, 160(a0) +; RV32ZVE32F-NEXT: sw t0, 164(a0) +; RV32ZVE32F-NEXT: sw t1, 168(a0) +; RV32ZVE32F-NEXT: sw t2, 172(a0) +; RV32ZVE32F-NEXT: sw t5, 144(a0) +; RV32ZVE32F-NEXT: sw t6, 148(a0) +; RV32ZVE32F-NEXT: sw s2, 152(a0) +; RV32ZVE32F-NEXT: sw s3, 156(a0) +; RV32ZVE32F-NEXT: sw s8, 128(a0) +; RV32ZVE32F-NEXT: sw s9, 132(a0) +; RV32ZVE32F-NEXT: sw s10, 136(a0) +; RV32ZVE32F-NEXT: sw s11, 140(a0) +; RV32ZVE32F-NEXT: lw a1, 140(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 112(a0) +; RV32ZVE32F-NEXT: lw a1, 136(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 116(a0) +; RV32ZVE32F-NEXT: lw a1, 132(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 120(a0) +; RV32ZVE32F-NEXT: lw a1, 128(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 124(a0) +; RV32ZVE32F-NEXT: lw a1, 156(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 96(a0) +; RV32ZVE32F-NEXT: lw a1, 152(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 100(a0) +; RV32ZVE32F-NEXT: lw a1, 148(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 104(a0) +; RV32ZVE32F-NEXT: lw a1, 144(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 108(a0) +; RV32ZVE32F-NEXT: lw a1, 172(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 80(a0) +; RV32ZVE32F-NEXT: lw a1, 168(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 84(a0) +; RV32ZVE32F-NEXT: lw a1, 164(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 88(a0) +; RV32ZVE32F-NEXT: lw a1, 160(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 92(a0) +; RV32ZVE32F-NEXT: lw a1, 188(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 64(a0) +; RV32ZVE32F-NEXT: lw a1, 184(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 68(a0) +; RV32ZVE32F-NEXT: lw a1, 180(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 72(a0) +; RV32ZVE32F-NEXT: lw a1, 176(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 76(a0) +; RV32ZVE32F-NEXT: sw a5, 240(a0) +; RV32ZVE32F-NEXT: sw a6, 244(a0) +; RV32ZVE32F-NEXT: sw t3, 248(a0) +; RV32ZVE32F-NEXT: sw t4, 252(a0) +; RV32ZVE32F-NEXT: sw s4, 224(a0) +; RV32ZVE32F-NEXT: sw s5, 228(a0) +; RV32ZVE32F-NEXT: sw s6, 232(a0) +; RV32ZVE32F-NEXT: sw s7, 236(a0) +; RV32ZVE32F-NEXT: sw ra, 208(a0) ; RV32ZVE32F-NEXT: lw a1, 108(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 212(a0) +; RV32ZVE32F-NEXT: lw a1, 104(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 216(a0) +; RV32ZVE32F-NEXT: lw a1, 100(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 220(a0) +; RV32ZVE32F-NEXT: lw a1, 116(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 192(a0) +; RV32ZVE32F-NEXT: lw a1, 112(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 196(a0) +; RV32ZVE32F-NEXT: lw a1, 124(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: sw a1, 200(a0) +; RV32ZVE32F-NEXT: lw a1, 120(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 204(a0) ; RV32ZVE32F-NEXT: lw a1, 220(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a1, 28(a0) -; RV32ZVE32F-NEXT: lw a1, 228(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a1, 24(a0) -; RV32ZVE32F-NEXT: lw a1, 236(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a1, 20(a0) -; RV32ZVE32F-NEXT: lw a1, 244(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: sw a1, 16(a0) -; RV32ZVE32F-NEXT: lw a1, 248(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a1, 12(a0) +; RV32ZVE32F-NEXT: lw a1, 216(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 20(a0) +; RV32ZVE32F-NEXT: lw a1, 212(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 24(a0) +; RV32ZVE32F-NEXT: lw a1, 208(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 28(a0) ; RV32ZVE32F-NEXT: lw a1, 252(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 0(a0) +; RV32ZVE32F-NEXT: lw a1, 248(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 244(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: sw a1, 8(a0) -; RV32ZVE32F-NEXT: lw a1, 192(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a1, 60(a0) -; RV32ZVE32F-NEXT: lw a1, 196(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a1, 56(a0) -; RV32ZVE32F-NEXT: lw a1, 200(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a1, 52(a0) +; RV32ZVE32F-NEXT: lw a1, 240(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 12(a0) ; RV32ZVE32F-NEXT: lw a1, 204(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: sw a1, 48(a0) -; RV32ZVE32F-NEXT: lw a1, 212(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a1, 44(a0) -; RV32ZVE32F-NEXT: lw a1, 224(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a1, 40(a0) +; RV32ZVE32F-NEXT: lw a1, 200(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 52(a0) +; RV32ZVE32F-NEXT: lw a1, 196(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 56(a0) +; RV32ZVE32F-NEXT: lw a1, 192(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 60(a0) +; RV32ZVE32F-NEXT: lw a1, 236(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 32(a0) ; RV32ZVE32F-NEXT: lw a1, 232(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: sw a1, 36(a0) -; RV32ZVE32F-NEXT: lw a1, 240(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: sw a1, 32(a0) +; RV32ZVE32F-NEXT: lw a1, 228(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 40(a0) +; RV32ZVE32F-NEXT: lw a1, 224(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 44(a0) ; RV32ZVE32F-NEXT: addi sp, s0, -512 ; RV32ZVE32F-NEXT: lw ra, 508(sp) # 4-byte Folded Reload ; RV32ZVE32F-NEXT: lw s0, 504(sp) # 4-byte Folded Reload @@ -13812,51 +13812,51 @@ define <32 x i64> @mgather_strided_split(ptr %base) { ; RV64ZVE32F-NEXT: ld s8, 336(a1) ; RV64ZVE32F-NEXT: ld s9, 352(a1) ; RV64ZVE32F-NEXT: ld s10, 368(a1) -; RV64ZVE32F-NEXT: ld s11, 448(a1) -; RV64ZVE32F-NEXT: ld ra, 464(a1) -; RV64ZVE32F-NEXT: ld a3, 480(a1) -; RV64ZVE32F-NEXT: ld a2, 496(a1) -; RV64ZVE32F-NEXT: ld a6, 384(a1) -; RV64ZVE32F-NEXT: ld a5, 400(a1) -; RV64ZVE32F-NEXT: ld a4, 416(a1) -; RV64ZVE32F-NEXT: ld a1, 432(a1) -; RV64ZVE32F-NEXT: sd a2, 248(a0) -; RV64ZVE32F-NEXT: sd a3, 240(a0) -; RV64ZVE32F-NEXT: sd ra, 232(a0) -; RV64ZVE32F-NEXT: sd s11, 224(a0) -; RV64ZVE32F-NEXT: sd a1, 216(a0) -; RV64ZVE32F-NEXT: sd a4, 208(a0) -; RV64ZVE32F-NEXT: sd a5, 200(a0) -; RV64ZVE32F-NEXT: sd a6, 192(a0) -; RV64ZVE32F-NEXT: sd s10, 184(a0) -; RV64ZVE32F-NEXT: sd s9, 176(a0) -; RV64ZVE32F-NEXT: sd s8, 168(a0) +; RV64ZVE32F-NEXT: ld s11, 384(a1) +; RV64ZVE32F-NEXT: ld ra, 400(a1) +; RV64ZVE32F-NEXT: ld a6, 416(a1) +; RV64ZVE32F-NEXT: ld a5, 432(a1) +; RV64ZVE32F-NEXT: ld a2, 448(a1) +; RV64ZVE32F-NEXT: ld a3, 464(a1) +; RV64ZVE32F-NEXT: ld a4, 480(a1) +; RV64ZVE32F-NEXT: ld a1, 496(a1) +; RV64ZVE32F-NEXT: sd a2, 224(a0) +; RV64ZVE32F-NEXT: sd a3, 232(a0) +; RV64ZVE32F-NEXT: sd a4, 240(a0) +; RV64ZVE32F-NEXT: sd a1, 248(a0) +; RV64ZVE32F-NEXT: sd s11, 192(a0) +; RV64ZVE32F-NEXT: sd ra, 200(a0) +; RV64ZVE32F-NEXT: sd a6, 208(a0) +; RV64ZVE32F-NEXT: sd a5, 216(a0) ; RV64ZVE32F-NEXT: sd s7, 160(a0) -; RV64ZVE32F-NEXT: sd s6, 152(a0) -; RV64ZVE32F-NEXT: sd s5, 144(a0) -; RV64ZVE32F-NEXT: sd s4, 136(a0) +; RV64ZVE32F-NEXT: sd s8, 168(a0) +; RV64ZVE32F-NEXT: sd s9, 176(a0) +; RV64ZVE32F-NEXT: sd s10, 184(a0) ; RV64ZVE32F-NEXT: sd s3, 128(a0) -; RV64ZVE32F-NEXT: sd s2, 120(a0) -; RV64ZVE32F-NEXT: sd s1, 112(a0) -; RV64ZVE32F-NEXT: sd s0, 104(a0) +; RV64ZVE32F-NEXT: sd s4, 136(a0) +; RV64ZVE32F-NEXT: sd s5, 144(a0) +; RV64ZVE32F-NEXT: sd s6, 152(a0) ; RV64ZVE32F-NEXT: sd t6, 96(a0) -; RV64ZVE32F-NEXT: sd t5, 88(a0) -; RV64ZVE32F-NEXT: sd t4, 80(a0) -; RV64ZVE32F-NEXT: sd t3, 72(a0) +; RV64ZVE32F-NEXT: sd s0, 104(a0) +; RV64ZVE32F-NEXT: sd s1, 112(a0) +; RV64ZVE32F-NEXT: sd s2, 120(a0) ; RV64ZVE32F-NEXT: sd t2, 64(a0) -; RV64ZVE32F-NEXT: sd t1, 56(a0) -; RV64ZVE32F-NEXT: sd t0, 48(a0) -; RV64ZVE32F-NEXT: sd a7, 40(a0) +; RV64ZVE32F-NEXT: sd t3, 72(a0) +; RV64ZVE32F-NEXT: sd t4, 80(a0) +; RV64ZVE32F-NEXT: sd t5, 88(a0) ; RV64ZVE32F-NEXT: ld a1, 0(sp) # 8-byte Folded Reload ; RV64ZVE32F-NEXT: sd a1, 32(a0) -; RV64ZVE32F-NEXT: ld a1, 8(sp) # 8-byte Folded Reload -; RV64ZVE32F-NEXT: sd a1, 24(a0) -; RV64ZVE32F-NEXT: ld a1, 16(sp) # 8-byte Folded Reload -; RV64ZVE32F-NEXT: sd a1, 16(a0) -; RV64ZVE32F-NEXT: ld a1, 24(sp) # 8-byte Folded Reload -; RV64ZVE32F-NEXT: sd a1, 8(a0) +; RV64ZVE32F-NEXT: sd a7, 40(a0) +; RV64ZVE32F-NEXT: sd t0, 48(a0) +; RV64ZVE32F-NEXT: sd t1, 56(a0) ; RV64ZVE32F-NEXT: ld a1, 32(sp) # 8-byte Folded Reload ; RV64ZVE32F-NEXT: sd a1, 0(a0) +; RV64ZVE32F-NEXT: ld a1, 24(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: sd a1, 8(a0) +; RV64ZVE32F-NEXT: ld a1, 16(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: sd a1, 16(a0) +; RV64ZVE32F-NEXT: ld a1, 8(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: sd a1, 24(a0) ; RV64ZVE32F-NEXT: ld ra, 136(sp) # 8-byte Folded Reload ; RV64ZVE32F-NEXT: ld s0, 128(sp) # 8-byte Folded Reload ; RV64ZVE32F-NEXT: ld s1, 120(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index bc7758717c1c152..323f08acac28e36 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -2863,8 +2863,8 @@ define void @mscatter_v1i64(<1 x i64> %val, <1 x ptr> %ptrs, <1 x i1> %m) { ; RV32ZVE32F-NEXT: # %bb.1: # %cond.store ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: sw a1, 4(a2) ; RV32ZVE32F-NEXT: sw a0, 0(a2) +; RV32ZVE32F-NEXT: sw a1, 4(a2) ; RV32ZVE32F-NEXT: .LBB36_2: # %else ; RV32ZVE32F-NEXT: ret ; @@ -2910,20 +2910,20 @@ define void @mscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) { ; RV32ZVE32F-NEXT: .LBB37_2: # %else2 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB37_3: # %cond.store -; RV32ZVE32F-NEXT: lw a4, 4(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: lw a4, 0(a0) +; RV32ZVE32F-NEXT: lw a0, 4(a0) ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a5, v8 -; RV32ZVE32F-NEXT: sw a4, 4(a5) -; RV32ZVE32F-NEXT: sw a0, 0(a5) +; RV32ZVE32F-NEXT: sw a4, 0(a5) +; RV32ZVE32F-NEXT: sw a0, 4(a5) ; RV32ZVE32F-NEXT: andi a3, a3, 2 ; RV32ZVE32F-NEXT: beqz a3, .LBB37_2 ; RV32ZVE32F-NEXT: .LBB37_4: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: sw a2, 4(a0) ; RV32ZVE32F-NEXT: sw a1, 0(a0) +; RV32ZVE32F-NEXT: sw a2, 4(a0) ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_v2i64: @@ -2987,20 +2987,20 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) { ; RV32ZVE32F-NEXT: .LBB38_4: # %else6 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB38_5: # %cond.store -; RV32ZVE32F-NEXT: lw t0, 4(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: lw t0, 0(a0) +; RV32ZVE32F-NEXT: lw a0, 4(a0) ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t1, v8 -; RV32ZVE32F-NEXT: sw t0, 4(t1) -; RV32ZVE32F-NEXT: sw a0, 0(t1) +; RV32ZVE32F-NEXT: sw t0, 0(t1) +; RV32ZVE32F-NEXT: sw a0, 4(t1) ; RV32ZVE32F-NEXT: andi a0, a5, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB38_2 ; RV32ZVE32F-NEXT: .LBB38_6: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9 -; RV32ZVE32F-NEXT: sw a7, 4(a0) ; RV32ZVE32F-NEXT: sw a6, 0(a0) +; RV32ZVE32F-NEXT: sw a7, 4(a0) ; RV32ZVE32F-NEXT: andi a0, a5, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB38_3 ; RV32ZVE32F-NEXT: .LBB38_7: # %cond.store3 @@ -3208,20 +3208,20 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) { ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB41_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: lw s1, 0(a0) +; RV32ZVE32F-NEXT: lw a0, 4(a0) ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) -; RV32ZVE32F-NEXT: sw a0, 0(s2) +; RV32ZVE32F-NEXT: sw s1, 0(s2) +; RV32ZVE32F-NEXT: sw a0, 4(s2) ; RV32ZVE32F-NEXT: andi a0, a7, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_2 ; RV32ZVE32F-NEXT: .LBB41_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) ; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw s0, 4(a0) ; RV32ZVE32F-NEXT: andi a0, a7, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_3 ; RV32ZVE32F-NEXT: .LBB41_12: # %cond.store3 @@ -3443,19 +3443,19 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB42_10: # %cond.store -; RV32ZVE32F-NEXT: lw a1, 4(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw a0, 4(a0) ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a1, 4(s2) -; RV32ZVE32F-NEXT: sw a0, 0(s2) +; RV32ZVE32F-NEXT: sw a1, 0(s2) +; RV32ZVE32F-NEXT: sw a0, 4(s2) ; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_2 ; RV32ZVE32F-NEXT: .LBB42_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_3 ; RV32ZVE32F-NEXT: .LBB42_12: # %cond.store3 @@ -3687,19 +3687,19 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB43_10: # %cond.store -; RV32ZVE32F-NEXT: lw a1, 4(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw a0, 4(a0) ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a1, 4(s2) -; RV32ZVE32F-NEXT: sw a0, 0(s2) +; RV32ZVE32F-NEXT: sw a1, 0(s2) +; RV32ZVE32F-NEXT: sw a0, 4(s2) ; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_2 ; RV32ZVE32F-NEXT: .LBB43_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_3 ; RV32ZVE32F-NEXT: .LBB43_12: # %cond.store3 @@ -3933,19 +3933,19 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB44_10: # %cond.store -; RV32ZVE32F-NEXT: lw a1, 4(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw a0, 4(a0) ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a1, 4(s2) -; RV32ZVE32F-NEXT: sw a0, 0(s2) +; RV32ZVE32F-NEXT: sw a1, 0(s2) +; RV32ZVE32F-NEXT: sw a0, 4(s2) ; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_2 ; RV32ZVE32F-NEXT: .LBB44_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_3 ; RV32ZVE32F-NEXT: .LBB44_12: # %cond.store3 @@ -4186,19 +4186,19 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB45_10: # %cond.store -; RV32ZVE32F-NEXT: lw a1, 4(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw a0, 4(a0) ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a1, 4(s2) -; RV32ZVE32F-NEXT: sw a0, 0(s2) +; RV32ZVE32F-NEXT: sw a1, 0(s2) +; RV32ZVE32F-NEXT: sw a0, 4(s2) ; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_2 ; RV32ZVE32F-NEXT: .LBB45_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_3 ; RV32ZVE32F-NEXT: .LBB45_12: # %cond.store3 @@ -4431,19 +4431,19 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB46_10: # %cond.store -; RV32ZVE32F-NEXT: lw a1, 4(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw a0, 4(a0) ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a1, 4(s2) -; RV32ZVE32F-NEXT: sw a0, 0(s2) +; RV32ZVE32F-NEXT: sw a1, 0(s2) +; RV32ZVE32F-NEXT: sw a0, 4(s2) ; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_2 ; RV32ZVE32F-NEXT: .LBB46_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_3 ; RV32ZVE32F-NEXT: .LBB46_12: # %cond.store3 @@ -4678,19 +4678,19 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB47_10: # %cond.store -; RV32ZVE32F-NEXT: lw a1, 4(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw a0, 4(a0) ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a1, 4(s2) -; RV32ZVE32F-NEXT: sw a0, 0(s2) +; RV32ZVE32F-NEXT: sw a1, 0(s2) +; RV32ZVE32F-NEXT: sw a0, 4(s2) ; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_2 ; RV32ZVE32F-NEXT: .LBB47_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_3 ; RV32ZVE32F-NEXT: .LBB47_12: # %cond.store3 @@ -4932,19 +4932,19 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB48_10: # %cond.store -; RV32ZVE32F-NEXT: lw a1, 4(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw a0, 4(a0) ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a1, 4(s2) -; RV32ZVE32F-NEXT: sw a0, 0(s2) +; RV32ZVE32F-NEXT: sw a1, 0(s2) +; RV32ZVE32F-NEXT: sw a0, 4(s2) ; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_2 ; RV32ZVE32F-NEXT: .LBB48_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_3 ; RV32ZVE32F-NEXT: .LBB48_12: # %cond.store3 @@ -5175,19 +5175,19 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB49_10: # %cond.store -; RV32ZVE32F-NEXT: lw a1, 4(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw a0, 4(a0) ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a1, 4(s2) -; RV32ZVE32F-NEXT: sw a0, 0(s2) +; RV32ZVE32F-NEXT: sw a1, 0(s2) +; RV32ZVE32F-NEXT: sw a0, 4(s2) ; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_2 ; RV32ZVE32F-NEXT: .LBB49_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_3 ; RV32ZVE32F-NEXT: .LBB49_12: # %cond.store3 @@ -5419,19 +5419,19 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB50_10: # %cond.store -; RV32ZVE32F-NEXT: lw a1, 4(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw a0, 4(a0) ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a1, 4(s2) -; RV32ZVE32F-NEXT: sw a0, 0(s2) +; RV32ZVE32F-NEXT: sw a1, 0(s2) +; RV32ZVE32F-NEXT: sw a0, 4(s2) ; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_2 ; RV32ZVE32F-NEXT: .LBB50_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_3 ; RV32ZVE32F-NEXT: .LBB50_12: # %cond.store3 @@ -5705,19 +5705,19 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: addi sp, sp, 48 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB51_10: # %cond.store -; RV32ZVE32F-NEXT: lw a1, 4(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw a0, 4(a0) ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a1, 4(s2) -; RV32ZVE32F-NEXT: sw a0, 0(s2) +; RV32ZVE32F-NEXT: sw a1, 0(s2) +; RV32ZVE32F-NEXT: sw a0, 4(s2) ; RV32ZVE32F-NEXT: andi a0, a2, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_2 ; RV32ZVE32F-NEXT: .LBB51_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: andi a0, a2, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_3 ; RV32ZVE32F-NEXT: .LBB51_12: # %cond.store3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll index 016f95bfef7e719..080ffe1a78b511f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -833,8 +833,8 @@ define signext i64 @vpreduce_add_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -867,8 +867,8 @@ define signext i64 @vpreduce_umax_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -901,8 +901,8 @@ define signext i64 @vpreduce_smax_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -935,8 +935,8 @@ define signext i64 @vpreduce_umin_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -969,8 +969,8 @@ define signext i64 @vpreduce_smin_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1003,8 +1003,8 @@ define signext i64 @vpreduce_and_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1037,8 +1037,8 @@ define signext i64 @vpreduce_or_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1071,8 +1071,8 @@ define signext i64 @vpreduce_xor_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1105,8 +1105,8 @@ define signext i64 @vpreduce_add_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1139,8 +1139,8 @@ define signext i64 @vpreduce_umax_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1173,8 +1173,8 @@ define signext i64 @vpreduce_smax_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1207,8 +1207,8 @@ define signext i64 @vpreduce_umin_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1241,8 +1241,8 @@ define signext i64 @vpreduce_smin_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1275,8 +1275,8 @@ define signext i64 @vpreduce_and_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1309,8 +1309,8 @@ define signext i64 @vpreduce_or_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1343,8 +1343,8 @@ define signext i64 @vpreduce_xor_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll index 5f3847e085055b3..0cecec31e2bda38 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll @@ -1376,8 +1376,8 @@ define <8 x i1> @icmp_eq_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1404,8 +1404,8 @@ define <8 x i1> @icmp_eq_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 z ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1465,8 +1465,8 @@ define <8 x i1> @icmp_ne_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1493,8 +1493,8 @@ define <8 x i1> @icmp_ne_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 z ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1554,8 +1554,8 @@ define <8 x i1> @icmp_ugt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1582,8 +1582,8 @@ define <8 x i1> @icmp_ugt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1643,8 +1643,8 @@ define <8 x i1> @icmp_uge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1673,8 +1673,8 @@ define <8 x i1> @icmp_uge_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1734,8 +1734,8 @@ define <8 x i1> @icmp_ult_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1762,8 +1762,8 @@ define <8 x i1> @icmp_ult_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1823,8 +1823,8 @@ define <8 x i1> @icmp_sgt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1851,8 +1851,8 @@ define <8 x i1> @icmp_sgt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1912,8 +1912,8 @@ define <8 x i1> @icmp_sge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1942,8 +1942,8 @@ define <8 x i1> @icmp_sge_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2003,8 +2003,8 @@ define <8 x i1> @icmp_slt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2031,8 +2031,8 @@ define <8 x i1> @icmp_slt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2092,8 +2092,8 @@ define <8 x i1> @icmp_sle_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2120,8 +2120,8 @@ define <8 x i1> @icmp_sle_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll index 0d8a9692442353a..ff5f6960ed4e4d6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll @@ -86,8 +86,8 @@ define <2 x i64> @vslide1down_2xi64(<2 x i64> %v, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -111,8 +111,8 @@ define <4 x i64> @vslide1down_4xi64(<4 x i64> %v, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll index d1fb30c7daa3ea0..e0b2dd1af918357 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll @@ -93,8 +93,8 @@ define <2 x i64> @vslide1up_2xi64(<2 x i64> %v, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -119,8 +119,8 @@ define <4 x i64> @vslide1up_4xi64(<4 x i64> %v, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll index 072c88f6353eaf1..4ef65032469e418 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll @@ -259,10 +259,10 @@ define void @vnsrl_0_i64(ptr %in, ptr %out) { ; ; ZVE32F-LABEL: vnsrl_0_i64: ; ZVE32F: # %bb.0: # %entry -; ZVE32F-NEXT: ld a2, 16(a0) -; ZVE32F-NEXT: ld a0, 0(a0) -; ZVE32F-NEXT: sd a2, 8(a1) -; ZVE32F-NEXT: sd a0, 0(a1) +; ZVE32F-NEXT: ld a2, 0(a0) +; ZVE32F-NEXT: ld a0, 16(a0) +; ZVE32F-NEXT: sd a2, 0(a1) +; ZVE32F-NEXT: sd a0, 8(a1) ; ZVE32F-NEXT: ret entry: %0 = load <4 x i64>, ptr %in, align 8 @@ -285,10 +285,10 @@ define void @vnsrl_64_i64(ptr %in, ptr %out) { ; ; ZVE32F-LABEL: vnsrl_64_i64: ; ZVE32F: # %bb.0: # %entry -; ZVE32F-NEXT: ld a2, 24(a0) -; ZVE32F-NEXT: ld a0, 8(a0) -; ZVE32F-NEXT: sd a2, 8(a1) -; ZVE32F-NEXT: sd a0, 0(a1) +; ZVE32F-NEXT: ld a2, 8(a0) +; ZVE32F-NEXT: ld a0, 24(a0) +; ZVE32F-NEXT: sd a2, 0(a1) +; ZVE32F-NEXT: sd a0, 8(a1) ; ZVE32F-NEXT: ret entry: %0 = load <4 x i64>, ptr %in, align 8 @@ -310,10 +310,10 @@ define void @vnsrl_0_double(ptr %in, ptr %out) { ; ; ZVE32F-LABEL: vnsrl_0_double: ; ZVE32F: # %bb.0: # %entry -; ZVE32F-NEXT: ld a2, 16(a0) -; ZVE32F-NEXT: ld a0, 0(a0) -; ZVE32F-NEXT: sd a2, 8(a1) -; ZVE32F-NEXT: sd a0, 0(a1) +; ZVE32F-NEXT: ld a2, 0(a0) +; ZVE32F-NEXT: ld a0, 16(a0) +; ZVE32F-NEXT: sd a2, 0(a1) +; ZVE32F-NEXT: sd a0, 8(a1) ; ZVE32F-NEXT: ret entry: %0 = load <4 x double>, ptr %in, align 8 @@ -336,10 +336,10 @@ define void @vnsrl_64_double(ptr %in, ptr %out) { ; ; ZVE32F-LABEL: vnsrl_64_double: ; ZVE32F: # %bb.0: # %entry -; ZVE32F-NEXT: ld a2, 24(a0) -; ZVE32F-NEXT: ld a0, 8(a0) -; ZVE32F-NEXT: sd a2, 8(a1) -; ZVE32F-NEXT: sd a0, 0(a1) +; ZVE32F-NEXT: ld a2, 8(a0) +; ZVE32F-NEXT: ld a0, 24(a0) +; ZVE32F-NEXT: sd a2, 0(a1) +; ZVE32F-NEXT: sd a0, 8(a1) ; ZVE32F-NEXT: ret entry: %0 = load <4 x double>, ptr %in, align 8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll index 9385fa69b2f0490..28202dc07f9564c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll @@ -663,14 +663,14 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur ; ZVE32F-NEXT: add a6, a1, a6 ; ZVE32F-NEXT: mul a7, a2, a5 ; ZVE32F-NEXT: add a7, a1, a7 -; ZVE32F-NEXT: ld t0, 0(a6) -; ZVE32F-NEXT: ld t1, 0(a7) -; ZVE32F-NEXT: ld a6, 80(a6) +; ZVE32F-NEXT: ld t0, 0(a7) +; ZVE32F-NEXT: ld t1, 0(a6) ; ZVE32F-NEXT: ld a7, 80(a7) -; ZVE32F-NEXT: sd t0, 8(a0) -; ZVE32F-NEXT: sd t1, 0(a0) -; ZVE32F-NEXT: sd a6, 24(a0) +; ZVE32F-NEXT: ld a6, 80(a6) +; ZVE32F-NEXT: sd t0, 0(a0) +; ZVE32F-NEXT: sd t1, 8(a0) ; ZVE32F-NEXT: sd a7, 16(a0) +; ZVE32F-NEXT: sd a6, 24(a0) ; ZVE32F-NEXT: addi a2, a2, 4 ; ZVE32F-NEXT: addi a0, a0, 32 ; ZVE32F-NEXT: addi a4, a4, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll index eba3bd9d86dbb39..293b75dc207c86e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -281,9 +281,9 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) ; RV32-SLOW-NEXT: vmv.x.s a1, v8 ; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-SLOW-NEXT: vmv.x.s a2, v9 +; RV32-SLOW-NEXT: srli a3, a1, 8 ; RV32-SLOW-NEXT: sb a1, 0(a2) -; RV32-SLOW-NEXT: srli a1, a1, 8 -; RV32-SLOW-NEXT: sb a1, 1(a2) +; RV32-SLOW-NEXT: sb a3, 1(a2) ; RV32-SLOW-NEXT: andi a1, a0, 2 ; RV32-SLOW-NEXT: beqz a1, .LBB6_2 ; RV32-SLOW-NEXT: .LBB6_6: # %cond.store1 @@ -293,9 +293,9 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) ; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV32-SLOW-NEXT: vslidedown.vi v10, v9, 1 ; RV32-SLOW-NEXT: vmv.x.s a2, v10 +; RV32-SLOW-NEXT: srli a3, a1, 8 ; RV32-SLOW-NEXT: sb a1, 0(a2) -; RV32-SLOW-NEXT: srli a1, a1, 8 -; RV32-SLOW-NEXT: sb a1, 1(a2) +; RV32-SLOW-NEXT: sb a3, 1(a2) ; RV32-SLOW-NEXT: andi a1, a0, 4 ; RV32-SLOW-NEXT: beqz a1, .LBB6_3 ; RV32-SLOW-NEXT: .LBB6_7: # %cond.store3 @@ -305,9 +305,9 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) ; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV32-SLOW-NEXT: vslidedown.vi v10, v9, 2 ; RV32-SLOW-NEXT: vmv.x.s a2, v10 +; RV32-SLOW-NEXT: srli a3, a1, 8 ; RV32-SLOW-NEXT: sb a1, 0(a2) -; RV32-SLOW-NEXT: srli a1, a1, 8 -; RV32-SLOW-NEXT: sb a1, 1(a2) +; RV32-SLOW-NEXT: sb a3, 1(a2) ; RV32-SLOW-NEXT: andi a0, a0, 8 ; RV32-SLOW-NEXT: beqz a0, .LBB6_4 ; RV32-SLOW-NEXT: .LBB6_8: # %cond.store5 @@ -317,9 +317,9 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) ; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV32-SLOW-NEXT: vslidedown.vi v8, v9, 3 ; RV32-SLOW-NEXT: vmv.x.s a1, v8 +; RV32-SLOW-NEXT: srli a2, a0, 8 ; RV32-SLOW-NEXT: sb a0, 0(a1) -; RV32-SLOW-NEXT: srli a0, a0, 8 -; RV32-SLOW-NEXT: sb a0, 1(a1) +; RV32-SLOW-NEXT: sb a2, 1(a1) ; RV32-SLOW-NEXT: ret ; ; RV64-SLOW-LABEL: mscatter_v4i16_align1: @@ -345,8 +345,8 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) ; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-SLOW-NEXT: vmv.x.s a2, v10 ; RV64-SLOW-NEXT: srli a3, a1, 8 -; RV64-SLOW-NEXT: sb a3, 1(a2) ; RV64-SLOW-NEXT: sb a1, 0(a2) +; RV64-SLOW-NEXT: sb a3, 1(a2) ; RV64-SLOW-NEXT: andi a1, a0, 2 ; RV64-SLOW-NEXT: beqz a1, .LBB6_2 ; RV64-SLOW-NEXT: .LBB6_6: # %cond.store1 @@ -357,8 +357,8 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) ; RV64-SLOW-NEXT: vslidedown.vi v9, v10, 1 ; RV64-SLOW-NEXT: vmv.x.s a2, v9 ; RV64-SLOW-NEXT: srli a3, a1, 8 -; RV64-SLOW-NEXT: sb a3, 1(a2) ; RV64-SLOW-NEXT: sb a1, 0(a2) +; RV64-SLOW-NEXT: sb a3, 1(a2) ; RV64-SLOW-NEXT: andi a1, a0, 4 ; RV64-SLOW-NEXT: beqz a1, .LBB6_3 ; RV64-SLOW-NEXT: .LBB6_7: # %cond.store3 @@ -369,8 +369,8 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) ; RV64-SLOW-NEXT: vslidedown.vi v12, v10, 2 ; RV64-SLOW-NEXT: vmv.x.s a2, v12 ; RV64-SLOW-NEXT: srli a3, a1, 8 -; RV64-SLOW-NEXT: sb a3, 1(a2) ; RV64-SLOW-NEXT: sb a1, 0(a2) +; RV64-SLOW-NEXT: sb a3, 1(a2) ; RV64-SLOW-NEXT: andi a0, a0, 8 ; RV64-SLOW-NEXT: beqz a0, .LBB6_4 ; RV64-SLOW-NEXT: .LBB6_8: # %cond.store5 @@ -381,8 +381,8 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) ; RV64-SLOW-NEXT: vslidedown.vi v8, v10, 3 ; RV64-SLOW-NEXT: vmv.x.s a1, v8 ; RV64-SLOW-NEXT: srli a2, a0, 8 -; RV64-SLOW-NEXT: sb a2, 1(a1) ; RV64-SLOW-NEXT: sb a0, 0(a1) +; RV64-SLOW-NEXT: sb a2, 1(a1) ; RV64-SLOW-NEXT: ret ; ; RV32-FAST-LABEL: mscatter_v4i16_align1: @@ -418,9 +418,9 @@ define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) ; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-SLOW-NEXT: vmv.x.s a1, v8 ; RV32-SLOW-NEXT: vmv.x.s a2, v9 +; RV32-SLOW-NEXT: srli a3, a1, 16 ; RV32-SLOW-NEXT: sh a1, 0(a2) -; RV32-SLOW-NEXT: srli a1, a1, 16 -; RV32-SLOW-NEXT: sh a1, 2(a2) +; RV32-SLOW-NEXT: sh a3, 2(a2) ; RV32-SLOW-NEXT: andi a0, a0, 2 ; RV32-SLOW-NEXT: beqz a0, .LBB7_2 ; RV32-SLOW-NEXT: .LBB7_4: # %cond.store1 @@ -429,9 +429,9 @@ define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) ; RV32-SLOW-NEXT: vmv.x.s a0, v8 ; RV32-SLOW-NEXT: vslidedown.vi v8, v9, 1 ; RV32-SLOW-NEXT: vmv.x.s a1, v8 +; RV32-SLOW-NEXT: srli a2, a0, 16 ; RV32-SLOW-NEXT: sh a0, 0(a1) -; RV32-SLOW-NEXT: srli a0, a0, 16 -; RV32-SLOW-NEXT: sh a0, 2(a1) +; RV32-SLOW-NEXT: sh a2, 2(a1) ; RV32-SLOW-NEXT: ret ; ; RV64-SLOW-LABEL: mscatter_v2i32_align2: @@ -450,9 +450,9 @@ define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) ; RV64-SLOW-NEXT: vmv.x.s a1, v8 ; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-SLOW-NEXT: vmv.x.s a2, v9 +; RV64-SLOW-NEXT: srli a3, a1, 16 ; RV64-SLOW-NEXT: sh a1, 0(a2) -; RV64-SLOW-NEXT: srli a1, a1, 16 -; RV64-SLOW-NEXT: sh a1, 2(a2) +; RV64-SLOW-NEXT: sh a3, 2(a2) ; RV64-SLOW-NEXT: andi a0, a0, 2 ; RV64-SLOW-NEXT: beqz a0, .LBB7_2 ; RV64-SLOW-NEXT: .LBB7_4: # %cond.store1 @@ -462,9 +462,9 @@ define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) ; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-SLOW-NEXT: vslidedown.vi v8, v9, 1 ; RV64-SLOW-NEXT: vmv.x.s a1, v8 +; RV64-SLOW-NEXT: srli a2, a0, 16 ; RV64-SLOW-NEXT: sh a0, 0(a1) -; RV64-SLOW-NEXT: srli a0, a0, 16 -; RV64-SLOW-NEXT: sh a0, 2(a1) +; RV64-SLOW-NEXT: sh a2, 2(a1) ; RV64-SLOW-NEXT: ret ; ; RV32-FAST-LABEL: mscatter_v2i32_align2: @@ -605,18 +605,18 @@ define void @masked_store_v2i32_align2(<2 x i32> %val, ptr %a, <2 x i32> %m) nou ; SLOW-NEXT: .LBB9_3: # %cond.store ; SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; SLOW-NEXT: vmv.x.s a2, v8 +; SLOW-NEXT: srli a3, a2, 16 ; SLOW-NEXT: sh a2, 0(a0) -; SLOW-NEXT: srli a2, a2, 16 -; SLOW-NEXT: sh a2, 2(a0) +; SLOW-NEXT: sh a3, 2(a0) ; SLOW-NEXT: andi a1, a1, 2 ; SLOW-NEXT: beqz a1, .LBB9_2 ; SLOW-NEXT: .LBB9_4: # %cond.store1 ; SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; SLOW-NEXT: vslidedown.vi v8, v8, 1 ; SLOW-NEXT: vmv.x.s a1, v8 +; SLOW-NEXT: srli a2, a1, 16 ; SLOW-NEXT: sh a1, 4(a0) -; SLOW-NEXT: srli a1, a1, 16 -; SLOW-NEXT: sh a1, 6(a0) +; SLOW-NEXT: sh a2, 6(a0) ; SLOW-NEXT: ret ; ; FAST-LABEL: masked_store_v2i32_align2: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll index ea7f6beb22a7cb1..da34396d72a7947 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll @@ -185,8 +185,8 @@ define <8 x i64> @vaaddu_vx_v8i64_floor(<8 x i64> %x, i64 %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -428,8 +428,8 @@ define <8 x i64> @vaaddu_vx_v8i64_ceil(<8 x i64> %x, i64 %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll index 6246ef7db0cb339..2f7b7b6807ce212 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -992,8 +992,8 @@ define <2 x i64> @vadd_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1018,8 +1018,8 @@ define <2 x i64> @vadd_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1086,8 +1086,8 @@ define <4 x i64> @vadd_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1112,8 +1112,8 @@ define <4 x i64> @vadd_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1180,8 +1180,8 @@ define <8 x i64> @vadd_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1206,8 +1206,8 @@ define <8 x i64> @vadd_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1274,8 +1274,8 @@ define <16 x i64> @vadd_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1300,8 +1300,8 @@ define <16 x i64> @vadd_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll index c413dd86f37128c..fb7816ce254583e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll @@ -859,8 +859,8 @@ define <2 x i64> @vand_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -885,8 +885,8 @@ define <2 x i64> @vand_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -953,8 +953,8 @@ define <4 x i64> @vand_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -979,8 +979,8 @@ define <4 x i64> @vand_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1047,8 +1047,8 @@ define <8 x i64> @vand_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1073,8 +1073,8 @@ define <8 x i64> @vand_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1141,8 +1141,8 @@ define <11 x i64> @vand_vx_v11i64(<11 x i64> %va, i64 %b, <11 x i1> %m, i32 zero ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1167,8 +1167,8 @@ define <11 x i64> @vand_vx_v11i64_unmasked(<11 x i64> %va, i64 %b, i32 zeroext % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1235,8 +1235,8 @@ define <16 x i64> @vand_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1261,8 +1261,8 @@ define <16 x i64> @vand_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll index e626727ffb8b4cc..d1f77bf275ab5c6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll @@ -611,8 +611,8 @@ define <2 x i64> @vdiv_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -637,8 +637,8 @@ define <2 x i64> @vdiv_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -685,8 +685,8 @@ define <4 x i64> @vdiv_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -711,8 +711,8 @@ define <4 x i64> @vdiv_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -759,8 +759,8 @@ define <8 x i64> @vdiv_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -785,8 +785,8 @@ define <8 x i64> @vdiv_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -833,8 +833,8 @@ define <16 x i64> @vdiv_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -859,8 +859,8 @@ define <16 x i64> @vdiv_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll index 3715449ef27f064..0219ebf503aa599 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll @@ -610,8 +610,8 @@ define <2 x i64> @vdivu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -636,8 +636,8 @@ define <2 x i64> @vdivu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -684,8 +684,8 @@ define <4 x i64> @vdivu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -710,8 +710,8 @@ define <4 x i64> @vdivu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -758,8 +758,8 @@ define <8 x i64> @vdivu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -784,8 +784,8 @@ define <8 x i64> @vdivu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -832,8 +832,8 @@ define <16 x i64> @vdivu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zer ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -858,8 +858,8 @@ define <16 x i64> @vdivu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll index 4805d6782a3b934..382101cbdffb33c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll @@ -1375,8 +1375,8 @@ define <2 x i64> @vmacc_vx_nxv2i64(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x i1> ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1405,8 +1405,8 @@ define <2 x i64> @vmacc_vx_nxv2i64_unmasked(<2 x i64> %a, i64 %b, <2 x i64> %c, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1448,8 +1448,8 @@ define <2 x i64> @vmacc_vx_nxv2i64_ta(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1509,8 +1509,8 @@ define <4 x i64> @vmacc_vx_nxv4i64(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x i1> ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1539,8 +1539,8 @@ define <4 x i64> @vmacc_vx_nxv4i64_unmasked(<4 x i64> %a, i64 %b, <4 x i64> %c, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1582,8 +1582,8 @@ define <4 x i64> @vmacc_vx_nxv4i64_ta(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1643,8 +1643,8 @@ define <8 x i64> @vmacc_vx_nxv8i64(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x i1> ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1673,8 +1673,8 @@ define <8 x i64> @vmacc_vx_nxv8i64_unmasked(<8 x i64> %a, i64 %b, <8 x i64> %c, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1716,8 +1716,8 @@ define <8 x i64> @vmacc_vx_nxv8i64_ta(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll index 6adc6ba9621a8fe..9a3a513167d8795 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll @@ -744,8 +744,8 @@ define <2 x i64> @vmax_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -770,8 +770,8 @@ define <2 x i64> @vmax_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -818,8 +818,8 @@ define <4 x i64> @vmax_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -844,8 +844,8 @@ define <4 x i64> @vmax_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -892,8 +892,8 @@ define <8 x i64> @vmax_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -918,8 +918,8 @@ define <8 x i64> @vmax_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -966,8 +966,8 @@ define <16 x i64> @vmax_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -992,8 +992,8 @@ define <16 x i64> @vmax_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll index baeb372c017e2e4..5090cb2899eb703 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll @@ -743,8 +743,8 @@ define <2 x i64> @vmaxu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -769,8 +769,8 @@ define <2 x i64> @vmaxu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -817,8 +817,8 @@ define <4 x i64> @vmaxu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -843,8 +843,8 @@ define <4 x i64> @vmaxu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -891,8 +891,8 @@ define <8 x i64> @vmaxu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -917,8 +917,8 @@ define <8 x i64> @vmaxu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -965,8 +965,8 @@ define <16 x i64> @vmaxu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zer ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -991,8 +991,8 @@ define <16 x i64> @vmaxu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll index d0c21ce05c02545..a25eddc8d973fcc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll @@ -744,8 +744,8 @@ define <2 x i64> @vmin_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -770,8 +770,8 @@ define <2 x i64> @vmin_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -818,8 +818,8 @@ define <4 x i64> @vmin_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -844,8 +844,8 @@ define <4 x i64> @vmin_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -892,8 +892,8 @@ define <8 x i64> @vmin_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -918,8 +918,8 @@ define <8 x i64> @vmin_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -966,8 +966,8 @@ define <16 x i64> @vmin_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -992,8 +992,8 @@ define <16 x i64> @vmin_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll index a730ba4729d252a..70df4d000ba925d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll @@ -743,8 +743,8 @@ define <2 x i64> @vminu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -769,8 +769,8 @@ define <2 x i64> @vminu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -817,8 +817,8 @@ define <4 x i64> @vminu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -843,8 +843,8 @@ define <4 x i64> @vminu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -891,8 +891,8 @@ define <8 x i64> @vminu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -917,8 +917,8 @@ define <8 x i64> @vminu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -965,8 +965,8 @@ define <16 x i64> @vminu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zer ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -991,8 +991,8 @@ define <16 x i64> @vminu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll index 8970fbf740d2356..143ba263267fa24 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll @@ -653,8 +653,8 @@ define <2 x i64> @vmul_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -679,8 +679,8 @@ define <2 x i64> @vmul_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -727,8 +727,8 @@ define <4 x i64> @vmul_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -753,8 +753,8 @@ define <4 x i64> @vmul_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -801,8 +801,8 @@ define <8 x i64> @vmul_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -827,8 +827,8 @@ define <8 x i64> @vmul_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -875,8 +875,8 @@ define <16 x i64> @vmul_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -901,8 +901,8 @@ define <16 x i64> @vmul_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll index 805e2e2e6bd35d1..7540b493aeded05 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll @@ -1375,8 +1375,8 @@ define <2 x i64> @vnmsac_vx_nxv2i64(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x i1 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1405,8 +1405,8 @@ define <2 x i64> @vnmsac_vx_nxv2i64_unmasked(<2 x i64> %a, i64 %b, <2 x i64> %c, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1448,8 +1448,8 @@ define <2 x i64> @vnmsac_vx_nxv2i64_ta(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1509,8 +1509,8 @@ define <4 x i64> @vnmsac_vx_nxv4i64(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x i1 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1539,8 +1539,8 @@ define <4 x i64> @vnmsac_vx_nxv4i64_unmasked(<4 x i64> %a, i64 %b, <4 x i64> %c, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1582,8 +1582,8 @@ define <4 x i64> @vnmsac_vx_nxv4i64_ta(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1643,8 +1643,8 @@ define <8 x i64> @vnmsac_vx_nxv8i64(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x i1 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1673,8 +1673,8 @@ define <8 x i64> @vnmsac_vx_nxv8i64_unmasked(<8 x i64> %a, i64 %b, <8 x i64> %c, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1716,8 +1716,8 @@ define <8 x i64> @vnmsac_vx_nxv8i64_ta(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll index 09c281b525a6439..e8a10fb88642491 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll @@ -913,8 +913,8 @@ define <2 x i64> @vor_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -939,8 +939,8 @@ define <2 x i64> @vor_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1007,8 +1007,8 @@ define <4 x i64> @vor_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1033,8 +1033,8 @@ define <4 x i64> @vor_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1101,8 +1101,8 @@ define <8 x i64> @vor_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1127,8 +1127,8 @@ define <8 x i64> @vor_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1195,8 +1195,8 @@ define <16 x i64> @vor_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroe ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1221,8 +1221,8 @@ define <16 x i64> @vor_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %e ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll index 2913cbdf0fffd22..a31405f75e8a4e6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll @@ -187,8 +187,8 @@ define <1 x i64> @vp_splat_v1i64(i64 %val, <1 x i1> %m, i32 zeroext %evl) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero @@ -209,8 +209,8 @@ define <2 x i64> @vp_splat_v2i64(i64 %val, <2 x i1> %m, i32 zeroext %evl) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero @@ -231,8 +231,8 @@ define <4 x i64> @vp_splat_v4i64(i64 %val, <4 x i1> %m, i32 zeroext %evl) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero @@ -253,8 +253,8 @@ define <8 x i64> @vp_splat_v8i64(i64 %val, <8 x i1> %m, i32 zeroext %evl) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll index bdf76dc63ddd856..df1c84a9e05d816 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -566,8 +566,8 @@ define <2 x i64> @vpmerge_vx_v2i64(i64 %a, <2 x i64> %vb, <2 x i1> %m, i32 zeroe ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -586,8 +586,8 @@ define <2 x i64> @vpmerge_vx_v2i64(i64 %a, <2 x i64> %vb, <2 x i1> %m, i32 zeroe ; RV32ZVFHMIN: # %bb.0: ; RV32ZVFHMIN-NEXT: addi sp, sp, -16 ; RV32ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; RV32ZVFHMIN-NEXT: sw a1, 12(sp) ; RV32ZVFHMIN-NEXT: sw a0, 8(sp) +; RV32ZVFHMIN-NEXT: sw a1, 12(sp) ; RV32ZVFHMIN-NEXT: addi a0, sp, 8 ; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32ZVFHMIN-NEXT: vlse64.v v9, (a0), zero @@ -635,8 +635,8 @@ define <4 x i64> @vpmerge_vx_v4i64(i64 %a, <4 x i64> %vb, <4 x i1> %m, i32 zeroe ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -655,8 +655,8 @@ define <4 x i64> @vpmerge_vx_v4i64(i64 %a, <4 x i64> %vb, <4 x i1> %m, i32 zeroe ; RV32ZVFHMIN: # %bb.0: ; RV32ZVFHMIN-NEXT: addi sp, sp, -16 ; RV32ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; RV32ZVFHMIN-NEXT: sw a1, 12(sp) ; RV32ZVFHMIN-NEXT: sw a0, 8(sp) +; RV32ZVFHMIN-NEXT: sw a1, 12(sp) ; RV32ZVFHMIN-NEXT: addi a0, sp, 8 ; RV32ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32ZVFHMIN-NEXT: vlse64.v v10, (a0), zero @@ -704,8 +704,8 @@ define <8 x i64> @vpmerge_vx_v8i64(i64 %a, <8 x i64> %vb, <8 x i1> %m, i32 zeroe ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -724,8 +724,8 @@ define <8 x i64> @vpmerge_vx_v8i64(i64 %a, <8 x i64> %vb, <8 x i1> %m, i32 zeroe ; RV32ZVFHMIN: # %bb.0: ; RV32ZVFHMIN-NEXT: addi sp, sp, -16 ; RV32ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; RV32ZVFHMIN-NEXT: sw a1, 12(sp) ; RV32ZVFHMIN-NEXT: sw a0, 8(sp) +; RV32ZVFHMIN-NEXT: sw a1, 12(sp) ; RV32ZVFHMIN-NEXT: addi a0, sp, 8 ; RV32ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32ZVFHMIN-NEXT: vlse64.v v12, (a0), zero @@ -773,8 +773,8 @@ define <16 x i64> @vpmerge_vx_v16i64(i64 %a, <16 x i64> %vb, <16 x i1> %m, i32 z ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -793,8 +793,8 @@ define <16 x i64> @vpmerge_vx_v16i64(i64 %a, <16 x i64> %vb, <16 x i1> %m, i32 z ; RV32ZVFHMIN: # %bb.0: ; RV32ZVFHMIN-NEXT: addi sp, sp, -16 ; RV32ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; RV32ZVFHMIN-NEXT: sw a1, 12(sp) ; RV32ZVFHMIN-NEXT: sw a0, 8(sp) +; RV32ZVFHMIN-NEXT: sw a1, 12(sp) ; RV32ZVFHMIN-NEXT: addi a0, sp, 8 ; RV32ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32ZVFHMIN-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll index aa76324f3804f1b..2ce6df5ce197ea6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll @@ -611,8 +611,8 @@ define <2 x i64> @vrem_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -637,8 +637,8 @@ define <2 x i64> @vrem_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -685,8 +685,8 @@ define <4 x i64> @vrem_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -711,8 +711,8 @@ define <4 x i64> @vrem_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -759,8 +759,8 @@ define <8 x i64> @vrem_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -785,8 +785,8 @@ define <8 x i64> @vrem_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -833,8 +833,8 @@ define <16 x i64> @vrem_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -859,8 +859,8 @@ define <16 x i64> @vrem_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll index 24fa9357f91660c..08ae37e13caaea9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll @@ -610,8 +610,8 @@ define <2 x i64> @vremu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -636,8 +636,8 @@ define <2 x i64> @vremu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -684,8 +684,8 @@ define <4 x i64> @vremu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -710,8 +710,8 @@ define <4 x i64> @vremu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -758,8 +758,8 @@ define <8 x i64> @vremu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -784,8 +784,8 @@ define <8 x i64> @vremu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -832,8 +832,8 @@ define <16 x i64> @vremu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zer ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -858,8 +858,8 @@ define <16 x i64> @vremu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll index 563482b88e8bdbc..bc2e0e5833f5cb6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll @@ -563,8 +563,8 @@ define <2 x i64> @vrsub_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -589,8 +589,8 @@ define <2 x i64> @vrsub_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -637,8 +637,8 @@ define <4 x i64> @vrsub_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -663,8 +663,8 @@ define <4 x i64> @vrsub_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -711,8 +711,8 @@ define <8 x i64> @vrsub_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -737,8 +737,8 @@ define <8 x i64> @vrsub_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -785,8 +785,8 @@ define <16 x i64> @vrsub_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zer ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -811,8 +811,8 @@ define <16 x i64> @vrsub_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll index 5030fda9dea3313..056a7f44c9da2df 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll @@ -1005,8 +1005,8 @@ define <2 x i64> @vsadd_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1031,8 +1031,8 @@ define <2 x i64> @vsadd_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1099,8 +1099,8 @@ define <4 x i64> @vsadd_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1125,8 +1125,8 @@ define <4 x i64> @vsadd_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1193,8 +1193,8 @@ define <8 x i64> @vsadd_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1219,8 +1219,8 @@ define <8 x i64> @vsadd_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1287,8 +1287,8 @@ define <16 x i64> @vsadd_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zer ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1313,8 +1313,8 @@ define <16 x i64> @vsadd_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll index 741699289e0271e..f69675b4a24b013 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll @@ -441,8 +441,8 @@ define <2 x i64> @sadd_v2i64_vx(<2 x i64> %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -488,8 +488,8 @@ define <4 x i64> @sadd_v4i64_vx(<4 x i64> %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -535,8 +535,8 @@ define <8 x i64> @sadd_v8i64_vx(<8 x i64> %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -582,8 +582,8 @@ define <16 x i64> @sadd_v16i64_vx(<16 x i64> %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll index 562399ea33e7a82..b79ec290e3d3e12 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll @@ -1001,8 +1001,8 @@ define <2 x i64> @vsaddu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroex ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1027,8 +1027,8 @@ define <2 x i64> @vsaddu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %e ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1095,8 +1095,8 @@ define <4 x i64> @vsaddu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroex ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1121,8 +1121,8 @@ define <4 x i64> @vsaddu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %e ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1189,8 +1189,8 @@ define <8 x i64> @vsaddu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1215,8 +1215,8 @@ define <8 x i64> @vsaddu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %e ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1283,8 +1283,8 @@ define <16 x i64> @vsaddu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 ze ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1309,8 +1309,8 @@ define <16 x i64> @vsaddu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll index 7b2cab294aa4965..7397147a7580c15 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll @@ -441,8 +441,8 @@ define <2 x i64> @uadd_v2i64_vx(<2 x i64> %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -488,8 +488,8 @@ define <4 x i64> @uadd_v4i64_vx(<4 x i64> %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -535,8 +535,8 @@ define <8 x i64> @uadd_v8i64_vx(<8 x i64> %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -582,8 +582,8 @@ define <16 x i64> @uadd_v16i64_vx(<16 x i64> %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll index 549c6ca11e320e3..c989736378479e2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll @@ -1037,8 +1037,8 @@ define <2 x i64> @vssub_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1063,8 +1063,8 @@ define <2 x i64> @vssub_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1133,8 +1133,8 @@ define <4 x i64> @vssub_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1159,8 +1159,8 @@ define <4 x i64> @vssub_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1229,8 +1229,8 @@ define <8 x i64> @vssub_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1255,8 +1255,8 @@ define <8 x i64> @vssub_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1325,8 +1325,8 @@ define <16 x i64> @vssub_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zer ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1351,8 +1351,8 @@ define <16 x i64> @vssub_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll index efe28eb9021ce23..e1746a31847da58 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll @@ -441,8 +441,8 @@ define <2 x i64> @ssub_v2i64_vx(<2 x i64> %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -489,8 +489,8 @@ define <4 x i64> @ssub_v4i64_vx(<4 x i64> %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -537,8 +537,8 @@ define <8 x i64> @ssub_v8i64_vx(<8 x i64> %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -585,8 +585,8 @@ define <16 x i64> @ssub_v16i64_vx(<16 x i64> %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll index 683f1150310b395..1fc2a18631001b6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll @@ -1032,8 +1032,8 @@ define <2 x i64> @vssubu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroex ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1058,8 +1058,8 @@ define <2 x i64> @vssubu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %e ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1128,8 +1128,8 @@ define <4 x i64> @vssubu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroex ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1154,8 +1154,8 @@ define <4 x i64> @vssubu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %e ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1224,8 +1224,8 @@ define <8 x i64> @vssubu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1250,8 +1250,8 @@ define <8 x i64> @vssubu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %e ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1320,8 +1320,8 @@ define <16 x i64> @vssubu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 ze ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1346,8 +1346,8 @@ define <16 x i64> @vssubu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll index dc9279f6e7fa09c..28c29d2960b2686 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll @@ -441,8 +441,8 @@ define <2 x i64> @usub_v2i64_vx(<2 x i64> %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -489,8 +489,8 @@ define <4 x i64> @usub_v4i64_vx(<4 x i64> %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -537,8 +537,8 @@ define <8 x i64> @usub_v8i64_vx(<8 x i64> %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -585,8 +585,8 @@ define <16 x i64> @usub_v16i64_vx(<16 x i64> %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll index 6052c9ee20fe102..7f1ad26bcda7a30 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll @@ -641,8 +641,8 @@ define <2 x i64> @vsub_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -667,8 +667,8 @@ define <2 x i64> @vsub_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -715,8 +715,8 @@ define <4 x i64> @vsub_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -741,8 +741,8 @@ define <4 x i64> @vsub_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -789,8 +789,8 @@ define <8 x i64> @vsub_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -815,8 +815,8 @@ define <8 x i64> @vsub_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -863,8 +863,8 @@ define <16 x i64> @vsub_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -889,8 +889,8 @@ define <16 x i64> @vsub_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll index 3a222e95566a48f..50184796b38f534 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll @@ -824,12 +824,12 @@ define <2 x i64> @vwadd_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwadd_vx_v2i64_i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: lw a2, 4(a1) -; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: lw a2, 0(a1) +; RV32-NEXT: lw a1, 4(a1) ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v8, (a0), zero ; RV32-NEXT: vwadd.wv v8, v8, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll index 1fc6af2d4cc1c63..98f246b8741dcc8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll @@ -773,8 +773,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i8(ptr %x, ptr %y) nounwind { ; RV32-NEXT: lbu a1, 0(a1) ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v8, (a0), zero ; RV32-NEXT: vwaddu.wv v8, v8, v9 @@ -805,8 +805,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i16(ptr %x, ptr %y) nounwind { ; RV32-NEXT: lhu a1, 0(a1) ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v8, (a0), zero ; RV32-NEXT: vwaddu.wv v8, v8, v9 @@ -837,8 +837,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i32(ptr %x, ptr %y) nounwind { ; RV32-NEXT: lw a1, 0(a1) ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v8, (a0), zero ; RV32-NEXT: vwaddu.wv v8, v8, v9 @@ -866,12 +866,12 @@ define <2 x i64> @vwaddu_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwaddu_vx_v2i64_i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: lw a2, 4(a1) -; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: lw a2, 0(a1) +; RV32-NEXT: lw a1, 4(a1) ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v8, (a0), zero ; RV32-NEXT: vwaddu.wv v8, v8, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll index 97c7f101c258275..01f2fe506e85f77 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll @@ -853,12 +853,12 @@ define <2 x i64> @vwmul_vx_v2i64_i64(ptr %x, ptr %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lw a2, 4(a1) -; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: lw a2, 0(a1) +; RV32-NEXT: lw a1, 4(a1) ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v9, (a0), zero ; RV32-NEXT: vsext.vf2 v10, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll index c73b3a0dce6be4b..db2f544ab306696 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll @@ -794,8 +794,8 @@ define <2 x i64> @vwmulsu_vx_v2i64_i8(ptr %x, ptr %y) { ; RV32-NEXT: lbu a1, 0(a1) ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v9, (a0), zero ; RV32-NEXT: vsext.vf2 v10, v8 @@ -828,8 +828,8 @@ define <2 x i64> @vwmulsu_vx_v2i64_i16(ptr %x, ptr %y) { ; RV32-NEXT: lhu a1, 0(a1) ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v9, (a0), zero ; RV32-NEXT: vsext.vf2 v10, v8 @@ -862,8 +862,8 @@ define <2 x i64> @vwmulsu_vx_v2i64_i32(ptr %x, ptr %y) { ; RV32-NEXT: lw a1, 0(a1) ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v9, (a0), zero ; RV32-NEXT: vsext.vf2 v10, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll index d632dc4c2a30d27..7a925165d981639 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll @@ -830,12 +830,12 @@ define <2 x i64> @vwsub_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsub_vx_v2i64_i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: lw a2, 4(a1) -; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: lw a2, 0(a1) +; RV32-NEXT: lw a1, 4(a1) ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v8, (a0), zero ; RV32-NEXT: vwsub.wv v8, v8, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll index 9b5f4a5012f4ed6..4c08a8c15a388e0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll @@ -776,8 +776,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i8(ptr %x, ptr %y) nounwind { ; RV32-NEXT: lbu a1, 0(a1) ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v8, (a0), zero ; RV32-NEXT: vwsubu.wv v8, v8, v9 @@ -809,8 +809,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i16(ptr %x, ptr %y) nounwind { ; RV32-NEXT: lhu a1, 0(a1) ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v8, (a0), zero ; RV32-NEXT: vwsubu.wv v8, v8, v9 @@ -842,8 +842,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i32(ptr %x, ptr %y) nounwind { ; RV32-NEXT: lw a1, 0(a1) ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v8, (a0), zero ; RV32-NEXT: vwsubu.wv v8, v8, v9 @@ -872,12 +872,12 @@ define <2 x i64> @vwsubu_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsubu_vx_v2i64_i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: lw a2, 4(a1) -; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: lw a2, 0(a1) +; RV32-NEXT: lw a1, 4(a1) ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v8, (a0), zero ; RV32-NEXT: vwsubu.wv v8, v8, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll index 16487a0784125e8..677b4ea6e91eca6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll @@ -1173,8 +1173,8 @@ define <2 x i64> @vxor_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1199,8 +1199,8 @@ define <2 x i64> @vxor_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1287,8 +1287,8 @@ define <4 x i64> @vxor_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1313,8 +1313,8 @@ define <4 x i64> @vxor_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1401,8 +1401,8 @@ define <8 x i64> @vxor_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1427,8 +1427,8 @@ define <8 x i64> @vxor_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1515,8 +1515,8 @@ define <16 x i64> @vxor_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1541,8 +1541,8 @@ define <16 x i64> @vxor_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index 5a880105f683795..5a1f7f54305846c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -154,10 +154,10 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) { ; CHECK-NOV-NEXT: .LBB3_8: # %entry ; CHECK-NOV-NEXT: lui a1, 524288 ; CHECK-NOV-NEXT: .LBB3_9: # %entry -; CHECK-NOV-NEXT: sw a1, 12(a0) -; CHECK-NOV-NEXT: sw a2, 8(a0) -; CHECK-NOV-NEXT: sw a4, 4(a0) ; CHECK-NOV-NEXT: sw a5, 0(a0) +; CHECK-NOV-NEXT: sw a4, 4(a0) +; CHECK-NOV-NEXT: sw a2, 8(a0) +; CHECK-NOV-NEXT: sw a1, 12(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB3_10: # %entry ; CHECK-NOV-NEXT: mv a1, a6 @@ -219,10 +219,10 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) { ; CHECK-NOV-NEXT: .LBB4_4: # %entry ; CHECK-NOV-NEXT: mv a5, a3 ; CHECK-NOV-NEXT: .LBB4_5: # %entry -; CHECK-NOV-NEXT: sw a5, 12(a0) -; CHECK-NOV-NEXT: sw a4, 8(a0) -; CHECK-NOV-NEXT: sw a2, 4(a0) ; CHECK-NOV-NEXT: sw a1, 0(a0) +; CHECK-NOV-NEXT: sw a2, 4(a0) +; CHECK-NOV-NEXT: sw a4, 8(a0) +; CHECK-NOV-NEXT: sw a5, 12(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB4_6: # %entry ; CHECK-NOV-NEXT: mv a1, a3 @@ -282,10 +282,10 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-NOV-NEXT: and a2, a6, a2 ; CHECK-NOV-NEXT: negw a4, a4 ; CHECK-NOV-NEXT: and a1, a4, a1 -; CHECK-NOV-NEXT: sw a1, 12(a0) -; CHECK-NOV-NEXT: sw a2, 8(a0) -; CHECK-NOV-NEXT: sw a3, 4(a0) ; CHECK-NOV-NEXT: sw a5, 0(a0) +; CHECK-NOV-NEXT: sw a3, 4(a0) +; CHECK-NOV-NEXT: sw a2, 8(a0) +; CHECK-NOV-NEXT: sw a1, 12(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB5_6: # %entry ; CHECK-NOV-NEXT: mv a1, a4 @@ -380,10 +380,10 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: .LBB6_8: # %entry ; CHECK-NOV-NEXT: lui a0, 524288 ; CHECK-NOV-NEXT: .LBB6_9: # %entry -; CHECK-NOV-NEXT: sw a0, 12(s0) -; CHECK-NOV-NEXT: sw s1, 8(s0) -; CHECK-NOV-NEXT: sw a2, 4(s0) ; CHECK-NOV-NEXT: sw a3, 0(s0) +; CHECK-NOV-NEXT: sw a2, 4(s0) +; CHECK-NOV-NEXT: sw s1, 8(s0) +; CHECK-NOV-NEXT: sw a0, 12(s0) ; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -555,10 +555,10 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: .LBB7_4: # %entry ; CHECK-NOV-NEXT: mv a3, a1 ; CHECK-NOV-NEXT: .LBB7_5: # %entry -; CHECK-NOV-NEXT: sw a3, 12(s0) -; CHECK-NOV-NEXT: sw a2, 8(s0) -; CHECK-NOV-NEXT: sw s1, 4(s0) ; CHECK-NOV-NEXT: sw a0, 0(s0) +; CHECK-NOV-NEXT: sw s1, 4(s0) +; CHECK-NOV-NEXT: sw a2, 8(s0) +; CHECK-NOV-NEXT: sw a3, 12(s0) ; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -728,10 +728,10 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: and a4, a4, s1 ; CHECK-NOV-NEXT: negw a2, a2 ; CHECK-NOV-NEXT: and a0, a2, a0 -; CHECK-NOV-NEXT: sw a0, 12(s0) -; CHECK-NOV-NEXT: sw a4, 8(s0) -; CHECK-NOV-NEXT: sw a1, 4(s0) ; CHECK-NOV-NEXT: sw a3, 0(s0) +; CHECK-NOV-NEXT: sw a1, 4(s0) +; CHECK-NOV-NEXT: sw a4, 8(s0) +; CHECK-NOV-NEXT: sw a0, 12(s0) ; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -992,10 +992,10 @@ define <4 x i16> @stest_f32i16(<4 x float> %x) { ; CHECK-NOV-NEXT: .LBB12_8: # %entry ; CHECK-NOV-NEXT: lui a1, 1048568 ; CHECK-NOV-NEXT: .LBB12_9: # %entry -; CHECK-NOV-NEXT: sh a1, 6(a0) -; CHECK-NOV-NEXT: sh a2, 4(a0) -; CHECK-NOV-NEXT: sh a3, 2(a0) ; CHECK-NOV-NEXT: sh a4, 0(a0) +; CHECK-NOV-NEXT: sh a3, 2(a0) +; CHECK-NOV-NEXT: sh a2, 4(a0) +; CHECK-NOV-NEXT: sh a1, 6(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB12_10: # %entry ; CHECK-NOV-NEXT: mv a1, a5 @@ -1059,10 +1059,10 @@ define <4 x i16> @utest_f32i16(<4 x float> %x) { ; CHECK-NOV-NEXT: .LBB13_4: # %entry ; CHECK-NOV-NEXT: mv a5, a3 ; CHECK-NOV-NEXT: .LBB13_5: # %entry -; CHECK-NOV-NEXT: sh a5, 6(a0) -; CHECK-NOV-NEXT: sh a4, 4(a0) -; CHECK-NOV-NEXT: sh a2, 2(a0) ; CHECK-NOV-NEXT: sh a1, 0(a0) +; CHECK-NOV-NEXT: sh a2, 2(a0) +; CHECK-NOV-NEXT: sh a4, 4(a0) +; CHECK-NOV-NEXT: sh a5, 6(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB13_6: # %entry ; CHECK-NOV-NEXT: mv a1, a3 @@ -1123,10 +1123,10 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) { ; CHECK-NOV-NEXT: and a2, a6, a2 ; CHECK-NOV-NEXT: negw a4, a4 ; CHECK-NOV-NEXT: and a1, a4, a1 -; CHECK-NOV-NEXT: sh a1, 6(a0) -; CHECK-NOV-NEXT: sh a2, 4(a0) -; CHECK-NOV-NEXT: sh a3, 2(a0) ; CHECK-NOV-NEXT: sh a5, 0(a0) +; CHECK-NOV-NEXT: sh a3, 2(a0) +; CHECK-NOV-NEXT: sh a2, 4(a0) +; CHECK-NOV-NEXT: sh a1, 6(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB14_6: # %entry ; CHECK-NOV-NEXT: mv a1, a4 @@ -1237,14 +1237,14 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz ; CHECK-NOV-NEXT: bge s1, a7, .LBB15_19 ; CHECK-NOV-NEXT: .LBB15_2: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz +; CHECK-NOV-NEXT: fcvt.l.s a3, fs4, rtz ; CHECK-NOV-NEXT: bge a1, a7, .LBB15_20 ; CHECK-NOV-NEXT: .LBB15_3: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz -; CHECK-NOV-NEXT: bge a2, a7, .LBB15_21 +; CHECK-NOV-NEXT: fcvt.l.s a2, fs3, rtz +; CHECK-NOV-NEXT: bge a3, a7, .LBB15_21 ; CHECK-NOV-NEXT: .LBB15_4: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz -; CHECK-NOV-NEXT: bge a3, a7, .LBB15_22 +; CHECK-NOV-NEXT: bge a2, a7, .LBB15_22 ; CHECK-NOV-NEXT: .LBB15_5: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz ; CHECK-NOV-NEXT: bge a4, a7, .LBB15_23 @@ -1261,9 +1261,9 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: .LBB15_10: # %entry ; CHECK-NOV-NEXT: bge a7, a4, .LBB15_28 ; CHECK-NOV-NEXT: .LBB15_11: # %entry -; CHECK-NOV-NEXT: bge a7, a3, .LBB15_29 +; CHECK-NOV-NEXT: bge a7, a2, .LBB15_29 ; CHECK-NOV-NEXT: .LBB15_12: # %entry -; CHECK-NOV-NEXT: bge a7, a2, .LBB15_30 +; CHECK-NOV-NEXT: bge a7, a3, .LBB15_30 ; CHECK-NOV-NEXT: .LBB15_13: # %entry ; CHECK-NOV-NEXT: bge a7, a1, .LBB15_31 ; CHECK-NOV-NEXT: .LBB15_14: # %entry @@ -1273,14 +1273,14 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: .LBB15_16: # %entry ; CHECK-NOV-NEXT: lui a0, 1048568 ; CHECK-NOV-NEXT: .LBB15_17: # %entry -; CHECK-NOV-NEXT: sh a0, 14(s0) -; CHECK-NOV-NEXT: sh s1, 12(s0) +; CHECK-NOV-NEXT: sh a3, 8(s0) ; CHECK-NOV-NEXT: sh a1, 10(s0) -; CHECK-NOV-NEXT: sh a2, 8(s0) -; CHECK-NOV-NEXT: sh a3, 6(s0) -; CHECK-NOV-NEXT: sh a4, 4(s0) -; CHECK-NOV-NEXT: sh a5, 2(s0) +; CHECK-NOV-NEXT: sh s1, 12(s0) +; CHECK-NOV-NEXT: sh a0, 14(s0) ; CHECK-NOV-NEXT: sh a6, 0(s0) +; CHECK-NOV-NEXT: sh a5, 2(s0) +; CHECK-NOV-NEXT: sh a4, 4(s0) +; CHECK-NOV-NEXT: sh a2, 6(s0) ; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload @@ -1305,18 +1305,18 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: blt s1, a7, .LBB15_2 ; CHECK-NOV-NEXT: .LBB15_19: # %entry ; CHECK-NOV-NEXT: mv s1, a7 -; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz +; CHECK-NOV-NEXT: fcvt.l.s a3, fs4, rtz ; CHECK-NOV-NEXT: blt a1, a7, .LBB15_3 ; CHECK-NOV-NEXT: .LBB15_20: # %entry ; CHECK-NOV-NEXT: mv a1, a7 -; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz -; CHECK-NOV-NEXT: blt a2, a7, .LBB15_4 +; CHECK-NOV-NEXT: fcvt.l.s a2, fs3, rtz +; CHECK-NOV-NEXT: blt a3, a7, .LBB15_4 ; CHECK-NOV-NEXT: .LBB15_21: # %entry -; CHECK-NOV-NEXT: mv a2, a7 +; CHECK-NOV-NEXT: mv a3, a7 ; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz -; CHECK-NOV-NEXT: blt a3, a7, .LBB15_5 +; CHECK-NOV-NEXT: blt a2, a7, .LBB15_5 ; CHECK-NOV-NEXT: .LBB15_22: # %entry -; CHECK-NOV-NEXT: mv a3, a7 +; CHECK-NOV-NEXT: mv a2, a7 ; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz ; CHECK-NOV-NEXT: blt a4, a7, .LBB15_6 ; CHECK-NOV-NEXT: .LBB15_23: # %entry @@ -1338,12 +1338,12 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: blt a7, a4, .LBB15_11 ; CHECK-NOV-NEXT: .LBB15_28: # %entry ; CHECK-NOV-NEXT: lui a4, 1048568 -; CHECK-NOV-NEXT: blt a7, a3, .LBB15_12 +; CHECK-NOV-NEXT: blt a7, a2, .LBB15_12 ; CHECK-NOV-NEXT: .LBB15_29: # %entry -; CHECK-NOV-NEXT: lui a3, 1048568 -; CHECK-NOV-NEXT: blt a7, a2, .LBB15_13 -; CHECK-NOV-NEXT: .LBB15_30: # %entry ; CHECK-NOV-NEXT: lui a2, 1048568 +; CHECK-NOV-NEXT: blt a7, a3, .LBB15_13 +; CHECK-NOV-NEXT: .LBB15_30: # %entry +; CHECK-NOV-NEXT: lui a3, 1048568 ; CHECK-NOV-NEXT: blt a7, a1, .LBB15_14 ; CHECK-NOV-NEXT: .LBB15_31: # %entry ; CHECK-NOV-NEXT: lui a1, 1048568 @@ -1585,40 +1585,40 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: fcvt.lu.s s1, fs6, rtz ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-NOV-NEXT: lui a1, 16 -; CHECK-NOV-NEXT: addiw a1, a1, -1 -; CHECK-NOV-NEXT: bgeu a0, a1, .LBB16_10 +; CHECK-NOV-NEXT: lui a3, 16 +; CHECK-NOV-NEXT: addiw a3, a3, -1 +; CHECK-NOV-NEXT: bgeu a0, a3, .LBB16_10 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: fcvt.lu.s a2, fs5, rtz -; CHECK-NOV-NEXT: bgeu s1, a1, .LBB16_11 +; CHECK-NOV-NEXT: fcvt.lu.s a1, fs5, rtz +; CHECK-NOV-NEXT: bgeu s1, a3, .LBB16_11 ; CHECK-NOV-NEXT: .LBB16_2: # %entry -; CHECK-NOV-NEXT: fcvt.lu.s a3, fs4, rtz -; CHECK-NOV-NEXT: bgeu a2, a1, .LBB16_12 +; CHECK-NOV-NEXT: fcvt.lu.s a2, fs4, rtz +; CHECK-NOV-NEXT: bgeu a1, a3, .LBB16_12 ; CHECK-NOV-NEXT: .LBB16_3: # %entry ; CHECK-NOV-NEXT: fcvt.lu.s a4, fs3, rtz -; CHECK-NOV-NEXT: bgeu a3, a1, .LBB16_13 +; CHECK-NOV-NEXT: bgeu a2, a3, .LBB16_13 ; CHECK-NOV-NEXT: .LBB16_4: # %entry ; CHECK-NOV-NEXT: fcvt.lu.s a5, fs2, rtz -; CHECK-NOV-NEXT: bgeu a4, a1, .LBB16_14 +; CHECK-NOV-NEXT: bgeu a4, a3, .LBB16_14 ; CHECK-NOV-NEXT: .LBB16_5: # %entry ; CHECK-NOV-NEXT: fcvt.lu.s a6, fs1, rtz -; CHECK-NOV-NEXT: bgeu a5, a1, .LBB16_15 +; CHECK-NOV-NEXT: bgeu a5, a3, .LBB16_15 ; CHECK-NOV-NEXT: .LBB16_6: # %entry ; CHECK-NOV-NEXT: fcvt.lu.s a7, fs0, rtz -; CHECK-NOV-NEXT: bgeu a6, a1, .LBB16_16 +; CHECK-NOV-NEXT: bgeu a6, a3, .LBB16_16 ; CHECK-NOV-NEXT: .LBB16_7: # %entry -; CHECK-NOV-NEXT: bltu a7, a1, .LBB16_9 +; CHECK-NOV-NEXT: bltu a7, a3, .LBB16_9 ; CHECK-NOV-NEXT: .LBB16_8: # %entry -; CHECK-NOV-NEXT: mv a7, a1 +; CHECK-NOV-NEXT: mv a7, a3 ; CHECK-NOV-NEXT: .LBB16_9: # %entry -; CHECK-NOV-NEXT: sh a7, 14(s0) -; CHECK-NOV-NEXT: sh a6, 12(s0) -; CHECK-NOV-NEXT: sh a5, 10(s0) ; CHECK-NOV-NEXT: sh a4, 8(s0) -; CHECK-NOV-NEXT: sh a3, 6(s0) -; CHECK-NOV-NEXT: sh a2, 4(s0) -; CHECK-NOV-NEXT: sh s1, 2(s0) +; CHECK-NOV-NEXT: sh a5, 10(s0) +; CHECK-NOV-NEXT: sh a6, 12(s0) +; CHECK-NOV-NEXT: sh a7, 14(s0) ; CHECK-NOV-NEXT: sh a0, 0(s0) +; CHECK-NOV-NEXT: sh s1, 2(s0) +; CHECK-NOV-NEXT: sh a1, 4(s0) +; CHECK-NOV-NEXT: sh a2, 6(s0) ; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload @@ -1638,32 +1638,32 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: addi sp, sp, 128 ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB16_10: # %entry -; CHECK-NOV-NEXT: mv a0, a1 -; CHECK-NOV-NEXT: fcvt.lu.s a2, fs5, rtz -; CHECK-NOV-NEXT: bltu s1, a1, .LBB16_2 +; CHECK-NOV-NEXT: mv a0, a3 +; CHECK-NOV-NEXT: fcvt.lu.s a1, fs5, rtz +; CHECK-NOV-NEXT: bltu s1, a3, .LBB16_2 ; CHECK-NOV-NEXT: .LBB16_11: # %entry -; CHECK-NOV-NEXT: mv s1, a1 -; CHECK-NOV-NEXT: fcvt.lu.s a3, fs4, rtz -; CHECK-NOV-NEXT: bltu a2, a1, .LBB16_3 +; CHECK-NOV-NEXT: mv s1, a3 +; CHECK-NOV-NEXT: fcvt.lu.s a2, fs4, rtz +; CHECK-NOV-NEXT: bltu a1, a3, .LBB16_3 ; CHECK-NOV-NEXT: .LBB16_12: # %entry -; CHECK-NOV-NEXT: mv a2, a1 +; CHECK-NOV-NEXT: mv a1, a3 ; CHECK-NOV-NEXT: fcvt.lu.s a4, fs3, rtz -; CHECK-NOV-NEXT: bltu a3, a1, .LBB16_4 +; CHECK-NOV-NEXT: bltu a2, a3, .LBB16_4 ; CHECK-NOV-NEXT: .LBB16_13: # %entry -; CHECK-NOV-NEXT: mv a3, a1 +; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: fcvt.lu.s a5, fs2, rtz -; CHECK-NOV-NEXT: bltu a4, a1, .LBB16_5 +; CHECK-NOV-NEXT: bltu a4, a3, .LBB16_5 ; CHECK-NOV-NEXT: .LBB16_14: # %entry -; CHECK-NOV-NEXT: mv a4, a1 +; CHECK-NOV-NEXT: mv a4, a3 ; CHECK-NOV-NEXT: fcvt.lu.s a6, fs1, rtz -; CHECK-NOV-NEXT: bltu a5, a1, .LBB16_6 +; CHECK-NOV-NEXT: bltu a5, a3, .LBB16_6 ; CHECK-NOV-NEXT: .LBB16_15: # %entry -; CHECK-NOV-NEXT: mv a5, a1 +; CHECK-NOV-NEXT: mv a5, a3 ; CHECK-NOV-NEXT: fcvt.lu.s a7, fs0, rtz -; CHECK-NOV-NEXT: bltu a6, a1, .LBB16_7 +; CHECK-NOV-NEXT: bltu a6, a3, .LBB16_7 ; CHECK-NOV-NEXT: .LBB16_16: # %entry -; CHECK-NOV-NEXT: mv a6, a1 -; CHECK-NOV-NEXT: bgeu a7, a1, .LBB16_8 +; CHECK-NOV-NEXT: mv a6, a3 +; CHECK-NOV-NEXT: bgeu a7, a3, .LBB16_8 ; CHECK-NOV-NEXT: j .LBB16_9 ; ; CHECK-V-LABEL: utesth_f16i16: @@ -1946,14 +1946,14 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: and t0, t0, s1 ; CHECK-NOV-NEXT: negw a3, a3 ; CHECK-NOV-NEXT: and a0, a3, a0 -; CHECK-NOV-NEXT: sh a0, 14(s0) -; CHECK-NOV-NEXT: sh t0, 12(s0) -; CHECK-NOV-NEXT: sh a1, 10(s0) ; CHECK-NOV-NEXT: sh a2, 8(s0) -; CHECK-NOV-NEXT: sh a4, 6(s0) -; CHECK-NOV-NEXT: sh a5, 4(s0) -; CHECK-NOV-NEXT: sh a6, 2(s0) +; CHECK-NOV-NEXT: sh a1, 10(s0) +; CHECK-NOV-NEXT: sh t0, 12(s0) +; CHECK-NOV-NEXT: sh a0, 14(s0) ; CHECK-NOV-NEXT: sh a7, 0(s0) +; CHECK-NOV-NEXT: sh a6, 2(s0) +; CHECK-NOV-NEXT: sh a5, 4(s0) +; CHECK-NOV-NEXT: sh a4, 6(s0) ; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload @@ -3488,10 +3488,10 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-NOV-NEXT: .LBB30_8: # %entry ; CHECK-NOV-NEXT: lui a1, 524288 ; CHECK-NOV-NEXT: .LBB30_9: # %entry -; CHECK-NOV-NEXT: sw a1, 12(a0) -; CHECK-NOV-NEXT: sw a2, 8(a0) -; CHECK-NOV-NEXT: sw a4, 4(a0) ; CHECK-NOV-NEXT: sw a5, 0(a0) +; CHECK-NOV-NEXT: sw a4, 4(a0) +; CHECK-NOV-NEXT: sw a2, 8(a0) +; CHECK-NOV-NEXT: sw a1, 12(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB30_10: # %entry ; CHECK-NOV-NEXT: mv a1, a6 @@ -3551,10 +3551,10 @@ define <4 x i32> @utest_f32i32_mm(<4 x float> %x) { ; CHECK-NOV-NEXT: .LBB31_4: # %entry ; CHECK-NOV-NEXT: mv a5, a3 ; CHECK-NOV-NEXT: .LBB31_5: # %entry -; CHECK-NOV-NEXT: sw a5, 12(a0) -; CHECK-NOV-NEXT: sw a4, 8(a0) -; CHECK-NOV-NEXT: sw a2, 4(a0) ; CHECK-NOV-NEXT: sw a1, 0(a0) +; CHECK-NOV-NEXT: sw a2, 4(a0) +; CHECK-NOV-NEXT: sw a4, 8(a0) +; CHECK-NOV-NEXT: sw a5, 12(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB31_6: # %entry ; CHECK-NOV-NEXT: mv a1, a3 @@ -3613,10 +3613,10 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) { ; CHECK-NOV-NEXT: sgtz a5, a1 ; CHECK-NOV-NEXT: negw a5, a5 ; CHECK-NOV-NEXT: and a1, a5, a1 -; CHECK-NOV-NEXT: sw a1, 12(a0) -; CHECK-NOV-NEXT: sw a2, 8(a0) -; CHECK-NOV-NEXT: sw a4, 4(a0) ; CHECK-NOV-NEXT: sw a3, 0(a0) +; CHECK-NOV-NEXT: sw a4, 4(a0) +; CHECK-NOV-NEXT: sw a2, 8(a0) +; CHECK-NOV-NEXT: sw a1, 12(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB32_6: # %entry ; CHECK-NOV-NEXT: mv a1, a3 @@ -3709,10 +3709,10 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NOV-NEXT: .LBB33_8: # %entry ; CHECK-NOV-NEXT: lui a0, 524288 ; CHECK-NOV-NEXT: .LBB33_9: # %entry -; CHECK-NOV-NEXT: sw a0, 12(s0) -; CHECK-NOV-NEXT: sw s1, 8(s0) -; CHECK-NOV-NEXT: sw a2, 4(s0) ; CHECK-NOV-NEXT: sw a3, 0(s0) +; CHECK-NOV-NEXT: sw a2, 4(s0) +; CHECK-NOV-NEXT: sw s1, 8(s0) +; CHECK-NOV-NEXT: sw a0, 12(s0) ; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -3882,10 +3882,10 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-NOV-NEXT: .LBB34_4: # %entry ; CHECK-NOV-NEXT: mv a3, a1 ; CHECK-NOV-NEXT: .LBB34_5: # %entry -; CHECK-NOV-NEXT: sw a3, 12(s0) -; CHECK-NOV-NEXT: sw a2, 8(s0) -; CHECK-NOV-NEXT: sw s1, 4(s0) ; CHECK-NOV-NEXT: sw a0, 0(s0) +; CHECK-NOV-NEXT: sw s1, 4(s0) +; CHECK-NOV-NEXT: sw a2, 8(s0) +; CHECK-NOV-NEXT: sw a3, 12(s0) ; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -4054,10 +4054,10 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NOV-NEXT: sgtz a4, a0 ; CHECK-NOV-NEXT: negw a4, a4 ; CHECK-NOV-NEXT: and a0, a4, a0 -; CHECK-NOV-NEXT: sw a0, 12(s0) -; CHECK-NOV-NEXT: sw a3, 8(s0) -; CHECK-NOV-NEXT: sw a1, 4(s0) ; CHECK-NOV-NEXT: sw a2, 0(s0) +; CHECK-NOV-NEXT: sw a1, 4(s0) +; CHECK-NOV-NEXT: sw a3, 8(s0) +; CHECK-NOV-NEXT: sw a0, 12(s0) ; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -4311,10 +4311,10 @@ define <4 x i16> @stest_f32i16_mm(<4 x float> %x) { ; CHECK-NOV-NEXT: .LBB39_8: # %entry ; CHECK-NOV-NEXT: lui a1, 1048568 ; CHECK-NOV-NEXT: .LBB39_9: # %entry -; CHECK-NOV-NEXT: sh a1, 6(a0) -; CHECK-NOV-NEXT: sh a2, 4(a0) -; CHECK-NOV-NEXT: sh a3, 2(a0) ; CHECK-NOV-NEXT: sh a4, 0(a0) +; CHECK-NOV-NEXT: sh a3, 2(a0) +; CHECK-NOV-NEXT: sh a2, 4(a0) +; CHECK-NOV-NEXT: sh a1, 6(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB39_10: # %entry ; CHECK-NOV-NEXT: mv a1, a5 @@ -4376,10 +4376,10 @@ define <4 x i16> @utest_f32i16_mm(<4 x float> %x) { ; CHECK-NOV-NEXT: .LBB40_4: # %entry ; CHECK-NOV-NEXT: mv a5, a3 ; CHECK-NOV-NEXT: .LBB40_5: # %entry -; CHECK-NOV-NEXT: sh a5, 6(a0) -; CHECK-NOV-NEXT: sh a4, 4(a0) -; CHECK-NOV-NEXT: sh a2, 2(a0) ; CHECK-NOV-NEXT: sh a1, 0(a0) +; CHECK-NOV-NEXT: sh a2, 2(a0) +; CHECK-NOV-NEXT: sh a4, 4(a0) +; CHECK-NOV-NEXT: sh a5, 6(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB40_6: # %entry ; CHECK-NOV-NEXT: mv a1, a3 @@ -4439,10 +4439,10 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) { ; CHECK-NOV-NEXT: sgtz a5, a1 ; CHECK-NOV-NEXT: negw a5, a5 ; CHECK-NOV-NEXT: and a1, a5, a1 -; CHECK-NOV-NEXT: sh a1, 6(a0) -; CHECK-NOV-NEXT: sh a2, 4(a0) -; CHECK-NOV-NEXT: sh a4, 2(a0) ; CHECK-NOV-NEXT: sh a3, 0(a0) +; CHECK-NOV-NEXT: sh a4, 2(a0) +; CHECK-NOV-NEXT: sh a2, 4(a0) +; CHECK-NOV-NEXT: sh a1, 6(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB41_6: # %entry ; CHECK-NOV-NEXT: mv a1, a3 @@ -4551,14 +4551,14 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz ; CHECK-NOV-NEXT: bge s1, a7, .LBB42_19 ; CHECK-NOV-NEXT: .LBB42_2: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz +; CHECK-NOV-NEXT: fcvt.l.s a3, fs4, rtz ; CHECK-NOV-NEXT: bge a1, a7, .LBB42_20 ; CHECK-NOV-NEXT: .LBB42_3: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz -; CHECK-NOV-NEXT: bge a2, a7, .LBB42_21 +; CHECK-NOV-NEXT: fcvt.l.s a2, fs3, rtz +; CHECK-NOV-NEXT: bge a3, a7, .LBB42_21 ; CHECK-NOV-NEXT: .LBB42_4: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz -; CHECK-NOV-NEXT: bge a3, a7, .LBB42_22 +; CHECK-NOV-NEXT: bge a2, a7, .LBB42_22 ; CHECK-NOV-NEXT: .LBB42_5: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz ; CHECK-NOV-NEXT: bge a4, a7, .LBB42_23 @@ -4575,9 +4575,9 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: .LBB42_10: # %entry ; CHECK-NOV-NEXT: bge a7, a4, .LBB42_28 ; CHECK-NOV-NEXT: .LBB42_11: # %entry -; CHECK-NOV-NEXT: bge a7, a3, .LBB42_29 +; CHECK-NOV-NEXT: bge a7, a2, .LBB42_29 ; CHECK-NOV-NEXT: .LBB42_12: # %entry -; CHECK-NOV-NEXT: bge a7, a2, .LBB42_30 +; CHECK-NOV-NEXT: bge a7, a3, .LBB42_30 ; CHECK-NOV-NEXT: .LBB42_13: # %entry ; CHECK-NOV-NEXT: bge a7, a1, .LBB42_31 ; CHECK-NOV-NEXT: .LBB42_14: # %entry @@ -4587,14 +4587,14 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: .LBB42_16: # %entry ; CHECK-NOV-NEXT: lui a0, 1048568 ; CHECK-NOV-NEXT: .LBB42_17: # %entry -; CHECK-NOV-NEXT: sh a0, 14(s0) -; CHECK-NOV-NEXT: sh s1, 12(s0) +; CHECK-NOV-NEXT: sh a3, 8(s0) ; CHECK-NOV-NEXT: sh a1, 10(s0) -; CHECK-NOV-NEXT: sh a2, 8(s0) -; CHECK-NOV-NEXT: sh a3, 6(s0) -; CHECK-NOV-NEXT: sh a4, 4(s0) -; CHECK-NOV-NEXT: sh a5, 2(s0) +; CHECK-NOV-NEXT: sh s1, 12(s0) +; CHECK-NOV-NEXT: sh a0, 14(s0) ; CHECK-NOV-NEXT: sh a6, 0(s0) +; CHECK-NOV-NEXT: sh a5, 2(s0) +; CHECK-NOV-NEXT: sh a4, 4(s0) +; CHECK-NOV-NEXT: sh a2, 6(s0) ; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload @@ -4619,18 +4619,18 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: blt s1, a7, .LBB42_2 ; CHECK-NOV-NEXT: .LBB42_19: # %entry ; CHECK-NOV-NEXT: mv s1, a7 -; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz +; CHECK-NOV-NEXT: fcvt.l.s a3, fs4, rtz ; CHECK-NOV-NEXT: blt a1, a7, .LBB42_3 ; CHECK-NOV-NEXT: .LBB42_20: # %entry ; CHECK-NOV-NEXT: mv a1, a7 -; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz -; CHECK-NOV-NEXT: blt a2, a7, .LBB42_4 +; CHECK-NOV-NEXT: fcvt.l.s a2, fs3, rtz +; CHECK-NOV-NEXT: blt a3, a7, .LBB42_4 ; CHECK-NOV-NEXT: .LBB42_21: # %entry -; CHECK-NOV-NEXT: mv a2, a7 +; CHECK-NOV-NEXT: mv a3, a7 ; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz -; CHECK-NOV-NEXT: blt a3, a7, .LBB42_5 +; CHECK-NOV-NEXT: blt a2, a7, .LBB42_5 ; CHECK-NOV-NEXT: .LBB42_22: # %entry -; CHECK-NOV-NEXT: mv a3, a7 +; CHECK-NOV-NEXT: mv a2, a7 ; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz ; CHECK-NOV-NEXT: blt a4, a7, .LBB42_6 ; CHECK-NOV-NEXT: .LBB42_23: # %entry @@ -4652,12 +4652,12 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: blt a7, a4, .LBB42_11 ; CHECK-NOV-NEXT: .LBB42_28: # %entry ; CHECK-NOV-NEXT: lui a4, 1048568 -; CHECK-NOV-NEXT: blt a7, a3, .LBB42_12 +; CHECK-NOV-NEXT: blt a7, a2, .LBB42_12 ; CHECK-NOV-NEXT: .LBB42_29: # %entry -; CHECK-NOV-NEXT: lui a3, 1048568 -; CHECK-NOV-NEXT: blt a7, a2, .LBB42_13 -; CHECK-NOV-NEXT: .LBB42_30: # %entry ; CHECK-NOV-NEXT: lui a2, 1048568 +; CHECK-NOV-NEXT: blt a7, a3, .LBB42_13 +; CHECK-NOV-NEXT: .LBB42_30: # %entry +; CHECK-NOV-NEXT: lui a3, 1048568 ; CHECK-NOV-NEXT: blt a7, a1, .LBB42_14 ; CHECK-NOV-NEXT: .LBB42_31: # %entry ; CHECK-NOV-NEXT: lui a1, 1048568 @@ -4897,40 +4897,40 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: fcvt.lu.s s1, fs6, rtz ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-NOV-NEXT: lui a1, 16 -; CHECK-NOV-NEXT: addiw a1, a1, -1 -; CHECK-NOV-NEXT: bgeu a0, a1, .LBB43_10 +; CHECK-NOV-NEXT: lui a3, 16 +; CHECK-NOV-NEXT: addiw a3, a3, -1 +; CHECK-NOV-NEXT: bgeu a0, a3, .LBB43_10 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: fcvt.lu.s a2, fs5, rtz -; CHECK-NOV-NEXT: bgeu s1, a1, .LBB43_11 +; CHECK-NOV-NEXT: fcvt.lu.s a1, fs5, rtz +; CHECK-NOV-NEXT: bgeu s1, a3, .LBB43_11 ; CHECK-NOV-NEXT: .LBB43_2: # %entry -; CHECK-NOV-NEXT: fcvt.lu.s a3, fs4, rtz -; CHECK-NOV-NEXT: bgeu a2, a1, .LBB43_12 +; CHECK-NOV-NEXT: fcvt.lu.s a2, fs4, rtz +; CHECK-NOV-NEXT: bgeu a1, a3, .LBB43_12 ; CHECK-NOV-NEXT: .LBB43_3: # %entry ; CHECK-NOV-NEXT: fcvt.lu.s a4, fs3, rtz -; CHECK-NOV-NEXT: bgeu a3, a1, .LBB43_13 +; CHECK-NOV-NEXT: bgeu a2, a3, .LBB43_13 ; CHECK-NOV-NEXT: .LBB43_4: # %entry ; CHECK-NOV-NEXT: fcvt.lu.s a5, fs2, rtz -; CHECK-NOV-NEXT: bgeu a4, a1, .LBB43_14 +; CHECK-NOV-NEXT: bgeu a4, a3, .LBB43_14 ; CHECK-NOV-NEXT: .LBB43_5: # %entry ; CHECK-NOV-NEXT: fcvt.lu.s a6, fs1, rtz -; CHECK-NOV-NEXT: bgeu a5, a1, .LBB43_15 +; CHECK-NOV-NEXT: bgeu a5, a3, .LBB43_15 ; CHECK-NOV-NEXT: .LBB43_6: # %entry ; CHECK-NOV-NEXT: fcvt.lu.s a7, fs0, rtz -; CHECK-NOV-NEXT: bgeu a6, a1, .LBB43_16 +; CHECK-NOV-NEXT: bgeu a6, a3, .LBB43_16 ; CHECK-NOV-NEXT: .LBB43_7: # %entry -; CHECK-NOV-NEXT: bltu a7, a1, .LBB43_9 +; CHECK-NOV-NEXT: bltu a7, a3, .LBB43_9 ; CHECK-NOV-NEXT: .LBB43_8: # %entry -; CHECK-NOV-NEXT: mv a7, a1 +; CHECK-NOV-NEXT: mv a7, a3 ; CHECK-NOV-NEXT: .LBB43_9: # %entry -; CHECK-NOV-NEXT: sh a7, 14(s0) -; CHECK-NOV-NEXT: sh a6, 12(s0) -; CHECK-NOV-NEXT: sh a5, 10(s0) ; CHECK-NOV-NEXT: sh a4, 8(s0) -; CHECK-NOV-NEXT: sh a3, 6(s0) -; CHECK-NOV-NEXT: sh a2, 4(s0) -; CHECK-NOV-NEXT: sh s1, 2(s0) +; CHECK-NOV-NEXT: sh a5, 10(s0) +; CHECK-NOV-NEXT: sh a6, 12(s0) +; CHECK-NOV-NEXT: sh a7, 14(s0) ; CHECK-NOV-NEXT: sh a0, 0(s0) +; CHECK-NOV-NEXT: sh s1, 2(s0) +; CHECK-NOV-NEXT: sh a1, 4(s0) +; CHECK-NOV-NEXT: sh a2, 6(s0) ; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload @@ -4950,32 +4950,32 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: addi sp, sp, 128 ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB43_10: # %entry -; CHECK-NOV-NEXT: mv a0, a1 -; CHECK-NOV-NEXT: fcvt.lu.s a2, fs5, rtz -; CHECK-NOV-NEXT: bltu s1, a1, .LBB43_2 +; CHECK-NOV-NEXT: mv a0, a3 +; CHECK-NOV-NEXT: fcvt.lu.s a1, fs5, rtz +; CHECK-NOV-NEXT: bltu s1, a3, .LBB43_2 ; CHECK-NOV-NEXT: .LBB43_11: # %entry -; CHECK-NOV-NEXT: mv s1, a1 -; CHECK-NOV-NEXT: fcvt.lu.s a3, fs4, rtz -; CHECK-NOV-NEXT: bltu a2, a1, .LBB43_3 +; CHECK-NOV-NEXT: mv s1, a3 +; CHECK-NOV-NEXT: fcvt.lu.s a2, fs4, rtz +; CHECK-NOV-NEXT: bltu a1, a3, .LBB43_3 ; CHECK-NOV-NEXT: .LBB43_12: # %entry -; CHECK-NOV-NEXT: mv a2, a1 +; CHECK-NOV-NEXT: mv a1, a3 ; CHECK-NOV-NEXT: fcvt.lu.s a4, fs3, rtz -; CHECK-NOV-NEXT: bltu a3, a1, .LBB43_4 +; CHECK-NOV-NEXT: bltu a2, a3, .LBB43_4 ; CHECK-NOV-NEXT: .LBB43_13: # %entry -; CHECK-NOV-NEXT: mv a3, a1 +; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: fcvt.lu.s a5, fs2, rtz -; CHECK-NOV-NEXT: bltu a4, a1, .LBB43_5 +; CHECK-NOV-NEXT: bltu a4, a3, .LBB43_5 ; CHECK-NOV-NEXT: .LBB43_14: # %entry -; CHECK-NOV-NEXT: mv a4, a1 +; CHECK-NOV-NEXT: mv a4, a3 ; CHECK-NOV-NEXT: fcvt.lu.s a6, fs1, rtz -; CHECK-NOV-NEXT: bltu a5, a1, .LBB43_6 +; CHECK-NOV-NEXT: bltu a5, a3, .LBB43_6 ; CHECK-NOV-NEXT: .LBB43_15: # %entry -; CHECK-NOV-NEXT: mv a5, a1 +; CHECK-NOV-NEXT: mv a5, a3 ; CHECK-NOV-NEXT: fcvt.lu.s a7, fs0, rtz -; CHECK-NOV-NEXT: bltu a6, a1, .LBB43_7 +; CHECK-NOV-NEXT: bltu a6, a3, .LBB43_7 ; CHECK-NOV-NEXT: .LBB43_16: # %entry -; CHECK-NOV-NEXT: mv a6, a1 -; CHECK-NOV-NEXT: bgeu a7, a1, .LBB43_8 +; CHECK-NOV-NEXT: mv a6, a3 +; CHECK-NOV-NEXT: bgeu a7, a3, .LBB43_8 ; CHECK-NOV-NEXT: j .LBB43_9 ; ; CHECK-V-LABEL: utesth_f16i16_mm: @@ -5257,14 +5257,14 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: sgtz t0, a0 ; CHECK-NOV-NEXT: negw t0, t0 ; CHECK-NOV-NEXT: and a0, t0, a0 -; CHECK-NOV-NEXT: sh a0, 14(s0) -; CHECK-NOV-NEXT: sh a7, 12(s0) -; CHECK-NOV-NEXT: sh a1, 10(s0) ; CHECK-NOV-NEXT: sh a2, 8(s0) -; CHECK-NOV-NEXT: sh a4, 6(s0) -; CHECK-NOV-NEXT: sh a5, 4(s0) -; CHECK-NOV-NEXT: sh a6, 2(s0) +; CHECK-NOV-NEXT: sh a1, 10(s0) +; CHECK-NOV-NEXT: sh a7, 12(s0) +; CHECK-NOV-NEXT: sh a0, 14(s0) ; CHECK-NOV-NEXT: sh a3, 0(s0) +; CHECK-NOV-NEXT: sh a6, 2(s0) +; CHECK-NOV-NEXT: sh a5, 4(s0) +; CHECK-NOV-NEXT: sh a4, 6(s0) ; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll b/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll index 3aca3130cc5459d..5c14ed1e813c0ed 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll @@ -1372,8 +1372,8 @@ define @intrinsic_vmseq_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmseq_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1408,8 +1408,8 @@ define @intrinsic_vmsge_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmsge_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll index accc18519d6260b..5349878b5d11174 100644 --- a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll @@ -31,14 +31,14 @@ define void @memset_1(ptr %a, i8 %value) nounwind { define void @memset_2(ptr %a, i8 %value) nounwind { ; RV32-LABEL: memset_2: ; RV32: # %bb.0: -; RV32-NEXT: sb a1, 1(a0) ; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: sb a1, 1(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: memset_2: ; RV64: # %bb.0: -; RV64-NEXT: sb a1, 1(a0) ; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: sb a1, 1(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: memset_2: @@ -63,18 +63,18 @@ define void @memset_2(ptr %a, i8 %value) nounwind { define void @memset_4(ptr %a, i8 %value) nounwind { ; RV32-LABEL: memset_4: ; RV32: # %bb.0: -; RV32-NEXT: sb a1, 3(a0) -; RV32-NEXT: sb a1, 2(a0) -; RV32-NEXT: sb a1, 1(a0) ; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: sb a1, 2(a0) +; RV32-NEXT: sb a1, 3(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: memset_4: ; RV64: # %bb.0: -; RV64-NEXT: sb a1, 3(a0) -; RV64-NEXT: sb a1, 2(a0) -; RV64-NEXT: sb a1, 1(a0) ; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: sb a1, 2(a0) +; RV64-NEXT: sb a1, 3(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: memset_4: @@ -102,26 +102,26 @@ define void @memset_4(ptr %a, i8 %value) nounwind { define void @memset_8(ptr %a, i8 %value) nounwind { ; RV32-LABEL: memset_8: ; RV32: # %bb.0: -; RV32-NEXT: sb a1, 7(a0) -; RV32-NEXT: sb a1, 6(a0) -; RV32-NEXT: sb a1, 5(a0) ; RV32-NEXT: sb a1, 4(a0) -; RV32-NEXT: sb a1, 3(a0) -; RV32-NEXT: sb a1, 2(a0) -; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: sb a1, 5(a0) +; RV32-NEXT: sb a1, 6(a0) +; RV32-NEXT: sb a1, 7(a0) ; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: sb a1, 1(a0) +; RV32-NEXT: sb a1, 2(a0) +; RV32-NEXT: sb a1, 3(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: memset_8: ; RV64: # %bb.0: -; RV64-NEXT: sb a1, 7(a0) -; RV64-NEXT: sb a1, 6(a0) -; RV64-NEXT: sb a1, 5(a0) ; RV64-NEXT: sb a1, 4(a0) -; RV64-NEXT: sb a1, 3(a0) -; RV64-NEXT: sb a1, 2(a0) -; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: sb a1, 5(a0) +; RV64-NEXT: sb a1, 6(a0) +; RV64-NEXT: sb a1, 7(a0) ; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: sb a1, 1(a0) +; RV64-NEXT: sb a1, 2(a0) +; RV64-NEXT: sb a1, 3(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: memset_8: @@ -130,8 +130,8 @@ define void @memset_8(ptr %a, i8 %value) nounwind { ; RV32-FAST-NEXT: lui a2, 4112 ; RV32-FAST-NEXT: addi a2, a2, 257 ; RV32-FAST-NEXT: mul a1, a1, a2 -; RV32-FAST-NEXT: sw a1, 4(a0) ; RV32-FAST-NEXT: sw a1, 0(a0) +; RV32-FAST-NEXT: sw a1, 4(a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: memset_8: @@ -270,8 +270,8 @@ define void @aligned_memset_8(ptr align 8 %a, i8 %value) nounwind { ; RV32-BOTH-NEXT: lui a2, 4112 ; RV32-BOTH-NEXT: addi a2, a2, 257 ; RV32-BOTH-NEXT: mul a1, a1, a2 -; RV32-BOTH-NEXT: sw a1, 4(a0) ; RV32-BOTH-NEXT: sw a1, 0(a0) +; RV32-BOTH-NEXT: sw a1, 4(a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_memset_8: @@ -377,14 +377,14 @@ define void @bzero_1(ptr %a) nounwind { define void @bzero_2(ptr %a) nounwind { ; RV32-LABEL: bzero_2: ; RV32: # %bb.0: -; RV32-NEXT: sb zero, 1(a0) ; RV32-NEXT: sb zero, 0(a0) +; RV32-NEXT: sb zero, 1(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: bzero_2: ; RV64: # %bb.0: -; RV64-NEXT: sb zero, 1(a0) ; RV64-NEXT: sb zero, 0(a0) +; RV64-NEXT: sb zero, 1(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: bzero_2: @@ -403,18 +403,18 @@ define void @bzero_2(ptr %a) nounwind { define void @bzero_4(ptr %a) nounwind { ; RV32-LABEL: bzero_4: ; RV32: # %bb.0: -; RV32-NEXT: sb zero, 3(a0) -; RV32-NEXT: sb zero, 2(a0) -; RV32-NEXT: sb zero, 1(a0) ; RV32-NEXT: sb zero, 0(a0) +; RV32-NEXT: sb zero, 1(a0) +; RV32-NEXT: sb zero, 2(a0) +; RV32-NEXT: sb zero, 3(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: bzero_4: ; RV64: # %bb.0: -; RV64-NEXT: sb zero, 3(a0) -; RV64-NEXT: sb zero, 2(a0) -; RV64-NEXT: sb zero, 1(a0) ; RV64-NEXT: sb zero, 0(a0) +; RV64-NEXT: sb zero, 1(a0) +; RV64-NEXT: sb zero, 2(a0) +; RV64-NEXT: sb zero, 3(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: bzero_4: @@ -433,32 +433,32 @@ define void @bzero_4(ptr %a) nounwind { define void @bzero_8(ptr %a) nounwind { ; RV32-LABEL: bzero_8: ; RV32: # %bb.0: -; RV32-NEXT: sb zero, 7(a0) -; RV32-NEXT: sb zero, 6(a0) -; RV32-NEXT: sb zero, 5(a0) ; RV32-NEXT: sb zero, 4(a0) -; RV32-NEXT: sb zero, 3(a0) -; RV32-NEXT: sb zero, 2(a0) -; RV32-NEXT: sb zero, 1(a0) +; RV32-NEXT: sb zero, 5(a0) +; RV32-NEXT: sb zero, 6(a0) +; RV32-NEXT: sb zero, 7(a0) ; RV32-NEXT: sb zero, 0(a0) +; RV32-NEXT: sb zero, 1(a0) +; RV32-NEXT: sb zero, 2(a0) +; RV32-NEXT: sb zero, 3(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: bzero_8: ; RV64: # %bb.0: -; RV64-NEXT: sb zero, 7(a0) -; RV64-NEXT: sb zero, 6(a0) -; RV64-NEXT: sb zero, 5(a0) ; RV64-NEXT: sb zero, 4(a0) -; RV64-NEXT: sb zero, 3(a0) -; RV64-NEXT: sb zero, 2(a0) -; RV64-NEXT: sb zero, 1(a0) +; RV64-NEXT: sb zero, 5(a0) +; RV64-NEXT: sb zero, 6(a0) +; RV64-NEXT: sb zero, 7(a0) ; RV64-NEXT: sb zero, 0(a0) +; RV64-NEXT: sb zero, 1(a0) +; RV64-NEXT: sb zero, 2(a0) +; RV64-NEXT: sb zero, 3(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: bzero_8: ; RV32-FAST: # %bb.0: -; RV32-FAST-NEXT: sw zero, 4(a0) ; RV32-FAST-NEXT: sw zero, 0(a0) +; RV32-FAST-NEXT: sw zero, 4(a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: bzero_8: @@ -608,8 +608,8 @@ define void @aligned_bzero_4(ptr %a) nounwind { define void @aligned_bzero_8(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_8: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: sw zero, 4(a0) ; RV32-BOTH-NEXT: sw zero, 0(a0) +; RV32-BOTH-NEXT: sw zero, 4(a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_8: diff --git a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll index 9cdced88c7bdc23..59ba857dca8a5f3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll +++ b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll @@ -27,15 +27,15 @@ define signext i32 @foo(i32 signext %aa) #0 { ; CHECK-NEXT: lw a6, 24(s1) ; CHECK-NEXT: lw a7, 20(s1) ; CHECK-NEXT: lw t1, 16(s1) -; CHECK-NEXT: lw a1, 12(s1) -; CHECK-NEXT: lw t2, 8(s1) +; CHECK-NEXT: lw t2, 12(s1) +; CHECK-NEXT: lw t3, 8(s1) ; CHECK-NEXT: sw a0, 52(s1) ; CHECK-NEXT: sw a0, 48(s1) ; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd t2, 16(sp) -; CHECK-NEXT: sd a1, 8(sp) ; CHECK-NEXT: addi a1, s1, 48 ; CHECK-NEXT: sd t1, 0(sp) +; CHECK-NEXT: sd t2, 8(sp) +; CHECK-NEXT: sd t3, 16(sp) ; CHECK-NEXT: mv a0, t0 ; CHECK-NEXT: call gfunc ; CHECK-NEXT: addi sp, sp, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/pr104480.ll b/llvm/test/CodeGen/RISCV/rvv/pr104480.ll index 93cf4d3766089a1..1e34d9aa6d0568f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr104480.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr104480.ll @@ -45,9 +45,9 @@ define @test_mulhs_expand( %broadcast.splat ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: addi a1, a0, 1365 -; CHECK-NEXT: sw a1, 12(sp) ; CHECK-NEXT: addi a0, a0, 1366 ; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: sw a1, 12(sp) ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vlse64.v v12, (a0), zero @@ -73,9 +73,9 @@ define @test_mulhu_expand( %broadcast.splat ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: lui a0, 699051 ; CHECK-NEXT: addi a1, a0, -1366 -; CHECK-NEXT: sw a1, 12(sp) ; CHECK-NEXT: addi a0, a0, -1365 ; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: sw a1, 12(sp) ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vlse64.v v12, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll index c164b62a679be05..7a2e40e86f00274 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll @@ -45,8 +45,8 @@ define @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, ; CHECK-NEXT: add t1, s1, t1 ; CHECK-NEXT: addi t1, t1, 64 ; CHECK-NEXT: vs8r.v v8, (t1) -; CHECK-NEXT: sd t0, 8(sp) ; CHECK-NEXT: sd t1, 0(sp) +; CHECK-NEXT: sd t0, 8(sp) ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: call bar ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll index eb8c58d2d377907..7315fd6cfbbecb4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll @@ -2375,8 +2375,8 @@ define @icmp_eq_vx_nxv1i64( %va, i64 %b, @icmp_eq_vx_swap_nxv1i64( %va, i64 %b ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -2457,8 +2457,8 @@ define @icmp_ne_vx_nxv1i64( %va, i64 %b, @icmp_ne_vx_swap_nxv1i64( %va, i64 %b ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -2539,8 +2539,8 @@ define @icmp_ugt_vx_nxv1i64( %va, i64 %b, @icmp_ugt_vx_swap_nxv1i64( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -2621,8 +2621,8 @@ define @icmp_uge_vx_nxv1i64( %va, i64 %b, @icmp_uge_vx_swap_nxv1i64( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -2705,8 +2705,8 @@ define @icmp_ult_vx_nxv1i64( %va, i64 %b, @icmp_ult_vx_swap_nxv1i64( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -2787,8 +2787,8 @@ define @icmp_sgt_vx_nxv1i64( %va, i64 %b, @icmp_sgt_vx_swap_nxv1i64( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -2869,8 +2869,8 @@ define @icmp_sge_vx_nxv1i64( %va, i64 %b, @icmp_sge_vx_swap_nxv1i64( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -2953,8 +2953,8 @@ define @icmp_slt_vx_nxv1i64( %va, i64 %b, @icmp_slt_vx_swap_nxv1i64( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -3035,8 +3035,8 @@ define @icmp_sle_vx_nxv1i64( %va, i64 %b, @icmp_sle_vx_swap_nxv1i64( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -3122,8 +3122,8 @@ define @icmp_eq_vx_nxv8i64( %va, i64 %b, @icmp_eq_vx_swap_nxv8i64( %va, i64 %b ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero @@ -3211,8 +3211,8 @@ define @icmp_ne_vx_nxv8i64( %va, i64 %b, @icmp_ne_vx_swap_nxv8i64( %va, i64 %b ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero @@ -3300,8 +3300,8 @@ define @icmp_ugt_vx_nxv8i64( %va, i64 %b, @icmp_ugt_vx_swap_nxv8i64( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero @@ -3389,8 +3389,8 @@ define @icmp_uge_vx_nxv8i64( %va, i64 %b, @icmp_uge_vx_swap_nxv8i64( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero @@ -3480,8 +3480,8 @@ define @icmp_ult_vx_nxv8i64( %va, i64 %b, @icmp_ult_vx_swap_nxv8i64( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero @@ -3569,8 +3569,8 @@ define @icmp_sgt_vx_nxv8i64( %va, i64 %b, @icmp_sgt_vx_swap_nxv8i64( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero @@ -3658,8 +3658,8 @@ define @icmp_sge_vx_nxv8i64( %va, i64 %b, @icmp_sge_vx_swap_nxv8i64( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero @@ -3749,8 +3749,8 @@ define @icmp_slt_vx_nxv8i64( %va, i64 %b, @icmp_slt_vx_swap_nxv8i64( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero @@ -3838,8 +3838,8 @@ define @icmp_sle_vx_nxv8i64( %va, i64 %b, @icmp_sle_vx_swap_nxv8i64( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll index 90ffeff9689e074..cd3f02f0400f0c8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll @@ -2055,8 +2055,8 @@ define @icmp_eq_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2080,8 +2080,8 @@ define @icmp_eq_xv_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2145,8 +2145,8 @@ define @icmp_ne_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2170,8 +2170,8 @@ define @icmp_ne_xv_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2215,8 +2215,8 @@ define @icmp_ugt_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2240,8 +2240,8 @@ define @icmp_ugt_xv_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2285,8 +2285,8 @@ define @icmp_uge_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2311,8 +2311,8 @@ define @icmp_uge_xv_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2417,8 +2417,8 @@ define @icmp_ult_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2442,8 +2442,8 @@ define @icmp_ult_xv_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2538,8 +2538,8 @@ define @icmp_ule_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2563,8 +2563,8 @@ define @icmp_ule_xv_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2609,8 +2609,8 @@ define @icmp_sgt_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2634,8 +2634,8 @@ define @icmp_sgt_xv_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2679,8 +2679,8 @@ define @icmp_sge_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2705,8 +2705,8 @@ define @icmp_sge_xv_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2791,8 +2791,8 @@ define @icmp_slt_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2816,8 +2816,8 @@ define @icmp_slt_xv_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2902,8 +2902,8 @@ define @icmp_sle_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2927,8 +2927,8 @@ define @icmp_sle_xv_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll index 28583efccdbca45..3912bc48c795194 100644 --- a/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll @@ -14,9 +14,9 @@ define i32 @splat_vector_split_i64() { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 -; CHECK-NEXT: sw zero, 12(sp) ; CHECK-NEXT: lui a0, 1044480 ; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: sw zero, 12(sp) ; CHECK-NEXT: li a0, 56 ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vsrl.vx v10, v8, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll index 721f03120bd4994..27a4d37a83da837 100644 --- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll @@ -483,10 +483,10 @@ define @mul_bigimm_stepvector_nxv8i64() { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: li a0, 7 +; RV32-NEXT: lui a1, 797989 +; RV32-NEXT: addi a1, a1, -683 +; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: lui a0, 797989 -; RV32-NEXT: addi a0, a0, -683 -; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero @@ -531,9 +531,9 @@ define @stepvector_nxv16i64() { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -558,10 +558,10 @@ define @add_stepvector_nxv16i64() { ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -592,11 +592,11 @@ define @mul_stepvector_nxv16i64() { ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a1, a0, 1 ; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -630,23 +630,23 @@ define @mul_bigimm_stepvector_nxv16i64() { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: li a0, 7 +; RV32-NEXT: lui a1, 797989 +; RV32-NEXT: addi a1, a1, -683 +; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: lui a0, 797989 -; RV32-NEXT: addi a0, a0, -683 -; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: lui a1, 11557 ; RV32-NEXT: addi a1, a1, -683 ; RV32-NEXT: mul a1, a0, a1 -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: srli a0, a0, 3 -; RV32-NEXT: lui a1, 92455 -; RV32-NEXT: addi a1, a1, -1368 -; RV32-NEXT: mulhu a1, a0, a1 -; RV32-NEXT: slli a2, a0, 1 +; RV32-NEXT: lui a2, 92455 +; RV32-NEXT: addi a2, a2, -1368 +; RV32-NEXT: mulhu a2, a0, a2 +; RV32-NEXT: slli a3, a0, 1 ; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: sub a0, a0, a2 -; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sub a0, a0, a3 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: sw a0, 4(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma @@ -683,10 +683,10 @@ define @shl_stepvector_nxv16i64() { ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 2 ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/unmasked-ta.ll b/llvm/test/CodeGen/RISCV/rvv/unmasked-ta.ll index ae8c36a7cb5e3d6..bcc7bb9f072f6d2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/unmasked-ta.ll +++ b/llvm/test/CodeGen/RISCV/rvv/unmasked-ta.ll @@ -279,8 +279,8 @@ define @intrinsic_vmacc_vx_nxv1i64_i64_nxv1i64( @intrinsic_vmadd_vx_nxv1i64_i64_nxv1i64( @intrinsic_vnmsac_vx_nxv1i64_i64_nxv1i64( @intrinsic_vnmsub_vx_nxv1i64_i64_nxv1i64( @intrinsic_vmv.s.x_x_nxv1i64(i64 %0, iXLen %1) nounwin ; RV32-LABEL: intrinsic_vmv.s.x_x_nxv1i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll index 1f027aef3103d4e..aeb3f6c174859c4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll @@ -1157,8 +1157,8 @@ define @intrinsic_vrsub_vx_nxv1i64_nxv1i64_i64( @intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64( @intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64( @intrinsic_vssub_vx_nxv1i64_nxv1i64_i64( @intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64( @intrinsic_vmerge_vxm_nxv8i64_nxv8i64_i64( @intrinsic_vmerge_vim_nxv8i64_nxv8i64_i64( @intrinsic_vmv.v.x_x_nxv1i64( %0, i6 ; RV32-LABEL: intrinsic_vmv.v.x_x_nxv1i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma ; RV32-NEXT: vlse64.v v8, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vaadd.ll b/llvm/test/CodeGen/RISCV/rvv/vaadd.ll index 096e60b6285ffdf..28b8b180b76772a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vaadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vaadd.ll @@ -1938,8 +1938,8 @@ define @intrinsic_vaadd_vx_nxv1i64_nxv1i64_i64( @intrinsic_vaadd_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vaadd_vx_nxv2i64_nxv2i64_i64( @intrinsic_vaadd_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vaadd_vx_nxv4i64_nxv4i64_i64( @intrinsic_vaadd_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vaadd_vx_nxv8i64_nxv8i64_i64( @intrinsic_vaadd_mask_vx_nxv8i64_nxv8i64_i64( @vaaddu_vx_nxv8i64_floor( %x, i64 %y ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -382,8 +382,8 @@ define @vaaddu_vx_nxv8i64_ceil( %x, i64 %y) ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu.ll index a15a1932360a5cf..7dd556dd16cf134 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vaaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu.ll @@ -1938,8 +1938,8 @@ define @intrinsic_vaaddu_vx_nxv1i64_nxv1i64_i64( @intrinsic_vaaddu_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vaaddu_vx_nxv2i64_nxv2i64_i64( @intrinsic_vaaddu_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vaaddu_vx_nxv4i64_nxv4i64_i64( @intrinsic_vaaddu_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vaaddu_vx_nxv8i64_nxv8i64_i64( @intrinsic_vaaddu_mask_vx_nxv8i64_nxv8i64_i64( @intrinsic_vadc_vxm_nxv1i64_nxv1i64_i64( @intrinsic_vadc_vxm_nxv2i64_nxv2i64_i64( @intrinsic_vadc_vxm_nxv4i64_nxv4i64_i64( @intrinsic_vadc_vxm_nxv8i64_nxv8i64_i64( @vadd_vx_nxv1i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -639,8 +639,8 @@ define @vadd_vx_nxv2i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -684,8 +684,8 @@ define @vadd_vx_nxv4i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -729,8 +729,8 @@ define @vadd_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -774,10 +774,10 @@ define @vadd_xx_nxv8i64(i64 %a, i64 %b) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: add a2, a0, a2 -; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: add a1, a1, a3 ; RV32-NEXT: sltu a0, a2, a0 ; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll index feeef73e538ae00..58dee75dd55a151 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll @@ -1484,8 +1484,8 @@ define @vadd_vx_nxv1i64( %va, i64 %b, @vadd_vx_nxv1i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1578,8 +1578,8 @@ define @vadd_vx_nxv2i64( %va, i64 %b, @vadd_vx_nxv2i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1672,8 +1672,8 @@ define @vadd_vx_nxv4i64( %va, i64 %b, @vadd_vx_nxv4i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1766,8 +1766,8 @@ define @vadd_vx_nxv8i64( %va, i64 %b, @vadd_vx_nxv8i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd.ll b/llvm/test/CodeGen/RISCV/rvv/vadd.ll index 2654e7daafb0c24..6c8cab848402583 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd.ll @@ -1858,8 +1858,8 @@ define @intrinsic_vadd_vx_nxv1i64_nxv1i64_i64( @intrinsic_vadd_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vadd_vx_nxv2i64_nxv2i64_i64( @intrinsic_vadd_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vadd_vx_nxv4i64_nxv4i64_i64( @intrinsic_vadd_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vadd_vx_nxv8i64_nxv8i64_i64( @intrinsic_vadd_mask_vx_nxv8i64_nxv8i64_i64( @vand_vx_nxv1i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1037,8 +1037,8 @@ define @vand_vx_nxv2i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1103,8 +1103,8 @@ define @vand_vx_nxv4i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1169,8 +1169,8 @@ define @vand_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1225,9 +1225,9 @@ define @vand_xx_nxv8i64(i64 %a, i64 %b) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: and a1, a1, a3 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll index 7b4a68d5867f992..03ea4646fcf8ac5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll @@ -1311,8 +1311,8 @@ define @vand_vx_nxv1i64( %va, i64 %b, @vand_vx_nxv1i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1405,8 +1405,8 @@ define @vand_vx_nxv2i64( %va, i64 %b, @vand_vx_nxv2i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1499,8 +1499,8 @@ define @vand_vx_nxv4i64( %va, i64 %b, @vand_vx_nxv4i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1593,8 +1593,8 @@ define @vand_vx_nxv8i64( %va, i64 %b, @vand_vx_nxv8i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vand.ll b/llvm/test/CodeGen/RISCV/rvv/vand.ll index 2cf5eab4da6a617..94b4b5748b220d4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vand.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vand.ll @@ -1898,8 +1898,8 @@ define @intrinsic_vand_vx_nxv1i64_nxv1i64_i64( @intrinsic_vand_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vand_vx_nxv2i64_nxv2i64_i64( @intrinsic_vand_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vand_vx_nxv4i64_nxv4i64_i64( @intrinsic_vand_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vand_vx_nxv8i64_nxv8i64_i64( @intrinsic_vand_mask_vx_nxv8i64_nxv8i64_i64( @vandn_vx_nxv1i64(i64 %x, %y) { ; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-RV32-NEXT: not a0, a0 ; CHECK-RV32-NEXT: not a1, a1 -; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: addi a0, sp, 8 ; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero @@ -1437,8 +1437,8 @@ define @vandn_vx_nxv1i64(i64 %x, %y) { ; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-ZVKB32-NEXT: not a0, a0 ; CHECK-ZVKB32-NEXT: not a1, a1 -; CHECK-ZVKB32-NEXT: sw a1, 12(sp) ; CHECK-ZVKB32-NEXT: sw a0, 8(sp) +; CHECK-ZVKB32-NEXT: sw a1, 12(sp) ; CHECK-ZVKB32-NEXT: addi a0, sp, 8 ; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-ZVKB32-NEXT: vlse64.v v9, (a0), zero @@ -1465,8 +1465,8 @@ define @vandn_vx_swapped_nxv1i64(i64 %x, % ; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-RV32-NEXT: not a0, a0 ; CHECK-RV32-NEXT: not a1, a1 -; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: addi a0, sp, 8 ; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero @@ -1487,8 +1487,8 @@ define @vandn_vx_swapped_nxv1i64(i64 %x, % ; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-ZVKB32-NEXT: not a0, a0 ; CHECK-ZVKB32-NEXT: not a1, a1 -; CHECK-ZVKB32-NEXT: sw a1, 12(sp) ; CHECK-ZVKB32-NEXT: sw a0, 8(sp) +; CHECK-ZVKB32-NEXT: sw a1, 12(sp) ; CHECK-ZVKB32-NEXT: addi a0, sp, 8 ; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-ZVKB32-NEXT: vlse64.v v9, (a0), zero @@ -1551,8 +1551,8 @@ define @vandn_vx_nxv2i64(i64 %x, %y) { ; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-RV32-NEXT: not a0, a0 ; CHECK-RV32-NEXT: not a1, a1 -; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: addi a0, sp, 8 ; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-RV32-NEXT: vlse64.v v10, (a0), zero @@ -1573,8 +1573,8 @@ define @vandn_vx_nxv2i64(i64 %x, %y) { ; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-ZVKB32-NEXT: not a0, a0 ; CHECK-ZVKB32-NEXT: not a1, a1 -; CHECK-ZVKB32-NEXT: sw a1, 12(sp) ; CHECK-ZVKB32-NEXT: sw a0, 8(sp) +; CHECK-ZVKB32-NEXT: sw a1, 12(sp) ; CHECK-ZVKB32-NEXT: addi a0, sp, 8 ; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-ZVKB32-NEXT: vlse64.v v10, (a0), zero @@ -1601,8 +1601,8 @@ define @vandn_vx_swapped_nxv2i64(i64 %x, % ; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-RV32-NEXT: not a0, a0 ; CHECK-RV32-NEXT: not a1, a1 -; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: addi a0, sp, 8 ; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-RV32-NEXT: vlse64.v v10, (a0), zero @@ -1623,8 +1623,8 @@ define @vandn_vx_swapped_nxv2i64(i64 %x, % ; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-ZVKB32-NEXT: not a0, a0 ; CHECK-ZVKB32-NEXT: not a1, a1 -; CHECK-ZVKB32-NEXT: sw a1, 12(sp) ; CHECK-ZVKB32-NEXT: sw a0, 8(sp) +; CHECK-ZVKB32-NEXT: sw a1, 12(sp) ; CHECK-ZVKB32-NEXT: addi a0, sp, 8 ; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-ZVKB32-NEXT: vlse64.v v10, (a0), zero @@ -1687,8 +1687,8 @@ define @vandn_vx_nxv4i64(i64 %x, %y) { ; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-RV32-NEXT: not a0, a0 ; CHECK-RV32-NEXT: not a1, a1 -; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: addi a0, sp, 8 ; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-RV32-NEXT: vlse64.v v12, (a0), zero @@ -1709,8 +1709,8 @@ define @vandn_vx_nxv4i64(i64 %x, %y) { ; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-ZVKB32-NEXT: not a0, a0 ; CHECK-ZVKB32-NEXT: not a1, a1 -; CHECK-ZVKB32-NEXT: sw a1, 12(sp) ; CHECK-ZVKB32-NEXT: sw a0, 8(sp) +; CHECK-ZVKB32-NEXT: sw a1, 12(sp) ; CHECK-ZVKB32-NEXT: addi a0, sp, 8 ; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-ZVKB32-NEXT: vlse64.v v12, (a0), zero @@ -1737,8 +1737,8 @@ define @vandn_vx_swapped_nxv4i64(i64 %x, % ; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-RV32-NEXT: not a0, a0 ; CHECK-RV32-NEXT: not a1, a1 -; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: addi a0, sp, 8 ; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-RV32-NEXT: vlse64.v v12, (a0), zero @@ -1759,8 +1759,8 @@ define @vandn_vx_swapped_nxv4i64(i64 %x, % ; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-ZVKB32-NEXT: not a0, a0 ; CHECK-ZVKB32-NEXT: not a1, a1 -; CHECK-ZVKB32-NEXT: sw a1, 12(sp) ; CHECK-ZVKB32-NEXT: sw a0, 8(sp) +; CHECK-ZVKB32-NEXT: sw a1, 12(sp) ; CHECK-ZVKB32-NEXT: addi a0, sp, 8 ; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-ZVKB32-NEXT: vlse64.v v12, (a0), zero @@ -1823,8 +1823,8 @@ define @vandn_vx_nxv8i64(i64 %x, %y) { ; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-RV32-NEXT: not a0, a0 ; CHECK-RV32-NEXT: not a1, a1 -; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: addi a0, sp, 8 ; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v16, (a0), zero @@ -1845,8 +1845,8 @@ define @vandn_vx_nxv8i64(i64 %x, %y) { ; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-ZVKB32-NEXT: not a0, a0 ; CHECK-ZVKB32-NEXT: not a1, a1 -; CHECK-ZVKB32-NEXT: sw a1, 12(sp) ; CHECK-ZVKB32-NEXT: sw a0, 8(sp) +; CHECK-ZVKB32-NEXT: sw a1, 12(sp) ; CHECK-ZVKB32-NEXT: addi a0, sp, 8 ; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-ZVKB32-NEXT: vlse64.v v16, (a0), zero @@ -1873,8 +1873,8 @@ define @vandn_vx_swapped_nxv8i64(i64 %x, % ; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-RV32-NEXT: not a0, a0 ; CHECK-RV32-NEXT: not a1, a1 -; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: addi a0, sp, 8 ; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v16, (a0), zero @@ -1895,8 +1895,8 @@ define @vandn_vx_swapped_nxv8i64(i64 %x, % ; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-ZVKB32-NEXT: not a0, a0 ; CHECK-ZVKB32-NEXT: not a1, a1 -; CHECK-ZVKB32-NEXT: sw a1, 12(sp) ; CHECK-ZVKB32-NEXT: sw a0, 8(sp) +; CHECK-ZVKB32-NEXT: sw a1, 12(sp) ; CHECK-ZVKB32-NEXT: addi a0, sp, 8 ; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-ZVKB32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vandn-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vandn-vp.ll index 95866543828fc7f..5986033638853b5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vandn-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vandn-vp.ll @@ -1112,8 +1112,8 @@ define @vandn_vx_vp_nxv1i64(i64 %a, %b, @vandn_vx_vp_nxv1i64(i64 %a, %b, @vandn_vx_vp_nxv2i64(i64 %a, %b, @vandn_vx_vp_nxv2i64(i64 %a, %b, @vandn_vx_vp_nxv4i64(i64 %a, %b, @vandn_vx_vp_nxv4i64(i64 %a, %b, @vandn_vx_vp_nxv8i64(i64 %a, %b, @vandn_vx_vp_nxv8i64(i64 %a, %b, @intrinsic_vandn_vx_nxv1i64_i64( %0, ; RV32-LABEL: intrinsic_vandn_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1934,8 +1934,8 @@ define @intrinsic_vandn_mask_vx_nxv1i64_i64( @intrinsic_vandn_vx_nxv2i64_i64( %0, ; RV32-LABEL: intrinsic_vandn_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -2005,8 +2005,8 @@ define @intrinsic_vandn_mask_vx_nxv2i64_i64( @intrinsic_vandn_vx_nxv4i64_i64( %0, ; RV32-LABEL: intrinsic_vandn_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -2076,8 +2076,8 @@ define @intrinsic_vandn_mask_vx_nxv4i64_i64( @intrinsic_vandn_vx_nxv8i64_i64( %0, ; RV32-LABEL: intrinsic_vandn_vx_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2147,8 +2147,8 @@ define @intrinsic_vandn_mask_vx_nxv8i64_i64( @intrinsic_vasub_vx_nxv1i64_nxv1i64_i64( @intrinsic_vasub_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vasub_vx_nxv2i64_nxv2i64_i64( @intrinsic_vasub_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vasub_vx_nxv4i64_nxv4i64_i64( @intrinsic_vasub_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vasub_vx_nxv8i64_nxv8i64_i64( @intrinsic_vasub_mask_vx_nxv8i64_nxv8i64_i64( @intrinsic_vasubu_vx_nxv1i64_nxv1i64_i64( @intrinsic_vasubu_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vasubu_vx_nxv2i64_nxv2i64_i64( @intrinsic_vasubu_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vasubu_vx_nxv4i64_nxv4i64_i64( @intrinsic_vasubu_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vasubu_vx_nxv8i64_nxv8i64_i64( @intrinsic_vasubu_mask_vx_nxv8i64_nxv8i64_i64( @intrinsic_vclmul_vx_nxv1i64_i64( %0 ; RV32-LABEL: intrinsic_vclmul_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -239,8 +239,8 @@ define @intrinsic_vclmul_mask_vx_nxv1i64_i64( @intrinsic_vclmul_vx_nxv2i64_i64( %0 ; RV32-LABEL: intrinsic_vclmul_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -310,8 +310,8 @@ define @intrinsic_vclmul_mask_vx_nxv2i64_i64( @intrinsic_vclmul_vx_nxv4i64_i64( %0 ; RV32-LABEL: intrinsic_vclmul_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -381,8 +381,8 @@ define @intrinsic_vclmul_mask_vx_nxv4i64_i64( @intrinsic_vclmul_vx_nxv8i64_i64( %0 ; RV32-LABEL: intrinsic_vclmul_vx_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -452,8 +452,8 @@ define @intrinsic_vclmul_mask_vx_nxv8i64_i64( @intrinsic_vclmulh_vx_nxv1i64_i64( % ; RV32-LABEL: intrinsic_vclmulh_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -239,8 +239,8 @@ define @intrinsic_vclmulh_mask_vx_nxv1i64_i64( @intrinsic_vclmulh_vx_nxv2i64_i64( % ; RV32-LABEL: intrinsic_vclmulh_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -310,8 +310,8 @@ define @intrinsic_vclmulh_mask_vx_nxv2i64_i64( @intrinsic_vclmulh_vx_nxv4i64_i64( % ; RV32-LABEL: intrinsic_vclmulh_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -381,8 +381,8 @@ define @intrinsic_vclmulh_mask_vx_nxv4i64_i64( @intrinsic_vclmulh_vx_nxv8i64_i64( % ; RV32-LABEL: intrinsic_vclmulh_vx_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -452,8 +452,8 @@ define @intrinsic_vclmulh_mask_vx_nxv8i64_i64( @vdiv_vx_nxv1i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -797,10 +797,10 @@ define @vdiv_vi_nxv1i64_0( %va) { ; RV32-V-NEXT: .cfi_def_cfa_offset 16 ; RV32-V-NEXT: lui a0, 748983 ; RV32-V-NEXT: addi a0, a0, -586 +; RV32-V-NEXT: lui a1, 898779 +; RV32-V-NEXT: addi a1, a1, 1755 +; RV32-V-NEXT: sw a1, 8(sp) ; RV32-V-NEXT: sw a0, 12(sp) -; RV32-V-NEXT: lui a0, 898779 -; RV32-V-NEXT: addi a0, a0, 1755 -; RV32-V-NEXT: sw a0, 8(sp) ; RV32-V-NEXT: addi a0, sp, 8 ; RV32-V-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-V-NEXT: vlse64.v v9, (a0), zero @@ -849,8 +849,8 @@ define @vdiv_vx_nxv2i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -876,10 +876,10 @@ define @vdiv_vi_nxv2i64_0( %va) { ; RV32-V-NEXT: .cfi_def_cfa_offset 16 ; RV32-V-NEXT: lui a0, 748983 ; RV32-V-NEXT: addi a0, a0, -586 +; RV32-V-NEXT: lui a1, 898779 +; RV32-V-NEXT: addi a1, a1, 1755 +; RV32-V-NEXT: sw a1, 8(sp) ; RV32-V-NEXT: sw a0, 12(sp) -; RV32-V-NEXT: lui a0, 898779 -; RV32-V-NEXT: addi a0, a0, 1755 -; RV32-V-NEXT: sw a0, 8(sp) ; RV32-V-NEXT: addi a0, sp, 8 ; RV32-V-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-V-NEXT: vlse64.v v10, (a0), zero @@ -928,8 +928,8 @@ define @vdiv_vx_nxv4i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -955,10 +955,10 @@ define @vdiv_vi_nxv4i64_0( %va) { ; RV32-V-NEXT: .cfi_def_cfa_offset 16 ; RV32-V-NEXT: lui a0, 748983 ; RV32-V-NEXT: addi a0, a0, -586 +; RV32-V-NEXT: lui a1, 898779 +; RV32-V-NEXT: addi a1, a1, 1755 +; RV32-V-NEXT: sw a1, 8(sp) ; RV32-V-NEXT: sw a0, 12(sp) -; RV32-V-NEXT: lui a0, 898779 -; RV32-V-NEXT: addi a0, a0, 1755 -; RV32-V-NEXT: sw a0, 8(sp) ; RV32-V-NEXT: addi a0, sp, 8 ; RV32-V-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-V-NEXT: vlse64.v v12, (a0), zero @@ -1007,8 +1007,8 @@ define @vdiv_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1034,10 +1034,10 @@ define @vdiv_vi_nxv8i64_0( %va) { ; RV32-V-NEXT: .cfi_def_cfa_offset 16 ; RV32-V-NEXT: lui a0, 748983 ; RV32-V-NEXT: addi a0, a0, -586 +; RV32-V-NEXT: lui a1, 898779 +; RV32-V-NEXT: addi a1, a1, 1755 +; RV32-V-NEXT: sw a1, 8(sp) ; RV32-V-NEXT: sw a0, 12(sp) -; RV32-V-NEXT: lui a0, 898779 -; RV32-V-NEXT: addi a0, a0, 1755 -; RV32-V-NEXT: sw a0, 8(sp) ; RV32-V-NEXT: addi a0, sp, 8 ; RV32-V-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-V-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll index a4b7ca7f39768fb..061c2d1066a1418 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll @@ -892,8 +892,8 @@ define @vdiv_vx_nxv1i64( %va, i64 %b, @vdiv_vx_nxv1i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -966,8 +966,8 @@ define @vdiv_vx_nxv2i64( %va, i64 %b, @vdiv_vx_nxv2i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1040,8 +1040,8 @@ define @vdiv_vx_nxv4i64( %va, i64 %b, @vdiv_vx_nxv4i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1114,8 +1114,8 @@ define @vdiv_vx_nxv8i64( %va, i64 %b, @vdiv_vx_nxv8i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv.ll index fd32afcc7ce6e29..a2d3a39d57d6b3f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv.ll @@ -1898,8 +1898,8 @@ define @intrinsic_vdiv_vx_nxv1i64_nxv1i64_i64( @intrinsic_vdiv_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vdiv_vx_nxv2i64_nxv2i64_i64( @intrinsic_vdiv_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vdiv_vx_nxv4i64_nxv4i64_i64( @intrinsic_vdiv_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vdiv_vx_nxv8i64_nxv8i64_i64( @intrinsic_vdiv_mask_vx_nxv8i64_nxv8i64_i64( @vdivu_vx_nxv1i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -688,9 +688,9 @@ define @vdivu_vi_nxv1i64_0( %va) { ; RV32-V-NEXT: addi sp, sp, -16 ; RV32-V-NEXT: .cfi_def_cfa_offset 16 ; RV32-V-NEXT: lui a0, 131072 +; RV32-V-NEXT: li a1, 1 +; RV32-V-NEXT: sw a1, 8(sp) ; RV32-V-NEXT: sw a0, 12(sp) -; RV32-V-NEXT: li a0, 1 -; RV32-V-NEXT: sw a0, 8(sp) ; RV32-V-NEXT: addi a0, sp, 8 ; RV32-V-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-V-NEXT: vlse64.v v9, (a0), zero @@ -759,8 +759,8 @@ define @vdivu_vx_nxv2i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -785,9 +785,9 @@ define @vdivu_vi_nxv2i64_0( %va) { ; RV32-V-NEXT: addi sp, sp, -16 ; RV32-V-NEXT: .cfi_def_cfa_offset 16 ; RV32-V-NEXT: lui a0, 131072 +; RV32-V-NEXT: li a1, 1 +; RV32-V-NEXT: sw a1, 8(sp) ; RV32-V-NEXT: sw a0, 12(sp) -; RV32-V-NEXT: li a0, 1 -; RV32-V-NEXT: sw a0, 8(sp) ; RV32-V-NEXT: addi a0, sp, 8 ; RV32-V-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-V-NEXT: vlse64.v v10, (a0), zero @@ -856,8 +856,8 @@ define @vdivu_vx_nxv4i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -882,9 +882,9 @@ define @vdivu_vi_nxv4i64_0( %va) { ; RV32-V-NEXT: addi sp, sp, -16 ; RV32-V-NEXT: .cfi_def_cfa_offset 16 ; RV32-V-NEXT: lui a0, 131072 +; RV32-V-NEXT: li a1, 1 +; RV32-V-NEXT: sw a1, 8(sp) ; RV32-V-NEXT: sw a0, 12(sp) -; RV32-V-NEXT: li a0, 1 -; RV32-V-NEXT: sw a0, 8(sp) ; RV32-V-NEXT: addi a0, sp, 8 ; RV32-V-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-V-NEXT: vlse64.v v12, (a0), zero @@ -953,8 +953,8 @@ define @vdivu_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -979,9 +979,9 @@ define @vdivu_vi_nxv8i64_0( %va) { ; RV32-V-NEXT: addi sp, sp, -16 ; RV32-V-NEXT: .cfi_def_cfa_offset 16 ; RV32-V-NEXT: lui a0, 131072 +; RV32-V-NEXT: li a1, 1 +; RV32-V-NEXT: sw a1, 8(sp) ; RV32-V-NEXT: sw a0, 12(sp) -; RV32-V-NEXT: li a0, 1 -; RV32-V-NEXT: sw a0, 8(sp) ; RV32-V-NEXT: addi a0, sp, 8 ; RV32-V-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-V-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll index 67c3f9dbf2869a9..25f6a2300f6739a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll @@ -891,8 +891,8 @@ define @vdivu_vx_nxv1i64( %va, i64 %b, @vdivu_vx_nxv1i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -965,8 +965,8 @@ define @vdivu_vx_nxv2i64( %va, i64 %b, @vdivu_vx_nxv2i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1039,8 +1039,8 @@ define @vdivu_vx_nxv4i64( %va, i64 %b, @vdivu_vx_nxv4i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1113,8 +1113,8 @@ define @vdivu_vx_nxv8i64( %va, i64 %b, @vdivu_vx_nxv8i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu.ll index 99d63c218d8e60b..85beeb1d8a37ba4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdivu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu.ll @@ -1898,8 +1898,8 @@ define @intrinsic_vdivu_vx_nxv1i64_nxv1i64_i64( @intrinsic_vdivu_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vdivu_vx_nxv2i64_nxv2i64_i64( @intrinsic_vdivu_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vdivu_vx_nxv4i64_nxv4i64_i64( @intrinsic_vdivu_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vdivu_vx_nxv8i64_nxv8i64_i64( @intrinsic_vdivu_mask_vx_nxv8i64_nxv8i64_i64(, ptr %in %cmp = icmp sgt <3 x i8> %a, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll index e232ac255c56f05..9fe9af478f0b2ca 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll @@ -1651,8 +1651,8 @@ define @vmacc_vx_nxv1i64( %a, i64 %b, @vmacc_vx_nxv1i64_unmasked( %a, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1724,8 +1724,8 @@ define @vmacc_vx_nxv1i64_ta( %a, i64 %b, @vmacc_vx_nxv2i64( %a, i64 %b, @vmacc_vx_nxv2i64_unmasked( %a, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1858,8 +1858,8 @@ define @vmacc_vx_nxv2i64_ta( %a, i64 %b, @vmacc_vx_nxv4i64( %a, i64 %b, @vmacc_vx_nxv4i64_unmasked( %a, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1992,8 +1992,8 @@ define @vmacc_vx_nxv4i64_ta( %a, i64 %b, @vmacc_vx_nxv8i64( %a, i64 %b, @vmacc_vx_nxv8i64_unmasked( %a, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero @@ -2129,8 +2129,8 @@ define @vmacc_vx_nxv8i64_ta( %a, i64 %b, @intrinsic_vmacc_vx_nxv1i64_i64_nxv1i64( @intrinsic_vmacc_mask_vx_nxv1i64_i64_nxv1i64( @intrinsic_vmacc_vx_nxv2i64_i64_nxv2i64( @intrinsic_vmacc_mask_vx_nxv2i64_i64_nxv2i64( @intrinsic_vmacc_vx_nxv4i64_i64_nxv4i64( @intrinsic_vmacc_mask_vx_nxv4i64_i64_nxv4i64( @intrinsic_vmadc.carry.in_vxm_nxv1i1_nxv1i64_i64( @intrinsic_vmadc.carry.in_vxm_nxv2i1_nxv2i64_i64( @intrinsic_vmadc.carry.in_vxm_nxv4i1_nxv4i64_i64( @intrinsic_vmadc.carry.in_vxm_nxv8i1_nxv8i64_i64( @intrinsic_vmadc_vx_nxv1i1_nxv1i64_i64( @intrinsic_vmadc_vx_nxv2i1_nxv2i64_i64( @intrinsic_vmadc_vx_nxv4i1_nxv4i64_i64( @intrinsic_vmadc_vx_nxv8i1_nxv8i64_i64( @vmadd_vx_nxv1i64( %va, @vmadd_vx_nxv2i64( %va, @vmadd_vx_nxv4i64( %va, @vmadd_vx_nxv8i64( %va, @vmadd_vx_nxv1i64( %a, i64 %b, @vmadd_vx_nxv1i64_unmasked( %a, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1708,8 +1708,8 @@ define @vmadd_vx_nxv1i64_ta( %a, i64 %b, @vmadd_vx_nxv2i64( %a, i64 %b, @vmadd_vx_nxv2i64_unmasked( %a, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1843,8 +1843,8 @@ define @vmadd_vx_nxv2i64_ta( %a, i64 %b, @vmadd_vx_nxv4i64( %a, i64 %b, @vmadd_vx_nxv4i64_unmasked( %a, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1978,8 +1978,8 @@ define @vmadd_vx_nxv4i64_ta( %a, i64 %b, @vmadd_vx_nxv8i64( %a, i64 %b, @vmadd_vx_nxv8i64_unmasked( %a, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero @@ -2116,8 +2116,8 @@ define @vmadd_vx_nxv8i64_ta( %a, i64 %b, @intrinsic_vmadd_vx_nxv1i64_i64_nxv1i64( @intrinsic_vmadd_mask_vx_nxv1i64_i64_nxv1i64( @intrinsic_vmadd_vx_nxv2i64_i64_nxv2i64( @intrinsic_vmadd_mask_vx_nxv2i64_i64_nxv2i64( @intrinsic_vmadd_vx_nxv4i64_i64_nxv4i64( @intrinsic_vmadd_mask_vx_nxv4i64_i64_nxv4i64( @vmax_vx_nxv1i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -715,8 +715,8 @@ define @vmax_vx_nxv2i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -764,8 +764,8 @@ define @vmax_vx_nxv4i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -813,8 +813,8 @@ define @vmax_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll index f65e708f5303cce..85cd97019fdfb1d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll @@ -1123,8 +1123,8 @@ define @vmax_vx_nxv1i64( %va, i64 %b, @vmax_vx_nxv1i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1197,8 +1197,8 @@ define @vmax_vx_nxv2i64( %va, i64 %b, @vmax_vx_nxv2i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1271,8 +1271,8 @@ define @vmax_vx_nxv4i64( %va, i64 %b, @vmax_vx_nxv4i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1345,8 +1345,8 @@ define @vmax_vx_nxv8i64( %va, i64 %b, @vmax_vx_nxv8i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax.ll b/llvm/test/CodeGen/RISCV/rvv/vmax.ll index 86f17dc20f23e79..5acca2b80919e7d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmax.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax.ll @@ -1898,8 +1898,8 @@ define @intrinsic_vmax_vx_nxv1i64_nxv1i64_i64( @intrinsic_vmax_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vmax_vx_nxv2i64_nxv2i64_i64( @intrinsic_vmax_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vmax_vx_nxv4i64_nxv4i64_i64( @intrinsic_vmax_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vmax_vx_nxv8i64_nxv8i64_i64( @intrinsic_vmax_mask_vx_nxv8i64_nxv8i64_i64( @vmax_vx_nxv1i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -715,8 +715,8 @@ define @vmax_vx_nxv2i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -764,8 +764,8 @@ define @vmax_vx_nxv4i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -813,8 +813,8 @@ define @vmax_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll index df1ad58e5ecbde7..40e11e621109355 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll @@ -1122,8 +1122,8 @@ define @vmaxu_vx_nxv1i64( %va, i64 %b, @vmaxu_vx_nxv1i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1196,8 +1196,8 @@ define @vmaxu_vx_nxv2i64( %va, i64 %b, @vmaxu_vx_nxv2i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1270,8 +1270,8 @@ define @vmaxu_vx_nxv4i64( %va, i64 %b, @vmaxu_vx_nxv4i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1344,8 +1344,8 @@ define @vmaxu_vx_nxv8i64( %va, i64 %b, @vmaxu_vx_nxv8i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu.ll index e2a5b95b2b4ad14..f1a7cedbb4417d6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu.ll @@ -1898,8 +1898,8 @@ define @intrinsic_vmaxu_vx_nxv1i64_nxv1i64_i64( @intrinsic_vmaxu_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vmaxu_vx_nxv2i64_nxv2i64_i64( @intrinsic_vmaxu_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vmaxu_vx_nxv4i64_nxv4i64_i64( @intrinsic_vmaxu_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vmaxu_vx_nxv8i64_nxv8i64_i64( @intrinsic_vmaxu_mask_vx_nxv8i64_nxv8i64_i64( @vmin_vx_nxv1i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -715,8 +715,8 @@ define @vmin_vx_nxv2i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -764,8 +764,8 @@ define @vmin_vx_nxv4i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -813,8 +813,8 @@ define @vmin_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll index 0bf0638633aa457..2e4ace7212777c3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll @@ -1123,8 +1123,8 @@ define @vmin_vx_nxv1i64( %va, i64 %b, @vmin_vx_nxv1i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1197,8 +1197,8 @@ define @vmin_vx_nxv2i64( %va, i64 %b, @vmin_vx_nxv2i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1271,8 +1271,8 @@ define @vmin_vx_nxv4i64( %va, i64 %b, @vmin_vx_nxv4i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1345,8 +1345,8 @@ define @vmin_vx_nxv8i64( %va, i64 %b, @vmin_vx_nxv8i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin.ll b/llvm/test/CodeGen/RISCV/rvv/vmin.ll index 311c9f9f1526bd9..002c0081c8e32a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin.ll @@ -1898,8 +1898,8 @@ define @intrinsic_vmin_vx_nxv1i64_nxv1i64_i64( @intrinsic_vmin_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vmin_vx_nxv2i64_nxv2i64_i64( @intrinsic_vmin_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vmin_vx_nxv4i64_nxv4i64_i64( @intrinsic_vmin_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vmin_vx_nxv8i64_nxv8i64_i64( @intrinsic_vmin_mask_vx_nxv8i64_nxv8i64_i64( @vmin_vx_nxv1i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -715,8 +715,8 @@ define @vmin_vx_nxv2i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -764,8 +764,8 @@ define @vmin_vx_nxv4i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -813,8 +813,8 @@ define @vmin_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll index 2acebdf2e646d45..17a10868ff05b51 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll @@ -1122,8 +1122,8 @@ define @vminu_vx_nxv1i64( %va, i64 %b, @vminu_vx_nxv1i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1196,8 +1196,8 @@ define @vminu_vx_nxv2i64( %va, i64 %b, @vminu_vx_nxv2i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1270,8 +1270,8 @@ define @vminu_vx_nxv4i64( %va, i64 %b, @vminu_vx_nxv4i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1344,8 +1344,8 @@ define @vminu_vx_nxv8i64( %va, i64 %b, @vminu_vx_nxv8i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu.ll b/llvm/test/CodeGen/RISCV/rvv/vminu.ll index b513331bc0b8fc1..3d2f6d00e681328 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vminu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu.ll @@ -1898,8 +1898,8 @@ define @intrinsic_vminu_vx_nxv1i64_nxv1i64_i64( @intrinsic_vminu_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vminu_vx_nxv2i64_nxv2i64_i64( @intrinsic_vminu_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vminu_vx_nxv4i64_nxv4i64_i64( @intrinsic_vminu_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vminu_vx_nxv8i64_nxv8i64_i64( @intrinsic_vminu_mask_vx_nxv8i64_nxv8i64_i64( @intrinsic_vmsbc.borrow.in_vxm_nxv1i1_nxv1i64_i64( @intrinsic_vmsbc.borrow.in_vxm_nxv2i1_nxv2i64_i64( @intrinsic_vmsbc.borrow.in_vxm_nxv4i1_nxv4i64_i64( @intrinsic_vmsbc.borrow.in_vxm_nxv8i1_nxv8i64_i64( @intrinsic_vmsbc_vx_nxv1i1_nxv1i64_i64( @intrinsic_vmsbc_vx_nxv2i1_nxv2i64_i64( @intrinsic_vmsbc_vx_nxv4i1_nxv4i64_i64( @intrinsic_vmsbc_vx_nxv8i1_nxv8i64_i64( @intrinsic_vmseq_vx_nxv1i64_i64( %0, ; RV32-LABEL: intrinsic_vmseq_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1688,8 +1688,8 @@ define @intrinsic_vmseq_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmseq_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero @@ -1728,8 +1728,8 @@ define @intrinsic_vmseq_vx_nxv2i64_i64( %0, ; RV32-LABEL: intrinsic_vmseq_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1762,8 +1762,8 @@ define @intrinsic_vmseq_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmseq_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1802,8 +1802,8 @@ define @intrinsic_vmseq_vx_nxv4i64_i64( %0, ; RV32-LABEL: intrinsic_vmseq_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1836,8 +1836,8 @@ define @intrinsic_vmseq_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmseq_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll index 75fc407abbc2f33..1ec304609699a69 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll @@ -1669,8 +1669,8 @@ define @intrinsic_vmsge_vx_nxv1i64_i64( %0, ; RV32-LABEL: intrinsic_vmsge_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1704,8 +1704,8 @@ define @intrinsic_vmsge_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmsge_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero @@ -1744,8 +1744,8 @@ define @intrinsic_vmsge_vx_nxv2i64_i64( %0, ; RV32-LABEL: intrinsic_vmsge_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1779,8 +1779,8 @@ define @intrinsic_vmsge_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmsge_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1819,8 +1819,8 @@ define @intrinsic_vmsge_vx_nxv4i64_i64( %0, ; RV32-LABEL: intrinsic_vmsge_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1854,8 +1854,8 @@ define @intrinsic_vmsge_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmsge_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2790,8 +2790,8 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv1i64_i64( @intrinsic_vmsge_maskedoff_mask_vx_nxv2i64_i64( @intrinsic_vmsge_maskedoff_mask_vx_nxv4i64_i64( @intrinsic_vmsgeu_vx_nxv1i64_i64( %0, ; RV32-LABEL: intrinsic_vmsgeu_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1704,8 +1704,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmsgeu_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero @@ -1744,8 +1744,8 @@ define @intrinsic_vmsgeu_vx_nxv2i64_i64( %0, ; RV32-LABEL: intrinsic_vmsgeu_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1779,8 +1779,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1819,8 +1819,8 @@ define @intrinsic_vmsgeu_vx_nxv4i64_i64( %0, ; RV32-LABEL: intrinsic_vmsgeu_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1854,8 +1854,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2802,8 +2802,8 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv1i64_i64( @intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i64_i64( @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i64_i64( @intrinsic_vmsgt_vx_nxv1i64_i64( %0, ; RV32-LABEL: intrinsic_vmsgt_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1688,8 +1688,8 @@ define @intrinsic_vmsgt_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmsgt_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero @@ -1728,8 +1728,8 @@ define @intrinsic_vmsgt_vx_nxv2i64_i64( %0, ; RV32-LABEL: intrinsic_vmsgt_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1762,8 +1762,8 @@ define @intrinsic_vmsgt_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmsgt_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1802,8 +1802,8 @@ define @intrinsic_vmsgt_vx_nxv4i64_i64( %0, ; RV32-LABEL: intrinsic_vmsgt_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1836,8 +1836,8 @@ define @intrinsic_vmsgt_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmsgt_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll index de7a0ad87be27c6..69b22573c289e50 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll @@ -1654,8 +1654,8 @@ define @intrinsic_vmsgtu_vx_nxv1i64_i64( %0, ; RV32-LABEL: intrinsic_vmsgtu_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1688,8 +1688,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero @@ -1728,8 +1728,8 @@ define @intrinsic_vmsgtu_vx_nxv2i64_i64( %0, ; RV32-LABEL: intrinsic_vmsgtu_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1762,8 +1762,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1802,8 +1802,8 @@ define @intrinsic_vmsgtu_vx_nxv4i64_i64( %0, ; RV32-LABEL: intrinsic_vmsgtu_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1836,8 +1836,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll index f54aef3ed4052c9..c8794e1b63900f6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll @@ -1654,8 +1654,8 @@ define @intrinsic_vmsle_vx_nxv1i64_i64( %0, ; RV32-LABEL: intrinsic_vmsle_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1688,8 +1688,8 @@ define @intrinsic_vmsle_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmsle_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero @@ -1728,8 +1728,8 @@ define @intrinsic_vmsle_vx_nxv2i64_i64( %0, ; RV32-LABEL: intrinsic_vmsle_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1762,8 +1762,8 @@ define @intrinsic_vmsle_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmsle_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1802,8 +1802,8 @@ define @intrinsic_vmsle_vx_nxv4i64_i64( %0, ; RV32-LABEL: intrinsic_vmsle_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1836,8 +1836,8 @@ define @intrinsic_vmsle_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmsle_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll index 540577247484e3a..86dc48d51cc2bc1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll @@ -1654,8 +1654,8 @@ define @intrinsic_vmsleu_vx_nxv1i64_i64( %0, ; RV32-LABEL: intrinsic_vmsleu_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1688,8 +1688,8 @@ define @intrinsic_vmsleu_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmsleu_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero @@ -1728,8 +1728,8 @@ define @intrinsic_vmsleu_vx_nxv2i64_i64( %0, ; RV32-LABEL: intrinsic_vmsleu_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1762,8 +1762,8 @@ define @intrinsic_vmsleu_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmsleu_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1802,8 +1802,8 @@ define @intrinsic_vmsleu_vx_nxv4i64_i64( %0, ; RV32-LABEL: intrinsic_vmsleu_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1836,8 +1836,8 @@ define @intrinsic_vmsleu_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmsleu_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll index 554d25172d4fde4..8d57f2adc538686 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll @@ -1654,8 +1654,8 @@ define @intrinsic_vmslt_vx_nxv1i64_i64( %0, ; RV32-LABEL: intrinsic_vmslt_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1688,8 +1688,8 @@ define @intrinsic_vmslt_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmslt_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero @@ -1728,8 +1728,8 @@ define @intrinsic_vmslt_vx_nxv2i64_i64( %0, ; RV32-LABEL: intrinsic_vmslt_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1762,8 +1762,8 @@ define @intrinsic_vmslt_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmslt_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1802,8 +1802,8 @@ define @intrinsic_vmslt_vx_nxv4i64_i64( %0, ; RV32-LABEL: intrinsic_vmslt_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1836,8 +1836,8 @@ define @intrinsic_vmslt_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmslt_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll index 7a8efa6c80fb6b9..627b59430871372 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll @@ -1654,8 +1654,8 @@ define @intrinsic_vmsltu_vx_nxv1i64_i64( %0, ; RV32-LABEL: intrinsic_vmsltu_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1688,8 +1688,8 @@ define @intrinsic_vmsltu_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmsltu_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero @@ -1728,8 +1728,8 @@ define @intrinsic_vmsltu_vx_nxv2i64_i64( %0, ; RV32-LABEL: intrinsic_vmsltu_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1762,8 +1762,8 @@ define @intrinsic_vmsltu_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmsltu_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1802,8 +1802,8 @@ define @intrinsic_vmsltu_vx_nxv4i64_i64( %0, ; RV32-LABEL: intrinsic_vmsltu_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1836,8 +1836,8 @@ define @intrinsic_vmsltu_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmsltu_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll index bd6bd8a804bcc26..47d1048f46cab47 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll @@ -1654,8 +1654,8 @@ define @intrinsic_vmsne_vx_nxv1i64_i64( %0, ; RV32-LABEL: intrinsic_vmsne_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1688,8 +1688,8 @@ define @intrinsic_vmsne_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmsne_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero @@ -1728,8 +1728,8 @@ define @intrinsic_vmsne_vx_nxv2i64_i64( %0, ; RV32-LABEL: intrinsic_vmsne_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1762,8 +1762,8 @@ define @intrinsic_vmsne_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmsne_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1802,8 +1802,8 @@ define @intrinsic_vmsne_vx_nxv4i64_i64( %0, ; RV32-LABEL: intrinsic_vmsne_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1836,8 +1836,8 @@ define @intrinsic_vmsne_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmsne_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll index 0b8620c90c62e07..c05ab8fe7db2fc3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll @@ -614,8 +614,8 @@ define @vmul_vx_nxv1i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -680,8 +680,8 @@ define @vmul_vx_nxv2i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -746,8 +746,8 @@ define @vmul_vx_nxv4i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -812,8 +812,8 @@ define @vmul_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -867,13 +867,13 @@ define @vmul_xx_nxv8i64(i64 %a, i64 %b) nounwind { ; RV32NOM-LABEL: vmul_xx_nxv8i64: ; RV32NOM: # %bb.0: ; RV32NOM-NEXT: addi sp, sp, -16 -; RV32NOM-NEXT: sw a1, 12(sp) ; RV32NOM-NEXT: sw a0, 8(sp) +; RV32NOM-NEXT: sw a1, 12(sp) ; RV32NOM-NEXT: addi a0, sp, 8 ; RV32NOM-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32NOM-NEXT: vlse64.v v8, (a0), zero -; RV32NOM-NEXT: sw a3, 4(sp) ; RV32NOM-NEXT: sw a2, 0(sp) +; RV32NOM-NEXT: sw a3, 4(sp) ; RV32NOM-NEXT: mv a0, sp ; RV32NOM-NEXT: vlse64.v v16, (a0), zero ; RV32NOM-NEXT: vmul.vv v8, v8, v16 @@ -891,12 +891,12 @@ define @vmul_xx_nxv8i64(i64 %a, i64 %b) nounwind { ; RV32M: # %bb.0: ; RV32M-NEXT: addi sp, sp, -16 ; RV32M-NEXT: mul a4, a0, a2 -; RV32M-NEXT: sw a4, 8(sp) ; RV32M-NEXT: mul a3, a0, a3 ; RV32M-NEXT: mulhu a0, a0, a2 ; RV32M-NEXT: add a0, a0, a3 ; RV32M-NEXT: mul a1, a1, a2 ; RV32M-NEXT: add a0, a0, a1 +; RV32M-NEXT: sw a4, 8(sp) ; RV32M-NEXT: sw a0, 12(sp) ; RV32M-NEXT: addi a0, sp, 8 ; RV32M-NEXT: vsetvli a1, zero, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll index 51026cbcb8c4bf1..80ff2c746f8db3d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll @@ -931,8 +931,8 @@ define @vmul_vx_nxv1i64( %va, i64 %b, @vmul_vx_nxv1i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1005,8 +1005,8 @@ define @vmul_vx_nxv2i64( %va, i64 %b, @vmul_vx_nxv2i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1079,8 +1079,8 @@ define @vmul_vx_nxv4i64( %va, i64 %b, @vmul_vx_nxv4i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1153,8 +1153,8 @@ define @vmul_vx_nxv8i64( %va, i64 %b, @vmul_vx_nxv8i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul.ll b/llvm/test/CodeGen/RISCV/rvv/vmul.ll index 91d255ffeeecfdc..81b9b60dc70c2b0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul.ll @@ -1862,8 +1862,8 @@ define @intrinsic_vmul_vx_nxv1i64_nxv1i64_i64( @intrinsic_vmul_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vmul_vx_nxv2i64_nxv2i64_i64( @intrinsic_vmul_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vmul_vx_nxv4i64_nxv4i64_i64( @intrinsic_vmul_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vmul_vx_nxv8i64_nxv8i64_i64( @intrinsic_vmul_mask_vx_nxv8i64_nxv8i64_i64( @intrinsic_vmulh_vx_nxv1i64_nxv1i64_i64( @intrinsic_vmulh_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vmulh_vx_nxv2i64_nxv2i64_i64( @intrinsic_vmulh_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vmulh_vx_nxv4i64_nxv4i64_i64( @intrinsic_vmulh_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vmulh_vx_nxv8i64_nxv8i64_i64( @intrinsic_vmulh_mask_vx_nxv8i64_nxv8i64_i64( @intrinsic_vmulhsu_vx_nxv1i64_nxv1i64_i64( @intrinsic_vmulhsu_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vmulhsu_vx_nxv2i64_nxv2i64_i64( @intrinsic_vmulhsu_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vmulhsu_vx_nxv4i64_nxv4i64_i64( @intrinsic_vmulhsu_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vmulhsu_vx_nxv8i64_nxv8i64_i64( @intrinsic_vmulhsu_mask_vx_nxv8i64_nxv8i64_i64( @intrinsic_vmulhu_vx_nxv1i64_nxv1i64_i64( @intrinsic_vmulhu_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vmulhu_vx_nxv2i64_nxv2i64_i64( @intrinsic_vmulhu_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vmulhu_vx_nxv4i64_nxv4i64_i64( @intrinsic_vmulhu_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vmulhu_vx_nxv8i64_nxv8i64_i64( @intrinsic_vmulhu_mask_vx_nxv8i64_nxv8i64_i64( @intrinsic_vmv.s.x_x_nxv1i64( %0, i6 ; RV32-LABEL: intrinsic_vmv.s.x_x_nxv1i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vid.v v9 ; RV32-NEXT: vmseq.vi v0, v9, 0 @@ -270,8 +270,8 @@ define @intrinsic_vmv.s.x_x_nxv2i64( %0, i6 ; RV32-LABEL: intrinsic_vmv.s.x_x_nxv2i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vid.v v10 ; RV32-NEXT: vmseq.vi v0, v10, 0 @@ -296,8 +296,8 @@ define @intrinsic_vmv.s.x_x_nxv4i64( %0, i6 ; RV32-LABEL: intrinsic_vmv.s.x_x_nxv4i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vid.v v12 ; RV32-NEXT: vmseq.vi v0, v12, 0 @@ -322,8 +322,8 @@ define @intrinsic_vmv.s.x_x_nxv8i64( %0, i6 ; RV32-LABEL: intrinsic_vmv.s.x_x_nxv8i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vid.v v16 ; RV32-NEXT: vmseq.vi v0, v16, 0 @@ -347,10 +347,10 @@ define @intrinsic_vmv.s.x_x_nxv1i64_bug( %0 ; RV32-LABEL: intrinsic_vmv.s.x_x_nxv1i64_bug: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: lw a1, 4(a0) -; RV32-NEXT: lw a0, 0(a0) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lw a0, 4(a0) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; RV32-NEXT: vid.v v9 ; RV32-NEXT: vmseq.vi v0, v9, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.x.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.x.ll index 4fa95fb2d945db9..79bdd30291c08c2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.x.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.x.ll @@ -373,8 +373,8 @@ define @intrinsic_vmv.v.x_x_nxv1i64(i64 %0, iXLen %1) nounwin ; RV32-LABEL: intrinsic_vmv.v.x_x_nxv1i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero @@ -404,8 +404,8 @@ define @intrinsic_vmv.v.x_x_nxv2i64(i64 %0, iXLen %1) nounwin ; RV32-LABEL: intrinsic_vmv.v.x_x_nxv2i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero @@ -435,8 +435,8 @@ define @intrinsic_vmv.v.x_x_nxv4i64(i64 %0, iXLen %1) nounwin ; RV32-LABEL: intrinsic_vmv.v.x_x_nxv4i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero @@ -466,8 +466,8 @@ define @intrinsic_vmv.v.x_x_nxv8i64(i64 %0, iXLen %1) nounwin ; RV32-LABEL: intrinsic_vmv.v.x_x_nxv8i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll index 30edcaf9b15b53c..4eadd11d4210486 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll @@ -1651,8 +1651,8 @@ define @vnmsac_vx_nxv1i64( %a, i64 %b, @vnmsac_vx_nxv1i64_unmasked( %a, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1724,8 +1724,8 @@ define @vnmsac_vx_nxv1i64_ta( %a, i64 %b, < ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1785,8 +1785,8 @@ define @vnmsac_vx_nxv2i64( %a, i64 %b, @vnmsac_vx_nxv2i64_unmasked( %a, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1858,8 +1858,8 @@ define @vnmsac_vx_nxv2i64_ta( %a, i64 %b, < ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1919,8 +1919,8 @@ define @vnmsac_vx_nxv4i64( %a, i64 %b, @vnmsac_vx_nxv4i64_unmasked( %a, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1992,8 +1992,8 @@ define @vnmsac_vx_nxv4i64_ta( %a, i64 %b, < ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -2055,8 +2055,8 @@ define @vnmsac_vx_nxv8i64( %a, i64 %b, @vnmsac_vx_nxv8i64_unmasked( %a, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero @@ -2129,8 +2129,8 @@ define @vnmsac_vx_nxv8i64_ta( %a, i64 %b, < ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsac.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsac.ll index 760f4d47ce2346d..505443d93720bb5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnmsac.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsac.ll @@ -1566,8 +1566,8 @@ define @intrinsic_vnmsac_vx_nxv1i64_i64_nxv1i64( @intrinsic_vnmsac_mask_vx_nxv1i64_i64_nxv1i64( @intrinsic_vnmsac_vx_nxv2i64_i64_nxv2i64( @intrinsic_vnmsac_mask_vx_nxv2i64_i64_nxv2i64( @intrinsic_vnmsac_vx_nxv4i64_i64_nxv4i64( @intrinsic_vnmsac_mask_vx_nxv4i64_i64_nxv4i64( @vnmsub_vx_nxv1i64( %va, @vnmsub_vx_nxv2i64( %va, @vnmsub_vx_nxv4i64( %va, @vnmsub_vx_nxv8i64( %va, @intrinsic_vnmsub_vx_nxv1i64_i64_nxv1i64( @intrinsic_vnmsub_mask_vx_nxv1i64_i64_nxv1i64( @intrinsic_vnmsub_vx_nxv2i64_i64_nxv2i64( @intrinsic_vnmsub_mask_vx_nxv2i64_i64_nxv2i64( @intrinsic_vnmsub_vx_nxv4i64_i64_nxv4i64( @intrinsic_vnmsub_mask_vx_nxv4i64_i64_nxv4i64( @vor_vx_nxv1i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -837,8 +837,8 @@ define @vor_vx_nxv2i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -893,8 +893,8 @@ define @vor_vx_nxv4i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -949,8 +949,8 @@ define @vor_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1016,9 +1016,9 @@ define @vor_xx_nxv8i64(i64 %a, i64 %b) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: or a1, a1, a3 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: or a0, a0, a2 ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll index b9388e58797048d..a23044967db44a9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll @@ -1323,8 +1323,8 @@ define @vor_vx_nxv1i64( %va, i64 %b, @vor_vx_nxv1i64_unmasked( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1417,8 +1417,8 @@ define @vor_vx_nxv2i64( %va, i64 %b, @vor_vx_nxv2i64_unmasked( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1511,8 +1511,8 @@ define @vor_vx_nxv4i64( %va, i64 %b, @vor_vx_nxv4i64_unmasked( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1605,8 +1605,8 @@ define @vor_vx_nxv8i64( %va, i64 %b, @vor_vx_nxv8i64_unmasked( %va, i64 % ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vor.ll b/llvm/test/CodeGen/RISCV/rvv/vor.ll index 3badc121f0322b6..8280f4c64684eeb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vor.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vor.ll @@ -1898,8 +1898,8 @@ define @intrinsic_vor_vx_nxv1i64_nxv1i64_i64( @intrinsic_vor_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vor_vx_nxv2i64_nxv2i64_i64( @intrinsic_vor_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vor_vx_nxv4i64_nxv4i64_i64( @intrinsic_vor_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vor_vx_nxv8i64_nxv8i64_i64( @intrinsic_vor_mask_vx_nxv8i64_nxv8i64_i64( @vp_splat_nxv1i64(i64 %val, %m, i32 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero @@ -213,8 +213,8 @@ define @vp_splat_nxv2i64(i64 %val, %m, i32 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero @@ -235,8 +235,8 @@ define @vp_splat_nxv4i64(i64 %val, %m, i32 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero @@ -257,8 +257,8 @@ define @vp_splat_nxv8i64(i64 %val, %m, i32 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll index 86dfc74fdee37b0..ea7bf65fc5644df 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -853,8 +853,8 @@ define @vpmerge_vx_nxv1i64(i64 %a, %vb, @vpmerge_vx_nxv2i64(i64 %a, %vb, @vpmerge_vx_nxv4i64(i64 %a, %vb, @vpmerge_vx_nxv8i64(i64 %a, %vb, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1253,8 +1253,8 @@ define signext i64 @vpwreduce_add_nxv1i32(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1287,8 +1287,8 @@ define signext i64 @vpwreduce_uadd_nxv1i32(i64 signext %s, %v ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1323,8 +1323,8 @@ define signext i64 @vpreduce_umax_nxv1i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1357,8 +1357,8 @@ define signext i64 @vpreduce_smax_nxv1i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1391,8 +1391,8 @@ define signext i64 @vpreduce_umin_nxv1i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1425,8 +1425,8 @@ define signext i64 @vpreduce_smin_nxv1i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1459,8 +1459,8 @@ define signext i64 @vpreduce_and_nxv1i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1493,8 +1493,8 @@ define signext i64 @vpreduce_or_nxv1i64(i64 signext %s, %v, < ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1527,8 +1527,8 @@ define signext i64 @vpreduce_xor_nxv1i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1561,8 +1561,8 @@ define signext i64 @vpreduce_add_nxv2i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1593,8 +1593,8 @@ define signext i64 @vwpreduce_add_nxv2i32(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1627,8 +1627,8 @@ define signext i64 @vwpreduce_uadd_nxv2i32(i64 signext %s, %v ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1663,8 +1663,8 @@ define signext i64 @vpreduce_umax_nxv2i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1697,8 +1697,8 @@ define signext i64 @vpreduce_smax_nxv2i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1731,8 +1731,8 @@ define signext i64 @vpreduce_umin_nxv2i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1765,8 +1765,8 @@ define signext i64 @vpreduce_smin_nxv2i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1799,8 +1799,8 @@ define signext i64 @vpreduce_and_nxv2i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1833,8 +1833,8 @@ define signext i64 @vpreduce_or_nxv2i64(i64 signext %s, %v, < ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1867,8 +1867,8 @@ define signext i64 @vpreduce_xor_nxv2i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1901,8 +1901,8 @@ define signext i64 @vpreduce_add_nxv4i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1933,8 +1933,8 @@ define signext i64 @vpwreduce_add_nxv4i32(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1967,8 +1967,8 @@ define signext i64 @vpwreduce_uadd_nxv4i32(i64 signext %s, %v ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -2003,8 +2003,8 @@ define signext i64 @vpreduce_umax_nxv4i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -2037,8 +2037,8 @@ define signext i64 @vpreduce_smax_nxv4i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -2071,8 +2071,8 @@ define signext i64 @vpreduce_umin_nxv4i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -2105,8 +2105,8 @@ define signext i64 @vpreduce_smin_nxv4i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -2139,8 +2139,8 @@ define signext i64 @vpreduce_and_nxv4i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -2173,8 +2173,8 @@ define signext i64 @vpreduce_or_nxv4i64(i64 signext %s, %v, < ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -2207,8 +2207,8 @@ define signext i64 @vpreduce_xor_nxv4i64(i64 signext %s, %v, ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll index 3a6ae5fdb21073e..8a3a47f23488674 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll @@ -939,8 +939,8 @@ define @vrem_vx_nxv1i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -966,10 +966,10 @@ define @vrem_vi_nxv1i64_0( %va) { ; RV32-V-NEXT: .cfi_def_cfa_offset 16 ; RV32-V-NEXT: lui a0, 748983 ; RV32-V-NEXT: addi a0, a0, -586 +; RV32-V-NEXT: lui a1, 898779 +; RV32-V-NEXT: addi a1, a1, 1755 +; RV32-V-NEXT: sw a1, 8(sp) ; RV32-V-NEXT: sw a0, 12(sp) -; RV32-V-NEXT: lui a0, 898779 -; RV32-V-NEXT: addi a0, a0, 1755 -; RV32-V-NEXT: sw a0, 8(sp) ; RV32-V-NEXT: addi a0, sp, 8 ; RV32-V-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-V-NEXT: vlse64.v v9, (a0), zero @@ -1022,8 +1022,8 @@ define @vrem_vx_nxv2i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1049,10 +1049,10 @@ define @vrem_vi_nxv2i64_0( %va) { ; RV32-V-NEXT: .cfi_def_cfa_offset 16 ; RV32-V-NEXT: lui a0, 748983 ; RV32-V-NEXT: addi a0, a0, -586 +; RV32-V-NEXT: lui a1, 898779 +; RV32-V-NEXT: addi a1, a1, 1755 +; RV32-V-NEXT: sw a1, 8(sp) ; RV32-V-NEXT: sw a0, 12(sp) -; RV32-V-NEXT: lui a0, 898779 -; RV32-V-NEXT: addi a0, a0, 1755 -; RV32-V-NEXT: sw a0, 8(sp) ; RV32-V-NEXT: addi a0, sp, 8 ; RV32-V-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-V-NEXT: vlse64.v v10, (a0), zero @@ -1105,8 +1105,8 @@ define @vrem_vx_nxv4i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1132,10 +1132,10 @@ define @vrem_vi_nxv4i64_0( %va) { ; RV32-V-NEXT: .cfi_def_cfa_offset 16 ; RV32-V-NEXT: lui a0, 748983 ; RV32-V-NEXT: addi a0, a0, -586 +; RV32-V-NEXT: lui a1, 898779 +; RV32-V-NEXT: addi a1, a1, 1755 +; RV32-V-NEXT: sw a1, 8(sp) ; RV32-V-NEXT: sw a0, 12(sp) -; RV32-V-NEXT: lui a0, 898779 -; RV32-V-NEXT: addi a0, a0, 1755 -; RV32-V-NEXT: sw a0, 8(sp) ; RV32-V-NEXT: addi a0, sp, 8 ; RV32-V-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-V-NEXT: vlse64.v v12, (a0), zero @@ -1188,8 +1188,8 @@ define @vrem_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1215,10 +1215,10 @@ define @vrem_vi_nxv8i64_0( %va) { ; RV32-V-NEXT: .cfi_def_cfa_offset 16 ; RV32-V-NEXT: lui a0, 748983 ; RV32-V-NEXT: addi a0, a0, -586 +; RV32-V-NEXT: lui a1, 898779 +; RV32-V-NEXT: addi a1, a1, 1755 +; RV32-V-NEXT: sw a1, 8(sp) ; RV32-V-NEXT: sw a0, 12(sp) -; RV32-V-NEXT: lui a0, 898779 -; RV32-V-NEXT: addi a0, a0, 1755 -; RV32-V-NEXT: sw a0, 8(sp) ; RV32-V-NEXT: addi a0, sp, 8 ; RV32-V-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-V-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll index 2ef96f4b3896fc9..48b89907c57a3bd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll @@ -892,8 +892,8 @@ define @vrem_vx_nxv1i64( %va, i64 %b, @vrem_vx_nxv1i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -966,8 +966,8 @@ define @vrem_vx_nxv2i64( %va, i64 %b, @vrem_vx_nxv2i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1040,8 +1040,8 @@ define @vrem_vx_nxv4i64( %va, i64 %b, @vrem_vx_nxv4i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1114,8 +1114,8 @@ define @vrem_vx_nxv8i64( %va, i64 %b, @vrem_vx_nxv8i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem.ll b/llvm/test/CodeGen/RISCV/rvv/vrem.ll index 15692419dd76ced..3c6488e84fd52e4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrem.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem.ll @@ -1898,8 +1898,8 @@ define @intrinsic_vrem_vx_nxv1i64_nxv1i64_i64( @intrinsic_vrem_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vrem_vx_nxv2i64_nxv2i64_i64( @intrinsic_vrem_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vrem_vx_nxv4i64_nxv4i64_i64( @intrinsic_vrem_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vrem_vx_nxv8i64_nxv8i64_i64( @intrinsic_vrem_mask_vx_nxv8i64_nxv8i64_i64( @vremu_vx_nxv1i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -704,9 +704,9 @@ define @vremu_vi_nxv1i64_0( %va) { ; RV32-V-NEXT: addi sp, sp, -16 ; RV32-V-NEXT: .cfi_def_cfa_offset 16 ; RV32-V-NEXT: lui a0, 131072 +; RV32-V-NEXT: li a1, 1 +; RV32-V-NEXT: sw a1, 8(sp) ; RV32-V-NEXT: sw a0, 12(sp) -; RV32-V-NEXT: li a0, 1 -; RV32-V-NEXT: sw a0, 8(sp) ; RV32-V-NEXT: addi a0, sp, 8 ; RV32-V-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-V-NEXT: vlse64.v v9, (a0), zero @@ -783,8 +783,8 @@ define @vremu_vx_nxv2i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -809,9 +809,9 @@ define @vremu_vi_nxv2i64_0( %va) { ; RV32-V-NEXT: addi sp, sp, -16 ; RV32-V-NEXT: .cfi_def_cfa_offset 16 ; RV32-V-NEXT: lui a0, 131072 +; RV32-V-NEXT: li a1, 1 +; RV32-V-NEXT: sw a1, 8(sp) ; RV32-V-NEXT: sw a0, 12(sp) -; RV32-V-NEXT: li a0, 1 -; RV32-V-NEXT: sw a0, 8(sp) ; RV32-V-NEXT: addi a0, sp, 8 ; RV32-V-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-V-NEXT: vlse64.v v10, (a0), zero @@ -888,8 +888,8 @@ define @vremu_vx_nxv4i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -914,9 +914,9 @@ define @vremu_vi_nxv4i64_0( %va) { ; RV32-V-NEXT: addi sp, sp, -16 ; RV32-V-NEXT: .cfi_def_cfa_offset 16 ; RV32-V-NEXT: lui a0, 131072 +; RV32-V-NEXT: li a1, 1 +; RV32-V-NEXT: sw a1, 8(sp) ; RV32-V-NEXT: sw a0, 12(sp) -; RV32-V-NEXT: li a0, 1 -; RV32-V-NEXT: sw a0, 8(sp) ; RV32-V-NEXT: addi a0, sp, 8 ; RV32-V-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-V-NEXT: vlse64.v v12, (a0), zero @@ -993,8 +993,8 @@ define @vremu_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1019,9 +1019,9 @@ define @vremu_vi_nxv8i64_0( %va) { ; RV32-V-NEXT: addi sp, sp, -16 ; RV32-V-NEXT: .cfi_def_cfa_offset 16 ; RV32-V-NEXT: lui a0, 131072 +; RV32-V-NEXT: li a1, 1 +; RV32-V-NEXT: sw a1, 8(sp) ; RV32-V-NEXT: sw a0, 12(sp) -; RV32-V-NEXT: li a0, 1 -; RV32-V-NEXT: sw a0, 8(sp) ; RV32-V-NEXT: addi a0, sp, 8 ; RV32-V-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-V-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll index 1f1ed4a1269acbb..20d33f6cadf2232 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll @@ -891,8 +891,8 @@ define @vremu_vx_nxv1i64( %va, i64 %b, @vremu_vx_nxv1i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -965,8 +965,8 @@ define @vremu_vx_nxv2i64( %va, i64 %b, @vremu_vx_nxv2i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1039,8 +1039,8 @@ define @vremu_vx_nxv4i64( %va, i64 %b, @vremu_vx_nxv4i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1113,8 +1113,8 @@ define @vremu_vx_nxv8i64( %va, i64 %b, @vremu_vx_nxv8i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu.ll b/llvm/test/CodeGen/RISCV/rvv/vremu.ll index 3a090a51e62624e..777407c144b5159 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vremu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu.ll @@ -1898,8 +1898,8 @@ define @intrinsic_vremu_vx_nxv1i64_nxv1i64_i64( @intrinsic_vremu_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vremu_vx_nxv2i64_nxv2i64_i64( @intrinsic_vremu_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vremu_vx_nxv4i64_nxv4i64_i64( @intrinsic_vremu_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vremu_vx_nxv8i64_nxv8i64_i64( @intrinsic_vremu_mask_vx_nxv8i64_nxv8i64_i64( @vrsub_vx_nxv1i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -438,8 +438,8 @@ define @vrsub_vx_nxv2i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -473,8 +473,8 @@ define @vrsub_vx_nxv4i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -508,8 +508,8 @@ define @vrsub_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll index be372c9aa54d5d0..f211c900ee5d3ae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll @@ -839,8 +839,8 @@ define @vrsub_vx_nxv1i64( %va, i64 %b, @vrsub_vx_nxv1i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -913,8 +913,8 @@ define @vrsub_vx_nxv2i64( %va, i64 %b, @vrsub_vx_nxv2i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -987,8 +987,8 @@ define @vrsub_vx_nxv4i64( %va, i64 %b, @vrsub_vx_nxv4i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1061,8 +1061,8 @@ define @vrsub_vx_nxv8i64( %va, i64 %b, @vrsub_vx_nxv8i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub.ll index 1a8db00a1c95910..19d6afbd0e57efe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrsub.ll @@ -842,8 +842,8 @@ define @intrinsic_vrsub_vx_nxv1i64_nxv1i64_i64( @intrinsic_vrsub_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vrsub_vx_nxv2i64_nxv2i64_i64( @intrinsic_vrsub_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vrsub_vx_nxv4i64_nxv4i64_i64( @intrinsic_vrsub_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vrsub_vx_nxv8i64_nxv8i64_i64( @intrinsic_vrsub_mask_vx_nxv8i64_nxv8i64_i64( @sadd_nxv1i64_vx( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -680,8 +680,8 @@ define @sadd_nxv2i64_vx( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -727,8 +727,8 @@ define @sadd_nxv4i64_vx( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -774,8 +774,8 @@ define @sadd_nxv8i64_vx( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll index 71b91f56e89a5e1..7c6167bd5bc7ba0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll @@ -1423,8 +1423,8 @@ define @vsadd_vx_nxv1i64( %va, i64 %b, @vsadd_vx_nxv1i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1517,8 +1517,8 @@ define @vsadd_vx_nxv2i64( %va, i64 %b, @vsadd_vx_nxv2i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1611,8 +1611,8 @@ define @vsadd_vx_nxv4i64( %va, i64 %b, @vsadd_vx_nxv4i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1705,8 +1705,8 @@ define @vsadd_vx_nxv8i64( %va, i64 %b, @vsadd_vx_nxv8i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd.ll index a108d98c1731b39..445942f8ef23530 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsadd.ll @@ -1898,8 +1898,8 @@ define @intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64( @intrinsic_vsadd_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vsadd_vx_nxv2i64_nxv2i64_i64( @intrinsic_vsadd_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vsadd_vx_nxv4i64_nxv4i64_i64( @intrinsic_vsadd_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vsadd_vx_nxv8i64_nxv8i64_i64( @intrinsic_vsadd_mask_vx_nxv8i64_nxv8i64_i64( @uadd_nxv1i64_vx( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -680,8 +680,8 @@ define @uadd_nxv2i64_vx( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -727,8 +727,8 @@ define @uadd_nxv4i64_vx( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -774,8 +774,8 @@ define @uadd_nxv8i64_vx( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll index 454a4ebab04a285..916cbdc7e62207d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll @@ -1422,8 +1422,8 @@ define @vsaddu_vx_nxv1i64( %va, i64 %b, @vsaddu_vx_nxv1i64_unmasked( %va, i6 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1516,8 +1516,8 @@ define @vsaddu_vx_nxv2i64( %va, i64 %b, @vsaddu_vx_nxv2i64_unmasked( %va, i6 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1610,8 +1610,8 @@ define @vsaddu_vx_nxv4i64( %va, i64 %b, @vsaddu_vx_nxv4i64_unmasked( %va, i6 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1704,8 +1704,8 @@ define @vsaddu_vx_nxv8i64( %va, i64 %b, @vsaddu_vx_nxv8i64_unmasked( %va, i6 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu.ll index 57a89d6fe7d234f..37f93fd90e5360f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu.ll @@ -1898,8 +1898,8 @@ define @intrinsic_vsaddu_vx_nxv1i64_nxv1i64_i64( @intrinsic_vsaddu_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vsaddu_vx_nxv2i64_nxv2i64_i64( @intrinsic_vsaddu_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vsaddu_vx_nxv4i64_nxv4i64_i64( @intrinsic_vsaddu_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vsaddu_vx_nxv8i64_nxv8i64_i64( @intrinsic_vsaddu_mask_vx_nxv8i64_nxv8i64_i64( @intrinsic_vsbc_vxm_nxv1i64_nxv1i64_i64( @intrinsic_vsbc_vxm_nxv2i64_nxv2i64_i64( @intrinsic_vsbc_vxm_nxv4i64_nxv4i64_i64( @intrinsic_vsbc_vxm_nxv8i64_nxv8i64_i64( @vmerge_xv_nxv1i64( %va, i64 %b, @vmerge_xv_nxv2i64( %va, i64 %b, @vmerge_xv_nxv4i64( %va, i64 %b, @vmerge_xv_nxv8i64( %va, i64 %b, @intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64( @intrinsic_vsmul_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vsmul_vx_nxv2i64_nxv2i64_i64( @intrinsic_vsmul_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vsmul_vx_nxv4i64_nxv4i64_i64( @intrinsic_vsmul_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vsmul_vx_nxv8i64_nxv8i64_i64( @intrinsic_vsmul_mask_vx_nxv8i64_nxv8i64_i64( @vsplat_nxv8i64_4() { ; RV32V: # %bb.0: ; RV32V-NEXT: addi sp, sp, -16 ; RV32V-NEXT: .cfi_def_cfa_offset 16 -; RV32V-NEXT: sw zero, 12(sp) ; RV32V-NEXT: lui a0, 1028096 ; RV32V-NEXT: addi a0, a0, -1281 ; RV32V-NEXT: sw a0, 8(sp) +; RV32V-NEXT: sw zero, 12(sp) ; RV32V-NEXT: addi a0, sp, 8 ; RV32V-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32V-NEXT: vlse64.v v8, (a0), zero @@ -63,8 +63,8 @@ define @vsplat_nxv8i64_5(i64 %a) { ; RV32V: # %bb.0: ; RV32V-NEXT: addi sp, sp, -16 ; RV32V-NEXT: .cfi_def_cfa_offset 16 -; RV32V-NEXT: sw a1, 12(sp) ; RV32V-NEXT: sw a0, 8(sp) +; RV32V-NEXT: sw a1, 12(sp) ; RV32V-NEXT: addi a0, sp, 8 ; RV32V-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32V-NEXT: vlse64.v v8, (a0), zero @@ -137,10 +137,10 @@ define @vadd_vx_nxv8i64_10( %v) { ; RV32V: # %bb.0: ; RV32V-NEXT: addi sp, sp, -16 ; RV32V-NEXT: .cfi_def_cfa_offset 16 -; RV32V-NEXT: sw zero, 12(sp) ; RV32V-NEXT: lui a0, 1028096 ; RV32V-NEXT: addi a0, a0, -1281 ; RV32V-NEXT: sw a0, 8(sp) +; RV32V-NEXT: sw zero, 12(sp) ; RV32V-NEXT: addi a0, sp, 8 ; RV32V-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32V-NEXT: vlse64.v v16, (a0), zero @@ -166,10 +166,10 @@ define @vadd_vx_nxv8i64_11( %v) { ; RV32V-NEXT: addi sp, sp, -16 ; RV32V-NEXT: .cfi_def_cfa_offset 16 ; RV32V-NEXT: li a0, 1 +; RV32V-NEXT: lui a1, 1028096 +; RV32V-NEXT: addi a1, a1, -1281 +; RV32V-NEXT: sw a1, 8(sp) ; RV32V-NEXT: sw a0, 12(sp) -; RV32V-NEXT: lui a0, 1028096 -; RV32V-NEXT: addi a0, a0, -1281 -; RV32V-NEXT: sw a0, 8(sp) ; RV32V-NEXT: addi a0, sp, 8 ; RV32V-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32V-NEXT: vlse64.v v16, (a0), zero @@ -194,8 +194,8 @@ define @vadd_vx_nxv8i64_12( %v, i64 %a) { ; RV32V: # %bb.0: ; RV32V-NEXT: addi sp, sp, -16 ; RV32V-NEXT: .cfi_def_cfa_offset 16 -; RV32V-NEXT: sw a1, 12(sp) ; RV32V-NEXT: sw a0, 8(sp) +; RV32V-NEXT: sw a1, 12(sp) ; RV32V-NEXT: addi a0, sp, 8 ; RV32V-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32V-NEXT: vlse64.v v16, (a0), zero @@ -238,8 +238,8 @@ define @vsplat_nxv8i64_14(i32 %a) { ; RV32V: # %bb.0: ; RV32V-NEXT: addi sp, sp, -16 ; RV32V-NEXT: .cfi_def_cfa_offset 16 -; RV32V-NEXT: sw zero, 12(sp) ; RV32V-NEXT: sw a0, 8(sp) +; RV32V-NEXT: sw zero, 12(sp) ; RV32V-NEXT: addi a0, sp, 8 ; RV32V-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32V-NEXT: vlse64.v v8, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll index c043858c029473a..632e28607b9be18 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll @@ -651,8 +651,8 @@ define @ssub_nxv1i64_vx( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -699,8 +699,8 @@ define @ssub_nxv2i64_vx( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -747,8 +747,8 @@ define @ssub_nxv4i64_vx( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -795,8 +795,8 @@ define @ssub_nxv8i64_vx( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll index 613b58b0f1b88a5..2f9786e237084de 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll @@ -1467,8 +1467,8 @@ define @vssub_vx_nxv1i64( %va, i64 %b, @vssub_vx_nxv1i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1563,8 +1563,8 @@ define @vssub_vx_nxv2i64( %va, i64 %b, @vssub_vx_nxv2i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1659,8 +1659,8 @@ define @vssub_vx_nxv4i64( %va, i64 %b, @vssub_vx_nxv4i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1755,8 +1755,8 @@ define @vssub_vx_nxv8i64( %va, i64 %b, @vssub_vx_nxv8i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub.ll b/llvm/test/CodeGen/RISCV/rvv/vssub.ll index 50fca5e832af506..2ac94f3c55a8d11 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub.ll @@ -1898,8 +1898,8 @@ define @intrinsic_vssub_vx_nxv1i64_nxv1i64_i64( @intrinsic_vssub_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vssub_vx_nxv2i64_nxv2i64_i64( @intrinsic_vssub_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vssub_vx_nxv4i64_nxv4i64_i64( @intrinsic_vssub_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vssub_vx_nxv8i64_nxv8i64_i64( @intrinsic_vssub_mask_vx_nxv8i64_nxv8i64_i64( @usub_nxv1i64_vx( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -699,8 +699,8 @@ define @usub_nxv2i64_vx( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -747,8 +747,8 @@ define @usub_nxv4i64_vx( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -795,8 +795,8 @@ define @usub_nxv8i64_vx( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll index 8c729d7d9bfb6ea..dea0a82eca375d7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll @@ -1465,8 +1465,8 @@ define @vssubu_vx_nxv1i64( %va, i64 %b, @vssubu_vx_nxv1i64_unmasked( %va, i6 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1561,8 +1561,8 @@ define @vssubu_vx_nxv2i64( %va, i64 %b, @vssubu_vx_nxv2i64_unmasked( %va, i6 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1657,8 +1657,8 @@ define @vssubu_vx_nxv4i64( %va, i64 %b, @vssubu_vx_nxv4i64_unmasked( %va, i6 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1753,8 +1753,8 @@ define @vssubu_vx_nxv8i64( %va, i64 %b, @vssubu_vx_nxv8i64_unmasked( %va, i6 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu.ll index db1b4ce34e9b381..9827b20b5ca0a96 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssubu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu.ll @@ -1898,8 +1898,8 @@ define @intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64( @intrinsic_vssubu_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vssubu_vx_nxv2i64_nxv2i64_i64( @intrinsic_vssubu_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vssubu_vx_nxv4i64_nxv4i64_i64( @intrinsic_vssubu_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vssubu_vx_nxv8i64_nxv8i64_i64( @intrinsic_vssubu_mask_vx_nxv8i64_nxv8i64_i64( @vsub_vx_nxv1i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -668,8 +668,8 @@ define @vsub_vx_nxv2i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -714,8 +714,8 @@ define @vsub_vx_nxv4i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -760,8 +760,8 @@ define @vsub_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -796,10 +796,10 @@ define @vsub_xx_nxv8i64(i64 %a, i64 %b) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sub a4, a0, a2 -; RV32-NEXT: sw a4, 8(sp) ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: sub a1, a1, a3 ; RV32-NEXT: sub a1, a1, a0 +; RV32-NEXT: sw a4, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll index a2b9285fedeaf78..634f5cd0a9a0e1a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll @@ -919,8 +919,8 @@ define @vsub_vx_nxv1i64( %va, i64 %b, @vsub_vx_nxv1i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -993,8 +993,8 @@ define @vsub_vx_nxv2i64( %va, i64 %b, @vsub_vx_nxv2i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1067,8 +1067,8 @@ define @vsub_vx_nxv4i64( %va, i64 %b, @vsub_vx_nxv4i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1141,8 +1141,8 @@ define @vsub_vx_nxv8i64( %va, i64 %b, @vsub_vx_nxv8i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub.ll b/llvm/test/CodeGen/RISCV/rvv/vsub.ll index c7e153d86fce3a8..ecf23722917d3bd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub.ll @@ -1858,8 +1858,8 @@ define @intrinsic_vsub_vx_nxv1i64_nxv1i64_i64( @intrinsic_vsub_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vsub_vx_nxv2i64_nxv2i64_i64( @intrinsic_vsub_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vsub_vx_nxv4i64_nxv4i64_i64( @intrinsic_vsub_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vsub_vx_nxv8i64_nxv8i64_i64( @intrinsic_vsub_mask_vx_nxv8i64_nxv8i64_i64( @vwadd_vx_splat_zext( %va, i32 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1530,8 +1530,8 @@ define @vwadd_wx_splat_zext( %va, i32 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll index b03a105610dfdf5..e98bd1a1ad78208 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll @@ -971,8 +971,8 @@ define @vxor_vx_nxv1i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1037,8 +1037,8 @@ define @vxor_vx_nxv2i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1103,8 +1103,8 @@ define @vxor_vx_nxv4i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -1169,8 +1169,8 @@ define @vxor_vx_nxv8i64( %va, i64 %b) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero @@ -1225,9 +1225,9 @@ define @vxor_xx_nxv8i64(i64 %a, i64 %b) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: xor a1, a1, a3 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: xor a0, a0, a2 ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll index f2235b4fdc94ba1..3ad0c22894561aa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll @@ -1691,8 +1691,8 @@ define @vxor_vx_nxv1i64( %va, i64 %b, @vxor_vx_nxv1i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero @@ -1805,8 +1805,8 @@ define @vxor_vx_nxv2i64( %va, i64 %b, @vxor_vx_nxv2i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -1919,8 +1919,8 @@ define @vxor_vx_nxv4i64( %va, i64 %b, @vxor_vx_nxv4i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero @@ -2033,8 +2033,8 @@ define @vxor_vx_nxv8i64( %va, i64 %b, @vxor_vx_nxv8i64_unmasked( %va, i64 ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor.ll b/llvm/test/CodeGen/RISCV/rvv/vxor.ll index b08d4530d0085c7..a3cb2f42c8ecf88 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxor.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxor.ll @@ -1898,8 +1898,8 @@ define @intrinsic_vxor_vx_nxv1i64_nxv1i64_i64( @intrinsic_vxor_mask_vx_nxv1i64_nxv1i64_i64( @intrinsic_vxor_vx_nxv2i64_nxv2i64_i64( @intrinsic_vxor_mask_vx_nxv2i64_nxv2i64_i64( @intrinsic_vxor_vx_nxv4i64_nxv4i64_i64( @intrinsic_vxor_mask_vx_nxv4i64_nxv4i64_i64( @intrinsic_vxor_vx_nxv8i64_nxv8i64_i64( @intrinsic_vxor_mask_vx_nxv8i64_nxv8i64_i64(, ptr %X %srem = srem <3 x i33> %ld, diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll index 90443051d4b574d..c7e57021b90dc17 100644 --- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll @@ -38,10 +38,10 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind { ; RV32I-NEXT: li a1, -1003 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __modsi3 -; RV32I-NEXT: sh a0, 6(s3) -; RV32I-NEXT: sh s1, 4(s3) -; RV32I-NEXT: sh s0, 2(s3) ; RV32I-NEXT: sh s4, 0(s3) +; RV32I-NEXT: sh s0, 2(s3) +; RV32I-NEXT: sh s1, 4(s3) +; RV32I-NEXT: sh a0, 6(s3) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -95,10 +95,10 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind { ; RV32IM-NEXT: li a6, -1003 ; RV32IM-NEXT: mul a5, a5, a6 ; RV32IM-NEXT: sub a1, a1, a5 -; RV32IM-NEXT: sh a1, 6(a0) -; RV32IM-NEXT: sh a4, 4(a0) -; RV32IM-NEXT: sh a3, 2(a0) ; RV32IM-NEXT: sh a2, 0(a0) +; RV32IM-NEXT: sh a3, 2(a0) +; RV32IM-NEXT: sh a4, 4(a0) +; RV32IM-NEXT: sh a1, 6(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: fold_srem_vec_1: @@ -130,10 +130,10 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind { ; RV64I-NEXT: li a1, -1003 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: sh a0, 6(s3) -; RV64I-NEXT: sh s1, 4(s3) -; RV64I-NEXT: sh s0, 2(s3) ; RV64I-NEXT: sh s4, 0(s3) +; RV64I-NEXT: sh s0, 2(s3) +; RV64I-NEXT: sh s1, 4(s3) +; RV64I-NEXT: sh a0, 6(s3) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -187,10 +187,10 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind { ; RV64IM-NEXT: li a6, -1003 ; RV64IM-NEXT: mul a2, a2, a6 ; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: sh a1, 6(a0) -; RV64IM-NEXT: sh a5, 4(a0) -; RV64IM-NEXT: sh a4, 2(a0) ; RV64IM-NEXT: sh a3, 0(a0) +; RV64IM-NEXT: sh a4, 2(a0) +; RV64IM-NEXT: sh a5, 4(a0) +; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -226,10 +226,10 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind { ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __modsi3 -; RV32I-NEXT: sh a0, 6(s3) -; RV32I-NEXT: sh s1, 4(s3) -; RV32I-NEXT: sh s0, 2(s3) ; RV32I-NEXT: sh s4, 0(s3) +; RV32I-NEXT: sh s0, 2(s3) +; RV32I-NEXT: sh s1, 4(s3) +; RV32I-NEXT: sh a0, 6(s3) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -276,10 +276,10 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind { ; RV32IM-NEXT: add a5, a5, a6 ; RV32IM-NEXT: mul a5, a5, a7 ; RV32IM-NEXT: sub a1, a1, a5 -; RV32IM-NEXT: sh a1, 6(a0) -; RV32IM-NEXT: sh a4, 4(a0) -; RV32IM-NEXT: sh a3, 2(a0) ; RV32IM-NEXT: sh a2, 0(a0) +; RV32IM-NEXT: sh a3, 2(a0) +; RV32IM-NEXT: sh a4, 4(a0) +; RV32IM-NEXT: sh a1, 6(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: fold_srem_vec_2: @@ -311,10 +311,10 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind { ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: sh a0, 6(s3) -; RV64I-NEXT: sh s1, 4(s3) -; RV64I-NEXT: sh s0, 2(s3) ; RV64I-NEXT: sh s4, 0(s3) +; RV64I-NEXT: sh s0, 2(s3) +; RV64I-NEXT: sh s1, 4(s3) +; RV64I-NEXT: sh a0, 6(s3) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -361,10 +361,10 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind { ; RV64IM-NEXT: add a2, a2, a6 ; RV64IM-NEXT: mul a2, a2, a7 ; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: sh a1, 6(a0) -; RV64IM-NEXT: sh a5, 4(a0) -; RV64IM-NEXT: sh a4, 2(a0) ; RV64IM-NEXT: sh a3, 0(a0) +; RV64IM-NEXT: sh a4, 2(a0) +; RV64IM-NEXT: sh a5, 4(a0) +; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -426,10 +426,10 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; RV32I-NEXT: add s2, s7, s2 ; RV32I-NEXT: add s3, s6, s3 ; RV32I-NEXT: add s4, s5, s4 -; RV32I-NEXT: sh s4, 6(s0) -; RV32I-NEXT: sh s3, 4(s0) -; RV32I-NEXT: sh s2, 2(s0) ; RV32I-NEXT: sh a0, 0(s0) +; RV32I-NEXT: sh s2, 2(s0) +; RV32I-NEXT: sh s3, 4(s0) +; RV32I-NEXT: sh s4, 6(s0) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -484,10 +484,10 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; RV32IM-NEXT: sub a1, a1, t2 ; RV32IM-NEXT: add a2, a2, a6 ; RV32IM-NEXT: sub a2, a2, t0 -; RV32IM-NEXT: sh a2, 6(a0) -; RV32IM-NEXT: sh a1, 4(a0) -; RV32IM-NEXT: sh a4, 2(a0) ; RV32IM-NEXT: sh a3, 0(a0) +; RV32IM-NEXT: sh a4, 2(a0) +; RV32IM-NEXT: sh a1, 4(a0) +; RV32IM-NEXT: sh a2, 6(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: combine_srem_sdiv: @@ -543,10 +543,10 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; RV64I-NEXT: add s2, s7, s2 ; RV64I-NEXT: add s3, s6, s3 ; RV64I-NEXT: add s4, s5, s4 -; RV64I-NEXT: sh s4, 6(s0) -; RV64I-NEXT: sh s3, 4(s0) -; RV64I-NEXT: sh s2, 2(s0) ; RV64I-NEXT: sh a0, 0(s0) +; RV64I-NEXT: sh s2, 2(s0) +; RV64I-NEXT: sh s3, 4(s0) +; RV64I-NEXT: sh s4, 6(s0) ; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload @@ -601,10 +601,10 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; RV64IM-NEXT: subw a1, a1, t2 ; RV64IM-NEXT: add a2, a2, a6 ; RV64IM-NEXT: subw a2, a2, t0 -; RV64IM-NEXT: sh a2, 6(a0) -; RV64IM-NEXT: sh a1, 4(a0) -; RV64IM-NEXT: sh a4, 2(a0) ; RV64IM-NEXT: sh a3, 0(a0) +; RV64IM-NEXT: sh a4, 2(a0) +; RV64IM-NEXT: sh a1, 4(a0) +; RV64IM-NEXT: sh a2, 6(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, %2 = sdiv <4 x i16> %x, @@ -641,10 +641,10 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; RV32I-NEXT: sub s3, a4, a1 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: call __modsi3 -; RV32I-NEXT: sh a0, 6(s0) -; RV32I-NEXT: sh s3, 4(s0) -; RV32I-NEXT: sh s2, 2(s0) ; RV32I-NEXT: sh s1, 0(s0) +; RV32I-NEXT: sh s2, 2(s0) +; RV32I-NEXT: sh s3, 4(s0) +; RV32I-NEXT: sh a0, 6(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -681,9 +681,9 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; RV32IM-NEXT: add a5, a3, a5 ; RV32IM-NEXT: andi a5, a5, -8 ; RV32IM-NEXT: sub a3, a3, a5 -; RV32IM-NEXT: sh a3, 4(a0) -; RV32IM-NEXT: sh a2, 2(a0) ; RV32IM-NEXT: sh a1, 0(a0) +; RV32IM-NEXT: sh a2, 2(a0) +; RV32IM-NEXT: sh a3, 4(a0) ; RV32IM-NEXT: sh a4, 6(a0) ; RV32IM-NEXT: ret ; @@ -714,10 +714,10 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; RV64I-NEXT: subw s3, a4, a1 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: sh a0, 6(s0) -; RV64I-NEXT: sh s3, 4(s0) -; RV64I-NEXT: sh s2, 2(s0) ; RV64I-NEXT: sh s1, 0(s0) +; RV64I-NEXT: sh s2, 2(s0) +; RV64I-NEXT: sh s3, 4(s0) +; RV64I-NEXT: sh a0, 6(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -754,9 +754,9 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; RV64IM-NEXT: add a3, a4, a3 ; RV64IM-NEXT: andi a3, a3, -8 ; RV64IM-NEXT: subw a4, a4, a3 -; RV64IM-NEXT: sh a4, 4(a0) -; RV64IM-NEXT: sh a5, 2(a0) ; RV64IM-NEXT: sh a1, 0(a0) +; RV64IM-NEXT: sh a5, 2(a0) +; RV64IM-NEXT: sh a4, 4(a0) ; RV64IM-NEXT: sh a2, 6(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, @@ -789,10 +789,10 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; RV32I-NEXT: addi a1, a0, 1327 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __modsi3 -; RV32I-NEXT: sh a0, 6(s2) -; RV32I-NEXT: sh s0, 4(s2) -; RV32I-NEXT: sh s3, 2(s2) ; RV32I-NEXT: sh zero, 0(s2) +; RV32I-NEXT: sh s3, 2(s2) +; RV32I-NEXT: sh s0, 4(s2) +; RV32I-NEXT: sh a0, 6(s2) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -837,9 +837,9 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; RV32IM-NEXT: mul a4, a4, a5 ; RV32IM-NEXT: sub a1, a1, a4 ; RV32IM-NEXT: sh zero, 0(a0) -; RV32IM-NEXT: sh a1, 6(a0) -; RV32IM-NEXT: sh a3, 4(a0) ; RV32IM-NEXT: sh a2, 2(a0) +; RV32IM-NEXT: sh a3, 4(a0) +; RV32IM-NEXT: sh a1, 6(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: dont_fold_srem_one: @@ -866,10 +866,10 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; RV64I-NEXT: addiw a1, a0, 1327 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: sh a0, 6(s2) -; RV64I-NEXT: sh s0, 4(s2) -; RV64I-NEXT: sh s3, 2(s2) ; RV64I-NEXT: sh zero, 0(s2) +; RV64I-NEXT: sh s3, 2(s2) +; RV64I-NEXT: sh s0, 4(s2) +; RV64I-NEXT: sh a0, 6(s2) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -913,9 +913,9 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; RV64IM-NEXT: mul a2, a2, a5 ; RV64IM-NEXT: subw a1, a1, a2 ; RV64IM-NEXT: sh zero, 0(a0) -; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: sh a4, 2(a0) ; RV64IM-NEXT: sh a3, 4(a0) +; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -947,10 +947,10 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind { ; RV32I-NEXT: addi a1, a0, 1327 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __modsi3 -; RV32I-NEXT: sh a0, 6(s0) -; RV32I-NEXT: sh s2, 4(s0) ; RV32I-NEXT: sh zero, 0(s0) ; RV32I-NEXT: sh s3, 2(s0) +; RV32I-NEXT: sh s2, 4(s0) +; RV32I-NEXT: sh a0, 6(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -991,8 +991,8 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind { ; RV32IM-NEXT: sub a2, a2, a4 ; RV32IM-NEXT: sh zero, 0(a0) ; RV32IM-NEXT: sh a2, 2(a0) -; RV32IM-NEXT: sh a1, 6(a0) ; RV32IM-NEXT: sh a3, 4(a0) +; RV32IM-NEXT: sh a1, 6(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: dont_fold_urem_i16_smax: @@ -1019,10 +1019,10 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind { ; RV64I-NEXT: addiw a1, a0, 1327 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: sh a0, 6(s0) -; RV64I-NEXT: sh s2, 4(s0) ; RV64I-NEXT: sh zero, 0(s0) ; RV64I-NEXT: sh s3, 2(s0) +; RV64I-NEXT: sh s2, 4(s0) +; RV64I-NEXT: sh a0, 6(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -1063,8 +1063,8 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind { ; RV64IM-NEXT: subw a4, a4, a2 ; RV64IM-NEXT: sh zero, 0(a0) ; RV64IM-NEXT: sh a4, 2(a0) -; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: sh a3, 4(a0) +; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -1085,15 +1085,15 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw s0, 16(a1) -; RV32I-NEXT: lw s1, 20(a1) -; RV32I-NEXT: lw s2, 24(a1) -; RV32I-NEXT: lw s3, 28(a1) +; RV32I-NEXT: lw s1, 16(a1) +; RV32I-NEXT: lw s2, 20(a1) +; RV32I-NEXT: lw s3, 24(a1) +; RV32I-NEXT: lw s4, 28(a1) ; RV32I-NEXT: lw a3, 0(a1) ; RV32I-NEXT: lw a4, 4(a1) -; RV32I-NEXT: lw s4, 8(a1) -; RV32I-NEXT: lw s5, 12(a1) -; RV32I-NEXT: mv s6, a0 +; RV32I-NEXT: lw s5, 8(a1) +; RV32I-NEXT: lw s6, 12(a1) +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a2, 1 ; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: mv a1, a4 @@ -1102,33 +1102,33 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV32I-NEXT: mv s7, a0 ; RV32I-NEXT: mv s8, a1 ; RV32I-NEXT: li a2, 654 -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s5 +; RV32I-NEXT: mv a0, s5 +; RV32I-NEXT: mv a1, s6 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __moddi3 -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: mv s6, a1 ; RV32I-NEXT: li a2, 23 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __moddi3 -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a2, a0, 1327 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __moddi3 -; RV32I-NEXT: sw a1, 28(s6) -; RV32I-NEXT: sw a0, 24(s6) -; RV32I-NEXT: sw s1, 20(s6) -; RV32I-NEXT: sw s0, 16(s6) -; RV32I-NEXT: sw s5, 12(s6) -; RV32I-NEXT: sw s4, 8(s6) -; RV32I-NEXT: sw s8, 4(s6) -; RV32I-NEXT: sw s7, 0(s6) +; RV32I-NEXT: sw s1, 16(s0) +; RV32I-NEXT: sw s2, 20(s0) +; RV32I-NEXT: sw a0, 24(s0) +; RV32I-NEXT: sw a1, 28(s0) +; RV32I-NEXT: sw s7, 0(s0) +; RV32I-NEXT: sw s8, 4(s0) +; RV32I-NEXT: sw s5, 8(s0) +; RV32I-NEXT: sw s6, 12(s0) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -1155,15 +1155,15 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s0, 16(a1) -; RV32IM-NEXT: lw s1, 20(a1) -; RV32IM-NEXT: lw s2, 24(a1) -; RV32IM-NEXT: lw s3, 28(a1) +; RV32IM-NEXT: lw s1, 16(a1) +; RV32IM-NEXT: lw s2, 20(a1) +; RV32IM-NEXT: lw s3, 24(a1) +; RV32IM-NEXT: lw s4, 28(a1) ; RV32IM-NEXT: lw a3, 0(a1) ; RV32IM-NEXT: lw a4, 4(a1) -; RV32IM-NEXT: lw s4, 8(a1) -; RV32IM-NEXT: lw s5, 12(a1) -; RV32IM-NEXT: mv s6, a0 +; RV32IM-NEXT: lw s5, 8(a1) +; RV32IM-NEXT: lw s6, 12(a1) +; RV32IM-NEXT: mv s0, a0 ; RV32IM-NEXT: li a2, 1 ; RV32IM-NEXT: mv a0, a3 ; RV32IM-NEXT: mv a1, a4 @@ -1172,33 +1172,33 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: mv s7, a0 ; RV32IM-NEXT: mv s8, a1 ; RV32IM-NEXT: li a2, 654 -; RV32IM-NEXT: mv a0, s4 -; RV32IM-NEXT: mv a1, s5 +; RV32IM-NEXT: mv a0, s5 +; RV32IM-NEXT: mv a1, s6 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __moddi3 -; RV32IM-NEXT: mv s4, a0 -; RV32IM-NEXT: mv s5, a1 +; RV32IM-NEXT: mv s5, a0 +; RV32IM-NEXT: mv s6, a1 ; RV32IM-NEXT: li a2, 23 -; RV32IM-NEXT: mv a0, s0 -; RV32IM-NEXT: mv a1, s1 +; RV32IM-NEXT: mv a0, s1 +; RV32IM-NEXT: mv a1, s2 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __moddi3 -; RV32IM-NEXT: mv s0, a0 -; RV32IM-NEXT: mv s1, a1 +; RV32IM-NEXT: mv s1, a0 +; RV32IM-NEXT: mv s2, a1 ; RV32IM-NEXT: lui a0, 1 ; RV32IM-NEXT: addi a2, a0, 1327 -; RV32IM-NEXT: mv a0, s2 -; RV32IM-NEXT: mv a1, s3 +; RV32IM-NEXT: mv a0, s3 +; RV32IM-NEXT: mv a1, s4 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __moddi3 -; RV32IM-NEXT: sw a1, 28(s6) -; RV32IM-NEXT: sw a0, 24(s6) -; RV32IM-NEXT: sw s1, 20(s6) -; RV32IM-NEXT: sw s0, 16(s6) -; RV32IM-NEXT: sw s5, 12(s6) -; RV32IM-NEXT: sw s4, 8(s6) -; RV32IM-NEXT: sw s8, 4(s6) -; RV32IM-NEXT: sw s7, 0(s6) +; RV32IM-NEXT: sw s1, 16(s0) +; RV32IM-NEXT: sw s2, 20(s0) +; RV32IM-NEXT: sw a0, 24(s0) +; RV32IM-NEXT: sw a1, 28(s0) +; RV32IM-NEXT: sw s7, 0(s0) +; RV32IM-NEXT: sw s8, 4(s0) +; RV32IM-NEXT: sw s5, 8(s0) +; RV32IM-NEXT: sw s6, 12(s0) ; RV32IM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -1236,10 +1236,10 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV64I-NEXT: addiw a1, a0, 1327 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: sd a0, 24(s2) -; RV64I-NEXT: sd s0, 16(s2) -; RV64I-NEXT: sd s3, 8(s2) ; RV64I-NEXT: sd zero, 0(s2) +; RV64I-NEXT: sd s3, 8(s2) +; RV64I-NEXT: sd s0, 16(s2) +; RV64I-NEXT: sd a0, 24(s2) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -1283,9 +1283,9 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV64IM-NEXT: mul a2, a2, a5 ; RV64IM-NEXT: sub a1, a1, a2 ; RV64IM-NEXT: sd zero, 0(a0) -; RV64IM-NEXT: sd a1, 24(a0) ; RV64IM-NEXT: sd a4, 8(a0) ; RV64IM-NEXT: sd a3, 16(a0) +; RV64IM-NEXT: sd a1, 24(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i64> %x, ret <4 x i64> %1 diff --git a/llvm/test/CodeGen/RISCV/stack-slot-size.ll b/llvm/test/CodeGen/RISCV/stack-slot-size.ll index 1388eaac3a67e7d..71ee6d8160a9da8 100644 --- a/llvm/test/CodeGen/RISCV/stack-slot-size.ll +++ b/llvm/test/CodeGen/RISCV/stack-slot-size.ll @@ -21,11 +21,11 @@ define i32 @caller129() nounwind { ; RV32I-NEXT: li a0, 42 ; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: sw zero, 16(sp) -; RV32I-NEXT: sw zero, 12(sp) -; RV32I-NEXT: sw zero, 8(sp) -; RV32I-NEXT: sw zero, 4(sp) ; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 12(sp) ; RV32I-NEXT: call callee129 ; RV32I-NEXT: lw a0, 24(sp) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -38,10 +38,10 @@ define i32 @caller129() nounwind { ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: li a0, 42 ; RV64I-NEXT: sw a0, 36(sp) -; RV64I-NEXT: sd zero, 16(sp) -; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: mv a0, sp ; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: sd zero, 8(sp) +; RV64I-NEXT: sd zero, 16(sp) ; RV64I-NEXT: call callee129 ; RV64I-NEXT: lw a0, 36(sp) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -62,11 +62,11 @@ define i32 @caller160() nounwind { ; RV32I-NEXT: li a0, 42 ; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: sw zero, 16(sp) -; RV32I-NEXT: sw zero, 12(sp) -; RV32I-NEXT: sw zero, 8(sp) -; RV32I-NEXT: sw zero, 4(sp) ; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 12(sp) ; RV32I-NEXT: call callee160 ; RV32I-NEXT: lw a0, 24(sp) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -79,10 +79,10 @@ define i32 @caller160() nounwind { ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: li a0, 42 ; RV64I-NEXT: sw a0, 36(sp) -; RV64I-NEXT: sd zero, 16(sp) -; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: mv a0, sp ; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: sd zero, 8(sp) +; RV64I-NEXT: sd zero, 16(sp) ; RV64I-NEXT: call callee160 ; RV64I-NEXT: lw a0, 36(sp) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -102,13 +102,13 @@ define i32 @caller161() nounwind { ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32I-NEXT: li a0, 42 ; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: sw zero, 20(sp) ; RV32I-NEXT: sw zero, 16(sp) -; RV32I-NEXT: sw zero, 12(sp) -; RV32I-NEXT: sw zero, 8(sp) -; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 20(sp) ; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 12(sp) ; RV32I-NEXT: call callee161 ; RV32I-NEXT: lw a0, 24(sp) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -121,10 +121,10 @@ define i32 @caller161() nounwind { ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: li a0, 42 ; RV64I-NEXT: sw a0, 36(sp) -; RV64I-NEXT: sd zero, 16(sp) -; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: mv a0, sp ; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: sd zero, 8(sp) +; RV64I-NEXT: sd zero, 16(sp) ; RV64I-NEXT: call callee161 ; RV64I-NEXT: lw a0, 36(sp) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/stack-store-check.ll b/llvm/test/CodeGen/RISCV/stack-store-check.ll index b51a759a87b8592..052ccbf6e06f369 100644 --- a/llvm/test/CodeGen/RISCV/stack-store-check.ll +++ b/llvm/test/CodeGen/RISCV/stack-store-check.ll @@ -14,292 +14,294 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-LABEL: main: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -688 -; CHECK-NEXT: sw ra, 684(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw s0, 680(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw s1, 676(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw s2, 672(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw s3, 668(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw s4, 664(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw s5, 660(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw s6, 656(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw s7, 652(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw s8, 648(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw s9, 644(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw s10, 640(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw s11, 636(sp) # 4-byte Folded Spill +; CHECK-NEXT: addi sp, sp, -704 +; CHECK-NEXT: sw ra, 700(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s0, 696(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s1, 692(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s2, 688(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s3, 684(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s4, 680(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s5, 676(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s6, 672(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s7, 668(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s8, 664(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s9, 660(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s10, 656(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s11, 652(sp) # 4-byte Folded Spill ; CHECK-NEXT: lui a0, %hi(U) -; CHECK-NEXT: lw s6, %lo(U)(a0) -; CHECK-NEXT: lw s7, %lo(U+4)(a0) -; CHECK-NEXT: lw s8, %lo(U+8)(a0) -; CHECK-NEXT: lw s0, %lo(U+12)(a0) -; CHECK-NEXT: sw zero, 612(sp) -; CHECK-NEXT: sw zero, 608(sp) -; CHECK-NEXT: sw zero, 604(sp) -; CHECK-NEXT: sw zero, 600(sp) -; CHECK-NEXT: sw s0, 596(sp) -; CHECK-NEXT: sw s8, 592(sp) -; CHECK-NEXT: sw s7, 588(sp) -; CHECK-NEXT: addi a0, sp, 616 -; CHECK-NEXT: addi a1, sp, 600 -; CHECK-NEXT: addi a2, sp, 584 -; CHECK-NEXT: sw s6, 584(sp) +; CHECK-NEXT: lw s9, %lo(U)(a0) +; CHECK-NEXT: lw s10, %lo(U+4)(a0) +; CHECK-NEXT: lw s11, %lo(U+8)(a0) +; CHECK-NEXT: lw s5, %lo(U+12)(a0) +; CHECK-NEXT: sw zero, 616(sp) +; CHECK-NEXT: sw zero, 620(sp) +; CHECK-NEXT: sw zero, 624(sp) +; CHECK-NEXT: sw zero, 628(sp) +; CHECK-NEXT: addi a0, sp, 632 +; CHECK-NEXT: addi a1, sp, 616 +; CHECK-NEXT: addi a2, sp, 600 +; CHECK-NEXT: sw s9, 600(sp) +; CHECK-NEXT: sw s10, 604(sp) +; CHECK-NEXT: sw s11, 608(sp) +; CHECK-NEXT: sw s5, 612(sp) ; CHECK-NEXT: call __subtf3 -; CHECK-NEXT: lw s1, 616(sp) -; CHECK-NEXT: lw s2, 620(sp) -; CHECK-NEXT: lw s3, 624(sp) -; CHECK-NEXT: lw s4, 628(sp) -; CHECK-NEXT: sw s0, 548(sp) -; CHECK-NEXT: sw s8, 544(sp) -; CHECK-NEXT: sw s7, 540(sp) -; CHECK-NEXT: sw s6, 536(sp) -; CHECK-NEXT: sw s4, 564(sp) -; CHECK-NEXT: sw s3, 560(sp) -; CHECK-NEXT: sw s2, 556(sp) -; CHECK-NEXT: addi a0, sp, 568 -; CHECK-NEXT: addi a1, sp, 552 -; CHECK-NEXT: addi a2, sp, 536 -; CHECK-NEXT: sw s1, 552(sp) +; CHECK-NEXT: lw s1, 632(sp) +; CHECK-NEXT: lw s2, 636(sp) +; CHECK-NEXT: lw s3, 640(sp) +; CHECK-NEXT: lw s4, 644(sp) +; CHECK-NEXT: sw s9, 552(sp) +; CHECK-NEXT: sw s10, 556(sp) +; CHECK-NEXT: sw s11, 560(sp) +; CHECK-NEXT: sw s5, 564(sp) +; CHECK-NEXT: addi a0, sp, 584 +; CHECK-NEXT: addi a1, sp, 568 +; CHECK-NEXT: addi a2, sp, 552 +; CHECK-NEXT: sw s1, 568(sp) +; CHECK-NEXT: sw s2, 572(sp) +; CHECK-NEXT: sw s3, 576(sp) +; CHECK-NEXT: sw s4, 580(sp) ; CHECK-NEXT: call __subtf3 -; CHECK-NEXT: lw a0, 568(sp) -; CHECK-NEXT: sw a0, 40(sp) # 4-byte Folded Spill -; CHECK-NEXT: lw a0, 572(sp) -; CHECK-NEXT: sw a0, 28(sp) # 4-byte Folded Spill -; CHECK-NEXT: lw a0, 576(sp) -; CHECK-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; CHECK-NEXT: lw a0, 580(sp) +; CHECK-NEXT: lw a0, 584(sp) +; CHECK-NEXT: sw a0, 52(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw a0, 588(sp) ; CHECK-NEXT: sw a0, 48(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw zero, 500(sp) -; CHECK-NEXT: sw zero, 496(sp) -; CHECK-NEXT: sw zero, 492(sp) -; CHECK-NEXT: sw zero, 488(sp) -; CHECK-NEXT: sw s0, 516(sp) -; CHECK-NEXT: sw s8, 512(sp) -; CHECK-NEXT: sw s7, 508(sp) -; CHECK-NEXT: addi a0, sp, 520 -; CHECK-NEXT: addi a1, sp, 504 -; CHECK-NEXT: addi a2, sp, 488 -; CHECK-NEXT: sw s6, 504(sp) +; CHECK-NEXT: lw a0, 592(sp) +; CHECK-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw a0, 596(sp) +; CHECK-NEXT: sw a0, 40(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw zero, 504(sp) +; CHECK-NEXT: sw zero, 508(sp) +; CHECK-NEXT: sw zero, 512(sp) +; CHECK-NEXT: sw zero, 516(sp) +; CHECK-NEXT: addi a0, sp, 536 +; CHECK-NEXT: addi a1, sp, 520 +; CHECK-NEXT: addi a2, sp, 504 +; CHECK-NEXT: sw s9, 520(sp) +; CHECK-NEXT: sw s10, 524(sp) +; CHECK-NEXT: sw s11, 528(sp) +; CHECK-NEXT: sw s5, 532(sp) ; CHECK-NEXT: call __addtf3 -; CHECK-NEXT: lw s9, 520(sp) -; CHECK-NEXT: lw s11, 524(sp) -; CHECK-NEXT: lw s5, 528(sp) -; CHECK-NEXT: lw s10, 532(sp) -; CHECK-NEXT: sw s10, 16(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw s0, 536(sp) +; CHECK-NEXT: lw s6, 540(sp) +; CHECK-NEXT: lw s7, 544(sp) +; CHECK-NEXT: lw s8, 548(sp) ; CHECK-NEXT: lui a0, %hi(Y1) ; CHECK-NEXT: lw a1, %lo(Y1)(a0) -; CHECK-NEXT: sw a1, 52(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw a1, 20(sp) # 4-byte Folded Spill ; CHECK-NEXT: lw a2, %lo(Y1+4)(a0) -; CHECK-NEXT: sw a2, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw a2, 16(sp) # 4-byte Folded Spill ; CHECK-NEXT: lw a3, %lo(Y1+8)(a0) -; CHECK-NEXT: sw a3, 8(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw a3, 12(sp) # 4-byte Folded Spill ; CHECK-NEXT: lw a0, %lo(Y1+12)(a0) -; CHECK-NEXT: sw a0, 4(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw a0, 308(sp) -; CHECK-NEXT: sw a3, 304(sp) -; CHECK-NEXT: sw a2, 300(sp) -; CHECK-NEXT: sw a1, 296(sp) -; CHECK-NEXT: sw s4, 324(sp) -; CHECK-NEXT: sw s3, 320(sp) -; CHECK-NEXT: sw s2, 316(sp) -; CHECK-NEXT: addi a0, sp, 328 -; CHECK-NEXT: addi a1, sp, 312 -; CHECK-NEXT: addi a2, sp, 296 -; CHECK-NEXT: sw s1, 312(sp) +; CHECK-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw a1, 312(sp) +; CHECK-NEXT: sw a2, 316(sp) +; CHECK-NEXT: sw a3, 320(sp) +; CHECK-NEXT: sw a0, 324(sp) +; CHECK-NEXT: addi a0, sp, 344 +; CHECK-NEXT: addi a1, sp, 328 +; CHECK-NEXT: addi a2, sp, 312 +; CHECK-NEXT: sw s1, 328(sp) +; CHECK-NEXT: sw s2, 332(sp) +; CHECK-NEXT: sw s3, 336(sp) +; CHECK-NEXT: sw s4, 340(sp) ; CHECK-NEXT: call __multf3 -; CHECK-NEXT: lw a0, 328(sp) -; CHECK-NEXT: sw a0, 44(sp) # 4-byte Folded Spill -; CHECK-NEXT: lw a0, 332(sp) -; CHECK-NEXT: sw a0, 36(sp) # 4-byte Folded Spill -; CHECK-NEXT: lw a0, 336(sp) -; CHECK-NEXT: sw a0, 32(sp) # 4-byte Folded Spill -; CHECK-NEXT: lw a0, 340(sp) -; CHECK-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw s0, 468(sp) -; CHECK-NEXT: sw s8, 464(sp) -; CHECK-NEXT: sw s7, 460(sp) -; CHECK-NEXT: sw s6, 456(sp) -; CHECK-NEXT: sw s10, 452(sp) -; CHECK-NEXT: sw s5, 448(sp) -; CHECK-NEXT: sw s11, 444(sp) -; CHECK-NEXT: addi a0, sp, 472 -; CHECK-NEXT: addi a1, sp, 456 -; CHECK-NEXT: addi a2, sp, 440 -; CHECK-NEXT: sw s9, 440(sp) +; CHECK-NEXT: lw a0, 344(sp) +; CHECK-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw a0, 348(sp) +; CHECK-NEXT: sw a0, 64(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw a0, 352(sp) +; CHECK-NEXT: sw a0, 60(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw a0, 356(sp) +; CHECK-NEXT: sw a0, 56(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s9, 472(sp) +; CHECK-NEXT: sw s10, 476(sp) +; CHECK-NEXT: sw s11, 480(sp) +; CHECK-NEXT: sw s5, 484(sp) +; CHECK-NEXT: addi a0, sp, 488 +; CHECK-NEXT: addi a1, sp, 472 +; CHECK-NEXT: addi a2, sp, 456 +; CHECK-NEXT: sw s0, 456(sp) +; CHECK-NEXT: sw s6, 460(sp) +; CHECK-NEXT: sw s7, 464(sp) +; CHECK-NEXT: sw s8, 468(sp) ; CHECK-NEXT: call __addtf3 -; CHECK-NEXT: lw a3, 472(sp) -; CHECK-NEXT: lw a0, 476(sp) -; CHECK-NEXT: lw a1, 480(sp) -; CHECK-NEXT: lw a2, 484(sp) -; CHECK-NEXT: sw zero, 420(sp) -; CHECK-NEXT: sw zero, 416(sp) -; CHECK-NEXT: sw zero, 412(sp) -; CHECK-NEXT: sw zero, 408(sp) -; CHECK-NEXT: sw a2, 404(sp) -; CHECK-NEXT: sw a1, 400(sp) -; CHECK-NEXT: sw a0, 396(sp) -; CHECK-NEXT: addi a0, sp, 424 -; CHECK-NEXT: addi a1, sp, 408 -; CHECK-NEXT: addi a2, sp, 392 -; CHECK-NEXT: sw a3, 392(sp) +; CHECK-NEXT: lw a3, 488(sp) +; CHECK-NEXT: lw a4, 492(sp) +; CHECK-NEXT: lw a5, 496(sp) +; CHECK-NEXT: lw a6, 500(sp) +; CHECK-NEXT: sw zero, 424(sp) +; CHECK-NEXT: sw zero, 428(sp) +; CHECK-NEXT: sw zero, 432(sp) +; CHECK-NEXT: sw zero, 436(sp) +; CHECK-NEXT: addi a0, sp, 440 +; CHECK-NEXT: addi a1, sp, 424 +; CHECK-NEXT: addi a2, sp, 408 +; CHECK-NEXT: sw a3, 408(sp) +; CHECK-NEXT: sw a4, 412(sp) +; CHECK-NEXT: sw a5, 416(sp) +; CHECK-NEXT: sw a6, 420(sp) ; CHECK-NEXT: call __subtf3 -; CHECK-NEXT: lw a0, 432(sp) -; CHECK-NEXT: lw a1, 436(sp) -; CHECK-NEXT: lw a2, 424(sp) -; CHECK-NEXT: lw a3, 428(sp) +; CHECK-NEXT: lw a0, 448(sp) +; CHECK-NEXT: lw a1, 452(sp) +; CHECK-NEXT: lw a2, 440(sp) +; CHECK-NEXT: lw a3, 444(sp) ; CHECK-NEXT: lui a4, %hi(X) ; CHECK-NEXT: sw a1, %lo(X+12)(a4) ; CHECK-NEXT: sw a0, %lo(X+8)(a4) ; CHECK-NEXT: sw a3, %lo(X+4)(a4) ; CHECK-NEXT: sw a2, %lo(X)(a4) -; CHECK-NEXT: lw s8, 4(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s8, 212(sp) -; CHECK-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s4, 208(sp) -; CHECK-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s3, 204(sp) -; CHECK-NEXT: lw a0, 52(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a0, 200(sp) -; CHECK-NEXT: lw a0, 48(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a0, 228(sp) -; CHECK-NEXT: lw s10, 20(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s5, 216(sp) +; CHECK-NEXT: lw s9, 16(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s9, 220(sp) +; CHECK-NEXT: lw s10, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: sw s10, 224(sp) -; CHECK-NEXT: lw s2, 28(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s2, 220(sp) -; CHECK-NEXT: addi a0, sp, 232 -; CHECK-NEXT: addi a1, sp, 216 -; CHECK-NEXT: addi a2, sp, 200 -; CHECK-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s0, 216(sp) +; CHECK-NEXT: lw s11, 8(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s11, 228(sp) +; CHECK-NEXT: addi a0, sp, 248 +; CHECK-NEXT: addi a1, sp, 232 +; CHECK-NEXT: addi a2, sp, 216 +; CHECK-NEXT: lw s1, 52(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s1, 232(sp) +; CHECK-NEXT: lw s2, 48(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s2, 236(sp) +; CHECK-NEXT: lw s3, 44(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s3, 240(sp) +; CHECK-NEXT: lw s4, 40(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s4, 244(sp) ; CHECK-NEXT: call __multf3 -; CHECK-NEXT: lw s1, 232(sp) -; CHECK-NEXT: lw a0, 236(sp) -; CHECK-NEXT: sw a0, 0(sp) # 4-byte Folded Spill -; CHECK-NEXT: lw s6, 240(sp) -; CHECK-NEXT: lw s7, 244(sp) -; CHECK-NEXT: sw zero, 356(sp) -; CHECK-NEXT: sw zero, 352(sp) -; CHECK-NEXT: sw zero, 348(sp) -; CHECK-NEXT: sw zero, 344(sp) -; CHECK-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a0, 372(sp) -; CHECK-NEXT: sw s5, 368(sp) -; CHECK-NEXT: sw s11, 364(sp) -; CHECK-NEXT: addi a0, sp, 376 -; CHECK-NEXT: addi a1, sp, 360 -; CHECK-NEXT: addi a2, sp, 344 -; CHECK-NEXT: sw s9, 360(sp) +; CHECK-NEXT: lw a0, 248(sp) +; CHECK-NEXT: sw a0, 36(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw a0, 252(sp) +; CHECK-NEXT: sw a0, 32(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw a0, 256(sp) +; CHECK-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw a0, 260(sp) +; CHECK-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw zero, 360(sp) +; CHECK-NEXT: sw zero, 364(sp) +; CHECK-NEXT: sw zero, 368(sp) +; CHECK-NEXT: sw zero, 372(sp) +; CHECK-NEXT: addi a0, sp, 392 +; CHECK-NEXT: addi a1, sp, 376 +; CHECK-NEXT: addi a2, sp, 360 +; CHECK-NEXT: sw s0, 376(sp) +; CHECK-NEXT: sw s6, 380(sp) +; CHECK-NEXT: sw s7, 384(sp) +; CHECK-NEXT: sw s8, 388(sp) ; CHECK-NEXT: call __multf3 -; CHECK-NEXT: lw a0, 384(sp) -; CHECK-NEXT: lw a1, 388(sp) -; CHECK-NEXT: lw a2, 376(sp) -; CHECK-NEXT: lw a3, 380(sp) +; CHECK-NEXT: lw a0, 400(sp) +; CHECK-NEXT: lw a1, 404(sp) +; CHECK-NEXT: lw a2, 392(sp) +; CHECK-NEXT: lw a3, 396(sp) ; CHECK-NEXT: lui a4, %hi(S) ; CHECK-NEXT: sw a1, %lo(S+12)(a4) ; CHECK-NEXT: sw a0, %lo(S+8)(a4) ; CHECK-NEXT: sw a3, %lo(S+4)(a4) ; CHECK-NEXT: sw a2, %lo(S)(a4) -; CHECK-NEXT: lw a0, 48(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a0, 260(sp) -; CHECK-NEXT: sw s10, 256(sp) -; CHECK-NEXT: sw s2, 252(sp) -; CHECK-NEXT: sw s0, 248(sp) -; CHECK-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a0, 276(sp) -; CHECK-NEXT: lw a0, 32(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a0, 272(sp) -; CHECK-NEXT: lw a0, 36(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a0, 268(sp) -; CHECK-NEXT: addi a0, sp, 280 -; CHECK-NEXT: addi a1, sp, 264 -; CHECK-NEXT: addi a2, sp, 248 -; CHECK-NEXT: lw a3, 44(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a3, 264(sp) +; CHECK-NEXT: sw s1, 264(sp) +; CHECK-NEXT: sw s2, 268(sp) +; CHECK-NEXT: sw s3, 272(sp) +; CHECK-NEXT: sw s4, 276(sp) +; CHECK-NEXT: addi a0, sp, 296 +; CHECK-NEXT: addi a1, sp, 280 +; CHECK-NEXT: addi a2, sp, 264 +; CHECK-NEXT: lw a3, 68(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw a3, 280(sp) +; CHECK-NEXT: lw a3, 64(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw a3, 284(sp) +; CHECK-NEXT: lw a3, 60(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw a3, 288(sp) +; CHECK-NEXT: lw a3, 56(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw a3, 292(sp) ; CHECK-NEXT: call __subtf3 -; CHECK-NEXT: lw a0, 288(sp) -; CHECK-NEXT: lw a1, 292(sp) -; CHECK-NEXT: lw a2, 280(sp) -; CHECK-NEXT: lw a3, 284(sp) +; CHECK-NEXT: lw a0, 304(sp) +; CHECK-NEXT: lw a1, 308(sp) +; CHECK-NEXT: lw a2, 296(sp) +; CHECK-NEXT: lw a3, 300(sp) ; CHECK-NEXT: lui a4, %hi(T) ; CHECK-NEXT: sw a1, %lo(T+12)(a4) ; CHECK-NEXT: sw a0, %lo(T+8)(a4) ; CHECK-NEXT: sw a3, %lo(T+4)(a4) ; CHECK-NEXT: sw a2, %lo(T)(a4) -; CHECK-NEXT: sw zero, 164(sp) -; CHECK-NEXT: sw zero, 160(sp) -; CHECK-NEXT: sw zero, 156(sp) -; CHECK-NEXT: sw zero, 152(sp) -; CHECK-NEXT: sw s7, 180(sp) -; CHECK-NEXT: sw s6, 176(sp) -; CHECK-NEXT: lw a0, 0(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a0, 172(sp) -; CHECK-NEXT: addi a0, sp, 184 -; CHECK-NEXT: addi a1, sp, 168 -; CHECK-NEXT: addi a2, sp, 152 -; CHECK-NEXT: sw s1, 168(sp) +; CHECK-NEXT: sw zero, 168(sp) +; CHECK-NEXT: sw zero, 172(sp) +; CHECK-NEXT: sw zero, 176(sp) +; CHECK-NEXT: sw zero, 180(sp) +; CHECK-NEXT: addi a0, sp, 200 +; CHECK-NEXT: addi a1, sp, 184 +; CHECK-NEXT: addi a2, sp, 168 +; CHECK-NEXT: lw a3, 36(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw a3, 184(sp) +; CHECK-NEXT: lw a3, 32(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw a3, 188(sp) +; CHECK-NEXT: lw a3, 28(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw a3, 192(sp) +; CHECK-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw a3, 196(sp) ; CHECK-NEXT: call __addtf3 -; CHECK-NEXT: lw a0, 192(sp) -; CHECK-NEXT: lw a1, 196(sp) -; CHECK-NEXT: lw a2, 184(sp) -; CHECK-NEXT: lw a3, 188(sp) +; CHECK-NEXT: lw a0, 208(sp) +; CHECK-NEXT: lw a1, 212(sp) +; CHECK-NEXT: lw a2, 200(sp) +; CHECK-NEXT: lw a3, 204(sp) ; CHECK-NEXT: lui a4, %hi(Y) ; CHECK-NEXT: sw a1, %lo(Y+12)(a4) ; CHECK-NEXT: sw a0, %lo(Y+8)(a4) ; CHECK-NEXT: sw a3, %lo(Y+4)(a4) ; CHECK-NEXT: sw a2, %lo(Y)(a4) -; CHECK-NEXT: sw zero, 116(sp) -; CHECK-NEXT: sw zero, 112(sp) -; CHECK-NEXT: sw zero, 108(sp) -; CHECK-NEXT: sw zero, 104(sp) -; CHECK-NEXT: sw s8, 132(sp) -; CHECK-NEXT: sw s4, 128(sp) -; CHECK-NEXT: sw s3, 124(sp) -; CHECK-NEXT: addi a0, sp, 136 -; CHECK-NEXT: addi a1, sp, 120 -; CHECK-NEXT: addi a2, sp, 104 -; CHECK-NEXT: lw a3, 52(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a3, 120(sp) +; CHECK-NEXT: sw zero, 120(sp) +; CHECK-NEXT: sw zero, 124(sp) +; CHECK-NEXT: sw zero, 128(sp) +; CHECK-NEXT: sw zero, 132(sp) +; CHECK-NEXT: addi a0, sp, 152 +; CHECK-NEXT: addi a1, sp, 136 +; CHECK-NEXT: addi a2, sp, 120 +; CHECK-NEXT: sw s5, 136(sp) +; CHECK-NEXT: sw s9, 140(sp) +; CHECK-NEXT: sw s10, 144(sp) +; CHECK-NEXT: sw s11, 148(sp) ; CHECK-NEXT: call __multf3 -; CHECK-NEXT: lw a3, 136(sp) -; CHECK-NEXT: lw a0, 140(sp) -; CHECK-NEXT: lw a1, 144(sp) -; CHECK-NEXT: lw a2, 148(sp) -; CHECK-NEXT: lui a4, 786400 -; CHECK-NEXT: sw a4, 68(sp) -; CHECK-NEXT: sw zero, 64(sp) -; CHECK-NEXT: sw zero, 60(sp) -; CHECK-NEXT: sw zero, 56(sp) -; CHECK-NEXT: sw a2, 84(sp) -; CHECK-NEXT: sw a1, 80(sp) -; CHECK-NEXT: sw a0, 76(sp) -; CHECK-NEXT: addi a0, sp, 88 -; CHECK-NEXT: addi a1, sp, 72 -; CHECK-NEXT: addi a2, sp, 56 -; CHECK-NEXT: sw a3, 72(sp) +; CHECK-NEXT: lw a3, 152(sp) +; CHECK-NEXT: lw a4, 156(sp) +; CHECK-NEXT: lw a5, 160(sp) +; CHECK-NEXT: lw a6, 164(sp) +; CHECK-NEXT: lui a0, 786400 +; CHECK-NEXT: sw zero, 72(sp) +; CHECK-NEXT: sw zero, 76(sp) +; CHECK-NEXT: sw zero, 80(sp) +; CHECK-NEXT: sw a0, 84(sp) +; CHECK-NEXT: addi a0, sp, 104 +; CHECK-NEXT: addi a1, sp, 88 +; CHECK-NEXT: addi a2, sp, 72 +; CHECK-NEXT: sw a3, 88(sp) +; CHECK-NEXT: sw a4, 92(sp) +; CHECK-NEXT: sw a5, 96(sp) +; CHECK-NEXT: sw a6, 100(sp) ; CHECK-NEXT: call __addtf3 -; CHECK-NEXT: lw a0, 96(sp) -; CHECK-NEXT: lw a1, 100(sp) -; CHECK-NEXT: lw a2, 88(sp) -; CHECK-NEXT: lw a3, 92(sp) +; CHECK-NEXT: lw a0, 112(sp) +; CHECK-NEXT: lw a1, 116(sp) +; CHECK-NEXT: lw a2, 104(sp) +; CHECK-NEXT: lw a3, 108(sp) ; CHECK-NEXT: lui a4, %hi(Y1) ; CHECK-NEXT: sw a0, %lo(Y1+8)(a4) ; CHECK-NEXT: sw a1, %lo(Y1+12)(a4) ; CHECK-NEXT: sw a2, %lo(Y1)(a4) ; CHECK-NEXT: sw a3, %lo(Y1+4)(a4) -; CHECK-NEXT: lw ra, 684(sp) # 4-byte Folded Reload -; CHECK-NEXT: lw s0, 680(sp) # 4-byte Folded Reload -; CHECK-NEXT: lw s1, 676(sp) # 4-byte Folded Reload -; CHECK-NEXT: lw s2, 672(sp) # 4-byte Folded Reload -; CHECK-NEXT: lw s3, 668(sp) # 4-byte Folded Reload -; CHECK-NEXT: lw s4, 664(sp) # 4-byte Folded Reload -; CHECK-NEXT: lw s5, 660(sp) # 4-byte Folded Reload -; CHECK-NEXT: lw s6, 656(sp) # 4-byte Folded Reload -; CHECK-NEXT: lw s7, 652(sp) # 4-byte Folded Reload -; CHECK-NEXT: lw s8, 648(sp) # 4-byte Folded Reload -; CHECK-NEXT: lw s9, 644(sp) # 4-byte Folded Reload -; CHECK-NEXT: lw s10, 640(sp) # 4-byte Folded Reload -; CHECK-NEXT: lw s11, 636(sp) # 4-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 688 +; CHECK-NEXT: lw ra, 700(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s0, 696(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s1, 692(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s2, 688(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s3, 684(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s4, 680(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s5, 676(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s6, 672(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s7, 668(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s8, 664(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s9, 660(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s10, 656(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s11, 652(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 704 ; CHECK-NEXT: ret %1 = load fp128, ptr @U, align 16 %2 = fsub fp128 0xL00000000000000000000000000000000, %1 diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll index 4dd6ed68ff98112..f7ef01b0958d87f 100644 --- a/llvm/test/CodeGen/RISCV/tail-calls.ll +++ b/llvm/test/CodeGen/RISCV/tail-calls.ll @@ -213,12 +213,12 @@ define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g ; CHECK-NEXT: lw t3, 44(sp) ; CHECK-NEXT: lw t4, 48(sp) ; CHECK-NEXT: lw t5, 52(sp) -; CHECK-NEXT: sw t5, 20(sp) ; CHECK-NEXT: sw t4, 16(sp) -; CHECK-NEXT: sw t3, 12(sp) -; CHECK-NEXT: sw t2, 8(sp) -; CHECK-NEXT: sw t1, 4(sp) +; CHECK-NEXT: sw t5, 20(sp) ; CHECK-NEXT: sw t0, 0(sp) +; CHECK-NEXT: sw t1, 4(sp) +; CHECK-NEXT: sw t2, 8(sp) +; CHECK-NEXT: sw t3, 12(sp) ; CHECK-NEXT: call callee_args ; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 @@ -235,15 +235,15 @@ define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g ; CHECK-LARGE-ZICFILP-NEXT: lw t4, 44(sp) ; CHECK-LARGE-ZICFILP-NEXT: lw t2, 48(sp) ; CHECK-LARGE-ZICFILP-NEXT: lw t5, 52(sp) -; CHECK-LARGE-ZICFILP-NEXT: sw t5, 20(sp) ; CHECK-LARGE-ZICFILP-NEXT: sw t2, 16(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t5, 20(sp) ; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi8: ; CHECK-LARGE-ZICFILP-NEXT: auipc t2, %pcrel_hi(.LCPI6_0) ; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi8)(t2) -; CHECK-LARGE-ZICFILP-NEXT: sw t4, 12(sp) -; CHECK-LARGE-ZICFILP-NEXT: sw t3, 8(sp) -; CHECK-LARGE-ZICFILP-NEXT: sw t1, 4(sp) ; CHECK-LARGE-ZICFILP-NEXT: sw t0, 0(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t1, 4(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t3, 8(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t4, 12(sp) ; CHECK-LARGE-ZICFILP-NEXT: jalr t2 ; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32 @@ -260,12 +260,12 @@ define void @caller_indirect_args() nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -32 ; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; CHECK-NEXT: lui a0, 262128 -; CHECK-NEXT: sw a0, 12(sp) -; CHECK-NEXT: sw zero, 8(sp) -; CHECK-NEXT: sw zero, 4(sp) +; CHECK-NEXT: lui a1, 262128 ; CHECK-NEXT: mv a0, sp ; CHECK-NEXT: sw zero, 0(sp) +; CHECK-NEXT: sw zero, 4(sp) +; CHECK-NEXT: sw zero, 8(sp) +; CHECK-NEXT: sw a1, 12(sp) ; CHECK-NEXT: call callee_indirect_args ; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 @@ -276,15 +276,15 @@ define void @caller_indirect_args() nounwind { ; CHECK-LARGE-ZICFILP-NEXT: lpad 0 ; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -32 ; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; CHECK-LARGE-ZICFILP-NEXT: lui a0, 262128 -; CHECK-LARGE-ZICFILP-NEXT: sw a0, 12(sp) +; CHECK-LARGE-ZICFILP-NEXT: lui a1, 262128 ; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi9: ; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI7_0) ; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi9)(a0) -; CHECK-LARGE-ZICFILP-NEXT: sw zero, 8(sp) -; CHECK-LARGE-ZICFILP-NEXT: sw zero, 4(sp) ; CHECK-LARGE-ZICFILP-NEXT: mv a0, sp ; CHECK-LARGE-ZICFILP-NEXT: sw zero, 0(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw zero, 4(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw zero, 8(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw a1, 12(sp) ; CHECK-LARGE-ZICFILP-NEXT: jalr t2 ; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32 diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll index 0ca9fc065ff2d4d..a2f5e446b63bcd0 100644 --- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll +++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll @@ -247,9 +247,9 @@ define void @store_i8(ptr %p, i8 %v) { define void @store_i16(ptr %p, i16 %v) { ; SLOW-LABEL: store_i16: ; SLOW: # %bb.0: +; SLOW-NEXT: srli a2, a1, 8 ; SLOW-NEXT: sb a1, 0(a0) -; SLOW-NEXT: srli a1, a1, 8 -; SLOW-NEXT: sb a1, 1(a0) +; SLOW-NEXT: sb a2, 1(a0) ; SLOW-NEXT: ret ; ; FAST-LABEL: store_i16: @@ -263,18 +263,18 @@ define void @store_i16(ptr %p, i16 %v) { define void @store_i24(ptr %p, i24 %v) { ; SLOW-LABEL: store_i24: ; SLOW: # %bb.0: -; SLOW-NEXT: sb a1, 0(a0) ; SLOW-NEXT: srli a2, a1, 8 +; SLOW-NEXT: srli a3, a1, 16 +; SLOW-NEXT: sb a1, 0(a0) ; SLOW-NEXT: sb a2, 1(a0) -; SLOW-NEXT: srli a1, a1, 16 -; SLOW-NEXT: sb a1, 2(a0) +; SLOW-NEXT: sb a3, 2(a0) ; SLOW-NEXT: ret ; ; FAST-LABEL: store_i24: ; FAST: # %bb.0: +; FAST-NEXT: srli a2, a1, 16 ; FAST-NEXT: sh a1, 0(a0) -; FAST-NEXT: srli a1, a1, 16 -; FAST-NEXT: sb a1, 2(a0) +; FAST-NEXT: sb a2, 2(a0) ; FAST-NEXT: ret store i24 %v, ptr %p, align 1 ret void @@ -283,13 +283,13 @@ define void @store_i24(ptr %p, i24 %v) { define void @store_i32(ptr %p, i32 %v) { ; SLOW-LABEL: store_i32: ; SLOW: # %bb.0: -; SLOW-NEXT: sb a1, 0(a0) ; SLOW-NEXT: srli a2, a1, 24 +; SLOW-NEXT: srli a3, a1, 16 +; SLOW-NEXT: srli a4, a1, 8 +; SLOW-NEXT: sb a1, 0(a0) +; SLOW-NEXT: sb a4, 1(a0) +; SLOW-NEXT: sb a3, 2(a0) ; SLOW-NEXT: sb a2, 3(a0) -; SLOW-NEXT: srli a2, a1, 16 -; SLOW-NEXT: sb a2, 2(a0) -; SLOW-NEXT: srli a1, a1, 8 -; SLOW-NEXT: sb a1, 1(a0) ; SLOW-NEXT: ret ; ; FAST-LABEL: store_i32: @@ -303,82 +303,82 @@ define void @store_i32(ptr %p, i32 %v) { define void @store_i64(ptr %p, i64 %v) { ; RV32I-LABEL: store_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: sb a2, 4(a0) -; RV32I-NEXT: sb a1, 0(a0) ; RV32I-NEXT: srli a3, a2, 24 +; RV32I-NEXT: srli a4, a2, 16 +; RV32I-NEXT: srli a5, a2, 8 +; RV32I-NEXT: sb a2, 4(a0) +; RV32I-NEXT: sb a5, 5(a0) +; RV32I-NEXT: sb a4, 6(a0) ; RV32I-NEXT: sb a3, 7(a0) -; RV32I-NEXT: srli a3, a2, 16 -; RV32I-NEXT: sb a3, 6(a0) -; RV32I-NEXT: srli a2, a2, 8 -; RV32I-NEXT: sb a2, 5(a0) ; RV32I-NEXT: srli a2, a1, 24 +; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: srli a4, a1, 8 +; RV32I-NEXT: sb a1, 0(a0) +; RV32I-NEXT: sb a4, 1(a0) +; RV32I-NEXT: sb a3, 2(a0) ; RV32I-NEXT: sb a2, 3(a0) -; RV32I-NEXT: srli a2, a1, 16 -; RV32I-NEXT: sb a2, 2(a0) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 1(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: store_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: sb a1, 0(a0) ; RV64I-NEXT: srli a2, a1, 56 +; RV64I-NEXT: srli a3, a1, 48 +; RV64I-NEXT: srli a4, a1, 40 +; RV64I-NEXT: srli a5, a1, 32 +; RV64I-NEXT: sb a5, 4(a0) +; RV64I-NEXT: sb a4, 5(a0) +; RV64I-NEXT: sb a3, 6(a0) ; RV64I-NEXT: sb a2, 7(a0) -; RV64I-NEXT: srli a2, a1, 48 -; RV64I-NEXT: sb a2, 6(a0) -; RV64I-NEXT: srli a2, a1, 40 -; RV64I-NEXT: sb a2, 5(a0) -; RV64I-NEXT: srli a2, a1, 32 -; RV64I-NEXT: sb a2, 4(a0) ; RV64I-NEXT: srli a2, a1, 24 +; RV64I-NEXT: srli a3, a1, 16 +; RV64I-NEXT: srli a4, a1, 8 +; RV64I-NEXT: sb a1, 0(a0) +; RV64I-NEXT: sb a4, 1(a0) +; RV64I-NEXT: sb a3, 2(a0) ; RV64I-NEXT: sb a2, 3(a0) -; RV64I-NEXT: srli a2, a1, 16 -; RV64I-NEXT: sb a2, 2(a0) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 1(a0) ; RV64I-NEXT: ret ; ; RV32IZBKB-LABEL: store_i64: ; RV32IZBKB: # %bb.0: -; RV32IZBKB-NEXT: sb a2, 4(a0) -; RV32IZBKB-NEXT: sb a1, 0(a0) ; RV32IZBKB-NEXT: srli a3, a2, 24 +; RV32IZBKB-NEXT: srli a4, a2, 16 +; RV32IZBKB-NEXT: srli a5, a2, 8 +; RV32IZBKB-NEXT: sb a2, 4(a0) +; RV32IZBKB-NEXT: sb a5, 5(a0) +; RV32IZBKB-NEXT: sb a4, 6(a0) ; RV32IZBKB-NEXT: sb a3, 7(a0) -; RV32IZBKB-NEXT: srli a3, a2, 16 -; RV32IZBKB-NEXT: sb a3, 6(a0) -; RV32IZBKB-NEXT: srli a2, a2, 8 -; RV32IZBKB-NEXT: sb a2, 5(a0) ; RV32IZBKB-NEXT: srli a2, a1, 24 +; RV32IZBKB-NEXT: srli a3, a1, 16 +; RV32IZBKB-NEXT: srli a4, a1, 8 +; RV32IZBKB-NEXT: sb a1, 0(a0) +; RV32IZBKB-NEXT: sb a4, 1(a0) +; RV32IZBKB-NEXT: sb a3, 2(a0) ; RV32IZBKB-NEXT: sb a2, 3(a0) -; RV32IZBKB-NEXT: srli a2, a1, 16 -; RV32IZBKB-NEXT: sb a2, 2(a0) -; RV32IZBKB-NEXT: srli a1, a1, 8 -; RV32IZBKB-NEXT: sb a1, 1(a0) ; RV32IZBKB-NEXT: ret ; ; RV64IZBKB-LABEL: store_i64: ; RV64IZBKB: # %bb.0: -; RV64IZBKB-NEXT: sb a1, 0(a0) ; RV64IZBKB-NEXT: srli a2, a1, 56 +; RV64IZBKB-NEXT: srli a3, a1, 48 +; RV64IZBKB-NEXT: srli a4, a1, 40 +; RV64IZBKB-NEXT: srli a5, a1, 32 +; RV64IZBKB-NEXT: sb a5, 4(a0) +; RV64IZBKB-NEXT: sb a4, 5(a0) +; RV64IZBKB-NEXT: sb a3, 6(a0) ; RV64IZBKB-NEXT: sb a2, 7(a0) -; RV64IZBKB-NEXT: srli a2, a1, 48 -; RV64IZBKB-NEXT: sb a2, 6(a0) -; RV64IZBKB-NEXT: srli a2, a1, 40 -; RV64IZBKB-NEXT: sb a2, 5(a0) -; RV64IZBKB-NEXT: srli a2, a1, 32 -; RV64IZBKB-NEXT: sb a2, 4(a0) ; RV64IZBKB-NEXT: srli a2, a1, 24 +; RV64IZBKB-NEXT: srli a3, a1, 16 +; RV64IZBKB-NEXT: srli a4, a1, 8 +; RV64IZBKB-NEXT: sb a1, 0(a0) +; RV64IZBKB-NEXT: sb a4, 1(a0) +; RV64IZBKB-NEXT: sb a3, 2(a0) ; RV64IZBKB-NEXT: sb a2, 3(a0) -; RV64IZBKB-NEXT: srli a2, a1, 16 -; RV64IZBKB-NEXT: sb a2, 2(a0) -; RV64IZBKB-NEXT: srli a1, a1, 8 -; RV64IZBKB-NEXT: sb a1, 1(a0) ; RV64IZBKB-NEXT: ret ; ; RV32I-FAST-LABEL: store_i64: ; RV32I-FAST: # %bb.0: -; RV32I-FAST-NEXT: sw a2, 4(a0) ; RV32I-FAST-NEXT: sw a1, 0(a0) +; RV32I-FAST-NEXT: sw a2, 4(a0) ; RV32I-FAST-NEXT: ret ; ; RV64I-FAST-LABEL: store_i64: @@ -543,31 +543,31 @@ define void @store_large_constant(ptr %x) { ; SLOW-LABEL: store_large_constant: ; SLOW: # %bb.0: ; SLOW-NEXT: li a1, -2 +; SLOW-NEXT: li a2, 220 +; SLOW-NEXT: li a3, 186 +; SLOW-NEXT: li a4, 152 +; SLOW-NEXT: sb a4, 4(a0) +; SLOW-NEXT: sb a3, 5(a0) +; SLOW-NEXT: sb a2, 6(a0) ; SLOW-NEXT: sb a1, 7(a0) -; SLOW-NEXT: li a1, 220 -; SLOW-NEXT: sb a1, 6(a0) -; SLOW-NEXT: li a1, 186 -; SLOW-NEXT: sb a1, 5(a0) -; SLOW-NEXT: li a1, 152 -; SLOW-NEXT: sb a1, 4(a0) ; SLOW-NEXT: li a1, 118 +; SLOW-NEXT: li a2, 84 +; SLOW-NEXT: li a3, 50 +; SLOW-NEXT: li a4, 16 +; SLOW-NEXT: sb a4, 0(a0) +; SLOW-NEXT: sb a3, 1(a0) +; SLOW-NEXT: sb a2, 2(a0) ; SLOW-NEXT: sb a1, 3(a0) -; SLOW-NEXT: li a1, 84 -; SLOW-NEXT: sb a1, 2(a0) -; SLOW-NEXT: li a1, 50 -; SLOW-NEXT: sb a1, 1(a0) -; SLOW-NEXT: li a1, 16 -; SLOW-NEXT: sb a1, 0(a0) ; SLOW-NEXT: ret ; ; RV32I-FAST-LABEL: store_large_constant: ; RV32I-FAST: # %bb.0: ; RV32I-FAST-NEXT: lui a1, 1043916 ; RV32I-FAST-NEXT: addi a1, a1, -1384 +; RV32I-FAST-NEXT: lui a2, 484675 +; RV32I-FAST-NEXT: addi a2, a2, 528 +; RV32I-FAST-NEXT: sw a2, 0(a0) ; RV32I-FAST-NEXT: sw a1, 4(a0) -; RV32I-FAST-NEXT: lui a1, 484675 -; RV32I-FAST-NEXT: addi a1, a1, 528 -; RV32I-FAST-NEXT: sw a1, 0(a0) ; RV32I-FAST-NEXT: ret ; ; RV64I-FAST-LABEL: store_large_constant: diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll index 5a5ae66b5fa767b..0ee067b673da9ac 100644 --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -362,14 +362,14 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32-NEXT: addi s1, s1, -1 ; RV32-NEXT: slli a1, s1, 21 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 4(s0) -; RV32-NEXT: andi a1, s3, 2047 +; RV32-NEXT: andi a2, s3, 2047 ; RV32-NEXT: andi a0, a0, 2047 ; RV32-NEXT: slli a0, a0, 11 ; RV32-NEXT: slli s1, s1, 22 ; RV32-NEXT: or a0, a0, s1 -; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: or a0, a2, a0 ; RV32-NEXT: sw a0, 0(s0) +; RV32-NEXT: sb a1, 4(s0) ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -423,10 +423,10 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64-NEXT: slli a0, a0, 22 ; RV64-NEXT: or a0, a2, a0 ; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a1, a0, 31 +; RV64-NEXT: srli a1, a1, 63 ; RV64-NEXT: sw a0, 0(s0) -; RV64-NEXT: slli a0, a0, 31 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 4(s0) +; RV64-NEXT: sb a1, 4(s0) ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -467,7 +467,6 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32M-NEXT: addi a1, a1, -1 ; RV32M-NEXT: slli a4, a1, 21 ; RV32M-NEXT: srli a4, a4, 31 -; RV32M-NEXT: sb a4, 4(a0) ; RV32M-NEXT: andi a2, a2, 2047 ; RV32M-NEXT: andi a3, a3, 2047 ; RV32M-NEXT: slli a3, a3, 11 @@ -475,6 +474,7 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32M-NEXT: or a1, a3, a1 ; RV32M-NEXT: or a1, a2, a1 ; RV32M-NEXT: sw a1, 0(a0) +; RV32M-NEXT: sb a4, 4(a0) ; RV32M-NEXT: ret ; ; RV64M-LABEL: test_urem_vec: @@ -513,10 +513,10 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64M-NEXT: slli a2, a2, 22 ; RV64M-NEXT: or a2, a3, a2 ; RV64M-NEXT: or a1, a1, a2 +; RV64M-NEXT: slli a2, a1, 31 +; RV64M-NEXT: srli a2, a2, 63 ; RV64M-NEXT: sw a1, 0(a0) -; RV64M-NEXT: slli a1, a1, 31 -; RV64M-NEXT: srli a1, a1, 63 -; RV64M-NEXT: sb a1, 4(a0) +; RV64M-NEXT: sb a2, 4(a0) ; RV64M-NEXT: ret ; ; RV32MV-LABEL: test_urem_vec: @@ -568,17 +568,17 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32MV-NEXT: vmv.x.s a1, v9 ; RV32MV-NEXT: slli a2, a1, 21 ; RV32MV-NEXT: srli a2, a2, 31 -; RV32MV-NEXT: sb a2, 4(a0) -; RV32MV-NEXT: vmv.x.s a2, v8 -; RV32MV-NEXT: andi a2, a2, 2047 +; RV32MV-NEXT: vmv.x.s a3, v8 +; RV32MV-NEXT: andi a3, a3, 2047 ; RV32MV-NEXT: vslidedown.vi v8, v8, 1 +; RV32MV-NEXT: slli a1, a1, 22 +; RV32MV-NEXT: or a1, a3, a1 ; RV32MV-NEXT: vmv.x.s a3, v8 ; RV32MV-NEXT: andi a3, a3, 2047 ; RV32MV-NEXT: slli a3, a3, 11 -; RV32MV-NEXT: slli a1, a1, 22 -; RV32MV-NEXT: or a1, a2, a1 ; RV32MV-NEXT: or a1, a1, a3 ; RV32MV-NEXT: sw a1, 0(a0) +; RV32MV-NEXT: sb a2, 4(a0) ; RV32MV-NEXT: ret ; ; RV64MV-LABEL: test_urem_vec: @@ -636,10 +636,10 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64MV-NEXT: slli a3, a3, 22 ; RV64MV-NEXT: or a1, a1, a3 ; RV64MV-NEXT: or a1, a1, a2 +; RV64MV-NEXT: slli a2, a1, 31 +; RV64MV-NEXT: srli a2, a2, 63 ; RV64MV-NEXT: sw a1, 0(a0) -; RV64MV-NEXT: slli a1, a1, 31 -; RV64MV-NEXT: srli a1, a1, 63 -; RV64MV-NEXT: sb a1, 4(a0) +; RV64MV-NEXT: sb a2, 4(a0) ; RV64MV-NEXT: ret %ld = load <3 x i11>, ptr %X %urem = urem <3 x i11> %ld, diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll index b0e790ed6063502..01f06474f78c23f 100644 --- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll @@ -39,10 +39,10 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind { ; RV32I-NEXT: li a1, 1003 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __umodsi3 -; RV32I-NEXT: sh a0, 6(s3) -; RV32I-NEXT: sh s1, 4(s3) -; RV32I-NEXT: sh s0, 2(s3) ; RV32I-NEXT: sh s4, 0(s3) +; RV32I-NEXT: sh s0, 2(s3) +; RV32I-NEXT: sh s1, 4(s3) +; RV32I-NEXT: sh a0, 6(s3) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -83,10 +83,10 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind { ; RV32IM-NEXT: li a6, 1003 ; RV32IM-NEXT: mul a5, a5, a6 ; RV32IM-NEXT: sub a1, a1, a5 -; RV32IM-NEXT: sh a1, 6(a0) -; RV32IM-NEXT: sh a4, 4(a0) ; RV32IM-NEXT: sh a2, 0(a0) ; RV32IM-NEXT: sh a3, 2(a0) +; RV32IM-NEXT: sh a4, 4(a0) +; RV32IM-NEXT: sh a1, 6(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: fold_urem_vec_1: @@ -118,10 +118,10 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind { ; RV64I-NEXT: li a1, 1003 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __umoddi3 -; RV64I-NEXT: sh a0, 6(s3) -; RV64I-NEXT: sh s1, 4(s3) -; RV64I-NEXT: sh s0, 2(s3) ; RV64I-NEXT: sh s4, 0(s3) +; RV64I-NEXT: sh s0, 2(s3) +; RV64I-NEXT: sh s1, 4(s3) +; RV64I-NEXT: sh a0, 6(s3) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -162,10 +162,10 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind { ; RV64IM-NEXT: li a6, 1003 ; RV64IM-NEXT: mul a3, a3, a6 ; RV64IM-NEXT: subw a1, a1, a3 -; RV64IM-NEXT: sh a1, 6(a0) -; RV64IM-NEXT: sh a5, 4(a0) ; RV64IM-NEXT: sh a4, 0(a0) ; RV64IM-NEXT: sh a2, 2(a0) +; RV64IM-NEXT: sh a5, 4(a0) +; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -201,10 +201,10 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind { ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __umodsi3 -; RV32I-NEXT: sh a0, 6(s3) -; RV32I-NEXT: sh s1, 4(s3) -; RV32I-NEXT: sh s0, 2(s3) ; RV32I-NEXT: sh s4, 0(s3) +; RV32I-NEXT: sh s0, 2(s3) +; RV32I-NEXT: sh s1, 4(s3) +; RV32I-NEXT: sh a0, 6(s3) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -235,10 +235,10 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind { ; RV32IM-NEXT: mulhu a5, a1, a5 ; RV32IM-NEXT: mul a5, a5, a7 ; RV32IM-NEXT: sub a1, a1, a5 -; RV32IM-NEXT: sh a1, 6(a0) -; RV32IM-NEXT: sh a4, 4(a0) -; RV32IM-NEXT: sh a3, 2(a0) ; RV32IM-NEXT: sh a2, 0(a0) +; RV32IM-NEXT: sh a3, 2(a0) +; RV32IM-NEXT: sh a4, 4(a0) +; RV32IM-NEXT: sh a1, 6(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: fold_urem_vec_2: @@ -270,10 +270,10 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind { ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __umoddi3 -; RV64I-NEXT: sh a0, 6(s3) -; RV64I-NEXT: sh s1, 4(s3) -; RV64I-NEXT: sh s0, 2(s3) ; RV64I-NEXT: sh s4, 0(s3) +; RV64I-NEXT: sh s0, 2(s3) +; RV64I-NEXT: sh s1, 4(s3) +; RV64I-NEXT: sh a0, 6(s3) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -304,10 +304,10 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind { ; RV64IM-NEXT: mulhu a2, a1, a2 ; RV64IM-NEXT: mul a2, a2, a7 ; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: sh a1, 6(a0) -; RV64IM-NEXT: sh a5, 4(a0) -; RV64IM-NEXT: sh a4, 2(a0) ; RV64IM-NEXT: sh a3, 0(a0) +; RV64IM-NEXT: sh a4, 2(a0) +; RV64IM-NEXT: sh a5, 4(a0) +; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -369,10 +369,10 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; RV32I-NEXT: add s2, s7, s2 ; RV32I-NEXT: add s3, s6, s3 ; RV32I-NEXT: add s4, s5, s4 -; RV32I-NEXT: sh s4, 6(s0) -; RV32I-NEXT: sh s3, 4(s0) -; RV32I-NEXT: sh s2, 2(s0) ; RV32I-NEXT: sh a0, 0(s0) +; RV32I-NEXT: sh s2, 2(s0) +; RV32I-NEXT: sh s3, 4(s0) +; RV32I-NEXT: sh s4, 6(s0) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -411,10 +411,10 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; RV32IM-NEXT: sub a1, a1, t2 ; RV32IM-NEXT: add a2, a2, a6 ; RV32IM-NEXT: sub a2, a2, t0 -; RV32IM-NEXT: sh a2, 6(a0) -; RV32IM-NEXT: sh a1, 4(a0) -; RV32IM-NEXT: sh a4, 2(a0) ; RV32IM-NEXT: sh a3, 0(a0) +; RV32IM-NEXT: sh a4, 2(a0) +; RV32IM-NEXT: sh a1, 4(a0) +; RV32IM-NEXT: sh a2, 6(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: combine_urem_udiv: @@ -470,10 +470,10 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; RV64I-NEXT: add s2, s7, s2 ; RV64I-NEXT: add s3, s6, s3 ; RV64I-NEXT: add s4, s5, s4 -; RV64I-NEXT: sh s4, 6(s0) -; RV64I-NEXT: sh s3, 4(s0) -; RV64I-NEXT: sh s2, 2(s0) ; RV64I-NEXT: sh a0, 0(s0) +; RV64I-NEXT: sh s2, 2(s0) +; RV64I-NEXT: sh s3, 4(s0) +; RV64I-NEXT: sh s4, 6(s0) ; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload @@ -512,10 +512,10 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; RV64IM-NEXT: subw a1, a1, t2 ; RV64IM-NEXT: add a2, a2, a6 ; RV64IM-NEXT: subw a2, a2, t0 -; RV64IM-NEXT: sh a2, 6(a0) -; RV64IM-NEXT: sh a1, 4(a0) -; RV64IM-NEXT: sh a4, 2(a0) ; RV64IM-NEXT: sh a3, 0(a0) +; RV64IM-NEXT: sh a4, 2(a0) +; RV64IM-NEXT: sh a1, 4(a0) +; RV64IM-NEXT: sh a2, 6(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, %2 = udiv <4 x i16> %x, @@ -544,10 +544,10 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind { ; RV32I-NEXT: andi a1, s1, 63 ; RV32I-NEXT: andi a2, s2, 31 ; RV32I-NEXT: andi a3, s3, 7 -; RV32I-NEXT: sh a0, 6(s0) -; RV32I-NEXT: sh a3, 4(s0) -; RV32I-NEXT: sh a2, 2(s0) ; RV32I-NEXT: sh a1, 0(s0) +; RV32I-NEXT: sh a2, 2(s0) +; RV32I-NEXT: sh a3, 4(s0) +; RV32I-NEXT: sh a0, 6(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -571,9 +571,9 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind { ; RV32IM-NEXT: andi a1, a1, 63 ; RV32IM-NEXT: andi a2, a2, 31 ; RV32IM-NEXT: andi a3, a3, 7 -; RV32IM-NEXT: sh a3, 4(a0) -; RV32IM-NEXT: sh a2, 2(a0) ; RV32IM-NEXT: sh a1, 0(a0) +; RV32IM-NEXT: sh a2, 2(a0) +; RV32IM-NEXT: sh a3, 4(a0) ; RV32IM-NEXT: sh a4, 6(a0) ; RV32IM-NEXT: ret ; @@ -596,10 +596,10 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind { ; RV64I-NEXT: andi a1, s1, 63 ; RV64I-NEXT: andi a2, s2, 31 ; RV64I-NEXT: andi a3, s3, 7 -; RV64I-NEXT: sh a0, 6(s0) -; RV64I-NEXT: sh a3, 4(s0) -; RV64I-NEXT: sh a2, 2(s0) ; RV64I-NEXT: sh a1, 0(s0) +; RV64I-NEXT: sh a2, 2(s0) +; RV64I-NEXT: sh a3, 4(s0) +; RV64I-NEXT: sh a0, 6(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -623,9 +623,9 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind { ; RV64IM-NEXT: andi a1, a1, 63 ; RV64IM-NEXT: andi a5, a5, 31 ; RV64IM-NEXT: andi a4, a4, 7 -; RV64IM-NEXT: sh a4, 4(a0) -; RV64IM-NEXT: sh a5, 2(a0) ; RV64IM-NEXT: sh a1, 0(a0) +; RV64IM-NEXT: sh a5, 2(a0) +; RV64IM-NEXT: sh a4, 4(a0) ; RV64IM-NEXT: sh a2, 6(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, @@ -658,10 +658,10 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; RV32I-NEXT: addi a1, a0, 1327 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __umodsi3 -; RV32I-NEXT: sh a0, 6(s2) -; RV32I-NEXT: sh s0, 4(s2) -; RV32I-NEXT: sh s3, 2(s2) ; RV32I-NEXT: sh zero, 0(s2) +; RV32I-NEXT: sh s3, 2(s2) +; RV32I-NEXT: sh s0, 4(s2) +; RV32I-NEXT: sh a0, 6(s2) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -695,9 +695,9 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; RV32IM-NEXT: mul a4, a4, a5 ; RV32IM-NEXT: sub a1, a1, a4 ; RV32IM-NEXT: sh zero, 0(a0) -; RV32IM-NEXT: sh a1, 6(a0) -; RV32IM-NEXT: sh a3, 4(a0) ; RV32IM-NEXT: sh a2, 2(a0) +; RV32IM-NEXT: sh a3, 4(a0) +; RV32IM-NEXT: sh a1, 6(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: dont_fold_urem_one: @@ -724,10 +724,10 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; RV64I-NEXT: addiw a1, a0, 1327 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __umoddi3 -; RV64I-NEXT: sh a0, 6(s2) -; RV64I-NEXT: sh s0, 4(s2) -; RV64I-NEXT: sh s3, 2(s2) ; RV64I-NEXT: sh zero, 0(s2) +; RV64I-NEXT: sh s3, 2(s2) +; RV64I-NEXT: sh s0, 4(s2) +; RV64I-NEXT: sh a0, 6(s2) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -761,9 +761,9 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; RV64IM-NEXT: mul a2, a2, a5 ; RV64IM-NEXT: subw a1, a1, a2 ; RV64IM-NEXT: sh zero, 0(a0) -; RV64IM-NEXT: sh a1, 6(a0) -; RV64IM-NEXT: sh a4, 4(a0) ; RV64IM-NEXT: sh a3, 2(a0) +; RV64IM-NEXT: sh a4, 4(a0) +; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -793,15 +793,15 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw s0, 16(a1) -; RV32I-NEXT: lw s1, 20(a1) -; RV32I-NEXT: lw s2, 24(a1) -; RV32I-NEXT: lw s3, 28(a1) +; RV32I-NEXT: lw s1, 16(a1) +; RV32I-NEXT: lw s2, 20(a1) +; RV32I-NEXT: lw s3, 24(a1) +; RV32I-NEXT: lw s4, 28(a1) ; RV32I-NEXT: lw a3, 0(a1) ; RV32I-NEXT: lw a4, 4(a1) -; RV32I-NEXT: lw s4, 8(a1) -; RV32I-NEXT: lw s5, 12(a1) -; RV32I-NEXT: mv s6, a0 +; RV32I-NEXT: lw s5, 8(a1) +; RV32I-NEXT: lw s6, 12(a1) +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a2, 1 ; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: mv a1, a4 @@ -810,33 +810,33 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32I-NEXT: mv s7, a0 ; RV32I-NEXT: mv s8, a1 ; RV32I-NEXT: li a2, 654 -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s5 +; RV32I-NEXT: mv a0, s5 +; RV32I-NEXT: mv a1, s6 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __umoddi3 -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: mv s6, a1 ; RV32I-NEXT: li a2, 23 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __umoddi3 -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a2, a0, 1327 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __umoddi3 -; RV32I-NEXT: sw a1, 28(s6) -; RV32I-NEXT: sw a0, 24(s6) -; RV32I-NEXT: sw s1, 20(s6) -; RV32I-NEXT: sw s0, 16(s6) -; RV32I-NEXT: sw s5, 12(s6) -; RV32I-NEXT: sw s4, 8(s6) -; RV32I-NEXT: sw s8, 4(s6) -; RV32I-NEXT: sw s7, 0(s6) +; RV32I-NEXT: sw s1, 16(s0) +; RV32I-NEXT: sw s2, 20(s0) +; RV32I-NEXT: sw a0, 24(s0) +; RV32I-NEXT: sw a1, 28(s0) +; RV32I-NEXT: sw s7, 0(s0) +; RV32I-NEXT: sw s8, 4(s0) +; RV32I-NEXT: sw s5, 8(s0) +; RV32I-NEXT: sw s6, 12(s0) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -863,15 +863,15 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s0, 16(a1) -; RV32IM-NEXT: lw s1, 20(a1) -; RV32IM-NEXT: lw s2, 24(a1) -; RV32IM-NEXT: lw s3, 28(a1) +; RV32IM-NEXT: lw s1, 16(a1) +; RV32IM-NEXT: lw s2, 20(a1) +; RV32IM-NEXT: lw s3, 24(a1) +; RV32IM-NEXT: lw s4, 28(a1) ; RV32IM-NEXT: lw a3, 0(a1) ; RV32IM-NEXT: lw a4, 4(a1) -; RV32IM-NEXT: lw s4, 8(a1) -; RV32IM-NEXT: lw s5, 12(a1) -; RV32IM-NEXT: mv s6, a0 +; RV32IM-NEXT: lw s5, 8(a1) +; RV32IM-NEXT: lw s6, 12(a1) +; RV32IM-NEXT: mv s0, a0 ; RV32IM-NEXT: li a2, 1 ; RV32IM-NEXT: mv a0, a3 ; RV32IM-NEXT: mv a1, a4 @@ -880,33 +880,33 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: mv s7, a0 ; RV32IM-NEXT: mv s8, a1 ; RV32IM-NEXT: li a2, 654 -; RV32IM-NEXT: mv a0, s4 -; RV32IM-NEXT: mv a1, s5 +; RV32IM-NEXT: mv a0, s5 +; RV32IM-NEXT: mv a1, s6 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __umoddi3 -; RV32IM-NEXT: mv s4, a0 -; RV32IM-NEXT: mv s5, a1 +; RV32IM-NEXT: mv s5, a0 +; RV32IM-NEXT: mv s6, a1 ; RV32IM-NEXT: li a2, 23 -; RV32IM-NEXT: mv a0, s0 -; RV32IM-NEXT: mv a1, s1 +; RV32IM-NEXT: mv a0, s1 +; RV32IM-NEXT: mv a1, s2 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __umoddi3 -; RV32IM-NEXT: mv s0, a0 -; RV32IM-NEXT: mv s1, a1 +; RV32IM-NEXT: mv s1, a0 +; RV32IM-NEXT: mv s2, a1 ; RV32IM-NEXT: lui a0, 1 ; RV32IM-NEXT: addi a2, a0, 1327 -; RV32IM-NEXT: mv a0, s2 -; RV32IM-NEXT: mv a1, s3 +; RV32IM-NEXT: mv a0, s3 +; RV32IM-NEXT: mv a1, s4 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __umoddi3 -; RV32IM-NEXT: sw a1, 28(s6) -; RV32IM-NEXT: sw a0, 24(s6) -; RV32IM-NEXT: sw s1, 20(s6) -; RV32IM-NEXT: sw s0, 16(s6) -; RV32IM-NEXT: sw s5, 12(s6) -; RV32IM-NEXT: sw s4, 8(s6) -; RV32IM-NEXT: sw s8, 4(s6) -; RV32IM-NEXT: sw s7, 0(s6) +; RV32IM-NEXT: sw s1, 16(s0) +; RV32IM-NEXT: sw s2, 20(s0) +; RV32IM-NEXT: sw a0, 24(s0) +; RV32IM-NEXT: sw a1, 28(s0) +; RV32IM-NEXT: sw s7, 0(s0) +; RV32IM-NEXT: sw s8, 4(s0) +; RV32IM-NEXT: sw s5, 8(s0) +; RV32IM-NEXT: sw s6, 12(s0) ; RV32IM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -944,10 +944,10 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV64I-NEXT: addiw a1, a0, 1327 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __umoddi3 -; RV64I-NEXT: sd a0, 24(s2) -; RV64I-NEXT: sd s0, 16(s2) -; RV64I-NEXT: sd s3, 8(s2) ; RV64I-NEXT: sd zero, 0(s2) +; RV64I-NEXT: sd s3, 8(s2) +; RV64I-NEXT: sd s0, 16(s2) +; RV64I-NEXT: sd a0, 24(s2) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -988,9 +988,9 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV64IM-NEXT: mul a2, a2, a5 ; RV64IM-NEXT: sub a1, a1, a2 ; RV64IM-NEXT: sd zero, 0(a0) -; RV64IM-NEXT: sd a1, 24(a0) ; RV64IM-NEXT: sd a4, 8(a0) ; RV64IM-NEXT: sd a3, 16(a0) +; RV64IM-NEXT: sd a1, 24(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i64> %x, ret <4 x i64> %1 diff --git a/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll b/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll index 18bb4f5ad0f0c06..7c4c98a12f4b818 100644 --- a/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll +++ b/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll @@ -64,10 +64,10 @@ define void @va_double(i32 %n, ...) { ; ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill ; ILP32E-NEXT: .cfi_offset ra, -28 ; ILP32E-NEXT: sw a5, 28(sp) -; ILP32E-NEXT: sw a4, 24(sp) -; ILP32E-NEXT: sw a3, 20(sp) -; ILP32E-NEXT: sw a2, 16(sp) ; ILP32E-NEXT: sw a1, 12(sp) +; ILP32E-NEXT: sw a2, 16(sp) +; ILP32E-NEXT: sw a3, 20(sp) +; ILP32E-NEXT: sw a4, 24(sp) ; ILP32E-NEXT: addi a0, sp, 12 ; ILP32E-NEXT: sw a0, 0(sp) ; ILP32E-NEXT: addi a0, sp, 19 @@ -98,10 +98,10 @@ define void @va_double(i32 %n, ...) { ; ILP32E-WITHFP-NEXT: addi s0, sp, 12 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24 ; ILP32E-WITHFP-NEXT: sw a5, 20(s0) -; ILP32E-WITHFP-NEXT: sw a4, 16(s0) -; ILP32E-WITHFP-NEXT: sw a3, 12(s0) -; ILP32E-WITHFP-NEXT: sw a2, 8(s0) ; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) ; ILP32E-WITHFP-NEXT: addi a0, s0, 4 ; ILP32E-WITHFP-NEXT: sw a0, -12(s0) ; ILP32E-WITHFP-NEXT: addi a0, s0, 11 diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll index d2c30c543907026..c53e5a78d1988c5 100644 --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -55,13 +55,13 @@ define i32 @va1(ptr %fmt, ...) { ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -48 ; ILP32-ILP32F-FPELIM-NEXT: .cfi_def_cfa_offset 48 ; ILP32-ILP32F-FPELIM-NEXT: mv a0, a1 -; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a5, 36(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a1, sp, 24 ; ILP32-ILP32F-FPELIM-NEXT: sw a1, 12(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 48 @@ -78,13 +78,13 @@ define i32 @va1(ptr %fmt, ...) { ; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 16 ; ILP32-ILP32F-WITHFP-NEXT: .cfi_def_cfa s0, 32 ; ILP32-ILP32F-WITHFP-NEXT: mv a0, a1 -; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a1, s0, 8 ; ILP32-ILP32F-WITHFP-NEXT: sw a1, -12(s0) ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -97,13 +97,13 @@ define i32 @va1(ptr %fmt, ...) { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: .cfi_def_cfa_offset 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: mv a0, a1 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, sp, 24 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 12(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 @@ -115,10 +115,10 @@ define i32 @va1(ptr %fmt, ...) { ; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 28 ; ILP32E-FPELIM-NEXT: mv a0, a1 ; ILP32E-FPELIM-NEXT: sw a5, 24(sp) -; ILP32E-FPELIM-NEXT: sw a4, 20(sp) -; ILP32E-FPELIM-NEXT: sw a3, 16(sp) -; ILP32E-FPELIM-NEXT: sw a2, 12(sp) ; ILP32E-FPELIM-NEXT: sw a1, 8(sp) +; ILP32E-FPELIM-NEXT: sw a2, 12(sp) +; ILP32E-FPELIM-NEXT: sw a3, 16(sp) +; ILP32E-FPELIM-NEXT: sw a4, 20(sp) ; ILP32E-FPELIM-NEXT: addi a1, sp, 12 ; ILP32E-FPELIM-NEXT: sw a1, 0(sp) ; ILP32E-FPELIM-NEXT: addi sp, sp, 28 @@ -136,10 +136,10 @@ define i32 @va1(ptr %fmt, ...) { ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24 ; ILP32E-WITHFP-NEXT: mv a0, a1 ; ILP32E-WITHFP-NEXT: sw a5, 20(s0) -; ILP32E-WITHFP-NEXT: sw a4, 16(s0) -; ILP32E-WITHFP-NEXT: sw a3, 12(s0) -; ILP32E-WITHFP-NEXT: sw a2, 8(s0) ; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) ; ILP32E-WITHFP-NEXT: addi a1, s0, 8 ; ILP32E-WITHFP-NEXT: sw a1, -12(s0) ; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload @@ -152,15 +152,15 @@ define i32 @va1(ptr %fmt, ...) { ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 ; LP64-LP64F-LP64D-FPELIM-NEXT: .cfi_def_cfa_offset 80 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, sp, 28 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: lw a0, 24(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80 ; LP64-LP64F-LP64D-FPELIM-NEXT: ret ; @@ -175,15 +175,15 @@ define i32 @va1(ptr %fmt, ...) { ; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 ; LP64-LP64F-LP64D-WITHFP-NEXT: .cfi_def_cfa s0, 64 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 12 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: lw a0, 8(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 @@ -193,14 +193,14 @@ define i32 @va1(ptr %fmt, ...) { ; LP64E-FPELIM: # %bb.0: ; LP64E-FPELIM-NEXT: addi sp, sp, -56 ; LP64E-FPELIM-NEXT: .cfi_def_cfa_offset 56 -; LP64E-FPELIM-NEXT: sd a1, 16(sp) ; LP64E-FPELIM-NEXT: addi a0, sp, 20 ; LP64E-FPELIM-NEXT: sd a0, 0(sp) +; LP64E-FPELIM-NEXT: sd a1, 16(sp) ; LP64E-FPELIM-NEXT: lw a0, 16(sp) ; LP64E-FPELIM-NEXT: sd a5, 48(sp) -; LP64E-FPELIM-NEXT: sd a4, 40(sp) -; LP64E-FPELIM-NEXT: sd a3, 32(sp) ; LP64E-FPELIM-NEXT: sd a2, 24(sp) +; LP64E-FPELIM-NEXT: sd a3, 32(sp) +; LP64E-FPELIM-NEXT: sd a4, 40(sp) ; LP64E-FPELIM-NEXT: addi sp, sp, 56 ; LP64E-FPELIM-NEXT: ret ; @@ -214,14 +214,14 @@ define i32 @va1(ptr %fmt, ...) { ; LP64E-WITHFP-NEXT: .cfi_offset s0, -64 ; LP64E-WITHFP-NEXT: addi s0, sp, 24 ; LP64E-WITHFP-NEXT: .cfi_def_cfa s0, 48 -; LP64E-WITHFP-NEXT: sd a1, 8(s0) ; LP64E-WITHFP-NEXT: addi a0, s0, 12 ; LP64E-WITHFP-NEXT: sd a0, -24(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) ; LP64E-WITHFP-NEXT: lw a0, 8(s0) ; LP64E-WITHFP-NEXT: sd a5, 40(s0) -; LP64E-WITHFP-NEXT: sd a4, 32(s0) -; LP64E-WITHFP-NEXT: sd a3, 24(s0) ; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) ; LP64E-WITHFP-NEXT: ld ra, 16(sp) # 8-byte Folded Reload ; LP64E-WITHFP-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; LP64E-WITHFP-NEXT: addi sp, sp, 72 @@ -241,13 +241,13 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind { ; ILP32-ILP32F-FPELIM: # %bb.0: ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -48 ; ILP32-ILP32F-FPELIM-NEXT: mv a0, a1 -; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a5, 36(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a1, sp, 24 ; ILP32-ILP32F-FPELIM-NEXT: sw a1, 12(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 48 @@ -260,13 +260,13 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind { ; ILP32-ILP32F-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 16 ; ILP32-ILP32F-WITHFP-NEXT: mv a0, a1 -; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a1, s0, 8 ; ILP32-ILP32F-WITHFP-NEXT: sw a1, -12(s0) ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -278,13 +278,13 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM: # %bb.0: ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: mv a0, a1 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, sp, 24 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 12(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 @@ -295,10 +295,10 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind { ; ILP32E-FPELIM-NEXT: addi sp, sp, -28 ; ILP32E-FPELIM-NEXT: mv a0, a1 ; ILP32E-FPELIM-NEXT: sw a5, 24(sp) -; ILP32E-FPELIM-NEXT: sw a4, 20(sp) -; ILP32E-FPELIM-NEXT: sw a3, 16(sp) -; ILP32E-FPELIM-NEXT: sw a2, 12(sp) ; ILP32E-FPELIM-NEXT: sw a1, 8(sp) +; ILP32E-FPELIM-NEXT: sw a2, 12(sp) +; ILP32E-FPELIM-NEXT: sw a3, 16(sp) +; ILP32E-FPELIM-NEXT: sw a4, 20(sp) ; ILP32E-FPELIM-NEXT: addi a1, sp, 12 ; ILP32E-FPELIM-NEXT: sw a1, 0(sp) ; ILP32E-FPELIM-NEXT: addi sp, sp, 28 @@ -312,10 +312,10 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind { ; ILP32E-WITHFP-NEXT: addi s0, sp, 12 ; ILP32E-WITHFP-NEXT: mv a0, a1 ; ILP32E-WITHFP-NEXT: sw a5, 20(s0) -; ILP32E-WITHFP-NEXT: sw a4, 16(s0) -; ILP32E-WITHFP-NEXT: sw a3, 12(s0) -; ILP32E-WITHFP-NEXT: sw a2, 8(s0) ; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) ; ILP32E-WITHFP-NEXT: addi a1, s0, 8 ; ILP32E-WITHFP-NEXT: sw a1, -12(s0) ; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload @@ -327,13 +327,13 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind { ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 ; LP64-LP64F-LP64D-FPELIM-NEXT: mv a0, a1 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, sp, 32 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80 @@ -346,13 +346,13 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 ; LP64-LP64F-LP64D-WITHFP-NEXT: mv a0, a1 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, s0, 16 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -365,10 +365,10 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind { ; LP64E-FPELIM-NEXT: addi sp, sp, -56 ; LP64E-FPELIM-NEXT: mv a0, a1 ; LP64E-FPELIM-NEXT: sd a5, 48(sp) -; LP64E-FPELIM-NEXT: sd a4, 40(sp) -; LP64E-FPELIM-NEXT: sd a3, 32(sp) -; LP64E-FPELIM-NEXT: sd a2, 24(sp) ; LP64E-FPELIM-NEXT: sd a1, 16(sp) +; LP64E-FPELIM-NEXT: sd a2, 24(sp) +; LP64E-FPELIM-NEXT: sd a3, 32(sp) +; LP64E-FPELIM-NEXT: sd a4, 40(sp) ; LP64E-FPELIM-NEXT: addi a1, sp, 24 ; LP64E-FPELIM-NEXT: sd a1, 0(sp) ; LP64E-FPELIM-NEXT: addi sp, sp, 56 @@ -382,10 +382,10 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind { ; LP64E-WITHFP-NEXT: addi s0, sp, 24 ; LP64E-WITHFP-NEXT: mv a0, a1 ; LP64E-WITHFP-NEXT: sd a5, 40(s0) -; LP64E-WITHFP-NEXT: sd a4, 32(s0) -; LP64E-WITHFP-NEXT: sd a3, 24(s0) -; LP64E-WITHFP-NEXT: sd a2, 16(s0) ; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) ; LP64E-WITHFP-NEXT: addi a1, s0, 16 ; LP64E-WITHFP-NEXT: sd a1, -24(s0) ; LP64E-WITHFP-NEXT: ld ra, 16(sp) # 8-byte Folded Reload @@ -410,13 +410,13 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind { ; ILP32-ILP32F-FPELIM-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; ILP32-ILP32F-FPELIM-NEXT: addi s0, sp, 16 ; ILP32-ILP32F-FPELIM-NEXT: mv s1, a1 -; ILP32-ILP32F-FPELIM-NEXT: sw a7, 28(s0) -; ILP32-ILP32F-FPELIM-NEXT: sw a6, 24(s0) ; ILP32-ILP32F-FPELIM-NEXT: sw a5, 20(s0) -; ILP32-ILP32F-FPELIM-NEXT: sw a4, 16(s0) -; ILP32-ILP32F-FPELIM-NEXT: sw a3, 12(s0) -; ILP32-ILP32F-FPELIM-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-FPELIM-NEXT: sw a6, 24(s0) +; ILP32-ILP32F-FPELIM-NEXT: sw a7, 28(s0) ; ILP32-ILP32F-FPELIM-NEXT: sw a1, 4(s0) +; ILP32-ILP32F-FPELIM-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-FPELIM-NEXT: sw a3, 12(s0) +; ILP32-ILP32F-FPELIM-NEXT: sw a4, 16(s0) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, s0, 8 ; ILP32-ILP32F-FPELIM-NEXT: sw a0, -16(s0) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, a1, 15 @@ -440,13 +440,13 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind { ; ILP32-ILP32F-WITHFP-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 16 ; ILP32-ILP32F-WITHFP-NEXT: mv s1, a1 -; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 8 ; ILP32-ILP32F-WITHFP-NEXT: sw a0, -16(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a0, a1, 15 @@ -470,13 +470,13 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi s0, sp, 16 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: mv s1, a1 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 28(s0) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 24(s0) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 20(s0) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 16(s0) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 12(s0) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 8(s0) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 24(s0) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 28(s0) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 4(s0) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 8(s0) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 12(s0) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 16(s0) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, s0, 8 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, -16(s0) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a1, 15 @@ -501,10 +501,10 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind { ; ILP32E-FPELIM-NEXT: addi s0, sp, 16 ; ILP32E-FPELIM-NEXT: mv s1, a1 ; ILP32E-FPELIM-NEXT: sw a5, 20(s0) -; ILP32E-FPELIM-NEXT: sw a4, 16(s0) -; ILP32E-FPELIM-NEXT: sw a3, 12(s0) -; ILP32E-FPELIM-NEXT: sw a2, 8(s0) ; ILP32E-FPELIM-NEXT: sw a1, 4(s0) +; ILP32E-FPELIM-NEXT: sw a2, 8(s0) +; ILP32E-FPELIM-NEXT: sw a3, 12(s0) +; ILP32E-FPELIM-NEXT: sw a4, 16(s0) ; ILP32E-FPELIM-NEXT: addi a0, s0, 8 ; ILP32E-FPELIM-NEXT: sw a0, -16(s0) ; ILP32E-FPELIM-NEXT: addi a0, a1, 3 @@ -529,10 +529,10 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind { ; ILP32E-WITHFP-NEXT: addi s0, sp, 16 ; ILP32E-WITHFP-NEXT: mv s1, a1 ; ILP32E-WITHFP-NEXT: sw a5, 20(s0) -; ILP32E-WITHFP-NEXT: sw a4, 16(s0) -; ILP32E-WITHFP-NEXT: sw a3, 12(s0) -; ILP32E-WITHFP-NEXT: sw a2, 8(s0) ; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) ; ILP32E-WITHFP-NEXT: addi a0, s0, 8 ; ILP32E-WITHFP-NEXT: sw a0, -16(s0) ; ILP32E-WITHFP-NEXT: addi a0, a1, 3 @@ -556,13 +556,13 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind { ; LP64-LP64F-LP64D-FPELIM-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-FPELIM-NEXT: addi s0, sp, 32 ; LP64-LP64F-LP64D-FPELIM-NEXT: mv s1, a1 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 56(s0) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 48(s0) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 40(s0) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(s0) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(s0) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 56(s0) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(s0) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, s0, 16 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, -32(s0) ; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a1, 32 @@ -588,13 +588,13 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 ; LP64-LP64F-LP64D-WITHFP-NEXT: mv s1, a1 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 16 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -32(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a1, 32 @@ -621,10 +621,10 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind { ; LP64E-FPELIM-NEXT: addi s0, sp, 32 ; LP64E-FPELIM-NEXT: mv s1, a1 ; LP64E-FPELIM-NEXT: sd a5, 40(s0) -; LP64E-FPELIM-NEXT: sd a4, 32(s0) -; LP64E-FPELIM-NEXT: sd a3, 24(s0) -; LP64E-FPELIM-NEXT: sd a2, 16(s0) ; LP64E-FPELIM-NEXT: sd a1, 8(s0) +; LP64E-FPELIM-NEXT: sd a2, 16(s0) +; LP64E-FPELIM-NEXT: sd a3, 24(s0) +; LP64E-FPELIM-NEXT: sd a4, 32(s0) ; LP64E-FPELIM-NEXT: addi a0, s0, 16 ; LP64E-FPELIM-NEXT: sd a0, -32(s0) ; LP64E-FPELIM-NEXT: slli a0, a1, 32 @@ -651,10 +651,10 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind { ; LP64E-WITHFP-NEXT: addi s0, sp, 32 ; LP64E-WITHFP-NEXT: mv s1, a1 ; LP64E-WITHFP-NEXT: sd a5, 40(s0) -; LP64E-WITHFP-NEXT: sd a4, 32(s0) -; LP64E-WITHFP-NEXT: sd a3, 24(s0) -; LP64E-WITHFP-NEXT: sd a2, 16(s0) ; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) ; LP64E-WITHFP-NEXT: addi a0, s0, 16 ; LP64E-WITHFP-NEXT: sd a0, -32(s0) ; LP64E-WITHFP-NEXT: slli a0, a1, 32 @@ -812,13 +812,13 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; ILP32-ILP32F-FPELIM-LABEL: va2: ; ILP32-ILP32F-FPELIM: # %bb.0: ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -48 -; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a5, 36(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 20 ; ILP32-ILP32F-FPELIM-NEXT: sw a0, 12(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 27 @@ -836,13 +836,13 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; ILP32-ILP32F-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; ILP32-ILP32F-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 16 -; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 4 ; ILP32-ILP32F-WITHFP-NEXT: sw a0, -12(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 11 @@ -859,13 +859,13 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-LABEL: va2: ; RV32D-ILP32-ILP32F-ILP32D-FPELIM: # %bb.0: ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -48 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 20 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 12(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 27 @@ -881,10 +881,10 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; ILP32E-FPELIM: # %bb.0: ; ILP32E-FPELIM-NEXT: addi sp, sp, -28 ; ILP32E-FPELIM-NEXT: sw a5, 24(sp) -; ILP32E-FPELIM-NEXT: sw a4, 20(sp) -; ILP32E-FPELIM-NEXT: sw a3, 16(sp) -; ILP32E-FPELIM-NEXT: sw a2, 12(sp) ; ILP32E-FPELIM-NEXT: sw a1, 8(sp) +; ILP32E-FPELIM-NEXT: sw a2, 12(sp) +; ILP32E-FPELIM-NEXT: sw a3, 16(sp) +; ILP32E-FPELIM-NEXT: sw a4, 20(sp) ; ILP32E-FPELIM-NEXT: addi a0, sp, 8 ; ILP32E-FPELIM-NEXT: sw a0, 0(sp) ; ILP32E-FPELIM-NEXT: addi a0, sp, 15 @@ -903,10 +903,10 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill ; ILP32E-WITHFP-NEXT: addi s0, sp, 12 ; ILP32E-WITHFP-NEXT: sw a5, 20(s0) -; ILP32E-WITHFP-NEXT: sw a4, 16(s0) -; ILP32E-WITHFP-NEXT: sw a3, 12(s0) -; ILP32E-WITHFP-NEXT: sw a2, 8(s0) ; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) ; ILP32E-WITHFP-NEXT: addi a0, s0, 4 ; ILP32E-WITHFP-NEXT: sw a0, -12(s0) ; ILP32E-WITHFP-NEXT: addi a0, s0, 11 @@ -924,13 +924,13 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 ; LP64-LP64F-LP64D-FPELIM-NEXT: mv a0, a1 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, sp, 39 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80 @@ -943,13 +943,13 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 ; LP64-LP64F-LP64D-WITHFP-NEXT: mv a0, a1 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, s0, 23 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -962,10 +962,10 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; LP64E-FPELIM-NEXT: addi sp, sp, -56 ; LP64E-FPELIM-NEXT: mv a0, a1 ; LP64E-FPELIM-NEXT: sd a5, 48(sp) -; LP64E-FPELIM-NEXT: sd a4, 40(sp) -; LP64E-FPELIM-NEXT: sd a3, 32(sp) -; LP64E-FPELIM-NEXT: sd a2, 24(sp) ; LP64E-FPELIM-NEXT: sd a1, 16(sp) +; LP64E-FPELIM-NEXT: sd a2, 24(sp) +; LP64E-FPELIM-NEXT: sd a3, 32(sp) +; LP64E-FPELIM-NEXT: sd a4, 40(sp) ; LP64E-FPELIM-NEXT: addi a1, sp, 31 ; LP64E-FPELIM-NEXT: sd a1, 0(sp) ; LP64E-FPELIM-NEXT: addi sp, sp, 56 @@ -979,10 +979,10 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; LP64E-WITHFP-NEXT: addi s0, sp, 24 ; LP64E-WITHFP-NEXT: mv a0, a1 ; LP64E-WITHFP-NEXT: sd a5, 40(s0) -; LP64E-WITHFP-NEXT: sd a4, 32(s0) -; LP64E-WITHFP-NEXT: sd a3, 24(s0) -; LP64E-WITHFP-NEXT: sd a2, 16(s0) ; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) ; LP64E-WITHFP-NEXT: addi a1, s0, 23 ; LP64E-WITHFP-NEXT: sd a1, -24(s0) ; LP64E-WITHFP-NEXT: ld ra, 16(sp) # 8-byte Folded Reload @@ -1009,13 +1009,13 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind { ; ILP32-ILP32F-FPELIM-LABEL: va2_va_arg: ; ILP32-ILP32F-FPELIM: # %bb.0: ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -48 -; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a5, 36(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 27 ; ILP32-ILP32F-FPELIM-NEXT: andi a1, a0, -8 ; ILP32-ILP32F-FPELIM-NEXT: addi a0, a1, 4 @@ -1033,13 +1033,13 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind { ; ILP32-ILP32F-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; ILP32-ILP32F-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 16 -; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 11 ; ILP32-ILP32F-WITHFP-NEXT: andi a1, a0, -8 ; ILP32-ILP32F-WITHFP-NEXT: addi a0, a1, 4 @@ -1056,13 +1056,13 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-LABEL: va2_va_arg: ; RV32D-ILP32-ILP32F-ILP32D-FPELIM: # %bb.0: ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -48 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 27 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: andi a0, a0, -8 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, a0, 8 @@ -1078,10 +1078,10 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind { ; ILP32E-FPELIM: # %bb.0: ; ILP32E-FPELIM-NEXT: addi sp, sp, -28 ; ILP32E-FPELIM-NEXT: sw a5, 24(sp) -; ILP32E-FPELIM-NEXT: sw a4, 20(sp) -; ILP32E-FPELIM-NEXT: sw a3, 16(sp) -; ILP32E-FPELIM-NEXT: sw a2, 12(sp) ; ILP32E-FPELIM-NEXT: sw a1, 8(sp) +; ILP32E-FPELIM-NEXT: sw a2, 12(sp) +; ILP32E-FPELIM-NEXT: sw a3, 16(sp) +; ILP32E-FPELIM-NEXT: sw a4, 20(sp) ; ILP32E-FPELIM-NEXT: addi a0, sp, 15 ; ILP32E-FPELIM-NEXT: andi a1, a0, -8 ; ILP32E-FPELIM-NEXT: addi a0, a1, 4 @@ -1100,10 +1100,10 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind { ; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill ; ILP32E-WITHFP-NEXT: addi s0, sp, 12 ; ILP32E-WITHFP-NEXT: sw a5, 20(s0) -; ILP32E-WITHFP-NEXT: sw a4, 16(s0) -; ILP32E-WITHFP-NEXT: sw a3, 12(s0) -; ILP32E-WITHFP-NEXT: sw a2, 8(s0) ; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) ; ILP32E-WITHFP-NEXT: addi a0, s0, 11 ; ILP32E-WITHFP-NEXT: andi a1, a0, -8 ; ILP32E-WITHFP-NEXT: addi a0, a1, 4 @@ -1121,13 +1121,13 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind { ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 ; LP64-LP64F-LP64D-FPELIM-NEXT: mv a0, a1 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, sp, 32 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80 @@ -1140,13 +1140,13 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 ; LP64-LP64F-LP64D-WITHFP-NEXT: mv a0, a1 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, s0, 16 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -1159,10 +1159,10 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind { ; LP64E-FPELIM-NEXT: addi sp, sp, -56 ; LP64E-FPELIM-NEXT: mv a0, a1 ; LP64E-FPELIM-NEXT: sd a5, 48(sp) -; LP64E-FPELIM-NEXT: sd a4, 40(sp) -; LP64E-FPELIM-NEXT: sd a3, 32(sp) -; LP64E-FPELIM-NEXT: sd a2, 24(sp) ; LP64E-FPELIM-NEXT: sd a1, 16(sp) +; LP64E-FPELIM-NEXT: sd a2, 24(sp) +; LP64E-FPELIM-NEXT: sd a3, 32(sp) +; LP64E-FPELIM-NEXT: sd a4, 40(sp) ; LP64E-FPELIM-NEXT: addi a1, sp, 24 ; LP64E-FPELIM-NEXT: sd a1, 0(sp) ; LP64E-FPELIM-NEXT: addi sp, sp, 56 @@ -1176,10 +1176,10 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind { ; LP64E-WITHFP-NEXT: addi s0, sp, 24 ; LP64E-WITHFP-NEXT: mv a0, a1 ; LP64E-WITHFP-NEXT: sd a5, 40(s0) -; LP64E-WITHFP-NEXT: sd a4, 32(s0) -; LP64E-WITHFP-NEXT: sd a3, 24(s0) -; LP64E-WITHFP-NEXT: sd a2, 16(s0) ; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) ; LP64E-WITHFP-NEXT: addi a1, s0, 16 ; LP64E-WITHFP-NEXT: sd a1, -24(s0) ; LP64E-WITHFP-NEXT: ld ra, 16(sp) # 8-byte Folded Reload @@ -1317,10 +1317,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; ILP32-ILP32F-FPELIM: # %bb.0: ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -32 ; ILP32-ILP32F-FPELIM-NEXT: sw a7, 28(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a6, 24(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a5, 20(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a4, 16(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a3, 12(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a4, 16(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a5, 20(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a6, 24(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 12 ; ILP32-ILP32F-FPELIM-NEXT: sw a0, 4(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 19 @@ -1343,10 +1343,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; ILP32-ILP32F-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill ; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 24 ; ILP32-ILP32F-WITHFP-NEXT: sw a7, 20(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a6, 16(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a5, 12(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a4, 8(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a3, 4(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a4, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a5, 12(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a6, 16(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 4 ; ILP32-ILP32F-WITHFP-NEXT: sw a0, -12(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 11 @@ -1368,10 +1368,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM: # %bb.0: ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -32 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 28(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 24(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 20(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 16(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 12(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 16(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 20(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 24(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 12 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 4(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 19 @@ -1390,9 +1390,9 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; ILP32E-FPELIM-LABEL: va3: ; ILP32E-FPELIM: # %bb.0: ; ILP32E-FPELIM-NEXT: addi sp, sp, -20 -; ILP32E-FPELIM-NEXT: sw a5, 16(sp) -; ILP32E-FPELIM-NEXT: sw a4, 12(sp) ; ILP32E-FPELIM-NEXT: sw a3, 8(sp) +; ILP32E-FPELIM-NEXT: sw a4, 12(sp) +; ILP32E-FPELIM-NEXT: sw a5, 16(sp) ; ILP32E-FPELIM-NEXT: addi a0, sp, 8 ; ILP32E-FPELIM-NEXT: sw a0, 0(sp) ; ILP32E-FPELIM-NEXT: addi a0, sp, 15 @@ -1414,9 +1414,9 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill ; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill ; ILP32E-WITHFP-NEXT: addi s0, sp, 12 -; ILP32E-WITHFP-NEXT: sw a5, 12(s0) -; ILP32E-WITHFP-NEXT: sw a4, 8(s0) ; ILP32E-WITHFP-NEXT: sw a3, 4(s0) +; ILP32E-WITHFP-NEXT: sw a4, 8(s0) +; ILP32E-WITHFP-NEXT: sw a5, 12(s0) ; ILP32E-WITHFP-NEXT: addi a0, s0, 4 ; ILP32E-WITHFP-NEXT: sw a0, -12(s0) ; ILP32E-WITHFP-NEXT: addi a0, s0, 11 @@ -1437,12 +1437,12 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; LP64-LP64F-LP64D-FPELIM-LABEL: va3: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 56(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 48(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 40(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 56(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 16(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 40(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a3, sp, 31 ; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, a1, a2 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 8(sp) @@ -1455,12 +1455,12 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 40(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 32(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 24(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 16(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 40(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 0(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a3, s0, 15 ; LP64-LP64F-LP64D-WITHFP-NEXT: add a0, a1, a2 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, -24(s0) @@ -1472,10 +1472,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; LP64E-FPELIM-LABEL: va3: ; LP64E-FPELIM: # %bb.0: ; LP64E-FPELIM-NEXT: addi sp, sp, -40 -; LP64E-FPELIM-NEXT: sd a5, 32(sp) -; LP64E-FPELIM-NEXT: sd a4, 24(sp) -; LP64E-FPELIM-NEXT: sd a3, 16(sp) ; LP64E-FPELIM-NEXT: sd a2, 8(sp) +; LP64E-FPELIM-NEXT: sd a3, 16(sp) +; LP64E-FPELIM-NEXT: sd a4, 24(sp) +; LP64E-FPELIM-NEXT: sd a5, 32(sp) ; LP64E-FPELIM-NEXT: addi a3, sp, 23 ; LP64E-FPELIM-NEXT: add a0, a1, a2 ; LP64E-FPELIM-NEXT: sd a3, 0(sp) @@ -1488,10 +1488,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; LP64E-WITHFP-NEXT: sd ra, 16(sp) # 8-byte Folded Spill ; LP64E-WITHFP-NEXT: sd s0, 8(sp) # 8-byte Folded Spill ; LP64E-WITHFP-NEXT: addi s0, sp, 24 -; LP64E-WITHFP-NEXT: sd a5, 24(s0) -; LP64E-WITHFP-NEXT: sd a4, 16(s0) -; LP64E-WITHFP-NEXT: sd a3, 8(s0) ; LP64E-WITHFP-NEXT: sd a2, 0(s0) +; LP64E-WITHFP-NEXT: sd a3, 8(s0) +; LP64E-WITHFP-NEXT: sd a4, 16(s0) +; LP64E-WITHFP-NEXT: sd a5, 24(s0) ; LP64E-WITHFP-NEXT: addi a3, s0, 15 ; LP64E-WITHFP-NEXT: add a0, a1, a2 ; LP64E-WITHFP-NEXT: sd a3, -24(s0) @@ -1521,10 +1521,10 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; ILP32-ILP32F-FPELIM: # %bb.0: ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -32 ; ILP32-ILP32F-FPELIM-NEXT: sw a7, 28(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a6, 24(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a5, 20(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a4, 16(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a3, 12(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a4, 16(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a5, 20(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a6, 24(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 19 ; ILP32-ILP32F-FPELIM-NEXT: andi a0, a0, -8 ; ILP32-ILP32F-FPELIM-NEXT: addi a3, a0, 4 @@ -1547,10 +1547,10 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; ILP32-ILP32F-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill ; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 24 ; ILP32-ILP32F-WITHFP-NEXT: sw a7, 20(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a6, 16(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a5, 12(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a4, 8(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a3, 4(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a4, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a5, 12(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a6, 16(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 11 ; ILP32-ILP32F-WITHFP-NEXT: andi a0, a0, -8 ; ILP32-ILP32F-WITHFP-NEXT: addi a3, a0, 4 @@ -1572,10 +1572,10 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM: # %bb.0: ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 35 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: andi a0, a0, -8 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a3, a0, 8 @@ -1594,9 +1594,9 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; ILP32E-FPELIM-LABEL: va3_va_arg: ; ILP32E-FPELIM: # %bb.0: ; ILP32E-FPELIM-NEXT: addi sp, sp, -20 -; ILP32E-FPELIM-NEXT: sw a5, 16(sp) -; ILP32E-FPELIM-NEXT: sw a4, 12(sp) ; ILP32E-FPELIM-NEXT: sw a3, 8(sp) +; ILP32E-FPELIM-NEXT: sw a4, 12(sp) +; ILP32E-FPELIM-NEXT: sw a5, 16(sp) ; ILP32E-FPELIM-NEXT: addi a0, sp, 15 ; ILP32E-FPELIM-NEXT: andi a0, a0, -8 ; ILP32E-FPELIM-NEXT: addi a3, a0, 4 @@ -1618,9 +1618,9 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill ; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill ; ILP32E-WITHFP-NEXT: addi s0, sp, 12 -; ILP32E-WITHFP-NEXT: sw a5, 12(s0) -; ILP32E-WITHFP-NEXT: sw a4, 8(s0) ; ILP32E-WITHFP-NEXT: sw a3, 4(s0) +; ILP32E-WITHFP-NEXT: sw a4, 8(s0) +; ILP32E-WITHFP-NEXT: sw a5, 12(s0) ; ILP32E-WITHFP-NEXT: addi a0, s0, 11 ; ILP32E-WITHFP-NEXT: andi a0, a0, -8 ; ILP32E-WITHFP-NEXT: addi a3, a0, 4 @@ -1641,12 +1641,12 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; LP64-LP64F-LP64D-FPELIM-LABEL: va3_va_arg: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 56(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 48(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 40(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 56(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 16(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 40(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a3, sp, 24 ; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, a1, a2 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 8(sp) @@ -1659,12 +1659,12 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 40(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 32(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 24(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 16(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 40(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 0(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a3, s0, 8 ; LP64-LP64F-LP64D-WITHFP-NEXT: add a0, a1, a2 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, -24(s0) @@ -1676,10 +1676,10 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; LP64E-FPELIM-LABEL: va3_va_arg: ; LP64E-FPELIM: # %bb.0: ; LP64E-FPELIM-NEXT: addi sp, sp, -40 -; LP64E-FPELIM-NEXT: sd a5, 32(sp) -; LP64E-FPELIM-NEXT: sd a4, 24(sp) -; LP64E-FPELIM-NEXT: sd a3, 16(sp) ; LP64E-FPELIM-NEXT: sd a2, 8(sp) +; LP64E-FPELIM-NEXT: sd a3, 16(sp) +; LP64E-FPELIM-NEXT: sd a4, 24(sp) +; LP64E-FPELIM-NEXT: sd a5, 32(sp) ; LP64E-FPELIM-NEXT: addi a3, sp, 16 ; LP64E-FPELIM-NEXT: add a0, a1, a2 ; LP64E-FPELIM-NEXT: sd a3, 0(sp) @@ -1692,10 +1692,10 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; LP64E-WITHFP-NEXT: sd ra, 16(sp) # 8-byte Folded Spill ; LP64E-WITHFP-NEXT: sd s0, 8(sp) # 8-byte Folded Spill ; LP64E-WITHFP-NEXT: addi s0, sp, 24 -; LP64E-WITHFP-NEXT: sd a5, 24(s0) -; LP64E-WITHFP-NEXT: sd a4, 16(s0) -; LP64E-WITHFP-NEXT: sd a3, 8(s0) ; LP64E-WITHFP-NEXT: sd a2, 0(s0) +; LP64E-WITHFP-NEXT: sd a3, 8(s0) +; LP64E-WITHFP-NEXT: sd a4, 16(s0) +; LP64E-WITHFP-NEXT: sd a5, 24(s0) ; LP64E-WITHFP-NEXT: addi a3, s0, 8 ; LP64E-WITHFP-NEXT: add a0, a1, a2 ; LP64E-WITHFP-NEXT: sd a3, -24(s0) @@ -1859,13 +1859,13 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind { ; ILP32-ILP32F-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; ILP32-ILP32F-FPELIM-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; ILP32-ILP32F-FPELIM-NEXT: mv s0, a1 -; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a5, 36(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 24 ; ILP32-ILP32F-FPELIM-NEXT: sw a0, 4(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a0, 0(sp) @@ -1902,13 +1902,13 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind { ; ILP32-ILP32F-WITHFP-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 32 ; ILP32-ILP32F-WITHFP-NEXT: mv s1, a1 -; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 8 ; ILP32-ILP32F-WITHFP-NEXT: sw a0, -16(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a0, -20(s0) @@ -1944,13 +1944,13 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: mv s0, a1 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 24 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 4(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 0(sp) @@ -1986,10 +1986,10 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind { ; ILP32E-FPELIM-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; ILP32E-FPELIM-NEXT: mv s0, a1 ; ILP32E-FPELIM-NEXT: sw a5, 36(sp) -; ILP32E-FPELIM-NEXT: sw a4, 32(sp) -; ILP32E-FPELIM-NEXT: sw a3, 28(sp) -; ILP32E-FPELIM-NEXT: sw a2, 24(sp) ; ILP32E-FPELIM-NEXT: sw a1, 20(sp) +; ILP32E-FPELIM-NEXT: sw a2, 24(sp) +; ILP32E-FPELIM-NEXT: sw a3, 28(sp) +; ILP32E-FPELIM-NEXT: sw a4, 32(sp) ; ILP32E-FPELIM-NEXT: addi a0, sp, 24 ; ILP32E-FPELIM-NEXT: sw a0, 4(sp) ; ILP32E-FPELIM-NEXT: sw a0, 0(sp) @@ -2027,10 +2027,10 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind { ; ILP32E-WITHFP-NEXT: addi s0, sp, 20 ; ILP32E-WITHFP-NEXT: mv s1, a1 ; ILP32E-WITHFP-NEXT: sw a5, 20(s0) -; ILP32E-WITHFP-NEXT: sw a4, 16(s0) -; ILP32E-WITHFP-NEXT: sw a3, 12(s0) -; ILP32E-WITHFP-NEXT: sw a2, 8(s0) ; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) ; ILP32E-WITHFP-NEXT: addi a0, s0, 8 ; ILP32E-WITHFP-NEXT: sw a0, -16(s0) ; ILP32E-WITHFP-NEXT: sw a0, -20(s0) @@ -2066,13 +2066,13 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind { ; LP64-LP64F-LP64D-FPELIM-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-FPELIM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-FPELIM-NEXT: mv s0, a1 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 88(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 80(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 72(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 64(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 56(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 48(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 80(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 88(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 40(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 48(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 56(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 64(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, sp, 48 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 0(sp) @@ -2109,13 +2109,13 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 48 ; LP64-LP64F-LP64D-WITHFP-NEXT: mv s1, a1 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 16 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -32(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -40(s0) @@ -2152,10 +2152,10 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind { ; LP64E-FPELIM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; LP64E-FPELIM-NEXT: mv s0, a1 ; LP64E-FPELIM-NEXT: sd a5, 72(sp) -; LP64E-FPELIM-NEXT: sd a4, 64(sp) -; LP64E-FPELIM-NEXT: sd a3, 56(sp) -; LP64E-FPELIM-NEXT: sd a2, 48(sp) ; LP64E-FPELIM-NEXT: sd a1, 40(sp) +; LP64E-FPELIM-NEXT: sd a2, 48(sp) +; LP64E-FPELIM-NEXT: sd a3, 56(sp) +; LP64E-FPELIM-NEXT: sd a4, 64(sp) ; LP64E-FPELIM-NEXT: addi a0, sp, 48 ; LP64E-FPELIM-NEXT: sd a0, 8(sp) ; LP64E-FPELIM-NEXT: sd a0, 0(sp) @@ -2193,10 +2193,10 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind { ; LP64E-WITHFP-NEXT: addi s0, sp, 40 ; LP64E-WITHFP-NEXT: mv s1, a1 ; LP64E-WITHFP-NEXT: sd a5, 40(s0) -; LP64E-WITHFP-NEXT: sd a4, 32(s0) -; LP64E-WITHFP-NEXT: sd a3, 24(s0) -; LP64E-WITHFP-NEXT: sd a2, 16(s0) ; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) ; LP64E-WITHFP-NEXT: addi a0, s0, 16 ; LP64E-WITHFP-NEXT: sd a0, -32(s0) ; LP64E-WITHFP-NEXT: sd a0, -40(s0) @@ -2255,30 +2255,27 @@ define void @va5_aligned_stack_caller() nounwind { ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -64 ; ILP32-ILP32F-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill ; ILP32-ILP32F-FPELIM-NEXT: li a0, 17 +; ILP32-ILP32F-FPELIM-NEXT: li a1, 16 +; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a0, 24(sp) -; ILP32-ILP32F-FPELIM-NEXT: li a0, 16 -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 20(sp) ; ILP32-ILP32F-FPELIM-NEXT: li a0, 15 +; ILP32-ILP32F-FPELIM-NEXT: lui a1, 262236 +; ILP32-ILP32F-FPELIM-NEXT: addi a1, a1, 655 +; ILP32-ILP32F-FPELIM-NEXT: lui a2, 377487 +; ILP32-ILP32F-FPELIM-NEXT: addi a2, a2, 1475 +; ILP32-ILP32F-FPELIM-NEXT: li a3, 14 +; ILP32-ILP32F-FPELIM-NEXT: sw a3, 0(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a2, 8(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a1, 12(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a0, 16(sp) -; ILP32-ILP32F-FPELIM-NEXT: lui a0, 262236 -; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, 655 -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 12(sp) -; ILP32-ILP32F-FPELIM-NEXT: lui a0, 377487 -; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, 1475 -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 8(sp) -; ILP32-ILP32F-FPELIM-NEXT: li a0, 14 -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 0(sp) ; ILP32-ILP32F-FPELIM-NEXT: lui a0, 262153 -; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, 491 -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 44(sp) +; ILP32-ILP32F-FPELIM-NEXT: addi a5, a0, 491 ; ILP32-ILP32F-FPELIM-NEXT: lui a0, 545260 -; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, -1967 -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 40(sp) +; ILP32-ILP32F-FPELIM-NEXT: addi t0, a0, -1967 ; ILP32-ILP32F-FPELIM-NEXT: lui a0, 964690 -; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, -328 -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 36(sp) +; ILP32-ILP32F-FPELIM-NEXT: addi t1, a0, -328 ; ILP32-ILP32F-FPELIM-NEXT: lui a0, 335544 -; ILP32-ILP32F-FPELIM-NEXT: addi a5, a0, 1311 +; ILP32-ILP32F-FPELIM-NEXT: addi t2, a0, 1311 ; ILP32-ILP32F-FPELIM-NEXT: lui a0, 688509 ; ILP32-ILP32F-FPELIM-NEXT: addi a6, a0, -2048 ; ILP32-ILP32F-FPELIM-NEXT: li a0, 1 @@ -2287,7 +2284,10 @@ define void @va5_aligned_stack_caller() nounwind { ; ILP32-ILP32F-FPELIM-NEXT: li a3, 12 ; ILP32-ILP32F-FPELIM-NEXT: li a4, 13 ; ILP32-ILP32F-FPELIM-NEXT: li a7, 4 -; ILP32-ILP32F-FPELIM-NEXT: sw a5, 32(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw t2, 32(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw t1, 36(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw t0, 40(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a5, 44(sp) ; ILP32-ILP32F-FPELIM-NEXT: call va5_aligned_stack_callee ; ILP32-ILP32F-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 64 @@ -2300,30 +2300,27 @@ define void @va5_aligned_stack_caller() nounwind { ; ILP32-ILP32F-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill ; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 64 ; ILP32-ILP32F-WITHFP-NEXT: li a0, 17 +; ILP32-ILP32F-WITHFP-NEXT: li a1, 16 +; ILP32-ILP32F-WITHFP-NEXT: sw a1, 20(sp) ; ILP32-ILP32F-WITHFP-NEXT: sw a0, 24(sp) -; ILP32-ILP32F-WITHFP-NEXT: li a0, 16 -; ILP32-ILP32F-WITHFP-NEXT: sw a0, 20(sp) ; ILP32-ILP32F-WITHFP-NEXT: li a0, 15 +; ILP32-ILP32F-WITHFP-NEXT: lui a1, 262236 +; ILP32-ILP32F-WITHFP-NEXT: addi a1, a1, 655 +; ILP32-ILP32F-WITHFP-NEXT: lui a2, 377487 +; ILP32-ILP32F-WITHFP-NEXT: addi a2, a2, 1475 +; ILP32-ILP32F-WITHFP-NEXT: li a3, 14 +; ILP32-ILP32F-WITHFP-NEXT: sw a3, 0(sp) +; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(sp) +; ILP32-ILP32F-WITHFP-NEXT: sw a1, 12(sp) ; ILP32-ILP32F-WITHFP-NEXT: sw a0, 16(sp) -; ILP32-ILP32F-WITHFP-NEXT: lui a0, 262236 -; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, 655 -; ILP32-ILP32F-WITHFP-NEXT: sw a0, 12(sp) -; ILP32-ILP32F-WITHFP-NEXT: lui a0, 377487 -; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, 1475 -; ILP32-ILP32F-WITHFP-NEXT: sw a0, 8(sp) -; ILP32-ILP32F-WITHFP-NEXT: li a0, 14 -; ILP32-ILP32F-WITHFP-NEXT: sw a0, 0(sp) ; ILP32-ILP32F-WITHFP-NEXT: lui a0, 262153 -; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, 491 -; ILP32-ILP32F-WITHFP-NEXT: sw a0, -20(s0) +; ILP32-ILP32F-WITHFP-NEXT: addi a5, a0, 491 ; ILP32-ILP32F-WITHFP-NEXT: lui a0, 545260 -; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, -1967 -; ILP32-ILP32F-WITHFP-NEXT: sw a0, -24(s0) +; ILP32-ILP32F-WITHFP-NEXT: addi t0, a0, -1967 ; ILP32-ILP32F-WITHFP-NEXT: lui a0, 964690 -; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, -328 -; ILP32-ILP32F-WITHFP-NEXT: sw a0, -28(s0) +; ILP32-ILP32F-WITHFP-NEXT: addi t1, a0, -328 ; ILP32-ILP32F-WITHFP-NEXT: lui a0, 335544 -; ILP32-ILP32F-WITHFP-NEXT: addi a5, a0, 1311 +; ILP32-ILP32F-WITHFP-NEXT: addi t2, a0, 1311 ; ILP32-ILP32F-WITHFP-NEXT: lui a0, 688509 ; ILP32-ILP32F-WITHFP-NEXT: addi a6, a0, -2048 ; ILP32-ILP32F-WITHFP-NEXT: li a0, 1 @@ -2332,7 +2329,10 @@ define void @va5_aligned_stack_caller() nounwind { ; ILP32-ILP32F-WITHFP-NEXT: li a3, 12 ; ILP32-ILP32F-WITHFP-NEXT: li a4, 13 ; ILP32-ILP32F-WITHFP-NEXT: li a7, 4 -; ILP32-ILP32F-WITHFP-NEXT: sw a5, -32(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw t2, -32(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw t1, -28(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw t0, -24(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a5, -20(s0) ; ILP32-ILP32F-WITHFP-NEXT: call va5_aligned_stack_callee ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload @@ -2345,29 +2345,26 @@ define void @va5_aligned_stack_caller() nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 262236 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, 655 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a1, 377487 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, a1, 1475 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a2, 17 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a3, 16 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 20(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a2, 15 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a3, 14 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 0(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 8(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 12(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 377487 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, 1475 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 8(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a0, 17 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 24(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a0, 16 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 20(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a0, 15 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 16(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a0, 14 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 0(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 16(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 262153 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, 491 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 44(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a5, a0, 491 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 545260 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, -1967 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 40(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi t0, a0, -1967 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 964690 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, -328 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 36(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi t1, a0, -328 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 335544 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a5, a0, 1311 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi t2, a0, 1311 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 688509 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a6, a0, -2048 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a0, 1 @@ -2376,7 +2373,10 @@ define void @va5_aligned_stack_caller() nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a3, 12 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a4, 13 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a7, 4 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 32(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw t2, 32(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw t1, 36(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw t0, 40(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 44(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: call va5_aligned_stack_callee ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 64 @@ -2390,32 +2390,29 @@ define void @va5_aligned_stack_caller() nounwind { ; ILP32E-FPELIM-NEXT: addi s0, sp, 64 ; ILP32E-FPELIM-NEXT: andi sp, sp, -16 ; ILP32E-FPELIM-NEXT: li a0, 17 +; ILP32E-FPELIM-NEXT: li a1, 16 +; ILP32E-FPELIM-NEXT: li a2, 15 +; ILP32E-FPELIM-NEXT: sw a2, 16(sp) +; ILP32E-FPELIM-NEXT: sw a1, 20(sp) ; ILP32E-FPELIM-NEXT: sw a0, 24(sp) -; ILP32E-FPELIM-NEXT: li a0, 16 -; ILP32E-FPELIM-NEXT: sw a0, 20(sp) -; ILP32E-FPELIM-NEXT: li a0, 15 -; ILP32E-FPELIM-NEXT: sw a0, 16(sp) ; ILP32E-FPELIM-NEXT: lui a0, 262236 ; ILP32E-FPELIM-NEXT: addi a0, a0, 655 +; ILP32E-FPELIM-NEXT: lui a1, 377487 +; ILP32E-FPELIM-NEXT: addi a1, a1, 1475 +; ILP32E-FPELIM-NEXT: li a2, 14 +; ILP32E-FPELIM-NEXT: li a3, 4 +; ILP32E-FPELIM-NEXT: sw a3, 0(sp) +; ILP32E-FPELIM-NEXT: sw a2, 4(sp) +; ILP32E-FPELIM-NEXT: sw a1, 8(sp) ; ILP32E-FPELIM-NEXT: sw a0, 12(sp) -; ILP32E-FPELIM-NEXT: lui a0, 377487 -; ILP32E-FPELIM-NEXT: addi a0, a0, 1475 -; ILP32E-FPELIM-NEXT: sw a0, 8(sp) -; ILP32E-FPELIM-NEXT: li a0, 14 -; ILP32E-FPELIM-NEXT: sw a0, 4(sp) -; ILP32E-FPELIM-NEXT: li a0, 4 -; ILP32E-FPELIM-NEXT: sw a0, 0(sp) ; ILP32E-FPELIM-NEXT: lui a0, 262153 -; ILP32E-FPELIM-NEXT: addi a0, a0, 491 -; ILP32E-FPELIM-NEXT: sw a0, 44(sp) +; ILP32E-FPELIM-NEXT: addi a6, a0, 491 ; ILP32E-FPELIM-NEXT: lui a0, 545260 -; ILP32E-FPELIM-NEXT: addi a0, a0, -1967 -; ILP32E-FPELIM-NEXT: sw a0, 40(sp) +; ILP32E-FPELIM-NEXT: addi a7, a0, -1967 ; ILP32E-FPELIM-NEXT: lui a0, 964690 -; ILP32E-FPELIM-NEXT: addi a0, a0, -328 -; ILP32E-FPELIM-NEXT: sw a0, 36(sp) +; ILP32E-FPELIM-NEXT: addi t0, a0, -328 ; ILP32E-FPELIM-NEXT: lui a0, 335544 -; ILP32E-FPELIM-NEXT: addi a6, a0, 1311 +; ILP32E-FPELIM-NEXT: addi t1, a0, 1311 ; ILP32E-FPELIM-NEXT: lui a0, 688509 ; ILP32E-FPELIM-NEXT: addi a5, a0, -2048 ; ILP32E-FPELIM-NEXT: li a0, 1 @@ -2423,7 +2420,10 @@ define void @va5_aligned_stack_caller() nounwind { ; ILP32E-FPELIM-NEXT: addi a2, sp, 32 ; ILP32E-FPELIM-NEXT: li a3, 12 ; ILP32E-FPELIM-NEXT: li a4, 13 -; ILP32E-FPELIM-NEXT: sw a6, 32(sp) +; ILP32E-FPELIM-NEXT: sw t1, 32(sp) +; ILP32E-FPELIM-NEXT: sw t0, 36(sp) +; ILP32E-FPELIM-NEXT: sw a7, 40(sp) +; ILP32E-FPELIM-NEXT: sw a6, 44(sp) ; ILP32E-FPELIM-NEXT: call va5_aligned_stack_callee ; ILP32E-FPELIM-NEXT: addi sp, s0, -64 ; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload @@ -2439,32 +2439,29 @@ define void @va5_aligned_stack_caller() nounwind { ; ILP32E-WITHFP-NEXT: addi s0, sp, 64 ; ILP32E-WITHFP-NEXT: andi sp, sp, -16 ; ILP32E-WITHFP-NEXT: li a0, 17 +; ILP32E-WITHFP-NEXT: li a1, 16 +; ILP32E-WITHFP-NEXT: li a2, 15 +; ILP32E-WITHFP-NEXT: sw a2, 16(sp) +; ILP32E-WITHFP-NEXT: sw a1, 20(sp) ; ILP32E-WITHFP-NEXT: sw a0, 24(sp) -; ILP32E-WITHFP-NEXT: li a0, 16 -; ILP32E-WITHFP-NEXT: sw a0, 20(sp) -; ILP32E-WITHFP-NEXT: li a0, 15 -; ILP32E-WITHFP-NEXT: sw a0, 16(sp) ; ILP32E-WITHFP-NEXT: lui a0, 262236 ; ILP32E-WITHFP-NEXT: addi a0, a0, 655 +; ILP32E-WITHFP-NEXT: lui a1, 377487 +; ILP32E-WITHFP-NEXT: addi a1, a1, 1475 +; ILP32E-WITHFP-NEXT: li a2, 14 +; ILP32E-WITHFP-NEXT: li a3, 4 +; ILP32E-WITHFP-NEXT: sw a3, 0(sp) +; ILP32E-WITHFP-NEXT: sw a2, 4(sp) +; ILP32E-WITHFP-NEXT: sw a1, 8(sp) ; ILP32E-WITHFP-NEXT: sw a0, 12(sp) -; ILP32E-WITHFP-NEXT: lui a0, 377487 -; ILP32E-WITHFP-NEXT: addi a0, a0, 1475 -; ILP32E-WITHFP-NEXT: sw a0, 8(sp) -; ILP32E-WITHFP-NEXT: li a0, 14 -; ILP32E-WITHFP-NEXT: sw a0, 4(sp) -; ILP32E-WITHFP-NEXT: li a0, 4 -; ILP32E-WITHFP-NEXT: sw a0, 0(sp) ; ILP32E-WITHFP-NEXT: lui a0, 262153 -; ILP32E-WITHFP-NEXT: addi a0, a0, 491 -; ILP32E-WITHFP-NEXT: sw a0, 44(sp) +; ILP32E-WITHFP-NEXT: addi a6, a0, 491 ; ILP32E-WITHFP-NEXT: lui a0, 545260 -; ILP32E-WITHFP-NEXT: addi a0, a0, -1967 -; ILP32E-WITHFP-NEXT: sw a0, 40(sp) +; ILP32E-WITHFP-NEXT: addi a7, a0, -1967 ; ILP32E-WITHFP-NEXT: lui a0, 964690 -; ILP32E-WITHFP-NEXT: addi a0, a0, -328 -; ILP32E-WITHFP-NEXT: sw a0, 36(sp) +; ILP32E-WITHFP-NEXT: addi t0, a0, -328 ; ILP32E-WITHFP-NEXT: lui a0, 335544 -; ILP32E-WITHFP-NEXT: addi a6, a0, 1311 +; ILP32E-WITHFP-NEXT: addi t1, a0, 1311 ; ILP32E-WITHFP-NEXT: lui a0, 688509 ; ILP32E-WITHFP-NEXT: addi a5, a0, -2048 ; ILP32E-WITHFP-NEXT: li a0, 1 @@ -2472,7 +2469,10 @@ define void @va5_aligned_stack_caller() nounwind { ; ILP32E-WITHFP-NEXT: addi a2, sp, 32 ; ILP32E-WITHFP-NEXT: li a3, 12 ; ILP32E-WITHFP-NEXT: li a4, 13 -; ILP32E-WITHFP-NEXT: sw a6, 32(sp) +; ILP32E-WITHFP-NEXT: sw t1, 32(sp) +; ILP32E-WITHFP-NEXT: sw t0, 36(sp) +; ILP32E-WITHFP-NEXT: sw a7, 40(sp) +; ILP32E-WITHFP-NEXT: sw a6, 44(sp) ; ILP32E-WITHFP-NEXT: call va5_aligned_stack_callee ; ILP32E-WITHFP-NEXT: addi sp, s0, -64 ; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload @@ -2484,14 +2484,11 @@ define void @va5_aligned_stack_caller() nounwind { ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -48 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; LP64-LP64F-LP64D-FPELIM-NEXT: li a0, 17 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 24(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: li a0, 16 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 16(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: li a0, 15 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: li t0, 17 +; LP64-LP64F-LP64D-FPELIM-NEXT: li t1, 16 +; LP64-LP64F-LP64D-FPELIM-NEXT: li t2, 15 ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, %hi(.LCPI11_0) -; LP64-LP64F-LP64D-FPELIM-NEXT: ld t0, %lo(.LCPI11_0)(a0) +; LP64-LP64F-LP64D-FPELIM-NEXT: ld t3, %lo(.LCPI11_0)(a0) ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, %hi(.LCPI11_1) ; LP64-LP64F-LP64D-FPELIM-NEXT: ld a2, %lo(.LCPI11_1)(a0) ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, %hi(.LCPI11_2) @@ -2504,7 +2501,10 @@ define void @va5_aligned_stack_caller() nounwind { ; LP64-LP64F-LP64D-FPELIM-NEXT: li a4, 12 ; LP64-LP64F-LP64D-FPELIM-NEXT: li a5, 13 ; LP64-LP64F-LP64D-FPELIM-NEXT: li a7, 14 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd t0, 0(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd t3, 0(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd t2, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd t1, 16(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd t0, 24(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: call va5_aligned_stack_callee ; LP64-LP64F-LP64D-FPELIM-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 48 @@ -2516,14 +2516,11 @@ define void @va5_aligned_stack_caller() nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 48 -; LP64-LP64F-LP64D-WITHFP-NEXT: li a0, 17 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 24(sp) -; LP64-LP64F-LP64D-WITHFP-NEXT: li a0, 16 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 16(sp) -; LP64-LP64F-LP64D-WITHFP-NEXT: li a0, 15 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 8(sp) +; LP64-LP64F-LP64D-WITHFP-NEXT: li t0, 17 +; LP64-LP64F-LP64D-WITHFP-NEXT: li t1, 16 +; LP64-LP64F-LP64D-WITHFP-NEXT: li t2, 15 ; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, %hi(.LCPI11_0) -; LP64-LP64F-LP64D-WITHFP-NEXT: ld t0, %lo(.LCPI11_0)(a0) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld t3, %lo(.LCPI11_0)(a0) ; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, %hi(.LCPI11_1) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld a2, %lo(.LCPI11_1)(a0) ; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, %hi(.LCPI11_2) @@ -2536,7 +2533,10 @@ define void @va5_aligned_stack_caller() nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: li a4, 12 ; LP64-LP64F-LP64D-WITHFP-NEXT: li a5, 13 ; LP64-LP64F-LP64D-WITHFP-NEXT: li a7, 14 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd t0, 0(sp) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd t3, 0(sp) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd t2, 8(sp) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd t1, 16(sp) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd t0, 24(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: call va5_aligned_stack_callee ; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -2548,19 +2548,16 @@ define void @va5_aligned_stack_caller() nounwind { ; LP64E-FPELIM-NEXT: addi sp, sp, -56 ; LP64E-FPELIM-NEXT: sd ra, 48(sp) # 8-byte Folded Spill ; LP64E-FPELIM-NEXT: li a0, 17 +; LP64E-FPELIM-NEXT: li a1, 16 +; LP64E-FPELIM-NEXT: sd a1, 32(sp) ; LP64E-FPELIM-NEXT: sd a0, 40(sp) -; LP64E-FPELIM-NEXT: li a0, 16 -; LP64E-FPELIM-NEXT: lui a1, %hi(.LCPI11_0) -; LP64E-FPELIM-NEXT: ld a1, %lo(.LCPI11_0)(a1) -; LP64E-FPELIM-NEXT: sd a0, 32(sp) -; LP64E-FPELIM-NEXT: li a0, 15 -; LP64E-FPELIM-NEXT: sd a0, 24(sp) -; LP64E-FPELIM-NEXT: sd a1, 16(sp) -; LP64E-FPELIM-NEXT: li a0, 14 -; LP64E-FPELIM-NEXT: sd a0, 8(sp) +; LP64E-FPELIM-NEXT: li a6, 15 +; LP64E-FPELIM-NEXT: lui a0, %hi(.LCPI11_0) +; LP64E-FPELIM-NEXT: ld a7, %lo(.LCPI11_0)(a0) +; LP64E-FPELIM-NEXT: li t0, 14 ; LP64E-FPELIM-NEXT: lui a0, 2384 ; LP64E-FPELIM-NEXT: addiw a0, a0, 761 -; LP64E-FPELIM-NEXT: slli a6, a0, 11 +; LP64E-FPELIM-NEXT: slli t1, a0, 11 ; LP64E-FPELIM-NEXT: lui a0, %hi(.LCPI11_1) ; LP64E-FPELIM-NEXT: ld a2, %lo(.LCPI11_1)(a0) ; LP64E-FPELIM-NEXT: lui a0, %hi(.LCPI11_2) @@ -2569,7 +2566,10 @@ define void @va5_aligned_stack_caller() nounwind { ; LP64E-FPELIM-NEXT: li a1, 11 ; LP64E-FPELIM-NEXT: li a4, 12 ; LP64E-FPELIM-NEXT: li a5, 13 -; LP64E-FPELIM-NEXT: sd a6, 0(sp) +; LP64E-FPELIM-NEXT: sd t1, 0(sp) +; LP64E-FPELIM-NEXT: sd t0, 8(sp) +; LP64E-FPELIM-NEXT: sd a7, 16(sp) +; LP64E-FPELIM-NEXT: sd a6, 24(sp) ; LP64E-FPELIM-NEXT: call va5_aligned_stack_callee ; LP64E-FPELIM-NEXT: ld ra, 48(sp) # 8-byte Folded Reload ; LP64E-FPELIM-NEXT: addi sp, sp, 56 @@ -2582,19 +2582,16 @@ define void @va5_aligned_stack_caller() nounwind { ; LP64E-WITHFP-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; LP64E-WITHFP-NEXT: addi s0, sp, 64 ; LP64E-WITHFP-NEXT: li a0, 17 +; LP64E-WITHFP-NEXT: li a1, 16 +; LP64E-WITHFP-NEXT: sd a1, 32(sp) ; LP64E-WITHFP-NEXT: sd a0, 40(sp) -; LP64E-WITHFP-NEXT: li a0, 16 -; LP64E-WITHFP-NEXT: lui a1, %hi(.LCPI11_0) -; LP64E-WITHFP-NEXT: ld a1, %lo(.LCPI11_0)(a1) -; LP64E-WITHFP-NEXT: sd a0, 32(sp) -; LP64E-WITHFP-NEXT: li a0, 15 -; LP64E-WITHFP-NEXT: sd a0, 24(sp) -; LP64E-WITHFP-NEXT: sd a1, 16(sp) -; LP64E-WITHFP-NEXT: li a0, 14 -; LP64E-WITHFP-NEXT: sd a0, 8(sp) +; LP64E-WITHFP-NEXT: li a6, 15 +; LP64E-WITHFP-NEXT: lui a0, %hi(.LCPI11_0) +; LP64E-WITHFP-NEXT: ld a7, %lo(.LCPI11_0)(a0) +; LP64E-WITHFP-NEXT: li t0, 14 ; LP64E-WITHFP-NEXT: lui a0, 2384 ; LP64E-WITHFP-NEXT: addiw a0, a0, 761 -; LP64E-WITHFP-NEXT: slli a6, a0, 11 +; LP64E-WITHFP-NEXT: slli t1, a0, 11 ; LP64E-WITHFP-NEXT: lui a0, %hi(.LCPI11_1) ; LP64E-WITHFP-NEXT: ld a2, %lo(.LCPI11_1)(a0) ; LP64E-WITHFP-NEXT: lui a0, %hi(.LCPI11_2) @@ -2603,7 +2600,10 @@ define void @va5_aligned_stack_caller() nounwind { ; LP64E-WITHFP-NEXT: li a1, 11 ; LP64E-WITHFP-NEXT: li a4, 12 ; LP64E-WITHFP-NEXT: li a5, 13 -; LP64E-WITHFP-NEXT: sd a6, 0(sp) +; LP64E-WITHFP-NEXT: sd t1, 0(sp) +; LP64E-WITHFP-NEXT: sd t0, 8(sp) +; LP64E-WITHFP-NEXT: sd a7, 16(sp) +; LP64E-WITHFP-NEXT: sd a6, 24(sp) ; LP64E-WITHFP-NEXT: call va5_aligned_stack_callee ; LP64E-WITHFP-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; LP64E-WITHFP-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2623,14 +2623,14 @@ define i32 @va6_no_fixed_args(...) nounwind { ; ILP32-ILP32F-FPELIM-LABEL: va6_no_fixed_args: ; ILP32-ILP32F-FPELIM: # %bb.0: ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -48 -; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a5, 36(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a5, 36(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a0, 16(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a1, sp, 20 ; ILP32-ILP32F-FPELIM-NEXT: sw a1, 12(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 48 @@ -2642,14 +2642,14 @@ define i32 @va6_no_fixed_args(...) nounwind { ; ILP32-ILP32F-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; ILP32-ILP32F-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 16 -; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a0, 0(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a1, s0, 4 ; ILP32-ILP32F-WITHFP-NEXT: sw a1, -12(s0) ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -2660,14 +2660,14 @@ define i32 @va6_no_fixed_args(...) nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-LABEL: va6_no_fixed_args: ; RV32D-ILP32-ILP32F-ILP32D-FPELIM: # %bb.0: ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -48 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 16(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, sp, 20 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 12(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 @@ -2676,12 +2676,12 @@ define i32 @va6_no_fixed_args(...) nounwind { ; ILP32E-FPELIM-LABEL: va6_no_fixed_args: ; ILP32E-FPELIM: # %bb.0: ; ILP32E-FPELIM-NEXT: addi sp, sp, -28 -; ILP32E-FPELIM-NEXT: sw a5, 24(sp) ; ILP32E-FPELIM-NEXT: sw a4, 20(sp) -; ILP32E-FPELIM-NEXT: sw a3, 16(sp) -; ILP32E-FPELIM-NEXT: sw a2, 12(sp) -; ILP32E-FPELIM-NEXT: sw a1, 8(sp) +; ILP32E-FPELIM-NEXT: sw a5, 24(sp) ; ILP32E-FPELIM-NEXT: sw a0, 4(sp) +; ILP32E-FPELIM-NEXT: sw a1, 8(sp) +; ILP32E-FPELIM-NEXT: sw a2, 12(sp) +; ILP32E-FPELIM-NEXT: sw a3, 16(sp) ; ILP32E-FPELIM-NEXT: addi a1, sp, 8 ; ILP32E-FPELIM-NEXT: sw a1, 0(sp) ; ILP32E-FPELIM-NEXT: addi sp, sp, 28 @@ -2693,12 +2693,12 @@ define i32 @va6_no_fixed_args(...) nounwind { ; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill ; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill ; ILP32E-WITHFP-NEXT: addi s0, sp, 12 -; ILP32E-WITHFP-NEXT: sw a5, 20(s0) ; ILP32E-WITHFP-NEXT: sw a4, 16(s0) -; ILP32E-WITHFP-NEXT: sw a3, 12(s0) -; ILP32E-WITHFP-NEXT: sw a2, 8(s0) -; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: sw a5, 20(s0) ; ILP32E-WITHFP-NEXT: sw a0, 0(s0) +; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) ; ILP32E-WITHFP-NEXT: addi a1, s0, 4 ; ILP32E-WITHFP-NEXT: sw a1, -12(s0) ; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload @@ -2709,14 +2709,14 @@ define i32 @va6_no_fixed_args(...) nounwind { ; LP64-LP64F-LP64D-FPELIM-LABEL: va6_no_fixed_args: ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 16(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, sp, 24 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80 @@ -2728,14 +2728,14 @@ define i32 @va6_no_fixed_args(...) nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 0(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, s0, 8 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -2746,12 +2746,12 @@ define i32 @va6_no_fixed_args(...) nounwind { ; LP64E-FPELIM-LABEL: va6_no_fixed_args: ; LP64E-FPELIM: # %bb.0: ; LP64E-FPELIM-NEXT: addi sp, sp, -56 -; LP64E-FPELIM-NEXT: sd a5, 48(sp) ; LP64E-FPELIM-NEXT: sd a4, 40(sp) -; LP64E-FPELIM-NEXT: sd a3, 32(sp) -; LP64E-FPELIM-NEXT: sd a2, 24(sp) -; LP64E-FPELIM-NEXT: sd a1, 16(sp) +; LP64E-FPELIM-NEXT: sd a5, 48(sp) ; LP64E-FPELIM-NEXT: sd a0, 8(sp) +; LP64E-FPELIM-NEXT: sd a1, 16(sp) +; LP64E-FPELIM-NEXT: sd a2, 24(sp) +; LP64E-FPELIM-NEXT: sd a3, 32(sp) ; LP64E-FPELIM-NEXT: addi a1, sp, 16 ; LP64E-FPELIM-NEXT: sd a1, 0(sp) ; LP64E-FPELIM-NEXT: addi sp, sp, 56 @@ -2763,12 +2763,12 @@ define i32 @va6_no_fixed_args(...) nounwind { ; LP64E-WITHFP-NEXT: sd ra, 16(sp) # 8-byte Folded Spill ; LP64E-WITHFP-NEXT: sd s0, 8(sp) # 8-byte Folded Spill ; LP64E-WITHFP-NEXT: addi s0, sp, 24 -; LP64E-WITHFP-NEXT: sd a5, 40(s0) ; LP64E-WITHFP-NEXT: sd a4, 32(s0) -; LP64E-WITHFP-NEXT: sd a3, 24(s0) -; LP64E-WITHFP-NEXT: sd a2, 16(s0) -; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: sd a5, 40(s0) ; LP64E-WITHFP-NEXT: sd a0, 0(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) ; LP64E-WITHFP-NEXT: addi a1, s0, 8 ; LP64E-WITHFP-NEXT: sd a1, -24(s0) ; LP64E-WITHFP-NEXT: ld ra, 16(sp) # 8-byte Folded Reload @@ -2794,25 +2794,25 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; ILP32-ILP32F-FPELIM-NEXT: mv a0, a1 ; ILP32-ILP32F-FPELIM-NEXT: lui t0, 24414 ; ILP32-ILP32F-FPELIM-NEXT: add t0, sp, t0 -; ILP32-ILP32F-FPELIM-NEXT: sw a7, 300(t0) -; ILP32-ILP32F-FPELIM-NEXT: lui a7, 24414 -; ILP32-ILP32F-FPELIM-NEXT: add a7, sp, a7 -; ILP32-ILP32F-FPELIM-NEXT: sw a6, 296(a7) -; ILP32-ILP32F-FPELIM-NEXT: lui a6, 24414 -; ILP32-ILP32F-FPELIM-NEXT: add a6, sp, a6 -; ILP32-ILP32F-FPELIM-NEXT: sw a5, 292(a6) +; ILP32-ILP32F-FPELIM-NEXT: sw a5, 292(t0) +; ILP32-ILP32F-FPELIM-NEXT: lui a5, 24414 +; ILP32-ILP32F-FPELIM-NEXT: add a5, sp, a5 +; ILP32-ILP32F-FPELIM-NEXT: sw a6, 296(a5) +; ILP32-ILP32F-FPELIM-NEXT: lui a5, 24414 +; ILP32-ILP32F-FPELIM-NEXT: add a5, sp, a5 +; ILP32-ILP32F-FPELIM-NEXT: sw a7, 300(a5) ; ILP32-ILP32F-FPELIM-NEXT: lui a5, 24414 ; ILP32-ILP32F-FPELIM-NEXT: add a5, sp, a5 -; ILP32-ILP32F-FPELIM-NEXT: sw a4, 288(a5) -; ILP32-ILP32F-FPELIM-NEXT: lui a4, 24414 -; ILP32-ILP32F-FPELIM-NEXT: add a4, sp, a4 -; ILP32-ILP32F-FPELIM-NEXT: sw a3, 284(a4) -; ILP32-ILP32F-FPELIM-NEXT: lui a3, 24414 -; ILP32-ILP32F-FPELIM-NEXT: add a3, sp, a3 -; ILP32-ILP32F-FPELIM-NEXT: sw a2, 280(a3) -; ILP32-ILP32F-FPELIM-NEXT: lui a2, 24414 -; ILP32-ILP32F-FPELIM-NEXT: add a2, sp, a2 -; ILP32-ILP32F-FPELIM-NEXT: sw a1, 276(a2) +; ILP32-ILP32F-FPELIM-NEXT: sw a1, 276(a5) +; ILP32-ILP32F-FPELIM-NEXT: lui a1, 24414 +; ILP32-ILP32F-FPELIM-NEXT: add a1, sp, a1 +; ILP32-ILP32F-FPELIM-NEXT: sw a2, 280(a1) +; ILP32-ILP32F-FPELIM-NEXT: lui a1, 24414 +; ILP32-ILP32F-FPELIM-NEXT: add a1, sp, a1 +; ILP32-ILP32F-FPELIM-NEXT: sw a3, 284(a1) +; ILP32-ILP32F-FPELIM-NEXT: lui a1, 24414 +; ILP32-ILP32F-FPELIM-NEXT: add a1, sp, a1 +; ILP32-ILP32F-FPELIM-NEXT: sw a4, 288(a1) ; ILP32-ILP32F-FPELIM-NEXT: lui a1, 24414 ; ILP32-ILP32F-FPELIM-NEXT: addi a1, a1, 280 ; ILP32-ILP32F-FPELIM-NEXT: add a1, sp, a1 @@ -2836,13 +2836,13 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, -1728 ; ILP32-ILP32F-WITHFP-NEXT: sub sp, sp, a0 ; ILP32-ILP32F-WITHFP-NEXT: mv a0, a1 -; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a1, s0, 8 ; ILP32-ILP32F-WITHFP-NEXT: lui a2, 24414 ; ILP32-ILP32F-WITHFP-NEXT: sub a2, s0, a2 @@ -2864,25 +2864,25 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: mv a0, a1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui t0, 24414 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add t0, sp, t0 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 300(t0) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a7, 24414 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a7, sp, a7 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 296(a7) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a6, 24414 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a6, sp, a6 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 292(a6) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 292(t0) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a5, 24414 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a5, sp, a5 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 296(a5) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a5, 24414 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a5, sp, a5 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 300(a5) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a5, 24414 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a5, sp, a5 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 288(a5) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a4, 24414 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a4, sp, a4 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 284(a4) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a3, 24414 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a3, sp, a3 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 280(a3) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a2, 24414 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a2, sp, a2 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 276(a2) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 276(a5) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a1, 24414 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, sp, a1 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 280(a1) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a1, 24414 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, sp, a1 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 284(a1) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a1, 24414 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, sp, a1 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 288(a1) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a1, 24414 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, a1, 280 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, sp, a1 @@ -2904,16 +2904,16 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; ILP32E-FPELIM-NEXT: sw a5, 284(a6) ; ILP32E-FPELIM-NEXT: lui a5, 24414 ; ILP32E-FPELIM-NEXT: add a5, sp, a5 -; ILP32E-FPELIM-NEXT: sw a4, 280(a5) -; ILP32E-FPELIM-NEXT: lui a4, 24414 -; ILP32E-FPELIM-NEXT: add a4, sp, a4 -; ILP32E-FPELIM-NEXT: sw a3, 276(a4) -; ILP32E-FPELIM-NEXT: lui a3, 24414 -; ILP32E-FPELIM-NEXT: add a3, sp, a3 -; ILP32E-FPELIM-NEXT: sw a2, 272(a3) -; ILP32E-FPELIM-NEXT: lui a2, 24414 -; ILP32E-FPELIM-NEXT: add a2, sp, a2 -; ILP32E-FPELIM-NEXT: sw a1, 268(a2) +; ILP32E-FPELIM-NEXT: sw a1, 268(a5) +; ILP32E-FPELIM-NEXT: lui a1, 24414 +; ILP32E-FPELIM-NEXT: add a1, sp, a1 +; ILP32E-FPELIM-NEXT: sw a2, 272(a1) +; ILP32E-FPELIM-NEXT: lui a1, 24414 +; ILP32E-FPELIM-NEXT: add a1, sp, a1 +; ILP32E-FPELIM-NEXT: sw a3, 276(a1) +; ILP32E-FPELIM-NEXT: lui a1, 24414 +; ILP32E-FPELIM-NEXT: add a1, sp, a1 +; ILP32E-FPELIM-NEXT: sw a4, 280(a1) ; ILP32E-FPELIM-NEXT: lui a1, 24414 ; ILP32E-FPELIM-NEXT: addi a1, a1, 272 ; ILP32E-FPELIM-NEXT: add a1, sp, a1 @@ -2938,10 +2938,10 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; ILP32E-WITHFP-NEXT: sub sp, sp, a0 ; ILP32E-WITHFP-NEXT: mv a0, a1 ; ILP32E-WITHFP-NEXT: sw a5, 20(s0) -; ILP32E-WITHFP-NEXT: sw a4, 16(s0) -; ILP32E-WITHFP-NEXT: sw a3, 12(s0) -; ILP32E-WITHFP-NEXT: sw a2, 8(s0) ; ILP32E-WITHFP-NEXT: sw a1, 4(s0) +; ILP32E-WITHFP-NEXT: sw a2, 8(s0) +; ILP32E-WITHFP-NEXT: sw a3, 12(s0) +; ILP32E-WITHFP-NEXT: sw a4, 16(s0) ; ILP32E-WITHFP-NEXT: addi a1, s0, 8 ; ILP32E-WITHFP-NEXT: lui a2, 24414 ; ILP32E-WITHFP-NEXT: sub a2, s0, a2 @@ -2964,12 +2964,6 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, sp, a0 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 280(a0) ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414 -; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, sp, a0 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 328(a0) -; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414 -; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, sp, a0 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 320(a0) -; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414 ; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 284 ; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, sp, a0 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) @@ -2981,14 +2975,20 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 312(a1) ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414 ; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, sp, a1 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 304(a1) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 320(a1) ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414 ; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, sp, a1 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 296(a1) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 328(a1) ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414 ; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, sp, a1 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 288(a1) ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, sp, a1 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 296(a1) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, sp, a1 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 304(a1) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414 ; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a1, a1, 336 ; LP64-LP64F-LP64D-FPELIM-NEXT: add sp, sp, a1 ; LP64-LP64F-LP64D-FPELIM-NEXT: ret @@ -3007,17 +3007,17 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, -1680 ; LP64-LP64F-LP64D-WITHFP-NEXT: sub sp, sp, a0 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 12 ; LP64-LP64F-LP64D-WITHFP-NEXT: lui a1, 24414 ; LP64-LP64F-LP64D-WITHFP-NEXT: sub a1, s0, a1 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -288(a1) ; LP64-LP64F-LP64D-WITHFP-NEXT: lw a0, 8(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: lui a1, 24414 ; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a1, a1, -1680 ; LP64-LP64F-LP64D-WITHFP-NEXT: add sp, sp, a1 @@ -3033,27 +3033,27 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; LP64E-FPELIM-NEXT: sub sp, sp, a0 ; LP64E-FPELIM-NEXT: .cfi_def_cfa_offset 100000064 ; LP64E-FPELIM-NEXT: lui a0, 24414 -; LP64E-FPELIM-NEXT: add a0, sp, a0 -; LP64E-FPELIM-NEXT: sd a1, 280(a0) -; LP64E-FPELIM-NEXT: lui a0, 24414 ; LP64E-FPELIM-NEXT: addiw a0, a0, 284 ; LP64E-FPELIM-NEXT: add a0, sp, a0 ; LP64E-FPELIM-NEXT: sd a0, 8(sp) ; LP64E-FPELIM-NEXT: lui a0, 24414 ; LP64E-FPELIM-NEXT: add a0, sp, a0 +; LP64E-FPELIM-NEXT: sd a1, 280(a0) +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: add a0, sp, a0 ; LP64E-FPELIM-NEXT: lw a0, 280(a0) ; LP64E-FPELIM-NEXT: lui a1, 24414 ; LP64E-FPELIM-NEXT: add a1, sp, a1 ; LP64E-FPELIM-NEXT: sd a5, 312(a1) ; LP64E-FPELIM-NEXT: lui a1, 24414 ; LP64E-FPELIM-NEXT: add a1, sp, a1 -; LP64E-FPELIM-NEXT: sd a4, 304(a1) +; LP64E-FPELIM-NEXT: sd a2, 288(a1) ; LP64E-FPELIM-NEXT: lui a1, 24414 ; LP64E-FPELIM-NEXT: add a1, sp, a1 ; LP64E-FPELIM-NEXT: sd a3, 296(a1) ; LP64E-FPELIM-NEXT: lui a1, 24414 ; LP64E-FPELIM-NEXT: add a1, sp, a1 -; LP64E-FPELIM-NEXT: sd a2, 288(a1) +; LP64E-FPELIM-NEXT: sd a4, 304(a1) ; LP64E-FPELIM-NEXT: lui a1, 24414 ; LP64E-FPELIM-NEXT: addiw a1, a1, 320 ; LP64E-FPELIM-NEXT: add sp, sp, a1 @@ -3072,16 +3072,16 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; LP64E-WITHFP-NEXT: lui a0, 24414 ; LP64E-WITHFP-NEXT: addiw a0, a0, -1704 ; LP64E-WITHFP-NEXT: sub sp, sp, a0 -; LP64E-WITHFP-NEXT: sd a1, 8(s0) ; LP64E-WITHFP-NEXT: addi a0, s0, 12 -; LP64E-WITHFP-NEXT: lui a1, 24414 -; LP64E-WITHFP-NEXT: sub a1, s0, a1 -; LP64E-WITHFP-NEXT: sd a0, -288(a1) +; LP64E-WITHFP-NEXT: lui a6, 24414 +; LP64E-WITHFP-NEXT: sub a6, s0, a6 +; LP64E-WITHFP-NEXT: sd a0, -288(a6) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) ; LP64E-WITHFP-NEXT: lw a0, 8(s0) ; LP64E-WITHFP-NEXT: sd a5, 40(s0) -; LP64E-WITHFP-NEXT: sd a4, 32(s0) -; LP64E-WITHFP-NEXT: sd a3, 24(s0) ; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) ; LP64E-WITHFP-NEXT: lui a1, 24414 ; LP64E-WITHFP-NEXT: addiw a1, a1, -1704 ; LP64E-WITHFP-NEXT: add sp, sp, a1 diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll index 3e14317a0047458..cae59c79aaaa8ad 100644 --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -18,13 +18,13 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: srlw a0, a0, a1 -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) ; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: sb a3, 3(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: lshr_4bytes: @@ -51,13 +51,13 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: srl a0, a0, a1 -; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) ; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: ret %src = load i32, ptr %src.ptr, align 1 %byteOff = load i32, ptr %byteOff.ptr, align 1 @@ -82,13 +82,13 @@ define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: sllw a0, a0, a1 -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) ; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: sb a3, 3(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_4bytes: @@ -115,13 +115,13 @@ define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: sll a0, a0, a1 -; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) ; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: ret %src = load i32, ptr %src.ptr, align 1 %byteOff = load i32, ptr %byteOff.ptr, align 1 @@ -146,13 +146,13 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: sraw a0, a0, a1 -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) ; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: sb a3, 3(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: ashr_4bytes: @@ -179,13 +179,13 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: sra a0, a0, a1 -; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) ; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: ret %src = load i32, ptr %src.ptr, align 1 %byteOff = load i32, ptr %byteOff.ptr, align 1 @@ -244,21 +244,21 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a3, a3, 35 ; RV64I-NEXT: or a1, a3, a1 ; RV64I-NEXT: srl a0, a0, a1 -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: srli a4, a0, 32 +; RV64I-NEXT: srli a5, a0, 40 +; RV64I-NEXT: sb a4, 4(a2) +; RV64I-NEXT: sb a5, 5(a2) ; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 5(a2) +; RV64I-NEXT: sb a3, 7(a2) ; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) ; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: sb a3, 3(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: lshr_8bytes: @@ -309,20 +309,20 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: .LBB3_3: ; RV32I-NEXT: srai a4, a4, 31 ; RV32I-NEXT: and a1, a4, a1 -; RV32I-NEXT: sb a1, 4(a2) ; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: srli a4, a1, 24 +; RV32I-NEXT: srli a5, a1, 8 +; RV32I-NEXT: sb a1, 4(a2) +; RV32I-NEXT: sb a5, 5(a2) ; RV32I-NEXT: sb a3, 6(a2) -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 7(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 5(a2) -; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: sb a4, 7(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) ; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: ret %src = load i64, ptr %src.ptr, align 1 %byteOff = load i64, ptr %byteOff.ptr, align 1 @@ -380,21 +380,21 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a3, a3, 35 ; RV64I-NEXT: or a1, a3, a1 ; RV64I-NEXT: sll a0, a0, a1 -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: srli a4, a0, 32 +; RV64I-NEXT: srli a5, a0, 40 +; RV64I-NEXT: sb a4, 4(a2) +; RV64I-NEXT: sb a5, 5(a2) ; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 5(a2) +; RV64I-NEXT: sb a3, 7(a2) ; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) ; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: sb a3, 3(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_8bytes: @@ -445,20 +445,20 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: .LBB4_3: ; RV32I-NEXT: srai a4, a4, 31 ; RV32I-NEXT: and a1, a4, a1 -; RV32I-NEXT: sb a1, 0(a2) -; RV32I-NEXT: sb a0, 4(a2) ; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: srli a4, a1, 24 +; RV32I-NEXT: srli a5, a1, 8 +; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: sb a5, 1(a2) ; RV32I-NEXT: sb a3, 2(a2) -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 3(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: sb a4, 3(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) ; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: sb a3, 7(a2) ; RV32I-NEXT: ret %src = load i64, ptr %src.ptr, align 1 %byteOff = load i64, ptr %byteOff.ptr, align 1 @@ -516,21 +516,21 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a3, a3, 35 ; RV64I-NEXT: or a1, a3, a1 ; RV64I-NEXT: sra a0, a0, a1 -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: srli a4, a0, 32 +; RV64I-NEXT: srli a5, a0, 40 +; RV64I-NEXT: sb a4, 4(a2) +; RV64I-NEXT: sb a5, 5(a2) ; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 5(a2) +; RV64I-NEXT: sb a3, 7(a2) ; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) ; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: sb a3, 3(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: ashr_8bytes: @@ -581,20 +581,20 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sll a3, a3, a4 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: .LBB5_3: -; RV32I-NEXT: sb a1, 4(a2) ; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: srli a4, a1, 24 +; RV32I-NEXT: srli a5, a1, 8 +; RV32I-NEXT: sb a1, 4(a2) +; RV32I-NEXT: sb a5, 5(a2) ; RV32I-NEXT: sb a3, 6(a2) -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 7(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 5(a2) -; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: sb a4, 7(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) ; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: ret %src = load i64, ptr %src.ptr, align 1 %byteOff = load i64, ptr %byteOff.ptr, align 1 @@ -689,36 +689,36 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: .LBB6_3: ; RV64I-NEXT: srai a4, a4, 63 ; RV64I-NEXT: and a1, a4, a1 -; RV64I-NEXT: sb a1, 8(a2) ; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a6, a1, 32 +; RV64I-NEXT: sb a6, 12(a2) +; RV64I-NEXT: sb a5, 13(a2) +; RV64I-NEXT: sb a4, 14(a2) ; RV64I-NEXT: sb a3, 15(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 14(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 12(a2) ; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: srli a4, a1, 16 +; RV64I-NEXT: srli a5, a1, 8 +; RV64I-NEXT: sb a1, 8(a2) +; RV64I-NEXT: sb a5, 9(a2) +; RV64I-NEXT: sb a4, 10(a2) ; RV64I-NEXT: sb a3, 11(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 10(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 9(a2) -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: srli a5, a0, 32 +; RV64I-NEXT: sb a5, 4(a2) +; RV64I-NEXT: sb a4, 5(a2) +; RV64I-NEXT: sb a3, 6(a2) ; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 4(a2) ; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) +; RV64I-NEXT: sb a3, 2(a2) ; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: lshr_16bytes: @@ -774,14 +774,14 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: or a1, a1, a6 -; RV32I-NEXT: sw zero, 28(sp) -; RV32I-NEXT: sw zero, 24(sp) -; RV32I-NEXT: sw zero, 20(sp) ; RV32I-NEXT: sw zero, 16(sp) -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw zero, 20(sp) +; RV32I-NEXT: sw zero, 24(sp) +; RV32I-NEXT: sw zero, 28(sp) ; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: andi a0, a1, 12 ; RV32I-NEXT: mv a3, sp ; RV32I-NEXT: add a0, a3, a0 @@ -805,34 +805,34 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sll a7, t1, a7 ; RV32I-NEXT: or a7, a5, a7 ; RV32I-NEXT: srl a0, a0, a1 -; RV32I-NEXT: sb a5, 8(a2) -; RV32I-NEXT: sb a0, 12(a2) -; RV32I-NEXT: sb a3, 0(a2) -; RV32I-NEXT: sb a6, 4(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli t1, a0, 24 +; RV32I-NEXT: srli t2, a0, 8 +; RV32I-NEXT: sb a0, 12(a2) +; RV32I-NEXT: sb t2, 13(a2) ; RV32I-NEXT: sb a1, 14(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 15(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 13(a2) +; RV32I-NEXT: sb t1, 15(a2) ; RV32I-NEXT: srli a0, a7, 16 +; RV32I-NEXT: srli a1, a7, 24 +; RV32I-NEXT: srli a7, a7, 8 +; RV32I-NEXT: sb a5, 8(a2) +; RV32I-NEXT: sb a7, 9(a2) ; RV32I-NEXT: sb a0, 10(a2) -; RV32I-NEXT: srli a0, a7, 24 -; RV32I-NEXT: sb a0, 11(a2) -; RV32I-NEXT: srli a0, a7, 8 -; RV32I-NEXT: sb a0, 9(a2) +; RV32I-NEXT: sb a1, 11(a2) ; RV32I-NEXT: srli a0, a4, 16 -; RV32I-NEXT: sb a0, 2(a2) -; RV32I-NEXT: srli a0, a4, 24 -; RV32I-NEXT: sb a0, 3(a2) +; RV32I-NEXT: srli a1, a4, 24 ; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a3, 0(a2) ; RV32I-NEXT: sb a4, 1(a2) +; RV32I-NEXT: sb a0, 2(a2) +; RV32I-NEXT: sb a1, 3(a2) ; RV32I-NEXT: srli a0, t0, 16 +; RV32I-NEXT: srli a1, t0, 24 +; RV32I-NEXT: srli a3, t0, 8 +; RV32I-NEXT: sb a6, 4(a2) +; RV32I-NEXT: sb a3, 5(a2) ; RV32I-NEXT: sb a0, 6(a2) -; RV32I-NEXT: srli a0, t0, 24 -; RV32I-NEXT: sb a0, 7(a2) -; RV32I-NEXT: srli a0, t0, 8 -; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: sb a1, 7(a2) ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 @@ -928,36 +928,36 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: .LBB7_3: ; RV64I-NEXT: srai a4, a4, 63 ; RV64I-NEXT: and a1, a4, a1 -; RV64I-NEXT: sb a1, 8(a2) ; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a6, a1, 32 +; RV64I-NEXT: sb a6, 12(a2) +; RV64I-NEXT: sb a5, 13(a2) +; RV64I-NEXT: sb a4, 14(a2) ; RV64I-NEXT: sb a3, 15(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 14(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 12(a2) ; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: srli a4, a1, 16 +; RV64I-NEXT: srli a5, a1, 8 +; RV64I-NEXT: sb a1, 8(a2) +; RV64I-NEXT: sb a5, 9(a2) +; RV64I-NEXT: sb a4, 10(a2) ; RV64I-NEXT: sb a3, 11(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 10(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 9(a2) -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: srli a5, a0, 32 +; RV64I-NEXT: sb a5, 4(a2) +; RV64I-NEXT: sb a4, 5(a2) +; RV64I-NEXT: sb a3, 6(a2) ; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 4(a2) ; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) +; RV64I-NEXT: sb a3, 2(a2) ; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: lshr_16bytes_wordOff: @@ -1004,50 +1004,50 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: or a0, a0, a6 ; RV32I-NEXT: or a0, a0, a7 ; RV32I-NEXT: lbu a1, 0(a1) -; RV32I-NEXT: sw zero, 28(sp) -; RV32I-NEXT: sw zero, 24(sp) -; RV32I-NEXT: sw zero, 20(sp) ; RV32I-NEXT: sw zero, 16(sp) -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw zero, 20(sp) +; RV32I-NEXT: sw zero, 24(sp) +; RV32I-NEXT: sw zero, 28(sp) ; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: andi a1, a1, 12 ; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: lw a1, 8(a0) -; RV32I-NEXT: lw a3, 12(a0) +; RV32I-NEXT: lw a3, 4(a0) ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a0, 4(a0) -; RV32I-NEXT: sb a1, 8(a2) -; RV32I-NEXT: sb a3, 12(a2) -; RV32I-NEXT: sb a4, 0(a2) -; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: lw a0, 12(a0) ; RV32I-NEXT: srli a5, a1, 16 +; RV32I-NEXT: srli a6, a1, 24 +; RV32I-NEXT: srli a7, a1, 8 +; RV32I-NEXT: sb a1, 8(a2) +; RV32I-NEXT: sb a7, 9(a2) ; RV32I-NEXT: sb a5, 10(a2) -; RV32I-NEXT: srli a5, a1, 24 -; RV32I-NEXT: sb a5, 11(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 9(a2) -; RV32I-NEXT: srli a1, a3, 16 +; RV32I-NEXT: sb a6, 11(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a5, a0, 24 +; RV32I-NEXT: srli a6, a0, 8 +; RV32I-NEXT: sb a0, 12(a2) +; RV32I-NEXT: sb a6, 13(a2) ; RV32I-NEXT: sb a1, 14(a2) -; RV32I-NEXT: srli a1, a3, 24 -; RV32I-NEXT: sb a1, 15(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 13(a2) -; RV32I-NEXT: srli a1, a4, 16 -; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: sb a5, 15(a2) +; RV32I-NEXT: srli a0, a4, 16 ; RV32I-NEXT: srli a1, a4, 24 +; RV32I-NEXT: srli a5, a4, 8 +; RV32I-NEXT: sb a4, 0(a2) +; RV32I-NEXT: sb a5, 1(a2) +; RV32I-NEXT: sb a0, 2(a2) ; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 1(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: srli a0, a3, 16 +; RV32I-NEXT: srli a1, a3, 24 +; RV32I-NEXT: srli a4, a3, 8 +; RV32I-NEXT: sb a3, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a0, 6(a2) ; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 @@ -1143,36 +1143,36 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: .LBB8_3: ; RV64I-NEXT: srai a4, a4, 63 ; RV64I-NEXT: and a1, a4, a1 -; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: sb a0, 8(a2) ; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a6, a1, 32 +; RV64I-NEXT: sb a6, 4(a2) +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: sb a4, 6(a2) ; RV64I-NEXT: sb a3, 7(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 6(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 5(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 4(a2) ; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: srli a4, a1, 16 +; RV64I-NEXT: srli a5, a1, 8 +; RV64I-NEXT: sb a1, 0(a2) +; RV64I-NEXT: sb a5, 1(a2) +; RV64I-NEXT: sb a4, 2(a2) ; RV64I-NEXT: sb a3, 3(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 2(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 1(a2) ; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: srli a5, a0, 32 +; RV64I-NEXT: sb a5, 12(a2) +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: sb a3, 14(a2) ; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 12(a2) ; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: sb a4, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) ; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 9(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_16bytes: @@ -1228,14 +1228,14 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: or a1, a1, a6 -; RV32I-NEXT: sw zero, 12(sp) -; RV32I-NEXT: sw zero, 8(sp) -; RV32I-NEXT: sw zero, 4(sp) ; RV32I-NEXT: sw zero, 0(sp) -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a5, 24(sp) -; RV32I-NEXT: sw a4, 20(sp) +; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 12(sp) ; RV32I-NEXT: sw a3, 16(sp) +; RV32I-NEXT: sw a4, 20(sp) +; RV32I-NEXT: sw a5, 24(sp) +; RV32I-NEXT: sw a0, 28(sp) ; RV32I-NEXT: andi a0, a1, 12 ; RV32I-NEXT: addi a3, sp, 16 ; RV32I-NEXT: sub a3, a3, a0 @@ -1259,34 +1259,34 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: srl a4, a4, a7 ; RV32I-NEXT: or a4, a5, a4 ; RV32I-NEXT: sll a0, a0, a1 -; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a5, a5, 24 +; RV32I-NEXT: srli a1, a4, 16 +; RV32I-NEXT: srli a7, a4, 8 +; RV32I-NEXT: sb a4, 8(a2) +; RV32I-NEXT: sb a7, 9(a2) +; RV32I-NEXT: sb a1, 10(a2) ; RV32I-NEXT: sb a5, 11(a2) ; RV32I-NEXT: srli a3, a3, 24 +; RV32I-NEXT: srli a1, t1, 16 +; RV32I-NEXT: srli a4, t1, 8 +; RV32I-NEXT: sb t1, 12(a2) +; RV32I-NEXT: sb a4, 13(a2) +; RV32I-NEXT: sb a1, 14(a2) ; RV32I-NEXT: sb a3, 15(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) ; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: srli a0, a6, 24 -; RV32I-NEXT: sb a0, 7(a2) -; RV32I-NEXT: sb a4, 8(a2) -; RV32I-NEXT: sb t1, 12(a2) +; RV32I-NEXT: srli a1, t0, 16 +; RV32I-NEXT: srli a3, t0, 8 ; RV32I-NEXT: sb t0, 4(a2) -; RV32I-NEXT: srli a0, a4, 16 -; RV32I-NEXT: sb a0, 10(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: srli a0, t1, 16 -; RV32I-NEXT: sb a0, 14(a2) -; RV32I-NEXT: srli a0, t1, 8 -; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: srli a0, t0, 16 -; RV32I-NEXT: sb a0, 6(a2) -; RV32I-NEXT: srli a0, t0, 8 -; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: sb a3, 5(a2) +; RV32I-NEXT: sb a1, 6(a2) +; RV32I-NEXT: sb a0, 7(a2) ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 @@ -1382,36 +1382,36 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: .LBB9_3: ; RV64I-NEXT: srai a4, a4, 63 ; RV64I-NEXT: and a1, a4, a1 -; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: sb a0, 8(a2) ; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a6, a1, 32 +; RV64I-NEXT: sb a6, 4(a2) +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: sb a4, 6(a2) ; RV64I-NEXT: sb a3, 7(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 6(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 5(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 4(a2) ; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: srli a4, a1, 16 +; RV64I-NEXT: srli a5, a1, 8 +; RV64I-NEXT: sb a1, 0(a2) +; RV64I-NEXT: sb a5, 1(a2) +; RV64I-NEXT: sb a4, 2(a2) ; RV64I-NEXT: sb a3, 3(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 2(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 1(a2) ; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: srli a5, a0, 32 +; RV64I-NEXT: sb a5, 12(a2) +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: sb a3, 14(a2) ; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 12(a2) ; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: sb a4, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) ; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 9(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_16bytes_wordOff: @@ -1458,50 +1458,50 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: or a0, a0, a6 ; RV32I-NEXT: or a0, a0, a7 ; RV32I-NEXT: lbu a1, 0(a1) -; RV32I-NEXT: sw zero, 12(sp) -; RV32I-NEXT: sw zero, 8(sp) -; RV32I-NEXT: sw zero, 4(sp) ; RV32I-NEXT: sw zero, 0(sp) -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a5, 24(sp) -; RV32I-NEXT: sw a4, 20(sp) +; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 12(sp) ; RV32I-NEXT: sw a3, 16(sp) +; RV32I-NEXT: sw a4, 20(sp) +; RV32I-NEXT: sw a5, 24(sp) +; RV32I-NEXT: sw a0, 28(sp) ; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: andi a1, a1, 12 ; RV32I-NEXT: addi a0, sp, 16 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lw a1, 8(a0) -; RV32I-NEXT: lw a3, 12(a0) +; RV32I-NEXT: lw a3, 4(a0) ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a0, 4(a0) -; RV32I-NEXT: sb a1, 8(a2) -; RV32I-NEXT: sb a3, 12(a2) -; RV32I-NEXT: sb a4, 0(a2) -; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: lw a0, 12(a0) ; RV32I-NEXT: srli a5, a1, 16 +; RV32I-NEXT: srli a6, a1, 24 +; RV32I-NEXT: srli a7, a1, 8 +; RV32I-NEXT: sb a1, 8(a2) +; RV32I-NEXT: sb a7, 9(a2) ; RV32I-NEXT: sb a5, 10(a2) -; RV32I-NEXT: srli a5, a1, 24 -; RV32I-NEXT: sb a5, 11(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 9(a2) -; RV32I-NEXT: srli a1, a3, 16 +; RV32I-NEXT: sb a6, 11(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a5, a0, 24 +; RV32I-NEXT: srli a6, a0, 8 +; RV32I-NEXT: sb a0, 12(a2) +; RV32I-NEXT: sb a6, 13(a2) ; RV32I-NEXT: sb a1, 14(a2) -; RV32I-NEXT: srli a1, a3, 24 -; RV32I-NEXT: sb a1, 15(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 13(a2) -; RV32I-NEXT: srli a1, a4, 16 -; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: sb a5, 15(a2) +; RV32I-NEXT: srli a0, a4, 16 ; RV32I-NEXT: srli a1, a4, 24 +; RV32I-NEXT: srli a5, a4, 8 +; RV32I-NEXT: sb a4, 0(a2) +; RV32I-NEXT: sb a5, 1(a2) +; RV32I-NEXT: sb a0, 2(a2) ; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 1(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: srli a0, a3, 16 +; RV32I-NEXT: srli a1, a3, 24 +; RV32I-NEXT: srli a4, a3, 8 +; RV32I-NEXT: sb a3, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a0, 6(a2) ; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 @@ -1598,36 +1598,36 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sll a3, a3, a4 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: .LBB10_3: -; RV64I-NEXT: sb a1, 8(a2) ; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a6, a1, 32 +; RV64I-NEXT: sb a6, 12(a2) +; RV64I-NEXT: sb a5, 13(a2) +; RV64I-NEXT: sb a4, 14(a2) ; RV64I-NEXT: sb a3, 15(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 14(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 12(a2) ; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: srli a4, a1, 16 +; RV64I-NEXT: srli a5, a1, 8 +; RV64I-NEXT: sb a1, 8(a2) +; RV64I-NEXT: sb a5, 9(a2) +; RV64I-NEXT: sb a4, 10(a2) ; RV64I-NEXT: sb a3, 11(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 10(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 9(a2) -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: srli a5, a0, 32 +; RV64I-NEXT: sb a5, 4(a2) +; RV64I-NEXT: sb a4, 5(a2) +; RV64I-NEXT: sb a3, 6(a2) ; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 4(a2) ; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) +; RV64I-NEXT: sb a3, 2(a2) ; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: ashr_16bytes: @@ -1684,14 +1684,14 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: or a1, a1, t0 ; RV32I-NEXT: srai a0, a0, 31 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: sw a0, 20(sp) ; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: sw a6, 12(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 24(sp) +; RV32I-NEXT: sw a0, 28(sp) ; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a6, 12(sp) ; RV32I-NEXT: andi a0, a1, 12 ; RV32I-NEXT: mv a3, sp ; RV32I-NEXT: add a0, a3, a0 @@ -1715,34 +1715,34 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sll a7, t1, a7 ; RV32I-NEXT: or a7, a5, a7 ; RV32I-NEXT: sra a0, a0, a1 -; RV32I-NEXT: sb a5, 8(a2) -; RV32I-NEXT: sb a0, 12(a2) -; RV32I-NEXT: sb a3, 0(a2) -; RV32I-NEXT: sb a6, 4(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli t1, a0, 24 +; RV32I-NEXT: srli t2, a0, 8 +; RV32I-NEXT: sb a0, 12(a2) +; RV32I-NEXT: sb t2, 13(a2) ; RV32I-NEXT: sb a1, 14(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 15(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 13(a2) +; RV32I-NEXT: sb t1, 15(a2) ; RV32I-NEXT: srli a0, a7, 16 +; RV32I-NEXT: srli a1, a7, 24 +; RV32I-NEXT: srli a7, a7, 8 +; RV32I-NEXT: sb a5, 8(a2) +; RV32I-NEXT: sb a7, 9(a2) ; RV32I-NEXT: sb a0, 10(a2) -; RV32I-NEXT: srli a0, a7, 24 -; RV32I-NEXT: sb a0, 11(a2) -; RV32I-NEXT: srli a0, a7, 8 -; RV32I-NEXT: sb a0, 9(a2) +; RV32I-NEXT: sb a1, 11(a2) ; RV32I-NEXT: srli a0, a4, 16 -; RV32I-NEXT: sb a0, 2(a2) -; RV32I-NEXT: srli a0, a4, 24 -; RV32I-NEXT: sb a0, 3(a2) +; RV32I-NEXT: srli a1, a4, 24 ; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a3, 0(a2) ; RV32I-NEXT: sb a4, 1(a2) +; RV32I-NEXT: sb a0, 2(a2) +; RV32I-NEXT: sb a1, 3(a2) ; RV32I-NEXT: srli a0, t0, 16 +; RV32I-NEXT: srli a1, t0, 24 +; RV32I-NEXT: srli a3, t0, 8 +; RV32I-NEXT: sb a6, 4(a2) +; RV32I-NEXT: sb a3, 5(a2) ; RV32I-NEXT: sb a0, 6(a2) -; RV32I-NEXT: srli a0, t0, 24 -; RV32I-NEXT: sb a0, 7(a2) -; RV32I-NEXT: srli a0, t0, 8 -; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: sb a1, 7(a2) ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 @@ -1838,36 +1838,36 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: sll a3, a3, a4 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: .LBB11_3: -; RV64I-NEXT: sb a1, 8(a2) ; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a6, a1, 32 +; RV64I-NEXT: sb a6, 12(a2) +; RV64I-NEXT: sb a5, 13(a2) +; RV64I-NEXT: sb a4, 14(a2) ; RV64I-NEXT: sb a3, 15(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 14(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 12(a2) ; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: srli a4, a1, 16 +; RV64I-NEXT: srli a5, a1, 8 +; RV64I-NEXT: sb a1, 8(a2) +; RV64I-NEXT: sb a5, 9(a2) +; RV64I-NEXT: sb a4, 10(a2) ; RV64I-NEXT: sb a3, 11(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 10(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 9(a2) -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: srli a5, a0, 32 +; RV64I-NEXT: sb a5, 4(a2) +; RV64I-NEXT: sb a4, 5(a2) +; RV64I-NEXT: sb a3, 6(a2) ; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 4(a2) ; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) +; RV64I-NEXT: sb a3, 2(a2) ; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: ashr_16bytes_wordOff: @@ -1915,50 +1915,50 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: or a6, a6, a7 ; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: srai a0, a0, 31 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: sw a0, 20(sp) ; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: sw a6, 12(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 24(sp) +; RV32I-NEXT: sw a0, 28(sp) ; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a6, 12(sp) ; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: andi a1, a1, 12 ; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: lw a1, 8(a0) -; RV32I-NEXT: lw a3, 12(a0) +; RV32I-NEXT: lw a3, 4(a0) ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a0, 4(a0) -; RV32I-NEXT: sb a1, 8(a2) -; RV32I-NEXT: sb a3, 12(a2) -; RV32I-NEXT: sb a4, 0(a2) -; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: lw a0, 12(a0) ; RV32I-NEXT: srli a5, a1, 16 +; RV32I-NEXT: srli a6, a1, 24 +; RV32I-NEXT: srli a7, a1, 8 +; RV32I-NEXT: sb a1, 8(a2) +; RV32I-NEXT: sb a7, 9(a2) ; RV32I-NEXT: sb a5, 10(a2) -; RV32I-NEXT: srli a5, a1, 24 -; RV32I-NEXT: sb a5, 11(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 9(a2) -; RV32I-NEXT: srli a1, a3, 16 +; RV32I-NEXT: sb a6, 11(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a5, a0, 24 +; RV32I-NEXT: srli a6, a0, 8 +; RV32I-NEXT: sb a0, 12(a2) +; RV32I-NEXT: sb a6, 13(a2) ; RV32I-NEXT: sb a1, 14(a2) -; RV32I-NEXT: srli a1, a3, 24 -; RV32I-NEXT: sb a1, 15(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 13(a2) -; RV32I-NEXT: srli a1, a4, 16 -; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: sb a5, 15(a2) +; RV32I-NEXT: srli a0, a4, 16 ; RV32I-NEXT: srli a1, a4, 24 +; RV32I-NEXT: srli a5, a4, 8 +; RV32I-NEXT: sb a4, 0(a2) +; RV32I-NEXT: sb a5, 1(a2) +; RV32I-NEXT: sb a0, 2(a2) ; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 1(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: srli a0, a3, 16 +; RV32I-NEXT: srli a1, a3, 24 +; RV32I-NEXT: srli a4, a3, 8 +; RV32I-NEXT: sb a3, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a0, 6(a2) ; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 @@ -2083,97 +2083,97 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a6 -; RV64I-NEXT: sd zero, 56(sp) -; RV64I-NEXT: sd zero, 48(sp) -; RV64I-NEXT: sd zero, 40(sp) ; RV64I-NEXT: sd zero, 32(sp) -; RV64I-NEXT: sd a0, 24(sp) -; RV64I-NEXT: sd a5, 16(sp) -; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd zero, 40(sp) +; RV64I-NEXT: sd zero, 48(sp) +; RV64I-NEXT: sd zero, 56(sp) ; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd a5, 16(sp) +; RV64I-NEXT: sd a0, 24(sp) ; RV64I-NEXT: andi a0, a1, 24 ; RV64I-NEXT: mv a3, sp ; RV64I-NEXT: add a0, a3, a0 ; RV64I-NEXT: ld a3, 0(a0) ; RV64I-NEXT: ld a4, 8(a0) -; RV64I-NEXT: slli a1, a1, 3 -; RV64I-NEXT: ld a5, 16(a0) -; RV64I-NEXT: ld a6, 24(a0) -; RV64I-NEXT: srl a7, a4, a1 -; RV64I-NEXT: andi a0, a1, 56 -; RV64I-NEXT: xori t0, a0, 63 -; RV64I-NEXT: slli a0, a5, 1 -; RV64I-NEXT: sll a0, a0, t0 -; RV64I-NEXT: or a0, a7, a0 -; RV64I-NEXT: srl a3, a3, a1 +; RV64I-NEXT: slli a5, a1, 3 +; RV64I-NEXT: ld a6, 16(a0) +; RV64I-NEXT: ld a7, 24(a0) +; RV64I-NEXT: srl a0, a4, a5 +; RV64I-NEXT: andi a1, a5, 56 +; RV64I-NEXT: xori t0, a1, 63 +; RV64I-NEXT: slli a1, a6, 1 +; RV64I-NEXT: sll a1, a1, t0 +; RV64I-NEXT: or a1, a0, a1 +; RV64I-NEXT: srl a3, a3, a5 ; RV64I-NEXT: slli a4, a4, 1 ; RV64I-NEXT: sll a4, a4, t0 ; RV64I-NEXT: or a4, a3, a4 -; RV64I-NEXT: srl a5, a5, a1 -; RV64I-NEXT: slli t1, a6, 1 +; RV64I-NEXT: srl a6, a6, a5 +; RV64I-NEXT: slli t1, a7, 1 ; RV64I-NEXT: sll t0, t1, t0 -; RV64I-NEXT: or t0, a5, t0 -; RV64I-NEXT: srl a1, a6, a1 -; RV64I-NEXT: sb a5, 16(a2) -; RV64I-NEXT: sb a1, 24(a2) +; RV64I-NEXT: or t0, a6, t0 +; RV64I-NEXT: srl a5, a7, a5 +; RV64I-NEXT: srli a7, a5, 56 +; RV64I-NEXT: srli t1, a5, 48 +; RV64I-NEXT: srli t2, a5, 40 +; RV64I-NEXT: srli t3, a5, 32 +; RV64I-NEXT: sb t3, 28(a2) +; RV64I-NEXT: sb t2, 29(a2) +; RV64I-NEXT: sb t1, 30(a2) +; RV64I-NEXT: sb a7, 31(a2) +; RV64I-NEXT: srli a7, a5, 24 +; RV64I-NEXT: srli t1, a5, 16 +; RV64I-NEXT: srli t2, a5, 8 +; RV64I-NEXT: sb a5, 24(a2) +; RV64I-NEXT: sb t2, 25(a2) +; RV64I-NEXT: sb t1, 26(a2) +; RV64I-NEXT: sb a7, 27(a2) +; RV64I-NEXT: srli a5, t0, 56 +; RV64I-NEXT: srli a7, t0, 48 +; RV64I-NEXT: srli t1, t0, 40 +; RV64I-NEXT: srli t2, t0, 32 +; RV64I-NEXT: sb t2, 20(a2) +; RV64I-NEXT: sb t1, 21(a2) +; RV64I-NEXT: sb a7, 22(a2) +; RV64I-NEXT: sb a5, 23(a2) +; RV64I-NEXT: srli a5, t0, 24 +; RV64I-NEXT: srli a7, t0, 16 +; RV64I-NEXT: srli t0, t0, 8 +; RV64I-NEXT: sb a6, 16(a2) +; RV64I-NEXT: sb t0, 17(a2) +; RV64I-NEXT: sb a7, 18(a2) +; RV64I-NEXT: sb a5, 19(a2) +; RV64I-NEXT: srli a5, a4, 56 +; RV64I-NEXT: srli a6, a4, 48 +; RV64I-NEXT: srli a7, a4, 40 +; RV64I-NEXT: srli t0, a4, 32 +; RV64I-NEXT: sb t0, 4(a2) +; RV64I-NEXT: sb a7, 5(a2) +; RV64I-NEXT: sb a6, 6(a2) +; RV64I-NEXT: sb a5, 7(a2) +; RV64I-NEXT: srli a5, a4, 24 +; RV64I-NEXT: srli a6, a4, 16 +; RV64I-NEXT: srli a4, a4, 8 ; RV64I-NEXT: sb a3, 0(a2) -; RV64I-NEXT: sb a7, 8(a2) +; RV64I-NEXT: sb a4, 1(a2) +; RV64I-NEXT: sb a6, 2(a2) +; RV64I-NEXT: sb a5, 3(a2) ; RV64I-NEXT: srli a3, a1, 56 -; RV64I-NEXT: sb a3, 31(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 30(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 29(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 28(a2) +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a6, a1, 32 +; RV64I-NEXT: sb a6, 12(a2) +; RV64I-NEXT: sb a5, 13(a2) +; RV64I-NEXT: sb a4, 14(a2) +; RV64I-NEXT: sb a3, 15(a2) ; RV64I-NEXT: srli a3, a1, 24 -; RV64I-NEXT: sb a3, 27(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 26(a2) +; RV64I-NEXT: srli a4, a1, 16 ; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: srli a1, t0, 56 -; RV64I-NEXT: sb a1, 23(a2) -; RV64I-NEXT: srli a1, t0, 48 -; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, t0, 40 -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, t0, 32 -; RV64I-NEXT: sb a1, 20(a2) -; RV64I-NEXT: srli a1, t0, 24 -; RV64I-NEXT: sb a1, 19(a2) -; RV64I-NEXT: srli a1, t0, 16 -; RV64I-NEXT: sb a1, 18(a2) -; RV64I-NEXT: srli a1, t0, 8 -; RV64I-NEXT: sb a1, 17(a2) -; RV64I-NEXT: srli a1, a4, 56 -; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a4, 48 -; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a4, 40 -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: srli a1, a4, 32 -; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: srli a1, a4, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a1, a4, 16 -; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a4, 1(a2) -; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 12(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 9(a2) +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: sb a1, 9(a2) +; RV64I-NEXT: sb a4, 10(a2) +; RV64I-NEXT: sb a3, 11(a2) ; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret ; @@ -2183,6 +2183,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw s0, 76(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 72(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 64(sp) # 4-byte Folded Spill ; RV32I-NEXT: lbu a3, 1(a0) ; RV32I-NEXT: lbu a4, 0(a0) ; RV32I-NEXT: lbu a5, 2(a0) @@ -2273,124 +2274,125 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: or a1, a1, t3 ; RV32I-NEXT: or a1, a1, t2 -; RV32I-NEXT: sw zero, 60(sp) -; RV32I-NEXT: sw zero, 56(sp) -; RV32I-NEXT: sw zero, 52(sp) ; RV32I-NEXT: sw zero, 48(sp) -; RV32I-NEXT: sw zero, 44(sp) -; RV32I-NEXT: sw zero, 40(sp) -; RV32I-NEXT: sw zero, 36(sp) +; RV32I-NEXT: sw zero, 52(sp) +; RV32I-NEXT: sw zero, 56(sp) +; RV32I-NEXT: sw zero, 60(sp) ; RV32I-NEXT: sw zero, 32(sp) -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw t1, 24(sp) -; RV32I-NEXT: sw t0, 20(sp) +; RV32I-NEXT: sw zero, 36(sp) +; RV32I-NEXT: sw zero, 40(sp) +; RV32I-NEXT: sw zero, 44(sp) ; RV32I-NEXT: sw a7, 16(sp) -; RV32I-NEXT: sw a6, 12(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw t0, 20(sp) +; RV32I-NEXT: sw t1, 24(sp) +; RV32I-NEXT: sw a0, 28(sp) ; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a6, 12(sp) ; RV32I-NEXT: andi a0, a1, 28 ; RV32I-NEXT: mv a3, sp -; RV32I-NEXT: add a3, a3, a0 -; RV32I-NEXT: lw a6, 0(a3) -; RV32I-NEXT: lw a7, 4(a3) -; RV32I-NEXT: slli a5, a1, 3 -; RV32I-NEXT: lw t0, 8(a3) -; RV32I-NEXT: lw t1, 12(a3) -; RV32I-NEXT: srl a4, a7, a5 -; RV32I-NEXT: andi a0, a5, 24 -; RV32I-NEXT: xori t2, a0, 31 -; RV32I-NEXT: slli a0, t0, 1 -; RV32I-NEXT: sll a0, a0, t2 -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: srl a6, a6, a5 -; RV32I-NEXT: slli a7, a7, 1 -; RV32I-NEXT: sll a1, a7, t2 -; RV32I-NEXT: or a1, a6, a1 -; RV32I-NEXT: srl a7, t1, a5 -; RV32I-NEXT: lw t3, 16(a3) -; RV32I-NEXT: lw t4, 20(a3) -; RV32I-NEXT: lw t5, 24(a3) -; RV32I-NEXT: lw t6, 28(a3) -; RV32I-NEXT: slli a3, t3, 1 -; RV32I-NEXT: sll a3, a3, t2 -; RV32I-NEXT: or a3, a7, a3 -; RV32I-NEXT: srl t0, t0, a5 -; RV32I-NEXT: slli t1, t1, 1 -; RV32I-NEXT: sll t1, t1, t2 -; RV32I-NEXT: or t1, t0, t1 -; RV32I-NEXT: srl s0, t4, a5 +; RV32I-NEXT: add a6, a3, a0 +; RV32I-NEXT: lw a3, 0(a6) +; RV32I-NEXT: lw a4, 4(a6) +; RV32I-NEXT: slli t1, a1, 3 +; RV32I-NEXT: lw a7, 8(a6) +; RV32I-NEXT: lw t0, 12(a6) +; RV32I-NEXT: srl a0, a4, t1 +; RV32I-NEXT: andi a1, t1, 24 +; RV32I-NEXT: xori t2, a1, 31 +; RV32I-NEXT: slli a1, a7, 1 +; RV32I-NEXT: sll a1, a1, t2 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: srl a3, a3, t1 +; RV32I-NEXT: slli a4, a4, 1 +; RV32I-NEXT: sll a4, a4, t2 +; RV32I-NEXT: or a4, a3, a4 +; RV32I-NEXT: srl a5, t0, t1 +; RV32I-NEXT: lw t3, 16(a6) +; RV32I-NEXT: lw t4, 20(a6) +; RV32I-NEXT: lw t5, 24(a6) +; RV32I-NEXT: lw t6, 28(a6) +; RV32I-NEXT: slli a6, t3, 1 +; RV32I-NEXT: sll a6, a6, t2 +; RV32I-NEXT: or a6, a5, a6 +; RV32I-NEXT: srl a7, a7, t1 +; RV32I-NEXT: slli t0, t0, 1 +; RV32I-NEXT: sll t0, t0, t2 +; RV32I-NEXT: or t0, a7, t0 +; RV32I-NEXT: srl s0, t4, t1 ; RV32I-NEXT: slli s1, t5, 1 ; RV32I-NEXT: sll s1, s1, t2 ; RV32I-NEXT: or s1, s0, s1 -; RV32I-NEXT: srl t3, t3, a5 +; RV32I-NEXT: srl t3, t3, t1 ; RV32I-NEXT: slli t4, t4, 1 ; RV32I-NEXT: sll t4, t4, t2 ; RV32I-NEXT: or t4, t3, t4 -; RV32I-NEXT: srl t5, t5, a5 +; RV32I-NEXT: srl t5, t5, t1 ; RV32I-NEXT: slli s2, t6, 1 ; RV32I-NEXT: sll t2, s2, t2 ; RV32I-NEXT: or t2, t5, t2 -; RV32I-NEXT: srl a5, t6, a5 +; RV32I-NEXT: srl t1, t6, t1 +; RV32I-NEXT: srli t6, t1, 24 +; RV32I-NEXT: srli s2, t1, 16 +; RV32I-NEXT: srli s3, t1, 8 +; RV32I-NEXT: sb t1, 28(a2) +; RV32I-NEXT: sb s3, 29(a2) +; RV32I-NEXT: sb s2, 30(a2) +; RV32I-NEXT: sb t6, 31(a2) +; RV32I-NEXT: srli t1, t2, 24 +; RV32I-NEXT: srli t6, t2, 16 +; RV32I-NEXT: srli t2, t2, 8 ; RV32I-NEXT: sb t5, 24(a2) -; RV32I-NEXT: sb a5, 28(a2) +; RV32I-NEXT: sb t2, 25(a2) +; RV32I-NEXT: sb t6, 26(a2) +; RV32I-NEXT: sb t1, 27(a2) +; RV32I-NEXT: srli t1, t4, 24 +; RV32I-NEXT: srli t2, t4, 16 +; RV32I-NEXT: srli t4, t4, 8 ; RV32I-NEXT: sb t3, 16(a2) -; RV32I-NEXT: sb s0, 20(a2) -; RV32I-NEXT: sb t0, 8(a2) -; RV32I-NEXT: sb a7, 12(a2) -; RV32I-NEXT: sb a6, 0(a2) -; RV32I-NEXT: sb a4, 4(a2) -; RV32I-NEXT: srli a4, a5, 24 -; RV32I-NEXT: sb a4, 31(a2) -; RV32I-NEXT: srli a4, a5, 16 -; RV32I-NEXT: sb a4, 30(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 29(a2) -; RV32I-NEXT: srli a4, t2, 24 -; RV32I-NEXT: sb a4, 27(a2) -; RV32I-NEXT: srli a4, t2, 16 -; RV32I-NEXT: sb a4, 26(a2) -; RV32I-NEXT: srli a4, t2, 8 -; RV32I-NEXT: sb a4, 25(a2) -; RV32I-NEXT: srli a4, t4, 24 -; RV32I-NEXT: sb a4, 19(a2) -; RV32I-NEXT: srli a4, t4, 16 -; RV32I-NEXT: sb a4, 18(a2) -; RV32I-NEXT: srli a4, t4, 8 -; RV32I-NEXT: sb a4, 17(a2) -; RV32I-NEXT: srli a4, s1, 24 -; RV32I-NEXT: sb a4, 23(a2) -; RV32I-NEXT: srli a4, s1, 16 -; RV32I-NEXT: sb a4, 22(a2) +; RV32I-NEXT: sb t4, 17(a2) +; RV32I-NEXT: sb t2, 18(a2) +; RV32I-NEXT: sb t1, 19(a2) +; RV32I-NEXT: srli t1, s1, 24 +; RV32I-NEXT: srli t2, s1, 16 ; RV32I-NEXT: srli s1, s1, 8 +; RV32I-NEXT: sb s0, 20(a2) ; RV32I-NEXT: sb s1, 21(a2) -; RV32I-NEXT: srli a4, t1, 24 -; RV32I-NEXT: sb a4, 11(a2) -; RV32I-NEXT: srli a4, t1, 16 -; RV32I-NEXT: sb a4, 10(a2) -; RV32I-NEXT: srli a4, t1, 8 -; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: srli a4, a3, 24 -; RV32I-NEXT: sb a4, 15(a2) -; RV32I-NEXT: srli a4, a3, 16 -; RV32I-NEXT: sb a4, 14(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 13(a2) +; RV32I-NEXT: sb t2, 22(a2) +; RV32I-NEXT: sb t1, 23(a2) +; RV32I-NEXT: srli t1, t0, 24 +; RV32I-NEXT: srli t2, t0, 16 +; RV32I-NEXT: srli t0, t0, 8 +; RV32I-NEXT: sb a7, 8(a2) +; RV32I-NEXT: sb t0, 9(a2) +; RV32I-NEXT: sb t2, 10(a2) +; RV32I-NEXT: sb t1, 11(a2) +; RV32I-NEXT: srli a7, a6, 24 +; RV32I-NEXT: srli t0, a6, 16 +; RV32I-NEXT: srli a6, a6, 8 +; RV32I-NEXT: sb a5, 12(a2) +; RV32I-NEXT: sb a6, 13(a2) +; RV32I-NEXT: sb t0, 14(a2) +; RV32I-NEXT: sb a7, 15(a2) +; RV32I-NEXT: srli a5, a4, 24 +; RV32I-NEXT: srli a6, a4, 16 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a3, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) +; RV32I-NEXT: sb a6, 2(a2) +; RV32I-NEXT: sb a5, 3(a2) ; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 3(a2) -; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 2(a2) +; RV32I-NEXT: srli a4, a1, 16 ; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 1(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: sb a1, 5(a2) +; RV32I-NEXT: sb a4, 6(a2) +; RV32I-NEXT: sb a3, 7(a2) ; RV32I-NEXT: lw s0, 76(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 72(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 64(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 @@ -2515,98 +2517,98 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a6 -; RV64I-NEXT: sd zero, 56(sp) -; RV64I-NEXT: sd zero, 48(sp) -; RV64I-NEXT: sd zero, 40(sp) ; RV64I-NEXT: sd zero, 32(sp) -; RV64I-NEXT: sd a0, 24(sp) -; RV64I-NEXT: sd a5, 16(sp) -; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd zero, 40(sp) +; RV64I-NEXT: sd zero, 48(sp) +; RV64I-NEXT: sd zero, 56(sp) ; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd a5, 16(sp) +; RV64I-NEXT: sd a0, 24(sp) ; RV64I-NEXT: slli a0, a1, 2 ; RV64I-NEXT: andi a0, a0, 24 ; RV64I-NEXT: mv a3, sp ; RV64I-NEXT: add a0, a3, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: ld a4, 8(a0) -; RV64I-NEXT: slli a1, a1, 5 -; RV64I-NEXT: ld a5, 16(a0) -; RV64I-NEXT: ld a6, 24(a0) -; RV64I-NEXT: srl a7, a4, a1 -; RV64I-NEXT: andi a0, a1, 32 -; RV64I-NEXT: xori t0, a0, 63 -; RV64I-NEXT: slli a0, a5, 1 -; RV64I-NEXT: sll a0, a0, t0 -; RV64I-NEXT: or a0, a7, a0 -; RV64I-NEXT: srl a3, a3, a1 -; RV64I-NEXT: slli a4, a4, 1 -; RV64I-NEXT: sll a4, a4, t0 -; RV64I-NEXT: or a4, a3, a4 -; RV64I-NEXT: srl a5, a5, a1 -; RV64I-NEXT: slli t1, a6, 1 -; RV64I-NEXT: sll t0, t1, t0 -; RV64I-NEXT: or t0, a5, t0 -; RV64I-NEXT: srl a1, a6, a1 +; RV64I-NEXT: ld a4, 0(a0) +; RV64I-NEXT: ld a5, 8(a0) +; RV64I-NEXT: slli a6, a1, 5 +; RV64I-NEXT: ld a7, 16(a0) +; RV64I-NEXT: ld t0, 24(a0) +; RV64I-NEXT: srl a3, a5, a6 +; RV64I-NEXT: andi a0, a6, 32 +; RV64I-NEXT: xori t1, a0, 63 +; RV64I-NEXT: slli a0, a7, 1 +; RV64I-NEXT: sll a0, a0, t1 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: srl t2, a4, a6 +; RV64I-NEXT: slli a5, a5, 1 +; RV64I-NEXT: sll a1, a5, t1 +; RV64I-NEXT: or a1, t2, a1 +; RV64I-NEXT: srl a5, a7, a6 +; RV64I-NEXT: slli a4, t0, 1 +; RV64I-NEXT: sll a4, a4, t1 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: srl a6, t0, a6 +; RV64I-NEXT: srli a7, a5, 24 +; RV64I-NEXT: srli t0, a5, 16 +; RV64I-NEXT: srli t1, a5, 8 ; RV64I-NEXT: sb a5, 16(a2) -; RV64I-NEXT: sb a1, 24(a2) -; RV64I-NEXT: sb a3, 0(a2) -; RV64I-NEXT: sb a7, 8(a2) -; RV64I-NEXT: srli a6, a5, 24 -; RV64I-NEXT: sb a6, 19(a2) -; RV64I-NEXT: srli a6, a5, 16 -; RV64I-NEXT: sb a6, 18(a2) -; RV64I-NEXT: srli a5, a5, 8 -; RV64I-NEXT: sb a5, 17(a2) -; RV64I-NEXT: srli a5, a1, 56 +; RV64I-NEXT: sb t1, 17(a2) +; RV64I-NEXT: sb t0, 18(a2) +; RV64I-NEXT: sb a7, 19(a2) +; RV64I-NEXT: srli a5, a6, 56 +; RV64I-NEXT: srli a7, a6, 48 +; RV64I-NEXT: srli t0, a6, 40 +; RV64I-NEXT: srli t1, a6, 32 +; RV64I-NEXT: sb t1, 28(a2) +; RV64I-NEXT: sb t0, 29(a2) +; RV64I-NEXT: sb a7, 30(a2) ; RV64I-NEXT: sb a5, 31(a2) -; RV64I-NEXT: srli a5, a1, 48 -; RV64I-NEXT: sb a5, 30(a2) -; RV64I-NEXT: srli a5, a1, 40 -; RV64I-NEXT: sb a5, 29(a2) -; RV64I-NEXT: srli a5, a1, 32 -; RV64I-NEXT: sb a5, 28(a2) -; RV64I-NEXT: srli a5, a1, 24 +; RV64I-NEXT: srli a5, a6, 24 +; RV64I-NEXT: srli a7, a6, 16 +; RV64I-NEXT: srli t0, a6, 8 +; RV64I-NEXT: sb a6, 24(a2) +; RV64I-NEXT: sb t0, 25(a2) +; RV64I-NEXT: sb a7, 26(a2) ; RV64I-NEXT: sb a5, 27(a2) -; RV64I-NEXT: srli a5, a1, 16 -; RV64I-NEXT: sb a5, 26(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: srli a1, a3, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a1, a3, 16 -; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a3, 1(a2) -; RV64I-NEXT: srli a1, a7, 24 -; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a7, 16 -; RV64I-NEXT: sb a1, 10(a2) -; RV64I-NEXT: srli a1, a7, 8 -; RV64I-NEXT: sb a1, 9(a2) -; RV64I-NEXT: srli a1, t0, 56 -; RV64I-NEXT: sb a1, 23(a2) -; RV64I-NEXT: srli a1, t0, 48 -; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, t0, 40 -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, t0, 32 -; RV64I-NEXT: sb a1, 20(a2) -; RV64I-NEXT: srli a1, a4, 56 -; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a4, 48 -; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a4, 40 -; RV64I-NEXT: sb a1, 5(a2) +; RV64I-NEXT: srli a5, t2, 24 +; RV64I-NEXT: srli a6, t2, 16 +; RV64I-NEXT: srli a7, t2, 8 +; RV64I-NEXT: sb t2, 0(a2) +; RV64I-NEXT: sb a7, 1(a2) +; RV64I-NEXT: sb a6, 2(a2) +; RV64I-NEXT: sb a5, 3(a2) +; RV64I-NEXT: srli a5, a3, 24 +; RV64I-NEXT: srli a6, a3, 16 +; RV64I-NEXT: srli a7, a3, 8 +; RV64I-NEXT: sb a3, 8(a2) +; RV64I-NEXT: sb a7, 9(a2) +; RV64I-NEXT: sb a6, 10(a2) +; RV64I-NEXT: sb a5, 11(a2) +; RV64I-NEXT: srli a3, a4, 56 +; RV64I-NEXT: srli a5, a4, 48 +; RV64I-NEXT: srli a6, a4, 40 ; RV64I-NEXT: srli a4, a4, 32 -; RV64I-NEXT: sb a4, 4(a2) +; RV64I-NEXT: sb a4, 20(a2) +; RV64I-NEXT: sb a6, 21(a2) +; RV64I-NEXT: sb a5, 22(a2) +; RV64I-NEXT: sb a3, 23(a2) +; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: sb a1, 4(a2) +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: sb a4, 6(a2) +; RV64I-NEXT: sb a3, 7(a2) ; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: srli a4, a0, 40 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: sb a0, 12(a2) +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: sb a3, 14(a2) +; RV64I-NEXT: sb a1, 15(a2) ; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret ; @@ -2694,90 +2696,90 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: or a0, a0, t2 ; RV32I-NEXT: or a0, a0, t3 ; RV32I-NEXT: lbu a1, 0(a1) -; RV32I-NEXT: sw zero, 60(sp) -; RV32I-NEXT: sw zero, 56(sp) -; RV32I-NEXT: sw zero, 52(sp) ; RV32I-NEXT: sw zero, 48(sp) -; RV32I-NEXT: sw zero, 44(sp) -; RV32I-NEXT: sw zero, 40(sp) -; RV32I-NEXT: sw zero, 36(sp) +; RV32I-NEXT: sw zero, 52(sp) +; RV32I-NEXT: sw zero, 56(sp) +; RV32I-NEXT: sw zero, 60(sp) ; RV32I-NEXT: sw zero, 32(sp) -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw t1, 24(sp) -; RV32I-NEXT: sw t0, 20(sp) +; RV32I-NEXT: sw zero, 36(sp) +; RV32I-NEXT: sw zero, 40(sp) +; RV32I-NEXT: sw zero, 44(sp) ; RV32I-NEXT: sw a7, 16(sp) -; RV32I-NEXT: sw a6, 12(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw t0, 20(sp) +; RV32I-NEXT: sw t1, 24(sp) +; RV32I-NEXT: sw a0, 28(sp) ; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a6, 12(sp) ; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: andi a1, a1, 28 ; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: add a1, a0, a1 -; RV32I-NEXT: lw a3, 16(a1) -; RV32I-NEXT: lw a4, 20(a1) -; RV32I-NEXT: lw a5, 24(a1) -; RV32I-NEXT: lw a6, 28(a1) -; RV32I-NEXT: lw a7, 0(a1) -; RV32I-NEXT: lw a0, 4(a1) -; RV32I-NEXT: lw t0, 8(a1) -; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: sb a5, 24(a2) -; RV32I-NEXT: sb a6, 28(a2) -; RV32I-NEXT: sb a3, 16(a2) -; RV32I-NEXT: sb a4, 20(a2) -; RV32I-NEXT: sb t0, 8(a2) -; RV32I-NEXT: sb a1, 12(a2) -; RV32I-NEXT: sb a7, 0(a2) -; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli t1, a5, 24 +; RV32I-NEXT: add a4, a0, a1 +; RV32I-NEXT: lw a5, 16(a4) +; RV32I-NEXT: lw a6, 20(a4) +; RV32I-NEXT: lw a7, 24(a4) +; RV32I-NEXT: lw a1, 0(a4) +; RV32I-NEXT: lw a0, 4(a4) +; RV32I-NEXT: lw t0, 8(a4) +; RV32I-NEXT: lw a3, 12(a4) +; RV32I-NEXT: lw a4, 28(a4) +; RV32I-NEXT: srli t1, a7, 24 +; RV32I-NEXT: srli t2, a7, 16 +; RV32I-NEXT: srli t3, a7, 8 +; RV32I-NEXT: sb a7, 24(a2) +; RV32I-NEXT: sb t3, 25(a2) +; RV32I-NEXT: sb t2, 26(a2) ; RV32I-NEXT: sb t1, 27(a2) -; RV32I-NEXT: srli t1, a5, 16 -; RV32I-NEXT: sb t1, 26(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 25(a2) -; RV32I-NEXT: srli a5, a6, 24 -; RV32I-NEXT: sb a5, 31(a2) +; RV32I-NEXT: srli a7, a4, 24 +; RV32I-NEXT: srli t1, a4, 16 +; RV32I-NEXT: srli t2, a4, 8 +; RV32I-NEXT: sb a4, 28(a2) +; RV32I-NEXT: sb t2, 29(a2) +; RV32I-NEXT: sb t1, 30(a2) +; RV32I-NEXT: sb a7, 31(a2) +; RV32I-NEXT: srli a4, a5, 24 +; RV32I-NEXT: srli a7, a5, 16 +; RV32I-NEXT: srli t1, a5, 8 +; RV32I-NEXT: sb a5, 16(a2) +; RV32I-NEXT: sb t1, 17(a2) +; RV32I-NEXT: sb a7, 18(a2) +; RV32I-NEXT: sb a4, 19(a2) +; RV32I-NEXT: srli a4, a6, 24 ; RV32I-NEXT: srli a5, a6, 16 -; RV32I-NEXT: sb a5, 30(a2) -; RV32I-NEXT: srli a5, a6, 8 -; RV32I-NEXT: sb a5, 29(a2) -; RV32I-NEXT: srli a5, a3, 24 -; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a7, a6, 8 +; RV32I-NEXT: sb a6, 20(a2) +; RV32I-NEXT: sb a7, 21(a2) +; RV32I-NEXT: sb a5, 22(a2) +; RV32I-NEXT: sb a4, 23(a2) +; RV32I-NEXT: srli a4, t0, 24 +; RV32I-NEXT: srli a5, t0, 16 +; RV32I-NEXT: srli a6, t0, 8 +; RV32I-NEXT: sb t0, 8(a2) +; RV32I-NEXT: sb a6, 9(a2) +; RV32I-NEXT: sb a5, 10(a2) +; RV32I-NEXT: sb a4, 11(a2) +; RV32I-NEXT: srli a4, a3, 24 ; RV32I-NEXT: srli a5, a3, 16 -; RV32I-NEXT: sb a5, 18(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 17(a2) -; RV32I-NEXT: srli a3, a4, 24 -; RV32I-NEXT: sb a3, 23(a2) -; RV32I-NEXT: srli a3, a4, 16 -; RV32I-NEXT: sb a3, 22(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 21(a2) -; RV32I-NEXT: srli a3, t0, 24 -; RV32I-NEXT: sb a3, 11(a2) -; RV32I-NEXT: srli a3, t0, 16 -; RV32I-NEXT: sb a3, 10(a2) -; RV32I-NEXT: srli a3, t0, 8 -; RV32I-NEXT: sb a3, 9(a2) +; RV32I-NEXT: srli a6, a3, 8 +; RV32I-NEXT: sb a3, 12(a2) +; RV32I-NEXT: sb a6, 13(a2) +; RV32I-NEXT: sb a5, 14(a2) +; RV32I-NEXT: sb a4, 15(a2) ; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 15(a2) -; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 14(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 13(a2) -; RV32I-NEXT: srli a1, a7, 24 -; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a1, a7, 16 -; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: srli a1, a7, 8 -; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: srli a4, a1, 16 +; RV32I-NEXT: srli a5, a1, 8 +; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: sb a5, 1(a2) +; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a3, 6(a2) ; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 @@ -2881,82 +2883,82 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: lbu a1, 0(a1) -; RV64I-NEXT: sd zero, 56(sp) -; RV64I-NEXT: sd zero, 48(sp) -; RV64I-NEXT: sd zero, 40(sp) ; RV64I-NEXT: sd zero, 32(sp) -; RV64I-NEXT: sd a0, 24(sp) -; RV64I-NEXT: sd a5, 16(sp) -; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd zero, 40(sp) +; RV64I-NEXT: sd zero, 48(sp) +; RV64I-NEXT: sd zero, 56(sp) ; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd a5, 16(sp) +; RV64I-NEXT: sd a0, 24(sp) ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: andi a1, a1, 24 ; RV64I-NEXT: mv a0, sp -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: ld a1, 16(a0) -; RV64I-NEXT: ld a3, 24(a0) -; RV64I-NEXT: ld a4, 0(a0) -; RV64I-NEXT: ld a0, 8(a0) -; RV64I-NEXT: sb a1, 16(a2) -; RV64I-NEXT: sb a3, 24(a2) -; RV64I-NEXT: sb a4, 0(a2) -; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: srli a5, a1, 56 +; RV64I-NEXT: add a3, a0, a1 +; RV64I-NEXT: ld a4, 16(a3) +; RV64I-NEXT: ld a0, 8(a3) +; RV64I-NEXT: ld a1, 0(a3) +; RV64I-NEXT: ld a3, 24(a3) +; RV64I-NEXT: srli a5, a4, 56 +; RV64I-NEXT: srli a6, a4, 48 +; RV64I-NEXT: srli a7, a4, 40 +; RV64I-NEXT: srli t0, a4, 32 +; RV64I-NEXT: sb t0, 20(a2) +; RV64I-NEXT: sb a7, 21(a2) +; RV64I-NEXT: sb a6, 22(a2) ; RV64I-NEXT: sb a5, 23(a2) -; RV64I-NEXT: srli a5, a1, 48 -; RV64I-NEXT: sb a5, 22(a2) -; RV64I-NEXT: srli a5, a1, 40 -; RV64I-NEXT: sb a5, 21(a2) -; RV64I-NEXT: srli a5, a1, 32 -; RV64I-NEXT: sb a5, 20(a2) -; RV64I-NEXT: srli a5, a1, 24 +; RV64I-NEXT: srli a5, a4, 24 +; RV64I-NEXT: srli a6, a4, 16 +; RV64I-NEXT: srli a7, a4, 8 +; RV64I-NEXT: sb a4, 16(a2) +; RV64I-NEXT: sb a7, 17(a2) +; RV64I-NEXT: sb a6, 18(a2) ; RV64I-NEXT: sb a5, 19(a2) -; RV64I-NEXT: srli a5, a1, 16 -; RV64I-NEXT: sb a5, 18(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 17(a2) -; RV64I-NEXT: srli a1, a3, 56 -; RV64I-NEXT: sb a1, 31(a2) -; RV64I-NEXT: srli a1, a3, 48 -; RV64I-NEXT: sb a1, 30(a2) -; RV64I-NEXT: srli a1, a3, 40 -; RV64I-NEXT: sb a1, 29(a2) -; RV64I-NEXT: srli a1, a3, 32 -; RV64I-NEXT: sb a1, 28(a2) -; RV64I-NEXT: srli a1, a3, 24 -; RV64I-NEXT: sb a1, 27(a2) -; RV64I-NEXT: srli a1, a3, 16 -; RV64I-NEXT: sb a1, 26(a2) -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a3, 25(a2) -; RV64I-NEXT: srli a1, a4, 56 -; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a4, 48 -; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a4, 40 -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: srli a1, a4, 32 -; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: srli a1, a4, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a1, a4, 16 -; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a4, 1(a2) +; RV64I-NEXT: srli a4, a3, 56 +; RV64I-NEXT: srli a5, a3, 48 +; RV64I-NEXT: srli a6, a3, 40 +; RV64I-NEXT: srli a7, a3, 32 +; RV64I-NEXT: sb a7, 28(a2) +; RV64I-NEXT: sb a6, 29(a2) +; RV64I-NEXT: sb a5, 30(a2) +; RV64I-NEXT: sb a4, 31(a2) +; RV64I-NEXT: srli a4, a3, 24 +; RV64I-NEXT: srli a5, a3, 16 +; RV64I-NEXT: srli a6, a3, 8 +; RV64I-NEXT: sb a3, 24(a2) +; RV64I-NEXT: sb a6, 25(a2) +; RV64I-NEXT: sb a5, 26(a2) +; RV64I-NEXT: sb a4, 27(a2) +; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a6, a1, 32 +; RV64I-NEXT: sb a6, 4(a2) +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: sb a4, 6(a2) +; RV64I-NEXT: sb a3, 7(a2) +; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: srli a4, a1, 16 +; RV64I-NEXT: srli a5, a1, 8 +; RV64I-NEXT: sb a1, 0(a2) +; RV64I-NEXT: sb a5, 1(a2) +; RV64I-NEXT: sb a4, 2(a2) +; RV64I-NEXT: sb a3, 3(a2) ; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: srli a5, a0, 32 +; RV64I-NEXT: sb a5, 12(a2) +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: sb a3, 14(a2) ; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 12(a2) ; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: sb a4, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) ; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 9(a2) ; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret ; @@ -3044,90 +3046,90 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: or a0, a0, t2 ; RV32I-NEXT: or a0, a0, t3 ; RV32I-NEXT: lbu a1, 0(a1) -; RV32I-NEXT: sw zero, 60(sp) -; RV32I-NEXT: sw zero, 56(sp) -; RV32I-NEXT: sw zero, 52(sp) ; RV32I-NEXT: sw zero, 48(sp) -; RV32I-NEXT: sw zero, 44(sp) -; RV32I-NEXT: sw zero, 40(sp) -; RV32I-NEXT: sw zero, 36(sp) +; RV32I-NEXT: sw zero, 52(sp) +; RV32I-NEXT: sw zero, 56(sp) +; RV32I-NEXT: sw zero, 60(sp) ; RV32I-NEXT: sw zero, 32(sp) -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw t1, 24(sp) -; RV32I-NEXT: sw t0, 20(sp) +; RV32I-NEXT: sw zero, 36(sp) +; RV32I-NEXT: sw zero, 40(sp) +; RV32I-NEXT: sw zero, 44(sp) ; RV32I-NEXT: sw a7, 16(sp) -; RV32I-NEXT: sw a6, 12(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw t0, 20(sp) +; RV32I-NEXT: sw t1, 24(sp) +; RV32I-NEXT: sw a0, 28(sp) ; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a6, 12(sp) ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: andi a1, a1, 24 ; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: add a1, a0, a1 -; RV32I-NEXT: lw a3, 16(a1) -; RV32I-NEXT: lw a4, 20(a1) -; RV32I-NEXT: lw a5, 24(a1) -; RV32I-NEXT: lw a6, 28(a1) -; RV32I-NEXT: lw a7, 0(a1) -; RV32I-NEXT: lw a0, 4(a1) -; RV32I-NEXT: lw t0, 8(a1) -; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: sb a5, 24(a2) -; RV32I-NEXT: sb a6, 28(a2) -; RV32I-NEXT: sb a3, 16(a2) -; RV32I-NEXT: sb a4, 20(a2) -; RV32I-NEXT: sb t0, 8(a2) -; RV32I-NEXT: sb a1, 12(a2) -; RV32I-NEXT: sb a7, 0(a2) -; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli t1, a5, 24 +; RV32I-NEXT: add a4, a0, a1 +; RV32I-NEXT: lw a5, 16(a4) +; RV32I-NEXT: lw a6, 20(a4) +; RV32I-NEXT: lw a7, 24(a4) +; RV32I-NEXT: lw a1, 0(a4) +; RV32I-NEXT: lw a0, 4(a4) +; RV32I-NEXT: lw t0, 8(a4) +; RV32I-NEXT: lw a3, 12(a4) +; RV32I-NEXT: lw a4, 28(a4) +; RV32I-NEXT: srli t1, a7, 24 +; RV32I-NEXT: srli t2, a7, 16 +; RV32I-NEXT: srli t3, a7, 8 +; RV32I-NEXT: sb a7, 24(a2) +; RV32I-NEXT: sb t3, 25(a2) +; RV32I-NEXT: sb t2, 26(a2) ; RV32I-NEXT: sb t1, 27(a2) -; RV32I-NEXT: srli t1, a5, 16 -; RV32I-NEXT: sb t1, 26(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 25(a2) -; RV32I-NEXT: srli a5, a6, 24 -; RV32I-NEXT: sb a5, 31(a2) +; RV32I-NEXT: srli a7, a4, 24 +; RV32I-NEXT: srli t1, a4, 16 +; RV32I-NEXT: srli t2, a4, 8 +; RV32I-NEXT: sb a4, 28(a2) +; RV32I-NEXT: sb t2, 29(a2) +; RV32I-NEXT: sb t1, 30(a2) +; RV32I-NEXT: sb a7, 31(a2) +; RV32I-NEXT: srli a4, a5, 24 +; RV32I-NEXT: srli a7, a5, 16 +; RV32I-NEXT: srli t1, a5, 8 +; RV32I-NEXT: sb a5, 16(a2) +; RV32I-NEXT: sb t1, 17(a2) +; RV32I-NEXT: sb a7, 18(a2) +; RV32I-NEXT: sb a4, 19(a2) +; RV32I-NEXT: srli a4, a6, 24 ; RV32I-NEXT: srli a5, a6, 16 -; RV32I-NEXT: sb a5, 30(a2) -; RV32I-NEXT: srli a5, a6, 8 -; RV32I-NEXT: sb a5, 29(a2) -; RV32I-NEXT: srli a5, a3, 24 -; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a7, a6, 8 +; RV32I-NEXT: sb a6, 20(a2) +; RV32I-NEXT: sb a7, 21(a2) +; RV32I-NEXT: sb a5, 22(a2) +; RV32I-NEXT: sb a4, 23(a2) +; RV32I-NEXT: srli a4, t0, 24 +; RV32I-NEXT: srli a5, t0, 16 +; RV32I-NEXT: srli a6, t0, 8 +; RV32I-NEXT: sb t0, 8(a2) +; RV32I-NEXT: sb a6, 9(a2) +; RV32I-NEXT: sb a5, 10(a2) +; RV32I-NEXT: sb a4, 11(a2) +; RV32I-NEXT: srli a4, a3, 24 ; RV32I-NEXT: srli a5, a3, 16 -; RV32I-NEXT: sb a5, 18(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 17(a2) -; RV32I-NEXT: srli a3, a4, 24 -; RV32I-NEXT: sb a3, 23(a2) -; RV32I-NEXT: srli a3, a4, 16 -; RV32I-NEXT: sb a3, 22(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 21(a2) -; RV32I-NEXT: srli a3, t0, 24 -; RV32I-NEXT: sb a3, 11(a2) -; RV32I-NEXT: srli a3, t0, 16 -; RV32I-NEXT: sb a3, 10(a2) -; RV32I-NEXT: srli a3, t0, 8 -; RV32I-NEXT: sb a3, 9(a2) +; RV32I-NEXT: srli a6, a3, 8 +; RV32I-NEXT: sb a3, 12(a2) +; RV32I-NEXT: sb a6, 13(a2) +; RV32I-NEXT: sb a5, 14(a2) +; RV32I-NEXT: sb a4, 15(a2) ; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 15(a2) -; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 14(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 13(a2) -; RV32I-NEXT: srli a1, a7, 24 -; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a1, a7, 16 -; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: srli a1, a7, 8 -; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: srli a4, a1, 16 +; RV32I-NEXT: srli a5, a1, 8 +; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: sb a5, 1(a2) +; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a3, 6(a2) ; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 @@ -3252,97 +3254,97 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a6 -; RV64I-NEXT: sd zero, 24(sp) -; RV64I-NEXT: sd zero, 16(sp) -; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: sd zero, 0(sp) -; RV64I-NEXT: sd a0, 56(sp) -; RV64I-NEXT: sd a5, 48(sp) -; RV64I-NEXT: sd a4, 40(sp) +; RV64I-NEXT: sd zero, 8(sp) +; RV64I-NEXT: sd zero, 16(sp) +; RV64I-NEXT: sd zero, 24(sp) ; RV64I-NEXT: sd a3, 32(sp) +; RV64I-NEXT: sd a4, 40(sp) +; RV64I-NEXT: sd a5, 48(sp) +; RV64I-NEXT: sd a0, 56(sp) ; RV64I-NEXT: andi a0, a1, 24 ; RV64I-NEXT: addi a3, sp, 32 ; RV64I-NEXT: sub a3, a3, a0 -; RV64I-NEXT: ld a4, 0(a3) -; RV64I-NEXT: ld a5, 8(a3) -; RV64I-NEXT: slli a1, a1, 3 -; RV64I-NEXT: ld a6, 16(a3) -; RV64I-NEXT: ld a3, 24(a3) -; RV64I-NEXT: sll a7, a5, a1 -; RV64I-NEXT: andi a0, a1, 56 -; RV64I-NEXT: xori t0, a0, 63 -; RV64I-NEXT: srli a0, a4, 1 -; RV64I-NEXT: srl a0, a0, t0 -; RV64I-NEXT: or a0, a7, a0 -; RV64I-NEXT: sll a3, a3, a1 -; RV64I-NEXT: srli t1, a6, 1 -; RV64I-NEXT: srl t1, t1, t0 -; RV64I-NEXT: or t1, a3, t1 -; RV64I-NEXT: sll a6, a6, a1 -; RV64I-NEXT: srli a5, a5, 1 -; RV64I-NEXT: srl a5, a5, t0 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: sll a1, a4, a1 -; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: srli a4, a6, 56 -; RV64I-NEXT: sb a4, 23(a2) -; RV64I-NEXT: srli a3, a3, 56 -; RV64I-NEXT: sb a3, 31(a2) -; RV64I-NEXT: srli a3, a1, 56 -; RV64I-NEXT: sb a3, 7(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 6(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 5(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 4(a2) +; RV64I-NEXT: ld a5, 0(a3) +; RV64I-NEXT: ld a6, 8(a3) +; RV64I-NEXT: slli a7, a1, 3 +; RV64I-NEXT: ld t0, 16(a3) +; RV64I-NEXT: ld a1, 24(a3) +; RV64I-NEXT: sll a4, a6, a7 +; RV64I-NEXT: andi a0, a7, 56 +; RV64I-NEXT: xori a3, a0, 63 +; RV64I-NEXT: srli a0, a5, 1 +; RV64I-NEXT: srl a0, a0, a3 +; RV64I-NEXT: or a0, a4, a0 +; RV64I-NEXT: sll t1, a1, a7 +; RV64I-NEXT: srli a1, t0, 1 +; RV64I-NEXT: srl a1, a1, a3 +; RV64I-NEXT: or a1, t1, a1 +; RV64I-NEXT: sll t0, t0, a7 +; RV64I-NEXT: srli a6, a6, 1 +; RV64I-NEXT: srl a3, a6, a3 +; RV64I-NEXT: or a3, t0, a3 +; RV64I-NEXT: sll a5, a5, a7 +; RV64I-NEXT: srli a6, t0, 56 +; RV64I-NEXT: srli a7, a3, 48 +; RV64I-NEXT: srli t0, a3, 40 +; RV64I-NEXT: srli t2, a3, 32 +; RV64I-NEXT: sb t2, 20(a2) +; RV64I-NEXT: sb t0, 21(a2) +; RV64I-NEXT: sb a7, 22(a2) +; RV64I-NEXT: sb a6, 23(a2) +; RV64I-NEXT: srli a6, t1, 56 +; RV64I-NEXT: srli a7, a1, 48 +; RV64I-NEXT: srli t0, a1, 40 +; RV64I-NEXT: srli t1, a1, 32 +; RV64I-NEXT: sb t1, 28(a2) +; RV64I-NEXT: sb t0, 29(a2) +; RV64I-NEXT: sb a7, 30(a2) +; RV64I-NEXT: sb a6, 31(a2) +; RV64I-NEXT: srli a6, a5, 56 +; RV64I-NEXT: srli a7, a5, 48 +; RV64I-NEXT: srli t0, a5, 40 +; RV64I-NEXT: srli t1, a5, 32 +; RV64I-NEXT: sb t1, 4(a2) +; RV64I-NEXT: sb t0, 5(a2) +; RV64I-NEXT: sb a7, 6(a2) +; RV64I-NEXT: sb a6, 7(a2) +; RV64I-NEXT: srli a6, a5, 24 +; RV64I-NEXT: srli a7, a5, 16 +; RV64I-NEXT: srli t0, a5, 8 +; RV64I-NEXT: sb a5, 0(a2) +; RV64I-NEXT: sb t0, 1(a2) +; RV64I-NEXT: sb a7, 2(a2) +; RV64I-NEXT: sb a6, 3(a2) +; RV64I-NEXT: srli a4, a4, 56 +; RV64I-NEXT: srli a5, a0, 48 +; RV64I-NEXT: srli a6, a0, 40 +; RV64I-NEXT: srli a7, a0, 32 +; RV64I-NEXT: sb a7, 12(a2) +; RV64I-NEXT: sb a6, 13(a2) +; RV64I-NEXT: sb a5, 14(a2) +; RV64I-NEXT: sb a4, 15(a2) +; RV64I-NEXT: srli a4, a3, 24 +; RV64I-NEXT: srli a5, a3, 16 +; RV64I-NEXT: srli a6, a3, 8 +; RV64I-NEXT: sb a3, 16(a2) +; RV64I-NEXT: sb a6, 17(a2) +; RV64I-NEXT: sb a5, 18(a2) +; RV64I-NEXT: sb a4, 19(a2) ; RV64I-NEXT: srli a3, a1, 24 -; RV64I-NEXT: sb a3, 3(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 2(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 1(a2) -; RV64I-NEXT: srli a1, a7, 56 -; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: sb a5, 16(a2) -; RV64I-NEXT: sb t1, 24(a2) -; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: srli a1, a5, 48 -; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, a5, 40 -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, a5, 32 -; RV64I-NEXT: sb a1, 20(a2) -; RV64I-NEXT: srli a1, a5, 24 -; RV64I-NEXT: sb a1, 19(a2) -; RV64I-NEXT: srli a1, a5, 16 -; RV64I-NEXT: sb a1, 18(a2) -; RV64I-NEXT: srli a5, a5, 8 -; RV64I-NEXT: sb a5, 17(a2) -; RV64I-NEXT: srli a1, t1, 48 -; RV64I-NEXT: sb a1, 30(a2) -; RV64I-NEXT: srli a1, t1, 40 -; RV64I-NEXT: sb a1, 29(a2) -; RV64I-NEXT: srli a1, t1, 32 -; RV64I-NEXT: sb a1, 28(a2) -; RV64I-NEXT: srli a1, t1, 24 -; RV64I-NEXT: sb a1, 27(a2) -; RV64I-NEXT: srli a1, t1, 16 -; RV64I-NEXT: sb a1, 26(a2) -; RV64I-NEXT: srli a1, t1, 8 -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 12(a2) +; RV64I-NEXT: srli a4, a1, 16 +; RV64I-NEXT: srli a5, a1, 8 +; RV64I-NEXT: sb a1, 24(a2) +; RV64I-NEXT: sb a5, 25(a2) +; RV64I-NEXT: sb a4, 26(a2) +; RV64I-NEXT: sb a3, 27(a2) ; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: sb a4, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) ; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 9(a2) ; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret ; @@ -3352,6 +3354,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw s0, 76(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 72(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 64(sp) # 4-byte Folded Spill ; RV32I-NEXT: lbu a3, 1(a0) ; RV32I-NEXT: lbu a4, 0(a0) ; RV32I-NEXT: lbu a5, 2(a0) @@ -3442,124 +3445,125 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: or a1, a1, t3 ; RV32I-NEXT: or a1, a1, t2 -; RV32I-NEXT: sw zero, 28(sp) -; RV32I-NEXT: sw zero, 24(sp) -; RV32I-NEXT: sw zero, 20(sp) ; RV32I-NEXT: sw zero, 16(sp) -; RV32I-NEXT: sw zero, 12(sp) -; RV32I-NEXT: sw zero, 8(sp) -; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 20(sp) +; RV32I-NEXT: sw zero, 24(sp) +; RV32I-NEXT: sw zero, 28(sp) ; RV32I-NEXT: sw zero, 0(sp) -; RV32I-NEXT: sw a0, 60(sp) -; RV32I-NEXT: sw t1, 56(sp) -; RV32I-NEXT: sw t0, 52(sp) +; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 12(sp) ; RV32I-NEXT: sw a7, 48(sp) -; RV32I-NEXT: sw a6, 44(sp) -; RV32I-NEXT: sw a5, 40(sp) -; RV32I-NEXT: sw a4, 36(sp) +; RV32I-NEXT: sw t0, 52(sp) +; RV32I-NEXT: sw t1, 56(sp) +; RV32I-NEXT: sw a0, 60(sp) ; RV32I-NEXT: sw a3, 32(sp) +; RV32I-NEXT: sw a4, 36(sp) +; RV32I-NEXT: sw a5, 40(sp) +; RV32I-NEXT: sw a6, 44(sp) ; RV32I-NEXT: andi a0, a1, 28 ; RV32I-NEXT: addi a3, sp, 32 -; RV32I-NEXT: sub a5, a3, a0 -; RV32I-NEXT: lw a6, 0(a5) -; RV32I-NEXT: lw a3, 4(a5) -; RV32I-NEXT: slli a7, a1, 3 -; RV32I-NEXT: lw t0, 8(a5) -; RV32I-NEXT: lw t1, 12(a5) -; RV32I-NEXT: sll a4, a3, a7 -; RV32I-NEXT: andi a0, a7, 24 -; RV32I-NEXT: xori t2, a0, 31 -; RV32I-NEXT: srli a0, a6, 1 -; RV32I-NEXT: srl a0, a0, t2 -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: sll t3, t1, a7 -; RV32I-NEXT: srli a1, t0, 1 -; RV32I-NEXT: srl a1, a1, t2 -; RV32I-NEXT: or a1, t3, a1 -; RV32I-NEXT: sll t0, t0, a7 -; RV32I-NEXT: srli a3, a3, 1 -; RV32I-NEXT: srl a3, a3, t2 -; RV32I-NEXT: lw t4, 16(a5) -; RV32I-NEXT: lw t5, 20(a5) -; RV32I-NEXT: or a3, t0, a3 -; RV32I-NEXT: lw t6, 24(a5) -; RV32I-NEXT: lw a5, 28(a5) -; RV32I-NEXT: sll s0, t5, a7 -; RV32I-NEXT: srli s1, t4, 1 -; RV32I-NEXT: srl s1, s1, t2 -; RV32I-NEXT: or s1, s0, s1 -; RV32I-NEXT: sll t4, t4, a7 -; RV32I-NEXT: srli t1, t1, 1 -; RV32I-NEXT: srl t1, t1, t2 -; RV32I-NEXT: or t1, t4, t1 -; RV32I-NEXT: sll a5, a5, a7 -; RV32I-NEXT: srli s2, t6, 1 -; RV32I-NEXT: srl s2, s2, t2 -; RV32I-NEXT: or s2, a5, s2 -; RV32I-NEXT: sll t6, t6, a7 -; RV32I-NEXT: srli t5, t5, 1 -; RV32I-NEXT: srl t2, t5, t2 +; RV32I-NEXT: sub a7, a3, a0 +; RV32I-NEXT: lw t3, 0(a7) +; RV32I-NEXT: lw a6, 4(a7) +; RV32I-NEXT: slli t4, a1, 3 +; RV32I-NEXT: lw a5, 8(a7) +; RV32I-NEXT: lw t2, 12(a7) +; RV32I-NEXT: sll a0, a6, t4 +; RV32I-NEXT: andi a1, t4, 24 +; RV32I-NEXT: xori t5, a1, 31 +; RV32I-NEXT: srli a1, t3, 1 +; RV32I-NEXT: srl a1, a1, t5 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: sll a4, t2, t4 +; RV32I-NEXT: srli a3, a5, 1 +; RV32I-NEXT: srl a3, a3, t5 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: sll a5, a5, t4 +; RV32I-NEXT: srli a6, a6, 1 +; RV32I-NEXT: srl a6, a6, t5 +; RV32I-NEXT: lw t6, 16(a7) +; RV32I-NEXT: lw s0, 20(a7) +; RV32I-NEXT: or a6, a5, a6 +; RV32I-NEXT: lw s1, 24(a7) +; RV32I-NEXT: lw a7, 28(a7) +; RV32I-NEXT: sll t1, s0, t4 +; RV32I-NEXT: srli t0, t6, 1 +; RV32I-NEXT: srl t0, t0, t5 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: sll t6, t6, t4 +; RV32I-NEXT: srli t2, t2, 1 +; RV32I-NEXT: srl t2, t2, t5 ; RV32I-NEXT: or t2, t6, t2 -; RV32I-NEXT: sll a6, a6, a7 -; RV32I-NEXT: sb a6, 0(a2) -; RV32I-NEXT: srli a7, t6, 24 -; RV32I-NEXT: sb a7, 27(a2) +; RV32I-NEXT: sll s2, a7, t4 +; RV32I-NEXT: srli a7, s1, 1 +; RV32I-NEXT: srl a7, a7, t5 +; RV32I-NEXT: or s3, s2, a7 +; RV32I-NEXT: sll s1, s1, t4 +; RV32I-NEXT: srli s0, s0, 1 +; RV32I-NEXT: srl a7, s0, t5 +; RV32I-NEXT: or t5, s1, a7 +; RV32I-NEXT: sll a7, t3, t4 +; RV32I-NEXT: srli s1, s1, 24 +; RV32I-NEXT: srli t3, t5, 16 +; RV32I-NEXT: srli t4, t5, 8 +; RV32I-NEXT: sb t5, 24(a2) +; RV32I-NEXT: sb t4, 25(a2) +; RV32I-NEXT: sb t3, 26(a2) +; RV32I-NEXT: sb s1, 27(a2) +; RV32I-NEXT: srli t3, s2, 24 +; RV32I-NEXT: srli t4, s3, 16 +; RV32I-NEXT: srli t5, s3, 8 +; RV32I-NEXT: sb s3, 28(a2) +; RV32I-NEXT: sb t5, 29(a2) +; RV32I-NEXT: sb t4, 30(a2) +; RV32I-NEXT: sb t3, 31(a2) +; RV32I-NEXT: srli t3, t6, 24 +; RV32I-NEXT: srli t4, t2, 16 +; RV32I-NEXT: srli t5, t2, 8 +; RV32I-NEXT: sb t2, 16(a2) +; RV32I-NEXT: sb t5, 17(a2) +; RV32I-NEXT: sb t4, 18(a2) +; RV32I-NEXT: sb t3, 19(a2) +; RV32I-NEXT: srli t1, t1, 24 +; RV32I-NEXT: srli t2, t0, 16 +; RV32I-NEXT: srli t3, t0, 8 +; RV32I-NEXT: sb t0, 20(a2) +; RV32I-NEXT: sb t3, 21(a2) +; RV32I-NEXT: sb t2, 22(a2) +; RV32I-NEXT: sb t1, 23(a2) ; RV32I-NEXT: srli a5, a5, 24 -; RV32I-NEXT: sb a5, 31(a2) -; RV32I-NEXT: srli a5, t4, 24 -; RV32I-NEXT: sb a5, 19(a2) -; RV32I-NEXT: srli s0, s0, 24 -; RV32I-NEXT: sb s0, 23(a2) -; RV32I-NEXT: srli a5, t0, 24 +; RV32I-NEXT: srli t0, a6, 16 +; RV32I-NEXT: srli t1, a6, 8 +; RV32I-NEXT: sb a6, 8(a2) +; RV32I-NEXT: sb t1, 9(a2) +; RV32I-NEXT: sb t0, 10(a2) ; RV32I-NEXT: sb a5, 11(a2) -; RV32I-NEXT: srli a5, t3, 24 -; RV32I-NEXT: sb a5, 15(a2) -; RV32I-NEXT: srli a5, a6, 24 -; RV32I-NEXT: sb a5, 3(a2) -; RV32I-NEXT: srli a5, a6, 16 -; RV32I-NEXT: sb a5, 2(a2) -; RV32I-NEXT: srli a5, a6, 8 -; RV32I-NEXT: sb a5, 1(a2) ; RV32I-NEXT: srli a4, a4, 24 -; RV32I-NEXT: sb a4, 7(a2) -; RV32I-NEXT: sb t2, 24(a2) -; RV32I-NEXT: sb s2, 28(a2) -; RV32I-NEXT: sb t1, 16(a2) -; RV32I-NEXT: sb s1, 20(a2) -; RV32I-NEXT: sb a3, 8(a2) -; RV32I-NEXT: sb a1, 12(a2) -; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli a4, t2, 16 -; RV32I-NEXT: sb a4, 26(a2) -; RV32I-NEXT: srli a4, t2, 8 -; RV32I-NEXT: sb a4, 25(a2) -; RV32I-NEXT: srli a4, s2, 16 -; RV32I-NEXT: sb a4, 30(a2) -; RV32I-NEXT: srli a4, s2, 8 -; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: srli a4, t1, 16 -; RV32I-NEXT: sb a4, 18(a2) -; RV32I-NEXT: srli a4, t1, 8 -; RV32I-NEXT: sb a4, 17(a2) -; RV32I-NEXT: srli a4, s1, 16 -; RV32I-NEXT: sb a4, 22(a2) -; RV32I-NEXT: srli s1, s1, 8 -; RV32I-NEXT: sb s1, 21(a2) -; RV32I-NEXT: srli a4, a3, 16 -; RV32I-NEXT: sb a4, 10(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 9(a2) +; RV32I-NEXT: srli a5, a3, 16 +; RV32I-NEXT: srli a6, a3, 8 +; RV32I-NEXT: sb a3, 12(a2) +; RV32I-NEXT: sb a6, 13(a2) +; RV32I-NEXT: sb a5, 14(a2) +; RV32I-NEXT: sb a4, 15(a2) +; RV32I-NEXT: srli a3, a7, 24 +; RV32I-NEXT: srli a4, a7, 16 +; RV32I-NEXT: srli a5, a7, 8 +; RV32I-NEXT: sb a7, 0(a2) +; RV32I-NEXT: sb a5, 1(a2) +; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: sb a3, 3(a2) +; RV32I-NEXT: srli a0, a0, 24 ; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 14(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 13(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: srli a4, a1, 8 +; RV32I-NEXT: sb a1, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a3, 6(a2) +; RV32I-NEXT: sb a0, 7(a2) ; RV32I-NEXT: lw s0, 76(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 72(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 64(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 @@ -3684,27 +3688,27 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a6 -; RV64I-NEXT: sd zero, 24(sp) -; RV64I-NEXT: sd zero, 16(sp) -; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: sd zero, 0(sp) -; RV64I-NEXT: sd a0, 56(sp) -; RV64I-NEXT: sd a5, 48(sp) -; RV64I-NEXT: sd a4, 40(sp) +; RV64I-NEXT: sd zero, 8(sp) +; RV64I-NEXT: sd zero, 16(sp) +; RV64I-NEXT: sd zero, 24(sp) ; RV64I-NEXT: sd a3, 32(sp) +; RV64I-NEXT: sd a4, 40(sp) +; RV64I-NEXT: sd a5, 48(sp) +; RV64I-NEXT: sd a0, 56(sp) ; RV64I-NEXT: slli a0, a1, 2 ; RV64I-NEXT: andi a0, a0, 24 ; RV64I-NEXT: addi a3, sp, 32 ; RV64I-NEXT: sub a3, a3, a0 -; RV64I-NEXT: ld a4, 0(a3) -; RV64I-NEXT: ld a5, 8(a3) +; RV64I-NEXT: ld a5, 0(a3) +; RV64I-NEXT: ld a4, 8(a3) ; RV64I-NEXT: slli a6, a1, 5 ; RV64I-NEXT: ld a7, 16(a3) ; RV64I-NEXT: ld a1, 24(a3) -; RV64I-NEXT: sll a3, a5, a6 +; RV64I-NEXT: sll a3, a4, a6 ; RV64I-NEXT: andi a0, a6, 32 ; RV64I-NEXT: xori t0, a0, 63 -; RV64I-NEXT: srli a0, a4, 1 +; RV64I-NEXT: srli a0, a5, 1 ; RV64I-NEXT: srl a0, a0, t0 ; RV64I-NEXT: or a0, a3, a0 ; RV64I-NEXT: sll t1, a1, a6 @@ -3712,70 +3716,70 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: srl a1, a1, t0 ; RV64I-NEXT: or a1, t1, a1 ; RV64I-NEXT: sll a7, a7, a6 -; RV64I-NEXT: srli a5, a5, 1 -; RV64I-NEXT: srl a5, a5, t0 -; RV64I-NEXT: or a5, a7, a5 -; RV64I-NEXT: sll a4, a4, a6 -; RV64I-NEXT: sb a4, 0(a2) +; RV64I-NEXT: srli a4, a4, 1 +; RV64I-NEXT: srl a4, a4, t0 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: sll a5, a5, a6 ; RV64I-NEXT: srli a6, a7, 56 +; RV64I-NEXT: srli t0, a7, 48 +; RV64I-NEXT: srli t2, a7, 40 +; RV64I-NEXT: srli a7, a7, 32 +; RV64I-NEXT: sb a7, 20(a2) +; RV64I-NEXT: sb t2, 21(a2) +; RV64I-NEXT: sb t0, 22(a2) ; RV64I-NEXT: sb a6, 23(a2) -; RV64I-NEXT: srli a6, a7, 48 -; RV64I-NEXT: sb a6, 22(a2) -; RV64I-NEXT: srli a6, a7, 40 -; RV64I-NEXT: sb a6, 21(a2) -; RV64I-NEXT: srli a6, a7, 32 -; RV64I-NEXT: sb a6, 20(a2) ; RV64I-NEXT: srli a6, t1, 56 +; RV64I-NEXT: srli a7, t1, 48 +; RV64I-NEXT: srli t0, t1, 40 +; RV64I-NEXT: srli t1, t1, 32 +; RV64I-NEXT: sb t1, 28(a2) +; RV64I-NEXT: sb t0, 29(a2) +; RV64I-NEXT: sb a7, 30(a2) ; RV64I-NEXT: sb a6, 31(a2) -; RV64I-NEXT: srli a6, t1, 48 -; RV64I-NEXT: sb a6, 30(a2) -; RV64I-NEXT: srli a6, t1, 40 -; RV64I-NEXT: sb a6, 29(a2) -; RV64I-NEXT: srli a6, t1, 32 -; RV64I-NEXT: sb a6, 28(a2) -; RV64I-NEXT: srli a6, a4, 56 +; RV64I-NEXT: srli a6, a5, 56 +; RV64I-NEXT: srli a7, a5, 48 +; RV64I-NEXT: srli t0, a5, 40 +; RV64I-NEXT: srli t1, a5, 32 +; RV64I-NEXT: sb t1, 4(a2) +; RV64I-NEXT: sb t0, 5(a2) +; RV64I-NEXT: sb a7, 6(a2) ; RV64I-NEXT: sb a6, 7(a2) -; RV64I-NEXT: srli a6, a4, 48 -; RV64I-NEXT: sb a6, 6(a2) -; RV64I-NEXT: srli a6, a4, 40 -; RV64I-NEXT: sb a6, 5(a2) -; RV64I-NEXT: srli a6, a4, 32 -; RV64I-NEXT: sb a6, 4(a2) -; RV64I-NEXT: srli a6, a4, 24 +; RV64I-NEXT: srli a6, a5, 24 +; RV64I-NEXT: srli a7, a5, 16 +; RV64I-NEXT: srli t0, a5, 8 +; RV64I-NEXT: sb a5, 0(a2) +; RV64I-NEXT: sb t0, 1(a2) +; RV64I-NEXT: sb a7, 2(a2) ; RV64I-NEXT: sb a6, 3(a2) -; RV64I-NEXT: srli a6, a4, 16 -; RV64I-NEXT: sb a6, 2(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a4, 1(a2) -; RV64I-NEXT: srli a4, a3, 56 -; RV64I-NEXT: sb a4, 15(a2) -; RV64I-NEXT: srli a4, a3, 48 -; RV64I-NEXT: sb a4, 14(a2) -; RV64I-NEXT: srli a4, a3, 40 -; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: srli a5, a3, 56 +; RV64I-NEXT: srli a6, a3, 48 +; RV64I-NEXT: srli a7, a3, 40 ; RV64I-NEXT: srli a3, a3, 32 ; RV64I-NEXT: sb a3, 12(a2) -; RV64I-NEXT: sb a5, 16(a2) -; RV64I-NEXT: sb a1, 24(a2) -; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: srli a3, a5, 24 +; RV64I-NEXT: sb a7, 13(a2) +; RV64I-NEXT: sb a6, 14(a2) +; RV64I-NEXT: sb a5, 15(a2) +; RV64I-NEXT: srli a3, a4, 24 +; RV64I-NEXT: srli a5, a4, 16 +; RV64I-NEXT: srli a6, a4, 8 +; RV64I-NEXT: sb a4, 16(a2) +; RV64I-NEXT: sb a6, 17(a2) +; RV64I-NEXT: sb a5, 18(a2) ; RV64I-NEXT: sb a3, 19(a2) -; RV64I-NEXT: srli a3, a5, 16 -; RV64I-NEXT: sb a3, 18(a2) -; RV64I-NEXT: srli a5, a5, 8 -; RV64I-NEXT: sb a5, 17(a2) ; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: srli a4, a1, 16 +; RV64I-NEXT: srli a5, a1, 8 +; RV64I-NEXT: sb a1, 24(a2) +; RV64I-NEXT: sb a5, 25(a2) +; RV64I-NEXT: sb a4, 26(a2) ; RV64I-NEXT: sb a3, 27(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 26(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 25(a2) ; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: sb a4, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) ; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 9(a2) ; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret ; @@ -3863,90 +3867,90 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: or a0, a0, t2 ; RV32I-NEXT: or a0, a0, t3 ; RV32I-NEXT: lbu a1, 0(a1) -; RV32I-NEXT: sw zero, 28(sp) -; RV32I-NEXT: sw zero, 24(sp) -; RV32I-NEXT: sw zero, 20(sp) ; RV32I-NEXT: sw zero, 16(sp) -; RV32I-NEXT: sw zero, 12(sp) -; RV32I-NEXT: sw zero, 8(sp) -; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 20(sp) +; RV32I-NEXT: sw zero, 24(sp) +; RV32I-NEXT: sw zero, 28(sp) ; RV32I-NEXT: sw zero, 0(sp) -; RV32I-NEXT: sw a0, 60(sp) -; RV32I-NEXT: sw t1, 56(sp) -; RV32I-NEXT: sw t0, 52(sp) +; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 12(sp) ; RV32I-NEXT: sw a7, 48(sp) -; RV32I-NEXT: sw a6, 44(sp) -; RV32I-NEXT: sw a5, 40(sp) -; RV32I-NEXT: sw a4, 36(sp) +; RV32I-NEXT: sw t0, 52(sp) +; RV32I-NEXT: sw t1, 56(sp) +; RV32I-NEXT: sw a0, 60(sp) ; RV32I-NEXT: sw a3, 32(sp) +; RV32I-NEXT: sw a4, 36(sp) +; RV32I-NEXT: sw a5, 40(sp) +; RV32I-NEXT: sw a6, 44(sp) ; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: andi a1, a1, 28 ; RV32I-NEXT: addi a0, sp, 32 -; RV32I-NEXT: sub a1, a0, a1 -; RV32I-NEXT: lw a3, 16(a1) -; RV32I-NEXT: lw a4, 20(a1) -; RV32I-NEXT: lw a5, 24(a1) -; RV32I-NEXT: lw a6, 28(a1) -; RV32I-NEXT: lw a7, 0(a1) -; RV32I-NEXT: lw a0, 4(a1) -; RV32I-NEXT: lw t0, 8(a1) -; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: sb a5, 24(a2) -; RV32I-NEXT: sb a6, 28(a2) -; RV32I-NEXT: sb a3, 16(a2) -; RV32I-NEXT: sb a4, 20(a2) -; RV32I-NEXT: sb t0, 8(a2) -; RV32I-NEXT: sb a1, 12(a2) -; RV32I-NEXT: sb a7, 0(a2) -; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli t1, a5, 24 +; RV32I-NEXT: sub a4, a0, a1 +; RV32I-NEXT: lw a5, 16(a4) +; RV32I-NEXT: lw a6, 20(a4) +; RV32I-NEXT: lw a7, 24(a4) +; RV32I-NEXT: lw a1, 0(a4) +; RV32I-NEXT: lw a0, 4(a4) +; RV32I-NEXT: lw t0, 8(a4) +; RV32I-NEXT: lw a3, 12(a4) +; RV32I-NEXT: lw a4, 28(a4) +; RV32I-NEXT: srli t1, a7, 24 +; RV32I-NEXT: srli t2, a7, 16 +; RV32I-NEXT: srli t3, a7, 8 +; RV32I-NEXT: sb a7, 24(a2) +; RV32I-NEXT: sb t3, 25(a2) +; RV32I-NEXT: sb t2, 26(a2) ; RV32I-NEXT: sb t1, 27(a2) -; RV32I-NEXT: srli t1, a5, 16 -; RV32I-NEXT: sb t1, 26(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 25(a2) -; RV32I-NEXT: srli a5, a6, 24 -; RV32I-NEXT: sb a5, 31(a2) +; RV32I-NEXT: srli a7, a4, 24 +; RV32I-NEXT: srli t1, a4, 16 +; RV32I-NEXT: srli t2, a4, 8 +; RV32I-NEXT: sb a4, 28(a2) +; RV32I-NEXT: sb t2, 29(a2) +; RV32I-NEXT: sb t1, 30(a2) +; RV32I-NEXT: sb a7, 31(a2) +; RV32I-NEXT: srli a4, a5, 24 +; RV32I-NEXT: srli a7, a5, 16 +; RV32I-NEXT: srli t1, a5, 8 +; RV32I-NEXT: sb a5, 16(a2) +; RV32I-NEXT: sb t1, 17(a2) +; RV32I-NEXT: sb a7, 18(a2) +; RV32I-NEXT: sb a4, 19(a2) +; RV32I-NEXT: srli a4, a6, 24 ; RV32I-NEXT: srli a5, a6, 16 -; RV32I-NEXT: sb a5, 30(a2) -; RV32I-NEXT: srli a5, a6, 8 -; RV32I-NEXT: sb a5, 29(a2) -; RV32I-NEXT: srli a5, a3, 24 -; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a7, a6, 8 +; RV32I-NEXT: sb a6, 20(a2) +; RV32I-NEXT: sb a7, 21(a2) +; RV32I-NEXT: sb a5, 22(a2) +; RV32I-NEXT: sb a4, 23(a2) +; RV32I-NEXT: srli a4, t0, 24 +; RV32I-NEXT: srli a5, t0, 16 +; RV32I-NEXT: srli a6, t0, 8 +; RV32I-NEXT: sb t0, 8(a2) +; RV32I-NEXT: sb a6, 9(a2) +; RV32I-NEXT: sb a5, 10(a2) +; RV32I-NEXT: sb a4, 11(a2) +; RV32I-NEXT: srli a4, a3, 24 ; RV32I-NEXT: srli a5, a3, 16 -; RV32I-NEXT: sb a5, 18(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 17(a2) -; RV32I-NEXT: srli a3, a4, 24 -; RV32I-NEXT: sb a3, 23(a2) -; RV32I-NEXT: srli a3, a4, 16 -; RV32I-NEXT: sb a3, 22(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 21(a2) -; RV32I-NEXT: srli a3, t0, 24 -; RV32I-NEXT: sb a3, 11(a2) -; RV32I-NEXT: srli a3, t0, 16 -; RV32I-NEXT: sb a3, 10(a2) -; RV32I-NEXT: srli a3, t0, 8 -; RV32I-NEXT: sb a3, 9(a2) +; RV32I-NEXT: srli a6, a3, 8 +; RV32I-NEXT: sb a3, 12(a2) +; RV32I-NEXT: sb a6, 13(a2) +; RV32I-NEXT: sb a5, 14(a2) +; RV32I-NEXT: sb a4, 15(a2) ; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 15(a2) -; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 14(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 13(a2) -; RV32I-NEXT: srli a1, a7, 24 -; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a1, a7, 16 -; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: srli a1, a7, 8 -; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: srli a4, a1, 16 +; RV32I-NEXT: srli a5, a1, 8 +; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: sb a5, 1(a2) +; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a3, 6(a2) ; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 @@ -4050,82 +4054,82 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: lbu a1, 0(a1) -; RV64I-NEXT: sd zero, 24(sp) -; RV64I-NEXT: sd zero, 16(sp) -; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: sd zero, 0(sp) -; RV64I-NEXT: sd a0, 56(sp) -; RV64I-NEXT: sd a5, 48(sp) -; RV64I-NEXT: sd a4, 40(sp) +; RV64I-NEXT: sd zero, 8(sp) +; RV64I-NEXT: sd zero, 16(sp) +; RV64I-NEXT: sd zero, 24(sp) ; RV64I-NEXT: sd a3, 32(sp) +; RV64I-NEXT: sd a4, 40(sp) +; RV64I-NEXT: sd a5, 48(sp) +; RV64I-NEXT: sd a0, 56(sp) ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: andi a1, a1, 24 ; RV64I-NEXT: addi a0, sp, 32 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: ld a1, 16(a0) -; RV64I-NEXT: ld a3, 24(a0) -; RV64I-NEXT: ld a4, 0(a0) -; RV64I-NEXT: ld a0, 8(a0) -; RV64I-NEXT: sb a1, 16(a2) -; RV64I-NEXT: sb a3, 24(a2) -; RV64I-NEXT: sb a4, 0(a2) -; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: srli a5, a1, 56 +; RV64I-NEXT: sub a3, a0, a1 +; RV64I-NEXT: ld a4, 16(a3) +; RV64I-NEXT: ld a0, 8(a3) +; RV64I-NEXT: ld a1, 0(a3) +; RV64I-NEXT: ld a3, 24(a3) +; RV64I-NEXT: srli a5, a4, 56 +; RV64I-NEXT: srli a6, a4, 48 +; RV64I-NEXT: srli a7, a4, 40 +; RV64I-NEXT: srli t0, a4, 32 +; RV64I-NEXT: sb t0, 20(a2) +; RV64I-NEXT: sb a7, 21(a2) +; RV64I-NEXT: sb a6, 22(a2) ; RV64I-NEXT: sb a5, 23(a2) -; RV64I-NEXT: srli a5, a1, 48 -; RV64I-NEXT: sb a5, 22(a2) -; RV64I-NEXT: srli a5, a1, 40 -; RV64I-NEXT: sb a5, 21(a2) -; RV64I-NEXT: srli a5, a1, 32 -; RV64I-NEXT: sb a5, 20(a2) -; RV64I-NEXT: srli a5, a1, 24 +; RV64I-NEXT: srli a5, a4, 24 +; RV64I-NEXT: srli a6, a4, 16 +; RV64I-NEXT: srli a7, a4, 8 +; RV64I-NEXT: sb a4, 16(a2) +; RV64I-NEXT: sb a7, 17(a2) +; RV64I-NEXT: sb a6, 18(a2) ; RV64I-NEXT: sb a5, 19(a2) -; RV64I-NEXT: srli a5, a1, 16 -; RV64I-NEXT: sb a5, 18(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 17(a2) -; RV64I-NEXT: srli a1, a3, 56 -; RV64I-NEXT: sb a1, 31(a2) -; RV64I-NEXT: srli a1, a3, 48 -; RV64I-NEXT: sb a1, 30(a2) -; RV64I-NEXT: srli a1, a3, 40 -; RV64I-NEXT: sb a1, 29(a2) -; RV64I-NEXT: srli a1, a3, 32 -; RV64I-NEXT: sb a1, 28(a2) -; RV64I-NEXT: srli a1, a3, 24 -; RV64I-NEXT: sb a1, 27(a2) -; RV64I-NEXT: srli a1, a3, 16 -; RV64I-NEXT: sb a1, 26(a2) -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a3, 25(a2) -; RV64I-NEXT: srli a1, a4, 56 -; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a4, 48 -; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a4, 40 -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: srli a1, a4, 32 -; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: srli a1, a4, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a1, a4, 16 -; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a4, 1(a2) +; RV64I-NEXT: srli a4, a3, 56 +; RV64I-NEXT: srli a5, a3, 48 +; RV64I-NEXT: srli a6, a3, 40 +; RV64I-NEXT: srli a7, a3, 32 +; RV64I-NEXT: sb a7, 28(a2) +; RV64I-NEXT: sb a6, 29(a2) +; RV64I-NEXT: sb a5, 30(a2) +; RV64I-NEXT: sb a4, 31(a2) +; RV64I-NEXT: srli a4, a3, 24 +; RV64I-NEXT: srli a5, a3, 16 +; RV64I-NEXT: srli a6, a3, 8 +; RV64I-NEXT: sb a3, 24(a2) +; RV64I-NEXT: sb a6, 25(a2) +; RV64I-NEXT: sb a5, 26(a2) +; RV64I-NEXT: sb a4, 27(a2) +; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a6, a1, 32 +; RV64I-NEXT: sb a6, 4(a2) +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: sb a4, 6(a2) +; RV64I-NEXT: sb a3, 7(a2) +; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: srli a4, a1, 16 +; RV64I-NEXT: srli a5, a1, 8 +; RV64I-NEXT: sb a1, 0(a2) +; RV64I-NEXT: sb a5, 1(a2) +; RV64I-NEXT: sb a4, 2(a2) +; RV64I-NEXT: sb a3, 3(a2) ; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: srli a5, a0, 32 +; RV64I-NEXT: sb a5, 12(a2) +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: sb a3, 14(a2) ; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 12(a2) ; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: sb a4, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) ; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 9(a2) ; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret ; @@ -4213,90 +4217,90 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: or a0, a0, t2 ; RV32I-NEXT: or a0, a0, t3 ; RV32I-NEXT: lbu a1, 0(a1) -; RV32I-NEXT: sw zero, 28(sp) -; RV32I-NEXT: sw zero, 24(sp) -; RV32I-NEXT: sw zero, 20(sp) ; RV32I-NEXT: sw zero, 16(sp) -; RV32I-NEXT: sw zero, 12(sp) -; RV32I-NEXT: sw zero, 8(sp) -; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 20(sp) +; RV32I-NEXT: sw zero, 24(sp) +; RV32I-NEXT: sw zero, 28(sp) ; RV32I-NEXT: sw zero, 0(sp) -; RV32I-NEXT: sw a0, 60(sp) -; RV32I-NEXT: sw t1, 56(sp) -; RV32I-NEXT: sw t0, 52(sp) +; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 12(sp) ; RV32I-NEXT: sw a7, 48(sp) -; RV32I-NEXT: sw a6, 44(sp) -; RV32I-NEXT: sw a5, 40(sp) -; RV32I-NEXT: sw a4, 36(sp) +; RV32I-NEXT: sw t0, 52(sp) +; RV32I-NEXT: sw t1, 56(sp) +; RV32I-NEXT: sw a0, 60(sp) ; RV32I-NEXT: sw a3, 32(sp) +; RV32I-NEXT: sw a4, 36(sp) +; RV32I-NEXT: sw a5, 40(sp) +; RV32I-NEXT: sw a6, 44(sp) ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: andi a1, a1, 24 ; RV32I-NEXT: addi a0, sp, 32 -; RV32I-NEXT: sub a1, a0, a1 -; RV32I-NEXT: lw a3, 16(a1) -; RV32I-NEXT: lw a4, 20(a1) -; RV32I-NEXT: lw a5, 24(a1) -; RV32I-NEXT: lw a6, 28(a1) -; RV32I-NEXT: lw a7, 0(a1) -; RV32I-NEXT: lw a0, 4(a1) -; RV32I-NEXT: lw t0, 8(a1) -; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: sb a5, 24(a2) -; RV32I-NEXT: sb a6, 28(a2) -; RV32I-NEXT: sb a3, 16(a2) -; RV32I-NEXT: sb a4, 20(a2) -; RV32I-NEXT: sb t0, 8(a2) -; RV32I-NEXT: sb a1, 12(a2) -; RV32I-NEXT: sb a7, 0(a2) -; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli t1, a5, 24 +; RV32I-NEXT: sub a4, a0, a1 +; RV32I-NEXT: lw a5, 16(a4) +; RV32I-NEXT: lw a6, 20(a4) +; RV32I-NEXT: lw a7, 24(a4) +; RV32I-NEXT: lw a1, 0(a4) +; RV32I-NEXT: lw a0, 4(a4) +; RV32I-NEXT: lw t0, 8(a4) +; RV32I-NEXT: lw a3, 12(a4) +; RV32I-NEXT: lw a4, 28(a4) +; RV32I-NEXT: srli t1, a7, 24 +; RV32I-NEXT: srli t2, a7, 16 +; RV32I-NEXT: srli t3, a7, 8 +; RV32I-NEXT: sb a7, 24(a2) +; RV32I-NEXT: sb t3, 25(a2) +; RV32I-NEXT: sb t2, 26(a2) ; RV32I-NEXT: sb t1, 27(a2) -; RV32I-NEXT: srli t1, a5, 16 -; RV32I-NEXT: sb t1, 26(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 25(a2) -; RV32I-NEXT: srli a5, a6, 24 -; RV32I-NEXT: sb a5, 31(a2) +; RV32I-NEXT: srli a7, a4, 24 +; RV32I-NEXT: srli t1, a4, 16 +; RV32I-NEXT: srli t2, a4, 8 +; RV32I-NEXT: sb a4, 28(a2) +; RV32I-NEXT: sb t2, 29(a2) +; RV32I-NEXT: sb t1, 30(a2) +; RV32I-NEXT: sb a7, 31(a2) +; RV32I-NEXT: srli a4, a5, 24 +; RV32I-NEXT: srli a7, a5, 16 +; RV32I-NEXT: srli t1, a5, 8 +; RV32I-NEXT: sb a5, 16(a2) +; RV32I-NEXT: sb t1, 17(a2) +; RV32I-NEXT: sb a7, 18(a2) +; RV32I-NEXT: sb a4, 19(a2) +; RV32I-NEXT: srli a4, a6, 24 ; RV32I-NEXT: srli a5, a6, 16 -; RV32I-NEXT: sb a5, 30(a2) -; RV32I-NEXT: srli a5, a6, 8 -; RV32I-NEXT: sb a5, 29(a2) -; RV32I-NEXT: srli a5, a3, 24 -; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a7, a6, 8 +; RV32I-NEXT: sb a6, 20(a2) +; RV32I-NEXT: sb a7, 21(a2) +; RV32I-NEXT: sb a5, 22(a2) +; RV32I-NEXT: sb a4, 23(a2) +; RV32I-NEXT: srli a4, t0, 24 +; RV32I-NEXT: srli a5, t0, 16 +; RV32I-NEXT: srli a6, t0, 8 +; RV32I-NEXT: sb t0, 8(a2) +; RV32I-NEXT: sb a6, 9(a2) +; RV32I-NEXT: sb a5, 10(a2) +; RV32I-NEXT: sb a4, 11(a2) +; RV32I-NEXT: srli a4, a3, 24 ; RV32I-NEXT: srli a5, a3, 16 -; RV32I-NEXT: sb a5, 18(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 17(a2) -; RV32I-NEXT: srli a3, a4, 24 -; RV32I-NEXT: sb a3, 23(a2) -; RV32I-NEXT: srli a3, a4, 16 -; RV32I-NEXT: sb a3, 22(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 21(a2) -; RV32I-NEXT: srli a3, t0, 24 -; RV32I-NEXT: sb a3, 11(a2) -; RV32I-NEXT: srli a3, t0, 16 -; RV32I-NEXT: sb a3, 10(a2) -; RV32I-NEXT: srli a3, t0, 8 -; RV32I-NEXT: sb a3, 9(a2) +; RV32I-NEXT: srli a6, a3, 8 +; RV32I-NEXT: sb a3, 12(a2) +; RV32I-NEXT: sb a6, 13(a2) +; RV32I-NEXT: sb a5, 14(a2) +; RV32I-NEXT: sb a4, 15(a2) ; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 15(a2) -; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 14(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 13(a2) -; RV32I-NEXT: srli a1, a7, 24 -; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a1, a7, 16 -; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: srli a1, a7, 8 -; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: srli a4, a1, 16 +; RV32I-NEXT: srli a5, a1, 8 +; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: sb a5, 1(a2) +; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a3, 6(a2) ; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 @@ -4422,97 +4426,97 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: sraiw a0, a0, 31 -; RV64I-NEXT: sd a0, 56(sp) -; RV64I-NEXT: sd a0, 48(sp) -; RV64I-NEXT: sd a0, 40(sp) ; RV64I-NEXT: sd a0, 32(sp) -; RV64I-NEXT: sd a6, 24(sp) -; RV64I-NEXT: sd a5, 16(sp) -; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd a0, 40(sp) +; RV64I-NEXT: sd a0, 48(sp) +; RV64I-NEXT: sd a0, 56(sp) ; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd a5, 16(sp) +; RV64I-NEXT: sd a6, 24(sp) ; RV64I-NEXT: andi a0, a1, 24 ; RV64I-NEXT: mv a3, sp ; RV64I-NEXT: add a0, a3, a0 ; RV64I-NEXT: ld a3, 0(a0) ; RV64I-NEXT: ld a4, 8(a0) -; RV64I-NEXT: slli a1, a1, 3 -; RV64I-NEXT: ld a5, 16(a0) -; RV64I-NEXT: ld a6, 24(a0) -; RV64I-NEXT: srl a7, a4, a1 -; RV64I-NEXT: andi a0, a1, 56 -; RV64I-NEXT: xori t0, a0, 63 -; RV64I-NEXT: slli a0, a5, 1 -; RV64I-NEXT: sll a0, a0, t0 -; RV64I-NEXT: or a0, a7, a0 -; RV64I-NEXT: srl a3, a3, a1 +; RV64I-NEXT: slli a5, a1, 3 +; RV64I-NEXT: ld a6, 16(a0) +; RV64I-NEXT: ld a7, 24(a0) +; RV64I-NEXT: srl a0, a4, a5 +; RV64I-NEXT: andi a1, a5, 56 +; RV64I-NEXT: xori t0, a1, 63 +; RV64I-NEXT: slli a1, a6, 1 +; RV64I-NEXT: sll a1, a1, t0 +; RV64I-NEXT: or a1, a0, a1 +; RV64I-NEXT: srl a3, a3, a5 ; RV64I-NEXT: slli a4, a4, 1 ; RV64I-NEXT: sll a4, a4, t0 ; RV64I-NEXT: or a4, a3, a4 -; RV64I-NEXT: srl a5, a5, a1 -; RV64I-NEXT: slli t1, a6, 1 +; RV64I-NEXT: srl a6, a6, a5 +; RV64I-NEXT: slli t1, a7, 1 ; RV64I-NEXT: sll t0, t1, t0 -; RV64I-NEXT: or t0, a5, t0 -; RV64I-NEXT: sra a1, a6, a1 -; RV64I-NEXT: sb a5, 16(a2) -; RV64I-NEXT: sb a1, 24(a2) +; RV64I-NEXT: or t0, a6, t0 +; RV64I-NEXT: sra a5, a7, a5 +; RV64I-NEXT: srli a7, a5, 56 +; RV64I-NEXT: srli t1, a5, 48 +; RV64I-NEXT: srli t2, a5, 40 +; RV64I-NEXT: srli t3, a5, 32 +; RV64I-NEXT: sb t3, 28(a2) +; RV64I-NEXT: sb t2, 29(a2) +; RV64I-NEXT: sb t1, 30(a2) +; RV64I-NEXT: sb a7, 31(a2) +; RV64I-NEXT: srli a7, a5, 24 +; RV64I-NEXT: srli t1, a5, 16 +; RV64I-NEXT: srli t2, a5, 8 +; RV64I-NEXT: sb a5, 24(a2) +; RV64I-NEXT: sb t2, 25(a2) +; RV64I-NEXT: sb t1, 26(a2) +; RV64I-NEXT: sb a7, 27(a2) +; RV64I-NEXT: srli a5, t0, 56 +; RV64I-NEXT: srli a7, t0, 48 +; RV64I-NEXT: srli t1, t0, 40 +; RV64I-NEXT: srli t2, t0, 32 +; RV64I-NEXT: sb t2, 20(a2) +; RV64I-NEXT: sb t1, 21(a2) +; RV64I-NEXT: sb a7, 22(a2) +; RV64I-NEXT: sb a5, 23(a2) +; RV64I-NEXT: srli a5, t0, 24 +; RV64I-NEXT: srli a7, t0, 16 +; RV64I-NEXT: srli t0, t0, 8 +; RV64I-NEXT: sb a6, 16(a2) +; RV64I-NEXT: sb t0, 17(a2) +; RV64I-NEXT: sb a7, 18(a2) +; RV64I-NEXT: sb a5, 19(a2) +; RV64I-NEXT: srli a5, a4, 56 +; RV64I-NEXT: srli a6, a4, 48 +; RV64I-NEXT: srli a7, a4, 40 +; RV64I-NEXT: srli t0, a4, 32 +; RV64I-NEXT: sb t0, 4(a2) +; RV64I-NEXT: sb a7, 5(a2) +; RV64I-NEXT: sb a6, 6(a2) +; RV64I-NEXT: sb a5, 7(a2) +; RV64I-NEXT: srli a5, a4, 24 +; RV64I-NEXT: srli a6, a4, 16 +; RV64I-NEXT: srli a4, a4, 8 ; RV64I-NEXT: sb a3, 0(a2) -; RV64I-NEXT: sb a7, 8(a2) +; RV64I-NEXT: sb a4, 1(a2) +; RV64I-NEXT: sb a6, 2(a2) +; RV64I-NEXT: sb a5, 3(a2) ; RV64I-NEXT: srli a3, a1, 56 -; RV64I-NEXT: sb a3, 31(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 30(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 29(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 28(a2) +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a6, a1, 32 +; RV64I-NEXT: sb a6, 12(a2) +; RV64I-NEXT: sb a5, 13(a2) +; RV64I-NEXT: sb a4, 14(a2) +; RV64I-NEXT: sb a3, 15(a2) ; RV64I-NEXT: srli a3, a1, 24 -; RV64I-NEXT: sb a3, 27(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 26(a2) +; RV64I-NEXT: srli a4, a1, 16 ; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: srli a1, t0, 56 -; RV64I-NEXT: sb a1, 23(a2) -; RV64I-NEXT: srli a1, t0, 48 -; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, t0, 40 -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, t0, 32 -; RV64I-NEXT: sb a1, 20(a2) -; RV64I-NEXT: srli a1, t0, 24 -; RV64I-NEXT: sb a1, 19(a2) -; RV64I-NEXT: srli a1, t0, 16 -; RV64I-NEXT: sb a1, 18(a2) -; RV64I-NEXT: srli a1, t0, 8 -; RV64I-NEXT: sb a1, 17(a2) -; RV64I-NEXT: srli a1, a4, 56 -; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a4, 48 -; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a4, 40 -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: srli a1, a4, 32 -; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: srli a1, a4, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a1, a4, 16 -; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a4, 1(a2) -; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 12(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 9(a2) +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: sb a1, 9(a2) +; RV64I-NEXT: sb a4, 10(a2) +; RV64I-NEXT: sb a3, 11(a2) ; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret ; @@ -4522,6 +4526,7 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw s0, 76(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 72(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 64(sp) # 4-byte Folded Spill ; RV32I-NEXT: lbu a3, 1(a0) ; RV32I-NEXT: lbu a4, 0(a0) ; RV32I-NEXT: lbu a5, 2(a0) @@ -4613,124 +4618,125 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a1, a1, t3 ; RV32I-NEXT: or a1, a1, t4 ; RV32I-NEXT: srai a0, a0, 31 -; RV32I-NEXT: sw a0, 60(sp) -; RV32I-NEXT: sw a0, 56(sp) -; RV32I-NEXT: sw a0, 52(sp) ; RV32I-NEXT: sw a0, 48(sp) -; RV32I-NEXT: sw a0, 44(sp) -; RV32I-NEXT: sw a0, 40(sp) -; RV32I-NEXT: sw a0, 36(sp) +; RV32I-NEXT: sw a0, 52(sp) +; RV32I-NEXT: sw a0, 56(sp) +; RV32I-NEXT: sw a0, 60(sp) ; RV32I-NEXT: sw a0, 32(sp) -; RV32I-NEXT: sw t2, 28(sp) -; RV32I-NEXT: sw t1, 24(sp) -; RV32I-NEXT: sw t0, 20(sp) +; RV32I-NEXT: sw a0, 36(sp) +; RV32I-NEXT: sw a0, 40(sp) +; RV32I-NEXT: sw a0, 44(sp) ; RV32I-NEXT: sw a7, 16(sp) -; RV32I-NEXT: sw a6, 12(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw t0, 20(sp) +; RV32I-NEXT: sw t1, 24(sp) +; RV32I-NEXT: sw t2, 28(sp) ; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a6, 12(sp) ; RV32I-NEXT: andi a0, a1, 28 ; RV32I-NEXT: mv a3, sp -; RV32I-NEXT: add a3, a3, a0 -; RV32I-NEXT: lw a6, 0(a3) -; RV32I-NEXT: lw a7, 4(a3) -; RV32I-NEXT: slli a5, a1, 3 -; RV32I-NEXT: lw t0, 8(a3) -; RV32I-NEXT: lw t1, 12(a3) -; RV32I-NEXT: srl a4, a7, a5 -; RV32I-NEXT: andi a0, a5, 24 -; RV32I-NEXT: xori t2, a0, 31 -; RV32I-NEXT: slli a0, t0, 1 -; RV32I-NEXT: sll a0, a0, t2 -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: srl a6, a6, a5 -; RV32I-NEXT: slli a7, a7, 1 -; RV32I-NEXT: sll a1, a7, t2 -; RV32I-NEXT: or a1, a6, a1 -; RV32I-NEXT: srl a7, t1, a5 -; RV32I-NEXT: lw t3, 16(a3) -; RV32I-NEXT: lw t4, 20(a3) -; RV32I-NEXT: lw t5, 24(a3) -; RV32I-NEXT: lw t6, 28(a3) -; RV32I-NEXT: slli a3, t3, 1 -; RV32I-NEXT: sll a3, a3, t2 -; RV32I-NEXT: or a3, a7, a3 -; RV32I-NEXT: srl t0, t0, a5 -; RV32I-NEXT: slli t1, t1, 1 -; RV32I-NEXT: sll t1, t1, t2 -; RV32I-NEXT: or t1, t0, t1 -; RV32I-NEXT: srl s0, t4, a5 +; RV32I-NEXT: add a6, a3, a0 +; RV32I-NEXT: lw a3, 0(a6) +; RV32I-NEXT: lw a4, 4(a6) +; RV32I-NEXT: slli t1, a1, 3 +; RV32I-NEXT: lw a7, 8(a6) +; RV32I-NEXT: lw t0, 12(a6) +; RV32I-NEXT: srl a0, a4, t1 +; RV32I-NEXT: andi a1, t1, 24 +; RV32I-NEXT: xori t2, a1, 31 +; RV32I-NEXT: slli a1, a7, 1 +; RV32I-NEXT: sll a1, a1, t2 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: srl a3, a3, t1 +; RV32I-NEXT: slli a4, a4, 1 +; RV32I-NEXT: sll a4, a4, t2 +; RV32I-NEXT: or a4, a3, a4 +; RV32I-NEXT: srl a5, t0, t1 +; RV32I-NEXT: lw t3, 16(a6) +; RV32I-NEXT: lw t4, 20(a6) +; RV32I-NEXT: lw t5, 24(a6) +; RV32I-NEXT: lw t6, 28(a6) +; RV32I-NEXT: slli a6, t3, 1 +; RV32I-NEXT: sll a6, a6, t2 +; RV32I-NEXT: or a6, a5, a6 +; RV32I-NEXT: srl a7, a7, t1 +; RV32I-NEXT: slli t0, t0, 1 +; RV32I-NEXT: sll t0, t0, t2 +; RV32I-NEXT: or t0, a7, t0 +; RV32I-NEXT: srl s0, t4, t1 ; RV32I-NEXT: slli s1, t5, 1 ; RV32I-NEXT: sll s1, s1, t2 ; RV32I-NEXT: or s1, s0, s1 -; RV32I-NEXT: srl t3, t3, a5 +; RV32I-NEXT: srl t3, t3, t1 ; RV32I-NEXT: slli t4, t4, 1 ; RV32I-NEXT: sll t4, t4, t2 ; RV32I-NEXT: or t4, t3, t4 -; RV32I-NEXT: srl t5, t5, a5 +; RV32I-NEXT: srl t5, t5, t1 ; RV32I-NEXT: slli s2, t6, 1 ; RV32I-NEXT: sll t2, s2, t2 ; RV32I-NEXT: or t2, t5, t2 -; RV32I-NEXT: sra a5, t6, a5 +; RV32I-NEXT: sra t1, t6, t1 +; RV32I-NEXT: srli t6, t1, 24 +; RV32I-NEXT: srli s2, t1, 16 +; RV32I-NEXT: srli s3, t1, 8 +; RV32I-NEXT: sb t1, 28(a2) +; RV32I-NEXT: sb s3, 29(a2) +; RV32I-NEXT: sb s2, 30(a2) +; RV32I-NEXT: sb t6, 31(a2) +; RV32I-NEXT: srli t1, t2, 24 +; RV32I-NEXT: srli t6, t2, 16 +; RV32I-NEXT: srli t2, t2, 8 ; RV32I-NEXT: sb t5, 24(a2) -; RV32I-NEXT: sb a5, 28(a2) +; RV32I-NEXT: sb t2, 25(a2) +; RV32I-NEXT: sb t6, 26(a2) +; RV32I-NEXT: sb t1, 27(a2) +; RV32I-NEXT: srli t1, t4, 24 +; RV32I-NEXT: srli t2, t4, 16 +; RV32I-NEXT: srli t4, t4, 8 ; RV32I-NEXT: sb t3, 16(a2) -; RV32I-NEXT: sb s0, 20(a2) -; RV32I-NEXT: sb t0, 8(a2) -; RV32I-NEXT: sb a7, 12(a2) -; RV32I-NEXT: sb a6, 0(a2) -; RV32I-NEXT: sb a4, 4(a2) -; RV32I-NEXT: srli a4, a5, 24 -; RV32I-NEXT: sb a4, 31(a2) -; RV32I-NEXT: srli a4, a5, 16 -; RV32I-NEXT: sb a4, 30(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 29(a2) -; RV32I-NEXT: srli a4, t2, 24 -; RV32I-NEXT: sb a4, 27(a2) -; RV32I-NEXT: srli a4, t2, 16 -; RV32I-NEXT: sb a4, 26(a2) -; RV32I-NEXT: srli a4, t2, 8 -; RV32I-NEXT: sb a4, 25(a2) -; RV32I-NEXT: srli a4, t4, 24 -; RV32I-NEXT: sb a4, 19(a2) -; RV32I-NEXT: srli a4, t4, 16 -; RV32I-NEXT: sb a4, 18(a2) -; RV32I-NEXT: srli a4, t4, 8 -; RV32I-NEXT: sb a4, 17(a2) -; RV32I-NEXT: srli a4, s1, 24 -; RV32I-NEXT: sb a4, 23(a2) -; RV32I-NEXT: srli a4, s1, 16 -; RV32I-NEXT: sb a4, 22(a2) +; RV32I-NEXT: sb t4, 17(a2) +; RV32I-NEXT: sb t2, 18(a2) +; RV32I-NEXT: sb t1, 19(a2) +; RV32I-NEXT: srli t1, s1, 24 +; RV32I-NEXT: srli t2, s1, 16 ; RV32I-NEXT: srli s1, s1, 8 +; RV32I-NEXT: sb s0, 20(a2) ; RV32I-NEXT: sb s1, 21(a2) -; RV32I-NEXT: srli a4, t1, 24 -; RV32I-NEXT: sb a4, 11(a2) -; RV32I-NEXT: srli a4, t1, 16 -; RV32I-NEXT: sb a4, 10(a2) -; RV32I-NEXT: srli a4, t1, 8 -; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: srli a4, a3, 24 -; RV32I-NEXT: sb a4, 15(a2) -; RV32I-NEXT: srli a4, a3, 16 -; RV32I-NEXT: sb a4, 14(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 13(a2) +; RV32I-NEXT: sb t2, 22(a2) +; RV32I-NEXT: sb t1, 23(a2) +; RV32I-NEXT: srli t1, t0, 24 +; RV32I-NEXT: srli t2, t0, 16 +; RV32I-NEXT: srli t0, t0, 8 +; RV32I-NEXT: sb a7, 8(a2) +; RV32I-NEXT: sb t0, 9(a2) +; RV32I-NEXT: sb t2, 10(a2) +; RV32I-NEXT: sb t1, 11(a2) +; RV32I-NEXT: srli a7, a6, 24 +; RV32I-NEXT: srli t0, a6, 16 +; RV32I-NEXT: srli a6, a6, 8 +; RV32I-NEXT: sb a5, 12(a2) +; RV32I-NEXT: sb a6, 13(a2) +; RV32I-NEXT: sb t0, 14(a2) +; RV32I-NEXT: sb a7, 15(a2) +; RV32I-NEXT: srli a5, a4, 24 +; RV32I-NEXT: srli a6, a4, 16 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a3, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) +; RV32I-NEXT: sb a6, 2(a2) +; RV32I-NEXT: sb a5, 3(a2) ; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 3(a2) -; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 2(a2) +; RV32I-NEXT: srli a4, a1, 16 ; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 1(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: sb a1, 5(a2) +; RV32I-NEXT: sb a4, 6(a2) +; RV32I-NEXT: sb a3, 7(a2) ; RV32I-NEXT: lw s0, 76(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 72(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 64(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 @@ -4856,98 +4862,98 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: sraiw a0, a0, 31 -; RV64I-NEXT: sd a0, 56(sp) -; RV64I-NEXT: sd a0, 48(sp) -; RV64I-NEXT: sd a0, 40(sp) ; RV64I-NEXT: sd a0, 32(sp) -; RV64I-NEXT: sd a6, 24(sp) -; RV64I-NEXT: sd a5, 16(sp) -; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd a0, 40(sp) +; RV64I-NEXT: sd a0, 48(sp) +; RV64I-NEXT: sd a0, 56(sp) ; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd a5, 16(sp) +; RV64I-NEXT: sd a6, 24(sp) ; RV64I-NEXT: slli a0, a1, 2 ; RV64I-NEXT: andi a0, a0, 24 ; RV64I-NEXT: mv a3, sp ; RV64I-NEXT: add a0, a3, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: ld a4, 8(a0) -; RV64I-NEXT: slli a1, a1, 5 -; RV64I-NEXT: ld a5, 16(a0) -; RV64I-NEXT: ld a6, 24(a0) -; RV64I-NEXT: srl a7, a4, a1 -; RV64I-NEXT: andi a0, a1, 32 -; RV64I-NEXT: xori t0, a0, 63 -; RV64I-NEXT: slli a0, a5, 1 -; RV64I-NEXT: sll a0, a0, t0 -; RV64I-NEXT: or a0, a7, a0 -; RV64I-NEXT: srl a3, a3, a1 -; RV64I-NEXT: slli a4, a4, 1 -; RV64I-NEXT: sll a4, a4, t0 -; RV64I-NEXT: or a4, a3, a4 -; RV64I-NEXT: srl a5, a5, a1 -; RV64I-NEXT: slli t1, a6, 1 -; RV64I-NEXT: sll t0, t1, t0 -; RV64I-NEXT: or t0, a5, t0 -; RV64I-NEXT: sra a1, a6, a1 +; RV64I-NEXT: ld a4, 0(a0) +; RV64I-NEXT: ld a5, 8(a0) +; RV64I-NEXT: slli a6, a1, 5 +; RV64I-NEXT: ld a7, 16(a0) +; RV64I-NEXT: ld t0, 24(a0) +; RV64I-NEXT: srl a3, a5, a6 +; RV64I-NEXT: andi a0, a6, 32 +; RV64I-NEXT: xori t1, a0, 63 +; RV64I-NEXT: slli a0, a7, 1 +; RV64I-NEXT: sll a0, a0, t1 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: srl t2, a4, a6 +; RV64I-NEXT: slli a5, a5, 1 +; RV64I-NEXT: sll a1, a5, t1 +; RV64I-NEXT: or a1, t2, a1 +; RV64I-NEXT: srl a5, a7, a6 +; RV64I-NEXT: slli a4, t0, 1 +; RV64I-NEXT: sll a4, a4, t1 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: sra a6, t0, a6 +; RV64I-NEXT: srli a7, a5, 24 +; RV64I-NEXT: srli t0, a5, 16 +; RV64I-NEXT: srli t1, a5, 8 ; RV64I-NEXT: sb a5, 16(a2) -; RV64I-NEXT: sb a1, 24(a2) -; RV64I-NEXT: sb a3, 0(a2) -; RV64I-NEXT: sb a7, 8(a2) -; RV64I-NEXT: srli a6, a5, 24 -; RV64I-NEXT: sb a6, 19(a2) -; RV64I-NEXT: srli a6, a5, 16 -; RV64I-NEXT: sb a6, 18(a2) -; RV64I-NEXT: srli a5, a5, 8 -; RV64I-NEXT: sb a5, 17(a2) -; RV64I-NEXT: srli a5, a1, 56 +; RV64I-NEXT: sb t1, 17(a2) +; RV64I-NEXT: sb t0, 18(a2) +; RV64I-NEXT: sb a7, 19(a2) +; RV64I-NEXT: srli a5, a6, 56 +; RV64I-NEXT: srli a7, a6, 48 +; RV64I-NEXT: srli t0, a6, 40 +; RV64I-NEXT: srli t1, a6, 32 +; RV64I-NEXT: sb t1, 28(a2) +; RV64I-NEXT: sb t0, 29(a2) +; RV64I-NEXT: sb a7, 30(a2) ; RV64I-NEXT: sb a5, 31(a2) -; RV64I-NEXT: srli a5, a1, 48 -; RV64I-NEXT: sb a5, 30(a2) -; RV64I-NEXT: srli a5, a1, 40 -; RV64I-NEXT: sb a5, 29(a2) -; RV64I-NEXT: srli a5, a1, 32 -; RV64I-NEXT: sb a5, 28(a2) -; RV64I-NEXT: srli a5, a1, 24 +; RV64I-NEXT: srli a5, a6, 24 +; RV64I-NEXT: srli a7, a6, 16 +; RV64I-NEXT: srli t0, a6, 8 +; RV64I-NEXT: sb a6, 24(a2) +; RV64I-NEXT: sb t0, 25(a2) +; RV64I-NEXT: sb a7, 26(a2) ; RV64I-NEXT: sb a5, 27(a2) -; RV64I-NEXT: srli a5, a1, 16 -; RV64I-NEXT: sb a5, 26(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: srli a1, a3, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a1, a3, 16 -; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a3, 1(a2) -; RV64I-NEXT: srli a1, a7, 24 -; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a7, 16 -; RV64I-NEXT: sb a1, 10(a2) -; RV64I-NEXT: srli a1, a7, 8 -; RV64I-NEXT: sb a1, 9(a2) -; RV64I-NEXT: srli a1, t0, 56 -; RV64I-NEXT: sb a1, 23(a2) -; RV64I-NEXT: srli a1, t0, 48 -; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, t0, 40 -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, t0, 32 -; RV64I-NEXT: sb a1, 20(a2) -; RV64I-NEXT: srli a1, a4, 56 -; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a4, 48 -; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a4, 40 -; RV64I-NEXT: sb a1, 5(a2) +; RV64I-NEXT: srli a5, t2, 24 +; RV64I-NEXT: srli a6, t2, 16 +; RV64I-NEXT: srli a7, t2, 8 +; RV64I-NEXT: sb t2, 0(a2) +; RV64I-NEXT: sb a7, 1(a2) +; RV64I-NEXT: sb a6, 2(a2) +; RV64I-NEXT: sb a5, 3(a2) +; RV64I-NEXT: srli a5, a3, 24 +; RV64I-NEXT: srli a6, a3, 16 +; RV64I-NEXT: srli a7, a3, 8 +; RV64I-NEXT: sb a3, 8(a2) +; RV64I-NEXT: sb a7, 9(a2) +; RV64I-NEXT: sb a6, 10(a2) +; RV64I-NEXT: sb a5, 11(a2) +; RV64I-NEXT: srli a3, a4, 56 +; RV64I-NEXT: srli a5, a4, 48 +; RV64I-NEXT: srli a6, a4, 40 ; RV64I-NEXT: srli a4, a4, 32 -; RV64I-NEXT: sb a4, 4(a2) +; RV64I-NEXT: sb a4, 20(a2) +; RV64I-NEXT: sb a6, 21(a2) +; RV64I-NEXT: sb a5, 22(a2) +; RV64I-NEXT: sb a3, 23(a2) +; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: sb a1, 4(a2) +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: sb a4, 6(a2) +; RV64I-NEXT: sb a3, 7(a2) ; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: srli a4, a0, 40 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: sb a0, 12(a2) +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: sb a3, 14(a2) +; RV64I-NEXT: sb a1, 15(a2) ; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret ; @@ -5036,90 +5042,90 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: or t2, t2, t3 ; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: srai a0, a0, 31 -; RV32I-NEXT: sw a0, 60(sp) -; RV32I-NEXT: sw a0, 56(sp) -; RV32I-NEXT: sw a0, 52(sp) ; RV32I-NEXT: sw a0, 48(sp) -; RV32I-NEXT: sw a0, 44(sp) -; RV32I-NEXT: sw a0, 40(sp) -; RV32I-NEXT: sw a0, 36(sp) +; RV32I-NEXT: sw a0, 52(sp) +; RV32I-NEXT: sw a0, 56(sp) +; RV32I-NEXT: sw a0, 60(sp) ; RV32I-NEXT: sw a0, 32(sp) -; RV32I-NEXT: sw t2, 28(sp) -; RV32I-NEXT: sw t1, 24(sp) -; RV32I-NEXT: sw t0, 20(sp) +; RV32I-NEXT: sw a0, 36(sp) +; RV32I-NEXT: sw a0, 40(sp) +; RV32I-NEXT: sw a0, 44(sp) ; RV32I-NEXT: sw a7, 16(sp) -; RV32I-NEXT: sw a6, 12(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw t0, 20(sp) +; RV32I-NEXT: sw t1, 24(sp) +; RV32I-NEXT: sw t2, 28(sp) ; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a6, 12(sp) ; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: andi a1, a1, 28 ; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: add a1, a0, a1 -; RV32I-NEXT: lw a3, 16(a1) -; RV32I-NEXT: lw a4, 20(a1) -; RV32I-NEXT: lw a5, 24(a1) -; RV32I-NEXT: lw a6, 28(a1) -; RV32I-NEXT: lw a7, 0(a1) -; RV32I-NEXT: lw a0, 4(a1) -; RV32I-NEXT: lw t0, 8(a1) -; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: sb a5, 24(a2) -; RV32I-NEXT: sb a6, 28(a2) -; RV32I-NEXT: sb a3, 16(a2) -; RV32I-NEXT: sb a4, 20(a2) -; RV32I-NEXT: sb t0, 8(a2) -; RV32I-NEXT: sb a1, 12(a2) -; RV32I-NEXT: sb a7, 0(a2) -; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli t1, a5, 24 +; RV32I-NEXT: add a4, a0, a1 +; RV32I-NEXT: lw a5, 16(a4) +; RV32I-NEXT: lw a6, 20(a4) +; RV32I-NEXT: lw a7, 24(a4) +; RV32I-NEXT: lw a1, 0(a4) +; RV32I-NEXT: lw a0, 4(a4) +; RV32I-NEXT: lw t0, 8(a4) +; RV32I-NEXT: lw a3, 12(a4) +; RV32I-NEXT: lw a4, 28(a4) +; RV32I-NEXT: srli t1, a7, 24 +; RV32I-NEXT: srli t2, a7, 16 +; RV32I-NEXT: srli t3, a7, 8 +; RV32I-NEXT: sb a7, 24(a2) +; RV32I-NEXT: sb t3, 25(a2) +; RV32I-NEXT: sb t2, 26(a2) ; RV32I-NEXT: sb t1, 27(a2) -; RV32I-NEXT: srli t1, a5, 16 -; RV32I-NEXT: sb t1, 26(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 25(a2) -; RV32I-NEXT: srli a5, a6, 24 -; RV32I-NEXT: sb a5, 31(a2) +; RV32I-NEXT: srli a7, a4, 24 +; RV32I-NEXT: srli t1, a4, 16 +; RV32I-NEXT: srli t2, a4, 8 +; RV32I-NEXT: sb a4, 28(a2) +; RV32I-NEXT: sb t2, 29(a2) +; RV32I-NEXT: sb t1, 30(a2) +; RV32I-NEXT: sb a7, 31(a2) +; RV32I-NEXT: srli a4, a5, 24 +; RV32I-NEXT: srli a7, a5, 16 +; RV32I-NEXT: srli t1, a5, 8 +; RV32I-NEXT: sb a5, 16(a2) +; RV32I-NEXT: sb t1, 17(a2) +; RV32I-NEXT: sb a7, 18(a2) +; RV32I-NEXT: sb a4, 19(a2) +; RV32I-NEXT: srli a4, a6, 24 ; RV32I-NEXT: srli a5, a6, 16 -; RV32I-NEXT: sb a5, 30(a2) -; RV32I-NEXT: srli a5, a6, 8 -; RV32I-NEXT: sb a5, 29(a2) -; RV32I-NEXT: srli a5, a3, 24 -; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a7, a6, 8 +; RV32I-NEXT: sb a6, 20(a2) +; RV32I-NEXT: sb a7, 21(a2) +; RV32I-NEXT: sb a5, 22(a2) +; RV32I-NEXT: sb a4, 23(a2) +; RV32I-NEXT: srli a4, t0, 24 +; RV32I-NEXT: srli a5, t0, 16 +; RV32I-NEXT: srli a6, t0, 8 +; RV32I-NEXT: sb t0, 8(a2) +; RV32I-NEXT: sb a6, 9(a2) +; RV32I-NEXT: sb a5, 10(a2) +; RV32I-NEXT: sb a4, 11(a2) +; RV32I-NEXT: srli a4, a3, 24 ; RV32I-NEXT: srli a5, a3, 16 -; RV32I-NEXT: sb a5, 18(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 17(a2) -; RV32I-NEXT: srli a3, a4, 24 -; RV32I-NEXT: sb a3, 23(a2) -; RV32I-NEXT: srli a3, a4, 16 -; RV32I-NEXT: sb a3, 22(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 21(a2) -; RV32I-NEXT: srli a3, t0, 24 -; RV32I-NEXT: sb a3, 11(a2) -; RV32I-NEXT: srli a3, t0, 16 -; RV32I-NEXT: sb a3, 10(a2) -; RV32I-NEXT: srli a3, t0, 8 -; RV32I-NEXT: sb a3, 9(a2) +; RV32I-NEXT: srli a6, a3, 8 +; RV32I-NEXT: sb a3, 12(a2) +; RV32I-NEXT: sb a6, 13(a2) +; RV32I-NEXT: sb a5, 14(a2) +; RV32I-NEXT: sb a4, 15(a2) ; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 15(a2) -; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 14(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 13(a2) -; RV32I-NEXT: srli a1, a7, 24 -; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a1, a7, 16 -; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: srli a1, a7, 8 -; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: srli a4, a1, 16 +; RV32I-NEXT: srli a5, a1, 8 +; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: sb a5, 1(a2) +; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a3, 6(a2) ; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 @@ -5224,82 +5230,82 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV64I-NEXT: or a6, a7, a6 ; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: sraiw a0, a0, 31 -; RV64I-NEXT: sd a0, 56(sp) -; RV64I-NEXT: sd a0, 48(sp) -; RV64I-NEXT: sd a0, 40(sp) ; RV64I-NEXT: sd a0, 32(sp) -; RV64I-NEXT: sd a6, 24(sp) -; RV64I-NEXT: sd a5, 16(sp) -; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd a0, 40(sp) +; RV64I-NEXT: sd a0, 48(sp) +; RV64I-NEXT: sd a0, 56(sp) ; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd a5, 16(sp) +; RV64I-NEXT: sd a6, 24(sp) ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: andi a1, a1, 24 ; RV64I-NEXT: mv a0, sp -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: ld a1, 16(a0) -; RV64I-NEXT: ld a3, 24(a0) -; RV64I-NEXT: ld a4, 0(a0) -; RV64I-NEXT: ld a0, 8(a0) -; RV64I-NEXT: sb a1, 16(a2) -; RV64I-NEXT: sb a3, 24(a2) -; RV64I-NEXT: sb a4, 0(a2) -; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: srli a5, a1, 56 +; RV64I-NEXT: add a3, a0, a1 +; RV64I-NEXT: ld a4, 16(a3) +; RV64I-NEXT: ld a0, 8(a3) +; RV64I-NEXT: ld a1, 0(a3) +; RV64I-NEXT: ld a3, 24(a3) +; RV64I-NEXT: srli a5, a4, 56 +; RV64I-NEXT: srli a6, a4, 48 +; RV64I-NEXT: srli a7, a4, 40 +; RV64I-NEXT: srli t0, a4, 32 +; RV64I-NEXT: sb t0, 20(a2) +; RV64I-NEXT: sb a7, 21(a2) +; RV64I-NEXT: sb a6, 22(a2) ; RV64I-NEXT: sb a5, 23(a2) -; RV64I-NEXT: srli a5, a1, 48 -; RV64I-NEXT: sb a5, 22(a2) -; RV64I-NEXT: srli a5, a1, 40 -; RV64I-NEXT: sb a5, 21(a2) -; RV64I-NEXT: srli a5, a1, 32 -; RV64I-NEXT: sb a5, 20(a2) -; RV64I-NEXT: srli a5, a1, 24 +; RV64I-NEXT: srli a5, a4, 24 +; RV64I-NEXT: srli a6, a4, 16 +; RV64I-NEXT: srli a7, a4, 8 +; RV64I-NEXT: sb a4, 16(a2) +; RV64I-NEXT: sb a7, 17(a2) +; RV64I-NEXT: sb a6, 18(a2) ; RV64I-NEXT: sb a5, 19(a2) -; RV64I-NEXT: srli a5, a1, 16 -; RV64I-NEXT: sb a5, 18(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 17(a2) -; RV64I-NEXT: srli a1, a3, 56 -; RV64I-NEXT: sb a1, 31(a2) -; RV64I-NEXT: srli a1, a3, 48 -; RV64I-NEXT: sb a1, 30(a2) -; RV64I-NEXT: srli a1, a3, 40 -; RV64I-NEXT: sb a1, 29(a2) -; RV64I-NEXT: srli a1, a3, 32 -; RV64I-NEXT: sb a1, 28(a2) -; RV64I-NEXT: srli a1, a3, 24 -; RV64I-NEXT: sb a1, 27(a2) -; RV64I-NEXT: srli a1, a3, 16 -; RV64I-NEXT: sb a1, 26(a2) -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a3, 25(a2) -; RV64I-NEXT: srli a1, a4, 56 -; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a4, 48 -; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a4, 40 -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: srli a1, a4, 32 -; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: srli a1, a4, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a1, a4, 16 -; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a4, 1(a2) +; RV64I-NEXT: srli a4, a3, 56 +; RV64I-NEXT: srli a5, a3, 48 +; RV64I-NEXT: srli a6, a3, 40 +; RV64I-NEXT: srli a7, a3, 32 +; RV64I-NEXT: sb a7, 28(a2) +; RV64I-NEXT: sb a6, 29(a2) +; RV64I-NEXT: sb a5, 30(a2) +; RV64I-NEXT: sb a4, 31(a2) +; RV64I-NEXT: srli a4, a3, 24 +; RV64I-NEXT: srli a5, a3, 16 +; RV64I-NEXT: srli a6, a3, 8 +; RV64I-NEXT: sb a3, 24(a2) +; RV64I-NEXT: sb a6, 25(a2) +; RV64I-NEXT: sb a5, 26(a2) +; RV64I-NEXT: sb a4, 27(a2) +; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a6, a1, 32 +; RV64I-NEXT: sb a6, 4(a2) +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: sb a4, 6(a2) +; RV64I-NEXT: sb a3, 7(a2) +; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: srli a4, a1, 16 +; RV64I-NEXT: srli a5, a1, 8 +; RV64I-NEXT: sb a1, 0(a2) +; RV64I-NEXT: sb a5, 1(a2) +; RV64I-NEXT: sb a4, 2(a2) +; RV64I-NEXT: sb a3, 3(a2) ; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: srli a5, a0, 32 +; RV64I-NEXT: sb a5, 12(a2) +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: sb a3, 14(a2) ; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 12(a2) ; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: sb a4, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) ; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 9(a2) ; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret ; @@ -5388,90 +5394,90 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: or t2, t2, t3 ; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: srai a0, a0, 31 -; RV32I-NEXT: sw a0, 60(sp) -; RV32I-NEXT: sw a0, 56(sp) -; RV32I-NEXT: sw a0, 52(sp) ; RV32I-NEXT: sw a0, 48(sp) -; RV32I-NEXT: sw a0, 44(sp) -; RV32I-NEXT: sw a0, 40(sp) -; RV32I-NEXT: sw a0, 36(sp) +; RV32I-NEXT: sw a0, 52(sp) +; RV32I-NEXT: sw a0, 56(sp) +; RV32I-NEXT: sw a0, 60(sp) ; RV32I-NEXT: sw a0, 32(sp) -; RV32I-NEXT: sw t2, 28(sp) -; RV32I-NEXT: sw t1, 24(sp) -; RV32I-NEXT: sw t0, 20(sp) +; RV32I-NEXT: sw a0, 36(sp) +; RV32I-NEXT: sw a0, 40(sp) +; RV32I-NEXT: sw a0, 44(sp) ; RV32I-NEXT: sw a7, 16(sp) -; RV32I-NEXT: sw a6, 12(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw t0, 20(sp) +; RV32I-NEXT: sw t1, 24(sp) +; RV32I-NEXT: sw t2, 28(sp) ; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a6, 12(sp) ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: andi a1, a1, 24 ; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: add a1, a0, a1 -; RV32I-NEXT: lw a3, 16(a1) -; RV32I-NEXT: lw a4, 20(a1) -; RV32I-NEXT: lw a5, 24(a1) -; RV32I-NEXT: lw a6, 28(a1) -; RV32I-NEXT: lw a7, 0(a1) -; RV32I-NEXT: lw a0, 4(a1) -; RV32I-NEXT: lw t0, 8(a1) -; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: sb a5, 24(a2) -; RV32I-NEXT: sb a6, 28(a2) -; RV32I-NEXT: sb a3, 16(a2) -; RV32I-NEXT: sb a4, 20(a2) -; RV32I-NEXT: sb t0, 8(a2) -; RV32I-NEXT: sb a1, 12(a2) -; RV32I-NEXT: sb a7, 0(a2) -; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli t1, a5, 24 +; RV32I-NEXT: add a4, a0, a1 +; RV32I-NEXT: lw a5, 16(a4) +; RV32I-NEXT: lw a6, 20(a4) +; RV32I-NEXT: lw a7, 24(a4) +; RV32I-NEXT: lw a1, 0(a4) +; RV32I-NEXT: lw a0, 4(a4) +; RV32I-NEXT: lw t0, 8(a4) +; RV32I-NEXT: lw a3, 12(a4) +; RV32I-NEXT: lw a4, 28(a4) +; RV32I-NEXT: srli t1, a7, 24 +; RV32I-NEXT: srli t2, a7, 16 +; RV32I-NEXT: srli t3, a7, 8 +; RV32I-NEXT: sb a7, 24(a2) +; RV32I-NEXT: sb t3, 25(a2) +; RV32I-NEXT: sb t2, 26(a2) ; RV32I-NEXT: sb t1, 27(a2) -; RV32I-NEXT: srli t1, a5, 16 -; RV32I-NEXT: sb t1, 26(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 25(a2) -; RV32I-NEXT: srli a5, a6, 24 -; RV32I-NEXT: sb a5, 31(a2) +; RV32I-NEXT: srli a7, a4, 24 +; RV32I-NEXT: srli t1, a4, 16 +; RV32I-NEXT: srli t2, a4, 8 +; RV32I-NEXT: sb a4, 28(a2) +; RV32I-NEXT: sb t2, 29(a2) +; RV32I-NEXT: sb t1, 30(a2) +; RV32I-NEXT: sb a7, 31(a2) +; RV32I-NEXT: srli a4, a5, 24 +; RV32I-NEXT: srli a7, a5, 16 +; RV32I-NEXT: srli t1, a5, 8 +; RV32I-NEXT: sb a5, 16(a2) +; RV32I-NEXT: sb t1, 17(a2) +; RV32I-NEXT: sb a7, 18(a2) +; RV32I-NEXT: sb a4, 19(a2) +; RV32I-NEXT: srli a4, a6, 24 ; RV32I-NEXT: srli a5, a6, 16 -; RV32I-NEXT: sb a5, 30(a2) -; RV32I-NEXT: srli a5, a6, 8 -; RV32I-NEXT: sb a5, 29(a2) -; RV32I-NEXT: srli a5, a3, 24 -; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a7, a6, 8 +; RV32I-NEXT: sb a6, 20(a2) +; RV32I-NEXT: sb a7, 21(a2) +; RV32I-NEXT: sb a5, 22(a2) +; RV32I-NEXT: sb a4, 23(a2) +; RV32I-NEXT: srli a4, t0, 24 +; RV32I-NEXT: srli a5, t0, 16 +; RV32I-NEXT: srli a6, t0, 8 +; RV32I-NEXT: sb t0, 8(a2) +; RV32I-NEXT: sb a6, 9(a2) +; RV32I-NEXT: sb a5, 10(a2) +; RV32I-NEXT: sb a4, 11(a2) +; RV32I-NEXT: srli a4, a3, 24 ; RV32I-NEXT: srli a5, a3, 16 -; RV32I-NEXT: sb a5, 18(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 17(a2) -; RV32I-NEXT: srli a3, a4, 24 -; RV32I-NEXT: sb a3, 23(a2) -; RV32I-NEXT: srli a3, a4, 16 -; RV32I-NEXT: sb a3, 22(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 21(a2) -; RV32I-NEXT: srli a3, t0, 24 -; RV32I-NEXT: sb a3, 11(a2) -; RV32I-NEXT: srli a3, t0, 16 -; RV32I-NEXT: sb a3, 10(a2) -; RV32I-NEXT: srli a3, t0, 8 -; RV32I-NEXT: sb a3, 9(a2) +; RV32I-NEXT: srli a6, a3, 8 +; RV32I-NEXT: sb a3, 12(a2) +; RV32I-NEXT: sb a6, 13(a2) +; RV32I-NEXT: sb a5, 14(a2) +; RV32I-NEXT: sb a4, 15(a2) ; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 15(a2) -; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 14(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 13(a2) -; RV32I-NEXT: srli a1, a7, 24 -; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a1, a7, 16 -; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: srli a1, a7, 8 -; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: srli a4, a1, 16 +; RV32I-NEXT: srli a5, a1, 8 +; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: sb a5, 1(a2) +; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a3, 6(a2) ; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll index 190d67a5d8c1188..d36c660b3b14214 100644 --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll @@ -17,13 +17,13 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a0, a0, a5 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: srlw a0, a0, a1 -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) ; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: sb a3, 3(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: lshr_4bytes: @@ -49,13 +49,13 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: srl a0, a0, a1 -; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) ; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: ret %src = load i32, ptr %src.ptr, align 1 %bitOff = load i32, ptr %bitOff.ptr, align 1 @@ -78,13 +78,13 @@ define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a0, a0, a5 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: sllw a0, a0, a1 -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) ; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: sb a3, 3(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_4bytes: @@ -110,13 +110,13 @@ define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: sll a0, a0, a1 -; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) ; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: ret %src = load i32, ptr %src.ptr, align 1 %bitOff = load i32, ptr %bitOff.ptr, align 1 @@ -139,13 +139,13 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a0, a0, a5 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: sraw a0, a0, a1 -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) ; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: sb a3, 3(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: ashr_4bytes: @@ -171,13 +171,13 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: sra a0, a0, a1 -; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) ; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: ret %src = load i32, ptr %src.ptr, align 1 %bitOff = load i32, ptr %bitOff.ptr, align 1 @@ -234,21 +234,21 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a3 ; RV64I-NEXT: srl a0, a0, a1 -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: srli a4, a0, 32 +; RV64I-NEXT: srli a5, a0, 40 +; RV64I-NEXT: sb a4, 4(a2) +; RV64I-NEXT: sb a5, 5(a2) ; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 5(a2) +; RV64I-NEXT: sb a3, 7(a2) ; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) ; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: sb a3, 3(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: lshr_8bytes: @@ -298,20 +298,20 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: .LBB3_3: ; RV32I-NEXT: srai a4, a4, 31 ; RV32I-NEXT: and a1, a4, a1 -; RV32I-NEXT: sb a1, 4(a2) ; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: srli a4, a1, 24 +; RV32I-NEXT: srli a5, a1, 8 +; RV32I-NEXT: sb a1, 4(a2) +; RV32I-NEXT: sb a5, 5(a2) ; RV32I-NEXT: sb a3, 6(a2) -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 7(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 5(a2) -; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: sb a4, 7(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) ; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: ret %src = load i64, ptr %src.ptr, align 1 %bitOff = load i64, ptr %bitOff.ptr, align 1 @@ -367,21 +367,21 @@ define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a3 ; RV64I-NEXT: sll a0, a0, a1 -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: srli a4, a0, 32 +; RV64I-NEXT: srli a5, a0, 40 +; RV64I-NEXT: sb a4, 4(a2) +; RV64I-NEXT: sb a5, 5(a2) ; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 5(a2) +; RV64I-NEXT: sb a3, 7(a2) ; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) ; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: sb a3, 3(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_8bytes: @@ -431,20 +431,20 @@ define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: .LBB4_3: ; RV32I-NEXT: srai a4, a4, 31 ; RV32I-NEXT: and a1, a4, a1 -; RV32I-NEXT: sb a1, 0(a2) -; RV32I-NEXT: sb a0, 4(a2) ; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: srli a4, a1, 24 +; RV32I-NEXT: srli a5, a1, 8 +; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: sb a5, 1(a2) ; RV32I-NEXT: sb a3, 2(a2) -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 3(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: sb a4, 3(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) ; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: sb a3, 7(a2) ; RV32I-NEXT: ret %src = load i64, ptr %src.ptr, align 1 %bitOff = load i64, ptr %bitOff.ptr, align 1 @@ -500,21 +500,21 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a3 ; RV64I-NEXT: sra a0, a0, a1 -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: srli a4, a0, 32 +; RV64I-NEXT: srli a5, a0, 40 +; RV64I-NEXT: sb a4, 4(a2) +; RV64I-NEXT: sb a5, 5(a2) ; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 5(a2) +; RV64I-NEXT: sb a3, 7(a2) ; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) ; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) +; RV64I-NEXT: sb a3, 3(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: ashr_8bytes: @@ -564,20 +564,20 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sll a3, a3, a4 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: .LBB5_3: -; RV32I-NEXT: sb a1, 4(a2) ; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: srli a4, a1, 24 +; RV32I-NEXT: srli a5, a1, 8 +; RV32I-NEXT: sb a1, 4(a2) +; RV32I-NEXT: sb a5, 5(a2) ; RV32I-NEXT: sb a3, 6(a2) -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 7(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 5(a2) -; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: sb a4, 7(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) ; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 3(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 1(a2) +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: ret %src = load i64, ptr %src.ptr, align 1 %bitOff = load i64, ptr %bitOff.ptr, align 1 @@ -670,36 +670,36 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: .LBB6_3: ; RV64I-NEXT: srai a4, a4, 63 ; RV64I-NEXT: and a1, a4, a1 -; RV64I-NEXT: sb a1, 8(a2) ; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a6, a1, 32 +; RV64I-NEXT: sb a6, 12(a2) +; RV64I-NEXT: sb a5, 13(a2) +; RV64I-NEXT: sb a4, 14(a2) ; RV64I-NEXT: sb a3, 15(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 14(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 12(a2) ; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: srli a4, a1, 16 +; RV64I-NEXT: srli a5, a1, 8 +; RV64I-NEXT: sb a1, 8(a2) +; RV64I-NEXT: sb a5, 9(a2) +; RV64I-NEXT: sb a4, 10(a2) ; RV64I-NEXT: sb a3, 11(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 10(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 9(a2) -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: srli a5, a0, 32 +; RV64I-NEXT: sb a5, 4(a2) +; RV64I-NEXT: sb a4, 5(a2) +; RV64I-NEXT: sb a3, 6(a2) ; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 4(a2) ; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) +; RV64I-NEXT: sb a3, 2(a2) ; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: lshr_16bytes: @@ -755,14 +755,14 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: or a1, a1, a6 -; RV32I-NEXT: sw zero, 28(sp) -; RV32I-NEXT: sw zero, 24(sp) -; RV32I-NEXT: sw zero, 20(sp) ; RV32I-NEXT: sw zero, 16(sp) -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw zero, 20(sp) +; RV32I-NEXT: sw zero, 24(sp) +; RV32I-NEXT: sw zero, 28(sp) ; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: srli a0, a1, 3 ; RV32I-NEXT: andi a0, a0, 12 ; RV32I-NEXT: mv a3, sp @@ -786,34 +786,34 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sll a5, a5, a7 ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: srl a0, a0, a1 -; RV32I-NEXT: sb a0, 12(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a5, a0, 24 +; RV32I-NEXT: srli a7, a0, 8 +; RV32I-NEXT: sb a0, 12(a2) +; RV32I-NEXT: sb a7, 13(a2) ; RV32I-NEXT: sb a1, 14(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 15(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: sb a4, 8(a2) -; RV32I-NEXT: sb a3, 0(a2) -; RV32I-NEXT: sb a6, 4(a2) +; RV32I-NEXT: sb a5, 15(a2) ; RV32I-NEXT: srli a0, a4, 16 +; RV32I-NEXT: srli a1, a4, 24 +; RV32I-NEXT: srli a5, a4, 8 +; RV32I-NEXT: sb a4, 8(a2) +; RV32I-NEXT: sb a5, 9(a2) ; RV32I-NEXT: sb a0, 10(a2) -; RV32I-NEXT: srli a0, a4, 24 -; RV32I-NEXT: sb a0, 11(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 9(a2) +; RV32I-NEXT: sb a1, 11(a2) ; RV32I-NEXT: srli a0, a3, 16 +; RV32I-NEXT: srli a1, a3, 24 +; RV32I-NEXT: srli a4, a3, 8 +; RV32I-NEXT: sb a3, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) ; RV32I-NEXT: sb a0, 2(a2) -; RV32I-NEXT: srli a0, a3, 24 -; RV32I-NEXT: sb a0, 3(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 1(a2) +; RV32I-NEXT: sb a1, 3(a2) ; RV32I-NEXT: srli a0, a6, 16 +; RV32I-NEXT: srli a1, a6, 24 +; RV32I-NEXT: srli a3, a6, 8 +; RV32I-NEXT: sb a6, 4(a2) +; RV32I-NEXT: sb a3, 5(a2) ; RV32I-NEXT: sb a0, 6(a2) -; RV32I-NEXT: srli a0, a6, 24 -; RV32I-NEXT: sb a0, 7(a2) -; RV32I-NEXT: srli a0, a6, 8 -; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: sb a1, 7(a2) ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 @@ -906,36 +906,36 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: .LBB7_3: ; RV64I-NEXT: srai a4, a4, 63 ; RV64I-NEXT: and a1, a4, a1 -; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: sb a0, 8(a2) ; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a6, a1, 32 +; RV64I-NEXT: sb a6, 4(a2) +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: sb a4, 6(a2) ; RV64I-NEXT: sb a3, 7(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 6(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 5(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 4(a2) ; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: srli a4, a1, 16 +; RV64I-NEXT: srli a5, a1, 8 +; RV64I-NEXT: sb a1, 0(a2) +; RV64I-NEXT: sb a5, 1(a2) +; RV64I-NEXT: sb a4, 2(a2) ; RV64I-NEXT: sb a3, 3(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 2(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 1(a2) ; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: srli a5, a0, 32 +; RV64I-NEXT: sb a5, 12(a2) +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: sb a3, 14(a2) ; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 12(a2) ; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: sb a4, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) ; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 9(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_16bytes: @@ -991,14 +991,14 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: or a1, a1, a6 -; RV32I-NEXT: sw zero, 12(sp) -; RV32I-NEXT: sw zero, 8(sp) -; RV32I-NEXT: sw zero, 4(sp) ; RV32I-NEXT: sw zero, 0(sp) -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a5, 24(sp) -; RV32I-NEXT: sw a4, 20(sp) +; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 12(sp) ; RV32I-NEXT: sw a3, 16(sp) +; RV32I-NEXT: sw a4, 20(sp) +; RV32I-NEXT: sw a5, 24(sp) +; RV32I-NEXT: sw a0, 28(sp) ; RV32I-NEXT: srli a0, a1, 3 ; RV32I-NEXT: andi a0, a0, 12 ; RV32I-NEXT: addi a3, sp, 16 @@ -1022,34 +1022,34 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: srl a0, a0, a7 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: sll a1, a4, a1 -; RV32I-NEXT: sb a1, 0(a2) ; RV32I-NEXT: srli a4, a1, 16 +; RV32I-NEXT: srli a5, a1, 24 +; RV32I-NEXT: srli a7, a1, 8 +; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: sb a7, 1(a2) ; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: srli a4, a1, 24 -; RV32I-NEXT: sb a4, 3(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 1(a2) -; RV32I-NEXT: sb a0, 8(a2) -; RV32I-NEXT: sb a3, 12(a2) -; RV32I-NEXT: sb a6, 4(a2) +; RV32I-NEXT: sb a5, 3(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a4, a0, 24 +; RV32I-NEXT: srli a5, a0, 8 +; RV32I-NEXT: sb a0, 8(a2) +; RV32I-NEXT: sb a5, 9(a2) ; RV32I-NEXT: sb a1, 10(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 11(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 9(a2) +; RV32I-NEXT: sb a4, 11(a2) ; RV32I-NEXT: srli a0, a3, 16 +; RV32I-NEXT: srli a1, a3, 24 +; RV32I-NEXT: srli a4, a3, 8 +; RV32I-NEXT: sb a3, 12(a2) +; RV32I-NEXT: sb a4, 13(a2) ; RV32I-NEXT: sb a0, 14(a2) -; RV32I-NEXT: srli a0, a3, 24 -; RV32I-NEXT: sb a0, 15(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 13(a2) +; RV32I-NEXT: sb a1, 15(a2) ; RV32I-NEXT: srli a0, a6, 16 +; RV32I-NEXT: srli a1, a6, 24 +; RV32I-NEXT: srli a3, a6, 8 +; RV32I-NEXT: sb a6, 4(a2) +; RV32I-NEXT: sb a3, 5(a2) ; RV32I-NEXT: sb a0, 6(a2) -; RV32I-NEXT: srli a0, a6, 24 -; RV32I-NEXT: sb a0, 7(a2) -; RV32I-NEXT: srli a0, a6, 8 -; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: sb a1, 7(a2) ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 @@ -1142,36 +1142,36 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sll a3, a3, a4 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: .LBB8_3: -; RV64I-NEXT: sb a1, 8(a2) ; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a6, a1, 32 +; RV64I-NEXT: sb a6, 12(a2) +; RV64I-NEXT: sb a5, 13(a2) +; RV64I-NEXT: sb a4, 14(a2) ; RV64I-NEXT: sb a3, 15(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 14(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 12(a2) ; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: srli a4, a1, 16 +; RV64I-NEXT: srli a5, a1, 8 +; RV64I-NEXT: sb a1, 8(a2) +; RV64I-NEXT: sb a5, 9(a2) +; RV64I-NEXT: sb a4, 10(a2) ; RV64I-NEXT: sb a3, 11(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 10(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 9(a2) -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: srli a5, a0, 32 +; RV64I-NEXT: sb a5, 4(a2) +; RV64I-NEXT: sb a4, 5(a2) +; RV64I-NEXT: sb a3, 6(a2) ; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 4(a2) ; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 0(a2) +; RV64I-NEXT: sb a4, 1(a2) +; RV64I-NEXT: sb a3, 2(a2) ; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 1(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: ashr_16bytes: @@ -1228,14 +1228,14 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: or a1, a1, t0 ; RV32I-NEXT: srai a0, a0, 31 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: sw a0, 20(sp) ; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: sw a6, 12(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 24(sp) +; RV32I-NEXT: sw a0, 28(sp) ; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a6, 12(sp) ; RV32I-NEXT: srli a0, a1, 3 ; RV32I-NEXT: andi a0, a0, 12 ; RV32I-NEXT: mv a3, sp @@ -1259,34 +1259,34 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sll a5, a5, a7 ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: sra a0, a0, a1 -; RV32I-NEXT: sb a0, 12(a2) ; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: srli a5, a0, 24 +; RV32I-NEXT: srli a7, a0, 8 +; RV32I-NEXT: sb a0, 12(a2) +; RV32I-NEXT: sb a7, 13(a2) ; RV32I-NEXT: sb a1, 14(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 15(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: sb a4, 8(a2) -; RV32I-NEXT: sb a3, 0(a2) -; RV32I-NEXT: sb a6, 4(a2) +; RV32I-NEXT: sb a5, 15(a2) ; RV32I-NEXT: srli a0, a4, 16 +; RV32I-NEXT: srli a1, a4, 24 +; RV32I-NEXT: srli a5, a4, 8 +; RV32I-NEXT: sb a4, 8(a2) +; RV32I-NEXT: sb a5, 9(a2) ; RV32I-NEXT: sb a0, 10(a2) -; RV32I-NEXT: srli a0, a4, 24 -; RV32I-NEXT: sb a0, 11(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 9(a2) +; RV32I-NEXT: sb a1, 11(a2) ; RV32I-NEXT: srli a0, a3, 16 +; RV32I-NEXT: srli a1, a3, 24 +; RV32I-NEXT: srli a4, a3, 8 +; RV32I-NEXT: sb a3, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) ; RV32I-NEXT: sb a0, 2(a2) -; RV32I-NEXT: srli a0, a3, 24 -; RV32I-NEXT: sb a0, 3(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 1(a2) +; RV32I-NEXT: sb a1, 3(a2) ; RV32I-NEXT: srli a0, a6, 16 +; RV32I-NEXT: srli a1, a6, 24 +; RV32I-NEXT: srli a3, a6, 8 +; RV32I-NEXT: sb a6, 4(a2) +; RV32I-NEXT: sb a3, 5(a2) ; RV32I-NEXT: sb a0, 6(a2) -; RV32I-NEXT: srli a0, a6, 24 -; RV32I-NEXT: sb a0, 7(a2) -; RV32I-NEXT: srli a0, a6, 8 -; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: sb a1, 7(a2) ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 @@ -1409,98 +1409,98 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: or a1, a1, a6 -; RV64I-NEXT: sd zero, 56(sp) -; RV64I-NEXT: sd zero, 48(sp) -; RV64I-NEXT: sd zero, 40(sp) +; RV64I-NEXT: or a6, a1, a6 ; RV64I-NEXT: sd zero, 32(sp) -; RV64I-NEXT: sd a0, 24(sp) -; RV64I-NEXT: sd a5, 16(sp) -; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd zero, 40(sp) +; RV64I-NEXT: sd zero, 48(sp) +; RV64I-NEXT: sd zero, 56(sp) ; RV64I-NEXT: sd a3, 0(sp) -; RV64I-NEXT: srli a0, a1, 3 +; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd a5, 16(sp) +; RV64I-NEXT: sd a0, 24(sp) +; RV64I-NEXT: srli a0, a6, 3 ; RV64I-NEXT: andi a0, a0, 24 -; RV64I-NEXT: mv a3, sp -; RV64I-NEXT: add a0, a3, a0 -; RV64I-NEXT: ld a3, 8(a0) -; RV64I-NEXT: ld a4, 0(a0) -; RV64I-NEXT: ld a5, 16(a0) -; RV64I-NEXT: ld a6, 24(a0) -; RV64I-NEXT: srl a0, a3, a1 -; RV64I-NEXT: andi a7, a1, 63 +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: ld a1, 8(a0) +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: ld a4, 16(a0) +; RV64I-NEXT: ld a5, 24(a0) +; RV64I-NEXT: srl a0, a1, a6 +; RV64I-NEXT: andi a7, a6, 63 ; RV64I-NEXT: xori a7, a7, 63 -; RV64I-NEXT: slli t0, a5, 1 +; RV64I-NEXT: slli t0, a4, 1 ; RV64I-NEXT: sll t0, t0, a7 ; RV64I-NEXT: or a0, a0, t0 -; RV64I-NEXT: srl a4, a4, a1 -; RV64I-NEXT: slli a3, a3, 1 -; RV64I-NEXT: sll a3, a3, a7 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: srl a4, a5, a1 -; RV64I-NEXT: slli a5, a6, 1 -; RV64I-NEXT: sll a5, a5, a7 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: srl a1, a6, a1 -; RV64I-NEXT: sb a1, 24(a2) -; RV64I-NEXT: srli a5, a1, 56 +; RV64I-NEXT: srl a3, a3, a6 +; RV64I-NEXT: slli a1, a1, 1 +; RV64I-NEXT: sll a1, a1, a7 +; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: srl a3, a4, a6 +; RV64I-NEXT: slli a4, a5, 1 +; RV64I-NEXT: sll a4, a4, a7 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: srl a4, a5, a6 +; RV64I-NEXT: srli a5, a4, 56 +; RV64I-NEXT: srli a6, a4, 48 +; RV64I-NEXT: srli a7, a4, 40 +; RV64I-NEXT: srli t0, a4, 32 +; RV64I-NEXT: sb t0, 28(a2) +; RV64I-NEXT: sb a7, 29(a2) +; RV64I-NEXT: sb a6, 30(a2) ; RV64I-NEXT: sb a5, 31(a2) -; RV64I-NEXT: srli a5, a1, 48 -; RV64I-NEXT: sb a5, 30(a2) -; RV64I-NEXT: srli a5, a1, 40 -; RV64I-NEXT: sb a5, 29(a2) -; RV64I-NEXT: srli a5, a1, 32 -; RV64I-NEXT: sb a5, 28(a2) -; RV64I-NEXT: srli a5, a1, 24 +; RV64I-NEXT: srli a5, a4, 24 +; RV64I-NEXT: srli a6, a4, 16 +; RV64I-NEXT: srli a7, a4, 8 +; RV64I-NEXT: sb a4, 24(a2) +; RV64I-NEXT: sb a7, 25(a2) +; RV64I-NEXT: sb a6, 26(a2) ; RV64I-NEXT: sb a5, 27(a2) -; RV64I-NEXT: srli a5, a1, 16 -; RV64I-NEXT: sb a5, 26(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: sb a4, 16(a2) -; RV64I-NEXT: sb a3, 0(a2) -; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: srli a1, a4, 56 -; RV64I-NEXT: sb a1, 23(a2) -; RV64I-NEXT: srli a1, a4, 48 -; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, a4, 40 -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, a4, 32 -; RV64I-NEXT: sb a1, 20(a2) -; RV64I-NEXT: srli a1, a4, 24 -; RV64I-NEXT: sb a1, 19(a2) -; RV64I-NEXT: srli a1, a4, 16 -; RV64I-NEXT: sb a1, 18(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a4, 17(a2) -; RV64I-NEXT: srli a1, a3, 56 -; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a3, 48 -; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a3, 40 -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: srli a1, a3, 32 -; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: srli a1, a3, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a1, a3, 16 -; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a3, 1(a2) +; RV64I-NEXT: srli a4, a3, 56 +; RV64I-NEXT: srli a5, a3, 48 +; RV64I-NEXT: srli a6, a3, 40 +; RV64I-NEXT: srli a7, a3, 32 +; RV64I-NEXT: sb a7, 20(a2) +; RV64I-NEXT: sb a6, 21(a2) +; RV64I-NEXT: sb a5, 22(a2) +; RV64I-NEXT: sb a4, 23(a2) +; RV64I-NEXT: srli a4, a3, 24 +; RV64I-NEXT: srli a5, a3, 16 +; RV64I-NEXT: srli a6, a3, 8 +; RV64I-NEXT: sb a3, 16(a2) +; RV64I-NEXT: sb a6, 17(a2) +; RV64I-NEXT: sb a5, 18(a2) +; RV64I-NEXT: sb a4, 19(a2) +; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a6, a1, 32 +; RV64I-NEXT: sb a6, 4(a2) +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: sb a4, 6(a2) +; RV64I-NEXT: sb a3, 7(a2) +; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: srli a4, a1, 16 +; RV64I-NEXT: srli a5, a1, 8 +; RV64I-NEXT: sb a1, 0(a2) +; RV64I-NEXT: sb a5, 1(a2) +; RV64I-NEXT: sb a4, 2(a2) +; RV64I-NEXT: sb a3, 3(a2) ; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: srli a5, a0, 32 +; RV64I-NEXT: sb a5, 12(a2) +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: sb a3, 14(a2) ; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 12(a2) ; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: sb a4, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) ; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 9(a2) ; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret ; @@ -1597,22 +1597,22 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: or a1, a1, t3 ; RV32I-NEXT: or a6, a1, a6 -; RV32I-NEXT: sw zero, 60(sp) -; RV32I-NEXT: sw zero, 56(sp) -; RV32I-NEXT: sw zero, 52(sp) ; RV32I-NEXT: sw zero, 48(sp) -; RV32I-NEXT: sw zero, 44(sp) -; RV32I-NEXT: sw zero, 40(sp) -; RV32I-NEXT: sw zero, 36(sp) +; RV32I-NEXT: sw zero, 52(sp) +; RV32I-NEXT: sw zero, 56(sp) +; RV32I-NEXT: sw zero, 60(sp) ; RV32I-NEXT: sw zero, 32(sp) -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw t2, 24(sp) -; RV32I-NEXT: sw t1, 20(sp) +; RV32I-NEXT: sw zero, 36(sp) +; RV32I-NEXT: sw zero, 40(sp) +; RV32I-NEXT: sw zero, 44(sp) ; RV32I-NEXT: sw t0, 16(sp) -; RV32I-NEXT: sw a7, 12(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw t1, 20(sp) +; RV32I-NEXT: sw t2, 24(sp) +; RV32I-NEXT: sw a0, 28(sp) ; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a7, 12(sp) ; RV32I-NEXT: srli a0, a6, 3 ; RV32I-NEXT: andi a0, a0, 28 ; RV32I-NEXT: mv a1, sp @@ -1656,62 +1656,62 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sll t0, t2, t0 ; RV32I-NEXT: or t0, t1, t0 ; RV32I-NEXT: srl a6, t4, a6 -; RV32I-NEXT: sb a6, 28(a2) ; RV32I-NEXT: srli t1, a6, 24 +; RV32I-NEXT: srli t2, a6, 16 +; RV32I-NEXT: srli t3, a6, 8 +; RV32I-NEXT: sb a6, 28(a2) +; RV32I-NEXT: sb t3, 29(a2) +; RV32I-NEXT: sb t2, 30(a2) ; RV32I-NEXT: sb t1, 31(a2) -; RV32I-NEXT: srli t1, a6, 16 -; RV32I-NEXT: sb t1, 30(a2) -; RV32I-NEXT: srli a6, a6, 8 -; RV32I-NEXT: sb a6, 29(a2) -; RV32I-NEXT: sb t0, 24(a2) -; RV32I-NEXT: sb a7, 16(a2) -; RV32I-NEXT: sb a5, 20(a2) -; RV32I-NEXT: sb a4, 8(a2) -; RV32I-NEXT: sb a3, 12(a2) -; RV32I-NEXT: sb a1, 0(a2) -; RV32I-NEXT: sb a0, 4(a2) ; RV32I-NEXT: srli a6, t0, 24 +; RV32I-NEXT: srli t1, t0, 16 +; RV32I-NEXT: srli t2, t0, 8 +; RV32I-NEXT: sb t0, 24(a2) +; RV32I-NEXT: sb t2, 25(a2) +; RV32I-NEXT: sb t1, 26(a2) ; RV32I-NEXT: sb a6, 27(a2) -; RV32I-NEXT: srli a6, t0, 16 -; RV32I-NEXT: sb a6, 26(a2) -; RV32I-NEXT: srli a6, t0, 8 -; RV32I-NEXT: sb a6, 25(a2) ; RV32I-NEXT: srli a6, a7, 24 +; RV32I-NEXT: srli t0, a7, 16 +; RV32I-NEXT: srli t1, a7, 8 +; RV32I-NEXT: sb a7, 16(a2) +; RV32I-NEXT: sb t1, 17(a2) +; RV32I-NEXT: sb t0, 18(a2) ; RV32I-NEXT: sb a6, 19(a2) -; RV32I-NEXT: srli a6, a7, 16 -; RV32I-NEXT: sb a6, 18(a2) -; RV32I-NEXT: srli a6, a7, 8 -; RV32I-NEXT: sb a6, 17(a2) ; RV32I-NEXT: srli a6, a5, 24 +; RV32I-NEXT: srli a7, a5, 16 +; RV32I-NEXT: srli t0, a5, 8 +; RV32I-NEXT: sb a5, 20(a2) +; RV32I-NEXT: sb t0, 21(a2) +; RV32I-NEXT: sb a7, 22(a2) ; RV32I-NEXT: sb a6, 23(a2) -; RV32I-NEXT: srli a6, a5, 16 -; RV32I-NEXT: sb a6, 22(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 21(a2) ; RV32I-NEXT: srli a5, a4, 24 +; RV32I-NEXT: srli a6, a4, 16 +; RV32I-NEXT: srli a7, a4, 8 +; RV32I-NEXT: sb a4, 8(a2) +; RV32I-NEXT: sb a7, 9(a2) +; RV32I-NEXT: sb a6, 10(a2) ; RV32I-NEXT: sb a5, 11(a2) -; RV32I-NEXT: srli a5, a4, 16 -; RV32I-NEXT: sb a5, 10(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 9(a2) ; RV32I-NEXT: srli a4, a3, 24 +; RV32I-NEXT: srli a5, a3, 16 +; RV32I-NEXT: srli a6, a3, 8 +; RV32I-NEXT: sb a3, 12(a2) +; RV32I-NEXT: sb a6, 13(a2) +; RV32I-NEXT: sb a5, 14(a2) ; RV32I-NEXT: sb a4, 15(a2) -; RV32I-NEXT: srli a4, a3, 16 -; RV32I-NEXT: sb a4, 14(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 13(a2) ; RV32I-NEXT: srli a3, a1, 24 +; RV32I-NEXT: srli a4, a1, 16 +; RV32I-NEXT: srli a5, a1, 8 +; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: sb a5, 1(a2) +; RV32I-NEXT: sb a4, 2(a2) ; RV32I-NEXT: sb a3, 3(a2) -; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 2(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 1(a2) ; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a3, 6(a2) ; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 @@ -1833,98 +1833,98 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: or a1, a1, a6 -; RV64I-NEXT: sd zero, 24(sp) -; RV64I-NEXT: sd zero, 16(sp) -; RV64I-NEXT: sd zero, 8(sp) +; RV64I-NEXT: or a6, a1, a6 ; RV64I-NEXT: sd zero, 0(sp) -; RV64I-NEXT: sd a0, 56(sp) -; RV64I-NEXT: sd a5, 48(sp) -; RV64I-NEXT: sd a4, 40(sp) +; RV64I-NEXT: sd zero, 8(sp) +; RV64I-NEXT: sd zero, 16(sp) +; RV64I-NEXT: sd zero, 24(sp) ; RV64I-NEXT: sd a3, 32(sp) -; RV64I-NEXT: srli a0, a1, 3 +; RV64I-NEXT: sd a4, 40(sp) +; RV64I-NEXT: sd a5, 48(sp) +; RV64I-NEXT: sd a0, 56(sp) +; RV64I-NEXT: srli a0, a6, 3 ; RV64I-NEXT: andi a0, a0, 24 -; RV64I-NEXT: addi a3, sp, 32 -; RV64I-NEXT: sub a3, a3, a0 -; RV64I-NEXT: ld a4, 8(a3) -; RV64I-NEXT: ld a5, 0(a3) -; RV64I-NEXT: ld a6, 16(a3) -; RV64I-NEXT: ld a3, 24(a3) -; RV64I-NEXT: sll a0, a4, a1 -; RV64I-NEXT: andi a7, a1, 63 +; RV64I-NEXT: addi a1, sp, 32 +; RV64I-NEXT: sub a1, a1, a0 +; RV64I-NEXT: ld a3, 8(a1) +; RV64I-NEXT: ld a4, 0(a1) +; RV64I-NEXT: ld a5, 16(a1) +; RV64I-NEXT: ld a1, 24(a1) +; RV64I-NEXT: sll a0, a3, a6 +; RV64I-NEXT: andi a7, a6, 63 ; RV64I-NEXT: xori a7, a7, 63 -; RV64I-NEXT: srli t0, a5, 1 +; RV64I-NEXT: srli t0, a4, 1 ; RV64I-NEXT: srl t0, t0, a7 ; RV64I-NEXT: or a0, a0, t0 -; RV64I-NEXT: sll a3, a3, a1 -; RV64I-NEXT: srli t0, a6, 1 +; RV64I-NEXT: sll a1, a1, a6 +; RV64I-NEXT: srli t0, a5, 1 ; RV64I-NEXT: srl t0, t0, a7 -; RV64I-NEXT: or a3, a3, t0 -; RV64I-NEXT: sll a6, a6, a1 -; RV64I-NEXT: srli a4, a4, 1 -; RV64I-NEXT: srl a4, a4, a7 -; RV64I-NEXT: or a4, a6, a4 -; RV64I-NEXT: sll a1, a5, a1 -; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: srli a5, a1, 56 +; RV64I-NEXT: or a1, a1, t0 +; RV64I-NEXT: sll a5, a5, a6 +; RV64I-NEXT: srli a3, a3, 1 +; RV64I-NEXT: srl a3, a3, a7 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: sll a4, a4, a6 +; RV64I-NEXT: srli a5, a4, 56 +; RV64I-NEXT: srli a6, a4, 48 +; RV64I-NEXT: srli a7, a4, 40 +; RV64I-NEXT: srli t0, a4, 32 +; RV64I-NEXT: sb t0, 4(a2) +; RV64I-NEXT: sb a7, 5(a2) +; RV64I-NEXT: sb a6, 6(a2) ; RV64I-NEXT: sb a5, 7(a2) -; RV64I-NEXT: srli a5, a1, 48 -; RV64I-NEXT: sb a5, 6(a2) -; RV64I-NEXT: srli a5, a1, 40 -; RV64I-NEXT: sb a5, 5(a2) -; RV64I-NEXT: srli a5, a1, 32 -; RV64I-NEXT: sb a5, 4(a2) -; RV64I-NEXT: srli a5, a1, 24 +; RV64I-NEXT: srli a5, a4, 24 +; RV64I-NEXT: srli a6, a4, 16 +; RV64I-NEXT: srli a7, a4, 8 +; RV64I-NEXT: sb a4, 0(a2) +; RV64I-NEXT: sb a7, 1(a2) +; RV64I-NEXT: sb a6, 2(a2) ; RV64I-NEXT: sb a5, 3(a2) -; RV64I-NEXT: srli a5, a1, 16 -; RV64I-NEXT: sb a5, 2(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 1(a2) -; RV64I-NEXT: sb a4, 16(a2) -; RV64I-NEXT: sb a3, 24(a2) -; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: srli a1, a4, 56 -; RV64I-NEXT: sb a1, 23(a2) -; RV64I-NEXT: srli a1, a4, 48 -; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, a4, 40 -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, a4, 32 -; RV64I-NEXT: sb a1, 20(a2) -; RV64I-NEXT: srli a1, a4, 24 -; RV64I-NEXT: sb a1, 19(a2) -; RV64I-NEXT: srli a1, a4, 16 -; RV64I-NEXT: sb a1, 18(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a4, 17(a2) -; RV64I-NEXT: srli a1, a3, 56 -; RV64I-NEXT: sb a1, 31(a2) -; RV64I-NEXT: srli a1, a3, 48 -; RV64I-NEXT: sb a1, 30(a2) -; RV64I-NEXT: srli a1, a3, 40 -; RV64I-NEXT: sb a1, 29(a2) -; RV64I-NEXT: srli a1, a3, 32 -; RV64I-NEXT: sb a1, 28(a2) -; RV64I-NEXT: srli a1, a3, 24 -; RV64I-NEXT: sb a1, 27(a2) -; RV64I-NEXT: srli a1, a3, 16 -; RV64I-NEXT: sb a1, 26(a2) -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a3, 25(a2) +; RV64I-NEXT: srli a4, a3, 56 +; RV64I-NEXT: srli a5, a3, 48 +; RV64I-NEXT: srli a6, a3, 40 +; RV64I-NEXT: srli a7, a3, 32 +; RV64I-NEXT: sb a7, 20(a2) +; RV64I-NEXT: sb a6, 21(a2) +; RV64I-NEXT: sb a5, 22(a2) +; RV64I-NEXT: sb a4, 23(a2) +; RV64I-NEXT: srli a4, a3, 24 +; RV64I-NEXT: srli a5, a3, 16 +; RV64I-NEXT: srli a6, a3, 8 +; RV64I-NEXT: sb a3, 16(a2) +; RV64I-NEXT: sb a6, 17(a2) +; RV64I-NEXT: sb a5, 18(a2) +; RV64I-NEXT: sb a4, 19(a2) +; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a6, a1, 32 +; RV64I-NEXT: sb a6, 28(a2) +; RV64I-NEXT: sb a5, 29(a2) +; RV64I-NEXT: sb a4, 30(a2) +; RV64I-NEXT: sb a3, 31(a2) +; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: srli a4, a1, 16 +; RV64I-NEXT: srli a5, a1, 8 +; RV64I-NEXT: sb a1, 24(a2) +; RV64I-NEXT: sb a5, 25(a2) +; RV64I-NEXT: sb a4, 26(a2) +; RV64I-NEXT: sb a3, 27(a2) ; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: srli a5, a0, 32 +; RV64I-NEXT: sb a5, 12(a2) +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: sb a3, 14(a2) ; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 12(a2) ; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: sb a4, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) ; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 9(a2) ; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret ; @@ -2021,22 +2021,22 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: or a1, a1, t3 ; RV32I-NEXT: or a7, a1, a7 -; RV32I-NEXT: sw zero, 28(sp) -; RV32I-NEXT: sw zero, 24(sp) -; RV32I-NEXT: sw zero, 20(sp) ; RV32I-NEXT: sw zero, 16(sp) -; RV32I-NEXT: sw zero, 12(sp) -; RV32I-NEXT: sw zero, 8(sp) -; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 20(sp) +; RV32I-NEXT: sw zero, 24(sp) +; RV32I-NEXT: sw zero, 28(sp) ; RV32I-NEXT: sw zero, 0(sp) -; RV32I-NEXT: sw a0, 60(sp) -; RV32I-NEXT: sw t2, 56(sp) -; RV32I-NEXT: sw t1, 52(sp) +; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 12(sp) ; RV32I-NEXT: sw t0, 48(sp) -; RV32I-NEXT: sw a6, 44(sp) -; RV32I-NEXT: sw a5, 40(sp) -; RV32I-NEXT: sw a4, 36(sp) +; RV32I-NEXT: sw t1, 52(sp) +; RV32I-NEXT: sw t2, 56(sp) +; RV32I-NEXT: sw a0, 60(sp) ; RV32I-NEXT: sw a3, 32(sp) +; RV32I-NEXT: sw a4, 36(sp) +; RV32I-NEXT: sw a5, 40(sp) +; RV32I-NEXT: sw a6, 44(sp) ; RV32I-NEXT: srli a0, a7, 3 ; RV32I-NEXT: andi a0, a0, 28 ; RV32I-NEXT: addi a1, sp, 32 @@ -2062,80 +2062,80 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lw t3, 20(a4) ; RV32I-NEXT: or a3, a6, a3 ; RV32I-NEXT: lw a6, 24(a4) -; RV32I-NEXT: lw a4, 28(a4) -; RV32I-NEXT: sll t4, t3, a7 +; RV32I-NEXT: lw t4, 28(a4) +; RV32I-NEXT: sll a4, t3, a7 ; RV32I-NEXT: srli t5, t2, 1 ; RV32I-NEXT: srl t5, t5, t1 -; RV32I-NEXT: or t4, t4, t5 +; RV32I-NEXT: or a4, a4, t5 ; RV32I-NEXT: sll t2, t2, a7 ; RV32I-NEXT: srli t0, t0, 1 ; RV32I-NEXT: srl t0, t0, t1 ; RV32I-NEXT: or t0, t2, t0 -; RV32I-NEXT: sll a4, a4, a7 -; RV32I-NEXT: srli t2, a6, 1 -; RV32I-NEXT: srl t2, t2, t1 -; RV32I-NEXT: or a4, a4, t2 +; RV32I-NEXT: sll t2, t4, a7 +; RV32I-NEXT: srli t4, a6, 1 +; RV32I-NEXT: srl t4, t4, t1 +; RV32I-NEXT: or t2, t2, t4 ; RV32I-NEXT: sll a6, a6, a7 -; RV32I-NEXT: srli t2, t3, 1 -; RV32I-NEXT: srl t1, t2, t1 +; RV32I-NEXT: srli t3, t3, 1 +; RV32I-NEXT: srl t1, t3, t1 ; RV32I-NEXT: or a6, a6, t1 ; RV32I-NEXT: sll a5, a5, a7 -; RV32I-NEXT: sb a5, 0(a2) ; RV32I-NEXT: srli a7, a5, 24 +; RV32I-NEXT: srli t1, a5, 16 +; RV32I-NEXT: srli t3, a5, 8 +; RV32I-NEXT: sb a5, 0(a2) +; RV32I-NEXT: sb t3, 1(a2) +; RV32I-NEXT: sb t1, 2(a2) ; RV32I-NEXT: sb a7, 3(a2) -; RV32I-NEXT: srli a7, a5, 16 -; RV32I-NEXT: sb a7, 2(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 1(a2) -; RV32I-NEXT: sb a6, 24(a2) -; RV32I-NEXT: sb a4, 28(a2) -; RV32I-NEXT: sb t0, 16(a2) -; RV32I-NEXT: sb t4, 20(a2) -; RV32I-NEXT: sb a3, 8(a2) -; RV32I-NEXT: sb a1, 12(a2) -; RV32I-NEXT: sb a0, 4(a2) ; RV32I-NEXT: srli a5, a6, 24 +; RV32I-NEXT: srli a7, a6, 16 +; RV32I-NEXT: srli t1, a6, 8 +; RV32I-NEXT: sb a6, 24(a2) +; RV32I-NEXT: sb t1, 25(a2) +; RV32I-NEXT: sb a7, 26(a2) ; RV32I-NEXT: sb a5, 27(a2) -; RV32I-NEXT: srli a5, a6, 16 -; RV32I-NEXT: sb a5, 26(a2) -; RV32I-NEXT: srli a5, a6, 8 -; RV32I-NEXT: sb a5, 25(a2) -; RV32I-NEXT: srli a5, a4, 24 +; RV32I-NEXT: srli a5, t2, 24 +; RV32I-NEXT: srli a6, t2, 16 +; RV32I-NEXT: srli a7, t2, 8 +; RV32I-NEXT: sb t2, 28(a2) +; RV32I-NEXT: sb a7, 29(a2) +; RV32I-NEXT: sb a6, 30(a2) ; RV32I-NEXT: sb a5, 31(a2) -; RV32I-NEXT: srli a5, a4, 16 -; RV32I-NEXT: sb a5, 30(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: srli a4, t0, 24 -; RV32I-NEXT: sb a4, 19(a2) -; RV32I-NEXT: srli a4, t0, 16 -; RV32I-NEXT: sb a4, 18(a2) -; RV32I-NEXT: srli a4, t0, 8 -; RV32I-NEXT: sb a4, 17(a2) -; RV32I-NEXT: srli a4, t4, 24 -; RV32I-NEXT: sb a4, 23(a2) -; RV32I-NEXT: srli a4, t4, 16 -; RV32I-NEXT: sb a4, 22(a2) -; RV32I-NEXT: srli a4, t4, 8 -; RV32I-NEXT: sb a4, 21(a2) +; RV32I-NEXT: srli a5, t0, 24 +; RV32I-NEXT: srli a6, t0, 16 +; RV32I-NEXT: srli a7, t0, 8 +; RV32I-NEXT: sb t0, 16(a2) +; RV32I-NEXT: sb a7, 17(a2) +; RV32I-NEXT: sb a6, 18(a2) +; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a5, a4, 24 +; RV32I-NEXT: srli a6, a4, 16 +; RV32I-NEXT: srli a7, a4, 8 +; RV32I-NEXT: sb a4, 20(a2) +; RV32I-NEXT: sb a7, 21(a2) +; RV32I-NEXT: sb a6, 22(a2) +; RV32I-NEXT: sb a5, 23(a2) ; RV32I-NEXT: srli a4, a3, 24 +; RV32I-NEXT: srli a5, a3, 16 +; RV32I-NEXT: srli a6, a3, 8 +; RV32I-NEXT: sb a3, 8(a2) +; RV32I-NEXT: sb a6, 9(a2) +; RV32I-NEXT: sb a5, 10(a2) ; RV32I-NEXT: sb a4, 11(a2) -; RV32I-NEXT: srli a4, a3, 16 -; RV32I-NEXT: sb a4, 10(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 9(a2) ; RV32I-NEXT: srli a3, a1, 24 +; RV32I-NEXT: srli a4, a1, 16 +; RV32I-NEXT: srli a5, a1, 8 +; RV32I-NEXT: sb a1, 12(a2) +; RV32I-NEXT: sb a5, 13(a2) +; RV32I-NEXT: sb a4, 14(a2) ; RV32I-NEXT: sb a3, 15(a2) -; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 14(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 13(a2) ; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a3, 6(a2) ; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 @@ -2257,99 +2257,99 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: or a1, a1, t1 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a7, a1, a7 ; RV64I-NEXT: sraiw a0, a0, 31 -; RV64I-NEXT: sd a0, 56(sp) -; RV64I-NEXT: sd a0, 48(sp) -; RV64I-NEXT: sd a0, 40(sp) ; RV64I-NEXT: sd a0, 32(sp) -; RV64I-NEXT: sd a6, 24(sp) -; RV64I-NEXT: sd a5, 16(sp) -; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd a0, 40(sp) +; RV64I-NEXT: sd a0, 48(sp) +; RV64I-NEXT: sd a0, 56(sp) ; RV64I-NEXT: sd a3, 0(sp) -; RV64I-NEXT: srli a0, a1, 3 +; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd a5, 16(sp) +; RV64I-NEXT: sd a6, 24(sp) +; RV64I-NEXT: srli a0, a7, 3 ; RV64I-NEXT: andi a0, a0, 24 -; RV64I-NEXT: mv a3, sp -; RV64I-NEXT: add a0, a3, a0 -; RV64I-NEXT: ld a3, 8(a0) -; RV64I-NEXT: ld a4, 0(a0) -; RV64I-NEXT: ld a5, 16(a0) -; RV64I-NEXT: ld a6, 24(a0) -; RV64I-NEXT: srl a0, a3, a1 -; RV64I-NEXT: andi a7, a1, 63 -; RV64I-NEXT: xori a7, a7, 63 -; RV64I-NEXT: slli t0, a5, 1 -; RV64I-NEXT: sll t0, t0, a7 +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: ld a1, 8(a0) +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: ld a4, 16(a0) +; RV64I-NEXT: ld a5, 24(a0) +; RV64I-NEXT: srl a0, a1, a7 +; RV64I-NEXT: andi a6, a7, 63 +; RV64I-NEXT: xori a6, a6, 63 +; RV64I-NEXT: slli t0, a4, 1 +; RV64I-NEXT: sll t0, t0, a6 ; RV64I-NEXT: or a0, a0, t0 -; RV64I-NEXT: srl a4, a4, a1 -; RV64I-NEXT: slli a3, a3, 1 -; RV64I-NEXT: sll a3, a3, a7 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: srl a4, a5, a1 -; RV64I-NEXT: slli a5, a6, 1 -; RV64I-NEXT: sll a5, a5, a7 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: sra a1, a6, a1 -; RV64I-NEXT: sb a1, 24(a2) -; RV64I-NEXT: srli a5, a1, 56 +; RV64I-NEXT: srl a3, a3, a7 +; RV64I-NEXT: slli a1, a1, 1 +; RV64I-NEXT: sll a1, a1, a6 +; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: srl a3, a4, a7 +; RV64I-NEXT: slli a4, a5, 1 +; RV64I-NEXT: sll a4, a4, a6 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: sra a4, a5, a7 +; RV64I-NEXT: srli a5, a4, 56 +; RV64I-NEXT: srli a6, a4, 48 +; RV64I-NEXT: srli a7, a4, 40 +; RV64I-NEXT: srli t0, a4, 32 +; RV64I-NEXT: sb t0, 28(a2) +; RV64I-NEXT: sb a7, 29(a2) +; RV64I-NEXT: sb a6, 30(a2) ; RV64I-NEXT: sb a5, 31(a2) -; RV64I-NEXT: srli a5, a1, 48 -; RV64I-NEXT: sb a5, 30(a2) -; RV64I-NEXT: srli a5, a1, 40 -; RV64I-NEXT: sb a5, 29(a2) -; RV64I-NEXT: srli a5, a1, 32 -; RV64I-NEXT: sb a5, 28(a2) -; RV64I-NEXT: srli a5, a1, 24 +; RV64I-NEXT: srli a5, a4, 24 +; RV64I-NEXT: srli a6, a4, 16 +; RV64I-NEXT: srli a7, a4, 8 +; RV64I-NEXT: sb a4, 24(a2) +; RV64I-NEXT: sb a7, 25(a2) +; RV64I-NEXT: sb a6, 26(a2) ; RV64I-NEXT: sb a5, 27(a2) -; RV64I-NEXT: srli a5, a1, 16 -; RV64I-NEXT: sb a5, 26(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: sb a4, 16(a2) -; RV64I-NEXT: sb a3, 0(a2) -; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: srli a1, a4, 56 -; RV64I-NEXT: sb a1, 23(a2) -; RV64I-NEXT: srli a1, a4, 48 -; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, a4, 40 -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, a4, 32 -; RV64I-NEXT: sb a1, 20(a2) -; RV64I-NEXT: srli a1, a4, 24 -; RV64I-NEXT: sb a1, 19(a2) -; RV64I-NEXT: srli a1, a4, 16 -; RV64I-NEXT: sb a1, 18(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a4, 17(a2) -; RV64I-NEXT: srli a1, a3, 56 -; RV64I-NEXT: sb a1, 7(a2) -; RV64I-NEXT: srli a1, a3, 48 -; RV64I-NEXT: sb a1, 6(a2) -; RV64I-NEXT: srli a1, a3, 40 -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: srli a1, a3, 32 -; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: srli a1, a3, 24 -; RV64I-NEXT: sb a1, 3(a2) -; RV64I-NEXT: srli a1, a3, 16 -; RV64I-NEXT: sb a1, 2(a2) -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a3, 1(a2) +; RV64I-NEXT: srli a4, a3, 56 +; RV64I-NEXT: srli a5, a3, 48 +; RV64I-NEXT: srli a6, a3, 40 +; RV64I-NEXT: srli a7, a3, 32 +; RV64I-NEXT: sb a7, 20(a2) +; RV64I-NEXT: sb a6, 21(a2) +; RV64I-NEXT: sb a5, 22(a2) +; RV64I-NEXT: sb a4, 23(a2) +; RV64I-NEXT: srli a4, a3, 24 +; RV64I-NEXT: srli a5, a3, 16 +; RV64I-NEXT: srli a6, a3, 8 +; RV64I-NEXT: sb a3, 16(a2) +; RV64I-NEXT: sb a6, 17(a2) +; RV64I-NEXT: sb a5, 18(a2) +; RV64I-NEXT: sb a4, 19(a2) +; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: srli a4, a1, 48 +; RV64I-NEXT: srli a5, a1, 40 +; RV64I-NEXT: srli a6, a1, 32 +; RV64I-NEXT: sb a6, 4(a2) +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: sb a4, 6(a2) +; RV64I-NEXT: sb a3, 7(a2) +; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: srli a4, a1, 16 +; RV64I-NEXT: srli a5, a1, 8 +; RV64I-NEXT: sb a1, 0(a2) +; RV64I-NEXT: sb a5, 1(a2) +; RV64I-NEXT: sb a4, 2(a2) +; RV64I-NEXT: sb a3, 3(a2) ; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: srli a5, a0, 32 +; RV64I-NEXT: sb a5, 12(a2) +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: sb a3, 14(a2) ; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 12(a2) ; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a3, a0, 16 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: sb a4, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) ; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 9(a2) ; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret ; @@ -2447,22 +2447,22 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a1, a1, a6 ; RV32I-NEXT: or a6, a1, t4 ; RV32I-NEXT: srai a0, a0, 31 -; RV32I-NEXT: sw a0, 60(sp) -; RV32I-NEXT: sw a0, 56(sp) -; RV32I-NEXT: sw a0, 52(sp) ; RV32I-NEXT: sw a0, 48(sp) -; RV32I-NEXT: sw a0, 44(sp) -; RV32I-NEXT: sw a0, 40(sp) -; RV32I-NEXT: sw a0, 36(sp) +; RV32I-NEXT: sw a0, 52(sp) +; RV32I-NEXT: sw a0, 56(sp) +; RV32I-NEXT: sw a0, 60(sp) ; RV32I-NEXT: sw a0, 32(sp) -; RV32I-NEXT: sw t3, 28(sp) -; RV32I-NEXT: sw t2, 24(sp) -; RV32I-NEXT: sw t1, 20(sp) +; RV32I-NEXT: sw a0, 36(sp) +; RV32I-NEXT: sw a0, 40(sp) +; RV32I-NEXT: sw a0, 44(sp) ; RV32I-NEXT: sw t0, 16(sp) -; RV32I-NEXT: sw a7, 12(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw t1, 20(sp) +; RV32I-NEXT: sw t2, 24(sp) +; RV32I-NEXT: sw t3, 28(sp) ; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a7, 12(sp) ; RV32I-NEXT: srli a0, a6, 3 ; RV32I-NEXT: andi a0, a0, 28 ; RV32I-NEXT: mv a1, sp @@ -2506,62 +2506,62 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sll t0, t2, t0 ; RV32I-NEXT: or t0, t1, t0 ; RV32I-NEXT: sra a6, t4, a6 -; RV32I-NEXT: sb a6, 28(a2) ; RV32I-NEXT: srli t1, a6, 24 +; RV32I-NEXT: srli t2, a6, 16 +; RV32I-NEXT: srli t3, a6, 8 +; RV32I-NEXT: sb a6, 28(a2) +; RV32I-NEXT: sb t3, 29(a2) +; RV32I-NEXT: sb t2, 30(a2) ; RV32I-NEXT: sb t1, 31(a2) -; RV32I-NEXT: srli t1, a6, 16 -; RV32I-NEXT: sb t1, 30(a2) -; RV32I-NEXT: srli a6, a6, 8 -; RV32I-NEXT: sb a6, 29(a2) -; RV32I-NEXT: sb t0, 24(a2) -; RV32I-NEXT: sb a7, 16(a2) -; RV32I-NEXT: sb a5, 20(a2) -; RV32I-NEXT: sb a4, 8(a2) -; RV32I-NEXT: sb a3, 12(a2) -; RV32I-NEXT: sb a1, 0(a2) -; RV32I-NEXT: sb a0, 4(a2) ; RV32I-NEXT: srli a6, t0, 24 +; RV32I-NEXT: srli t1, t0, 16 +; RV32I-NEXT: srli t2, t0, 8 +; RV32I-NEXT: sb t0, 24(a2) +; RV32I-NEXT: sb t2, 25(a2) +; RV32I-NEXT: sb t1, 26(a2) ; RV32I-NEXT: sb a6, 27(a2) -; RV32I-NEXT: srli a6, t0, 16 -; RV32I-NEXT: sb a6, 26(a2) -; RV32I-NEXT: srli a6, t0, 8 -; RV32I-NEXT: sb a6, 25(a2) ; RV32I-NEXT: srli a6, a7, 24 +; RV32I-NEXT: srli t0, a7, 16 +; RV32I-NEXT: srli t1, a7, 8 +; RV32I-NEXT: sb a7, 16(a2) +; RV32I-NEXT: sb t1, 17(a2) +; RV32I-NEXT: sb t0, 18(a2) ; RV32I-NEXT: sb a6, 19(a2) -; RV32I-NEXT: srli a6, a7, 16 -; RV32I-NEXT: sb a6, 18(a2) -; RV32I-NEXT: srli a6, a7, 8 -; RV32I-NEXT: sb a6, 17(a2) ; RV32I-NEXT: srli a6, a5, 24 +; RV32I-NEXT: srli a7, a5, 16 +; RV32I-NEXT: srli t0, a5, 8 +; RV32I-NEXT: sb a5, 20(a2) +; RV32I-NEXT: sb t0, 21(a2) +; RV32I-NEXT: sb a7, 22(a2) ; RV32I-NEXT: sb a6, 23(a2) -; RV32I-NEXT: srli a6, a5, 16 -; RV32I-NEXT: sb a6, 22(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 21(a2) ; RV32I-NEXT: srli a5, a4, 24 +; RV32I-NEXT: srli a6, a4, 16 +; RV32I-NEXT: srli a7, a4, 8 +; RV32I-NEXT: sb a4, 8(a2) +; RV32I-NEXT: sb a7, 9(a2) +; RV32I-NEXT: sb a6, 10(a2) ; RV32I-NEXT: sb a5, 11(a2) -; RV32I-NEXT: srli a5, a4, 16 -; RV32I-NEXT: sb a5, 10(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 9(a2) ; RV32I-NEXT: srli a4, a3, 24 +; RV32I-NEXT: srli a5, a3, 16 +; RV32I-NEXT: srli a6, a3, 8 +; RV32I-NEXT: sb a3, 12(a2) +; RV32I-NEXT: sb a6, 13(a2) +; RV32I-NEXT: sb a5, 14(a2) ; RV32I-NEXT: sb a4, 15(a2) -; RV32I-NEXT: srli a4, a3, 16 -; RV32I-NEXT: sb a4, 14(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 13(a2) ; RV32I-NEXT: srli a3, a1, 24 +; RV32I-NEXT: srli a4, a1, 16 +; RV32I-NEXT: srli a5, a1, 8 +; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: sb a5, 1(a2) +; RV32I-NEXT: sb a4, 2(a2) ; RV32I-NEXT: sb a3, 3(a2) -; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 2(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 1(a2) ; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a3, 6(a2) ; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 diff --git a/llvm/test/CodeGen/RISCV/xtheadmempair.ll b/llvm/test/CodeGen/RISCV/xtheadmempair.ll index 3a74bb66d9ec252..3525c40026064a4 100644 --- a/llvm/test/CodeGen/RISCV/xtheadmempair.ll +++ b/llvm/test/CodeGen/RISCV/xtheadmempair.ll @@ -174,10 +174,10 @@ define void @swd(ptr %a, i32 %b, i32%c) { define void @sdd(ptr %a, i64 %b, i64%c) { ; RV32XTHEADMEMPAIR-LABEL: sdd: ; RV32XTHEADMEMPAIR: # %bb.0: -; RV32XTHEADMEMPAIR-NEXT: sw a2, 36(a0) ; RV32XTHEADMEMPAIR-NEXT: sw a1, 32(a0) -; RV32XTHEADMEMPAIR-NEXT: sw a4, 44(a0) +; RV32XTHEADMEMPAIR-NEXT: sw a2, 36(a0) ; RV32XTHEADMEMPAIR-NEXT: sw a3, 40(a0) +; RV32XTHEADMEMPAIR-NEXT: sw a4, 44(a0) ; RV32XTHEADMEMPAIR-NEXT: ret ; ; RV64XTHEADMEMPAIR-LABEL: sdd: