From 2bbd5fb2f0c89d87f13863b6c17a6ca0205291be Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 4 Jan 2021 11:12:18 -0500 Subject: [PATCH] add patches for PPC --- deps/llvm.mk | 4 +- .../llvm-11-D93092-ppc-knownbits.patch | 51 +- deps/patches/llvm-11-ppc-half-ctr.patch | 96 +++ deps/patches/llvm-11-ppc-sp-from-bp.patch | 621 ++++++++++++++++++ 4 files changed, 750 insertions(+), 22 deletions(-) create mode 100644 deps/patches/llvm-11-ppc-half-ctr.patch create mode 100644 deps/patches/llvm-11-ppc-sp-from-bp.patch diff --git a/deps/llvm.mk b/deps/llvm.mk index f5a4060070320..794e340283d55 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -531,8 +531,10 @@ $(eval $(call LLVM_PATCH,llvm-11-D90722-rtdyld-absolute-relocs)) # remove for LL $(eval $(call LLVM_PATCH,llvm-invalid-addrspacecast-sink)) # upstreamed as D92210 $(eval $(call LLVM_PATCH,llvm-11-D92906-ppc-setjmp)) # remove for LLVM 12 $(eval $(call LLVM_PATCH,llvm-11-PR48458-X86ISelDAGToDAG)) # remove for LLVM 12 -$(eval $(call LLVM_PATCH,llvm-11-D93092-ppc-knownbits)) +$(eval $(call LLVM_PATCH,llvm-11-D93092-ppc-knownbits)) # remove for LLVM 12 $(eval $(call LLVM_PATCH,llvm-11-D93154-globalisel-as)) +$(eval $(call LLVM_PATCH,llvm-11-ppc-half-ctr)) # remove for LLVM 12 +$(eval $(call LLVM_PATCH,llvm-11-ppc-sp-from-bp)) # remove for LLVM 12 endif # LLVM_VER 11.0 diff --git a/deps/patches/llvm-11-D93092-ppc-knownbits.patch b/deps/patches/llvm-11-D93092-ppc-knownbits.patch index a4ebecafc82a9..47e6f743ddefd 100644 --- a/deps/patches/llvm-11-D93092-ppc-knownbits.patch +++ b/deps/patches/llvm-11-D93092-ppc-knownbits.patch @@ -1,8 +1,8 @@ -From 8bec64e2c0386934d4e38344907f0f4b0de4d8a3 Mon Sep 17 00:00:00 2001 -From: Valentin Churavy -Date: Tue, 15 Dec 2020 09:59:18 -0500 -Subject: [PATCH] [PowerPC] KnownBits should be constant when performing - non-sign comparison +From b5a0e6ca2b0c6367b082dd9a77b02c26607c8d7d Mon Sep 17 00:00:00 2001 +From: Kai Luo +Date: Tue, 29 Dec 2020 12:11:55 +0000 +Subject: [PATCH 2/4] [PowerPC] Remaining KnownBits should be constant when + performing non-sign comparison In `PPCTargetLowering::DAGCombineTruncBoolExt`, when checking if it's correct to perform the transformation for non-sign comparison, as the comment says ``` @@ -23,20 +23,30 @@ Bit 4, besides bit 0, is still unknown and affects the final result. This patch fixes https://bugs.llvm.org/show_bug.cgi?id=48388. +Reviewed By: nemanjai, #powerpc + Differential Revision: https://reviews.llvm.org/D93092 --- - llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 +- - llvm/test/CodeGen/PowerPC/pr48388.ll | 42 +++++++++++++++++++++ - 2 files changed, 44 insertions(+), 1 deletion(-) + llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 10 +++-- + llvm/test/CodeGen/PowerPC/pr48388.ll | 41 +++++++++++++++++++++ + 2 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/pr48388.ll diff --git llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp -index f54f1673526d..76b32db44656 100644 +index f54f1673526d..38dbff4197b9 100644 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp -@@ -13291,7 +13291,8 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, - Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0); - Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0); +@@ -13287,11 +13287,13 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, + KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1)); + + // We don't really care about what is known about the first bit (if +- // anything), so clear it in all masks prior to comparing them. +- Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0); +- Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0); ++ // anything), so pretend that it is known zero for both to ensure they can ++ // be compared as constants. ++ Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0); ++ Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0); - if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One) + if (!Op1Known.isConstant() || !Op2Known.isConstant() || @@ -46,10 +56,10 @@ index f54f1673526d..76b32db44656 100644 } diff --git llvm/test/CodeGen/PowerPC/pr48388.ll llvm/test/CodeGen/PowerPC/pr48388.ll new file mode 100644 -index 000000000000..138fb6147832 +index 000000000000..822e5d852317 --- /dev/null +++ llvm/test/CodeGen/PowerPC/pr48388.ll -@@ -0,0 +1,42 @@ +@@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le -ppc-asm-full-reg-names \ +; RUN: < %s | FileCheck %s @@ -58,13 +68,12 @@ index 000000000000..138fb6147832 +; CHECK-LABEL: julia_div_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: divd r6, r3, r4 -+; CHECK-NEXT: li r5, 32767 -+; CHECK-NEXT: sldi r5, r5, 32 -+; CHECK-NEXT: oris r7, r5, 40069 -+; CHECK-NEXT: oris r5, r5, 40079 ++; CHECK-NEXT: lis r5, -1592 ++; CHECK-NEXT: ori r7, r5, 21321 ++; CHECK-NEXT: ori r5, r5, 65519 +; CHECK-NEXT: cmpdi r3, 0 -+; CHECK-NEXT: ori r7, r7, 13456 -+; CHECK-NEXT: ori r5, r5, 65264 ++; CHECK-NEXT: rldic r7, r7, 4, 17 ++; CHECK-NEXT: rldic r5, r5, 4, 17 +; CHECK-NEXT: iselgt r9, r5, r7 +; CHECK-NEXT: cmpdi r4, 0 +; CHECK-NEXT: mulld r8, r6, r4 @@ -93,5 +102,5 @@ index 000000000000..138fb6147832 + ret i64 %12 +} -- -2.29.2 +2.30.0 diff --git a/deps/patches/llvm-11-ppc-half-ctr.patch b/deps/patches/llvm-11-ppc-half-ctr.patch new file mode 100644 index 0000000000000..e9a9b9a4d5f86 --- /dev/null +++ b/deps/patches/llvm-11-ppc-half-ctr.patch @@ -0,0 +1,96 @@ +From 79a73d6388790cfec9bd76b1790f0f5551a9df8c Mon Sep 17 00:00:00 2001 +From: Nemanja Ivanovic +Date: Mon, 28 Dec 2020 22:51:51 -0600 +Subject: [PATCH 1/4] [PowerPC] Disable CTR loops containing operations on + half-precision + +On subtargets prior to Power9, conversions to/from half precision +are lowered to libcalls. This makes loops containing such operations +invalid candidates for HW loops. + +Fixes: https://bugs.llvm.org/show_bug.cgi?id=48519 +--- + .../Target/PowerPC/PPCTargetTransformInfo.cpp | 4 ++ + llvm/test/CodeGen/PowerPC/pr48519.ll | 55 +++++++++++++++++++ + 2 files changed, 59 insertions(+) + create mode 100644 llvm/test/CodeGen/PowerPC/pr48519.ll + +diff --git llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +index 53556ffc267d..49c10fdf8898 100644 +--- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp ++++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +@@ -441,6 +441,10 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, + isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) || + isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType())) + return true; ++ if (!ST->isISA3_0() && ++ (CI->getSrcTy()->getScalarType()->isHalfTy() || ++ CI->getDestTy()->getScalarType()->isHalfTy())) ++ return true; + } else if (isLargeIntegerTy(!TM.isPPC64(), + J->getType()->getScalarType()) && + (J->getOpcode() == Instruction::UDiv || +diff --git llvm/test/CodeGen/PowerPC/pr48519.ll llvm/test/CodeGen/PowerPC/pr48519.ll +new file mode 100644 +index 000000000000..777874e91c26 +--- /dev/null ++++ llvm/test/CodeGen/PowerPC/pr48519.ll +@@ -0,0 +1,55 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ++; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s ++define void @julia__typed_vcat_20() #0 { ++; CHECK-LABEL: julia__typed_vcat_20: ++; CHECK: # %bb.0: # %top ++; CHECK-NEXT: mflr r0 ++; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ++; CHECK-NEXT: std r0, 16(r1) ++; CHECK-NEXT: stdu r1, -48(r1) ++; CHECK-NEXT: li r3, 1 ++; CHECK-NEXT: li r30, 0 ++; CHECK-NEXT: .p2align 4 ++; CHECK-NEXT: .LBB0_1: # %L139 ++; CHECK-NEXT: # ++; CHECK-NEXT: addi r3, r3, -1 ++; CHECK-NEXT: mtfprd f0, r3 ++; CHECK-NEXT: xscvsxdsp f1, f0 ++; CHECK-NEXT: bl __gnu_f2h_ieee ++; CHECK-NEXT: nop ++; CHECK-NEXT: bl __gnu_h2f_ieee ++; CHECK-NEXT: nop ++; CHECK-NEXT: addi r30, r30, -1 ++; CHECK-NEXT: li r3, 0 ++; CHECK-NEXT: cmpldi r30, 0 ++; CHECK-NEXT: bne+ cr0, .LBB0_1 ++; CHECK-NEXT: # %bb.2: # %pass.1 ++; CHECK-NEXT: bl __gnu_f2h_ieee ++; CHECK-NEXT: nop ++; CHECK-NEXT: sth r3, 0(r3) ++top: ++ %.sroa.6.0.copyload = load i64, i64 addrspace(11)* null, align 8 ++ %0 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %.sroa.6.0.copyload, i64 0) ++ %1 = extractvalue { i64, i1 } %0, 0 ++ br label %L139 ++ ++L139: ; preds = %L139, %top ++ %value_phi21 = phi i64 [ %5, %L139 ], [ 1, %top ] ++ %value_phi23 = phi i64 [ 0, %L139 ], [ 1, %top ] ++ %2 = add nsw i64 %value_phi23, -1 ++ %3 = add i64 %2, 0 ++ %4 = sitofp i64 %3 to half ++ store half %4, half addrspace(13)* undef, align 2 ++ %.not101.not = icmp eq i64 %value_phi21, 0 ++ %5 = add i64 %value_phi21, 1 ++ br i1 %.not101.not, label %pass.1, label %L139 ++ ++pass.1: ; preds = %L139 ++ unreachable ++} ++ ++; Function Attrs: nounwind readnone speculatable willreturn ++declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) #0 ++ ++attributes #0 = { nounwind } +-- +2.30.0 + diff --git a/deps/patches/llvm-11-ppc-sp-from-bp.patch b/deps/patches/llvm-11-ppc-sp-from-bp.patch new file mode 100644 index 0000000000000..014cfb237a303 --- /dev/null +++ b/deps/patches/llvm-11-ppc-sp-from-bp.patch @@ -0,0 +1,621 @@ +From 646760460fa06f8577d35282cde5faf8f0ed8499 Mon Sep 17 00:00:00 2001 +From: Nemanja Ivanovic +Date: Tue, 22 Dec 2020 05:43:33 -0600 +Subject: [PATCH 4/4] [PowerPC] Restore stack ptr from base ptr when available + +On subtargets that have a red zone, we will copy the stack pointer to the base +pointer in the prologue prior to updating the stack pointer. There are no other +updates to the base pointer after that. This suggests that we should be able to +restore the stack pointer from the base pointer rather than loading it from the +back chain or adding the frame size back to either the stack pointer or the +frame pointer. +This came about because functions that call setjmp need to restore the SP from +the FP because the back chain might have been clobbered +(see https://reviews.llvm.org/D92906). However, if the stack is realigned, the +restored SP might be incorrect (which is what caused the failures in the two +ASan test cases). + +This patch was tested quite extensivelly both with sanitizer runtimes and +general code. + +Differential revision: https://reviews.llvm.org/D93327 +--- + llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 9 +- + llvm/test/CodeGen/PowerPC/aix-base-pointer.ll | 8 +- + llvm/test/CodeGen/PowerPC/pr46759.ll | 2 +- + .../CodeGen/PowerPC/stack-clash-prologue.ll | 498 ++++++++++++++++++ + llvm/test/CodeGen/PowerPC/stack-realign.ll | 4 +- + 5 files changed, 513 insertions(+), 8 deletions(-) + +diff --git llvm/lib/Target/PowerPC/PPCFrameLowering.cpp llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +index 66db0f199e15..80cbaa475184 100644 +--- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp ++++ llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +@@ -1704,11 +1704,18 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, + // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red + // zone add this offset back now. + ++ // If the function has a base pointer, the stack pointer has been copied ++ // to it so we can restore it by copying in the other direction. ++ if (HasRedZone && HasBP) { ++ BuildMI(MBB, MBBI, dl, OrInst, RBReg). ++ addReg(BPReg). ++ addReg(BPReg); ++ } + // If this function contained a fastcc call and GuaranteedTailCallOpt is + // enabled (=> hasFastCall()==true) the fastcc call might contain a tail + // call which invalidates the stack pointer value in SP(0). So we use the + // value of R31 in this case. Similar situation exists with setjmp. +- if (FI->hasFastCall() || MF.exposesReturnsTwice()) { ++ else if (FI->hasFastCall() || MF.exposesReturnsTwice()) { + assert(HasFP && "Expecting a valid frame pointer."); + if (!HasRedZone) + RBReg = FPReg; +diff --git llvm/test/CodeGen/PowerPC/aix-base-pointer.ll llvm/test/CodeGen/PowerPC/aix-base-pointer.ll +index 2566e31c025d..5141fd9e4222 100644 +--- llvm/test/CodeGen/PowerPC/aix-base-pointer.ll ++++ llvm/test/CodeGen/PowerPC/aix-base-pointer.ll +@@ -27,8 +27,8 @@ declare void @callee(i32*) + ; 32BIT: stwux 1, 1, 0 + ; 32BIT: addi 3, 1, 64 + ; 32BIT: bl .callee +-; 32BIT: lwz 1, 0(1) +-; 32BIT: lwz 30, -8(1) ++; 32BIT: mr 1, 30 ++; 32BIT: lwz 30, -16(1) + + ; 64BIT-LABEL: .caller: + ; 64BIT: std 30, -16(1) +@@ -38,5 +38,5 @@ declare void @callee(i32*) + ; 64BIT: stdux 1, 1, 0 + ; 64BIT: addi 3, 1, 128 + ; 64BIT: bl .callee +-; 64BIT: ld 1, 0(1) +-; 64BIT: ld 30, -16(1) ++; 64BIT: mr 1, 30 ++; 64BIT: ld 30, -24(1) +diff --git llvm/test/CodeGen/PowerPC/pr46759.ll llvm/test/CodeGen/PowerPC/pr46759.ll +index d1d68a5db7e3..92f2c64bc06a 100644 +--- llvm/test/CodeGen/PowerPC/pr46759.ll ++++ llvm/test/CodeGen/PowerPC/pr46759.ll +@@ -45,7 +45,7 @@ define void @foo(i32 %vla_size) #0 { + ; CHECK-LE-NEXT: .LBB0_2: # %entry + ; CHECK-LE-NEXT: addi r3, r1, 2048 + ; CHECK-LE-NEXT: lbz r3, 0(r3) +-; CHECK-LE-NEXT: ld r1, 0(r1) ++; CHECK-LE-NEXT: mr r1, r30 + ; CHECK-LE-NEXT: ld r31, -8(r1) + ; CHECK-LE-NEXT: ld r30, -16(r1) + ; CHECK-LE-NEXT: blr +diff --git llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll +index cb513be9128c..6443059c9704 100644 +--- llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll ++++ llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll +@@ -528,4 +528,502 @@ entry: + ret i8 %c + } + ++; alloca + align < probe_size ++define i32 @f8(i64 %i) local_unnamed_addr #0 { ++; CHECK-LE-LABEL: f8: ++; CHECK-LE: # %bb.0: ++; CHECK-LE-NEXT: clrldi r0, r1, 58 ++; CHECK-LE-NEXT: std r30, -16(r1) ++; CHECK-LE-NEXT: mr r30, r1 ++; CHECK-LE-NEXT: subfic r0, r0, -896 ++; CHECK-LE-NEXT: stdux r1, r1, r0 ++; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ++; CHECK-LE-NEXT: .cfi_offset r30, -16 ++; CHECK-LE-NEXT: addi r4, r1, 64 ++; CHECK-LE-NEXT: sldi r3, r3, 2 ++; CHECK-LE-NEXT: li r5, 1 ++; CHECK-LE-NEXT: stwx r5, r4, r3 ++; CHECK-LE-NEXT: lwz r3, 64(r1) ++; CHECK-LE-NEXT: mr r1, r30 ++; CHECK-LE-NEXT: ld r30, -16(r1) ++; CHECK-LE-NEXT: blr ++; ++; CHECK-BE-LABEL: f8: ++; CHECK-BE: # %bb.0: ++; CHECK-BE-NEXT: clrldi r0, r1, 58 ++; CHECK-BE-NEXT: std r30, -16(r1) ++; CHECK-BE-NEXT: mr r30, r1 ++; CHECK-BE-NEXT: subfic r0, r0, -896 ++; CHECK-BE-NEXT: stdux r1, r1, r0 ++; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ++; CHECK-BE-NEXT: .cfi_offset r30, -16 ++; CHECK-BE-NEXT: addi r4, r1, 64 ++; CHECK-BE-NEXT: li r5, 1 ++; CHECK-BE-NEXT: sldi r3, r3, 2 ++; CHECK-BE-NEXT: stwx r5, r4, r3 ++; CHECK-BE-NEXT: lwz r3, 64(r1) ++; CHECK-BE-NEXT: mr r1, r30 ++; CHECK-BE-NEXT: ld r30, -16(r1) ++; CHECK-BE-NEXT: blr ++; ++; CHECK-32-LABEL: f8: ++; CHECK-32: # %bb.0: ++; CHECK-32-NEXT: clrlwi r0, r1, 26 ++; CHECK-32-NEXT: subfic r0, r0, -896 ++; CHECK-32-NEXT: stwux r1, r1, r0 ++; CHECK-32-NEXT: sub r0, r1, r0 ++; CHECK-32-NEXT: addic r0, r0, -8 ++; CHECK-32-NEXT: stwx r30, 0, r0 ++; CHECK-32-NEXT: addic r30, r0, 8 ++; CHECK-32-NEXT: .cfi_def_cfa_register r30 ++; CHECK-32-NEXT: .cfi_offset r30, -8 ++; CHECK-32-NEXT: addi r3, r1, 64 ++; CHECK-32-NEXT: li r5, 1 ++; CHECK-32-NEXT: slwi r4, r4, 2 ++; CHECK-32-NEXT: stwx r5, r3, r4 ++; CHECK-32-NEXT: mr r0, r31 ++; CHECK-32-NEXT: lwz r3, 64(r1) ++; CHECK-32-NEXT: lwz r31, 0(r1) ++; CHECK-32-NEXT: lwz r30, -8(r31) ++; CHECK-32-NEXT: mr r1, r31 ++; CHECK-32-NEXT: mr r31, r0 ++; CHECK-32-NEXT: blr ++ %a = alloca i32, i32 200, align 64 ++ %b = getelementptr inbounds i32, i32* %a, i64 %i ++ store volatile i32 1, i32* %b ++ %c = load volatile i32, i32* %a ++ ret i32 %c ++} ++ ++; alloca > probe_size, align > probe_size ++define i32 @f9(i64 %i) local_unnamed_addr #0 { ++; CHECK-LE-LABEL: f9: ++; CHECK-LE: # %bb.0: ++; CHECK-LE-NEXT: std r30, -16(r1) ++; CHECK-LE-NEXT: mr r30, r1 ++; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 ++; CHECK-LE-NEXT: clrldi r0, r30, 53 ++; CHECK-LE-NEXT: subc r12, r30, r0 ++; CHECK-LE-NEXT: clrldi r0, r0, 52 ++; CHECK-LE-NEXT: cmpdi r0, 0 ++; CHECK-LE-NEXT: beq cr0, .LBB9_2 ++; CHECK-LE-NEXT: # %bb.1: ++; CHECK-LE-NEXT: neg r0, r0 ++; CHECK-LE-NEXT: stdux r30, r1, r0 ++; CHECK-LE-NEXT: .LBB9_2: ++; CHECK-LE-NEXT: li r0, -4096 ++; CHECK-LE-NEXT: cmpd r1, r12 ++; CHECK-LE-NEXT: beq cr0, .LBB9_4 ++; CHECK-LE-NEXT: .LBB9_3: ++; CHECK-LE-NEXT: stdux r30, r1, r0 ++; CHECK-LE-NEXT: cmpd r1, r12 ++; CHECK-LE-NEXT: bne cr0, .LBB9_3 ++; CHECK-LE-NEXT: .LBB9_4: ++; CHECK-LE-NEXT: mr r12, r30 ++; CHECK-LE-NEXT: stdu r12, -2048(r1) ++; CHECK-LE-NEXT: stdu r12, -4096(r1) ++; CHECK-LE-NEXT: stdu r12, -4096(r1) ++; CHECK-LE-NEXT: .cfi_def_cfa_register r1 ++; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ++; CHECK-LE-NEXT: .cfi_offset r30, -16 ++; CHECK-LE-NEXT: addi r4, r1, 2048 ++; CHECK-LE-NEXT: sldi r3, r3, 2 ++; CHECK-LE-NEXT: li r5, 1 ++; CHECK-LE-NEXT: stwx r5, r4, r3 ++; CHECK-LE-NEXT: lwz r3, 2048(r1) ++; CHECK-LE-NEXT: mr r1, r30 ++; CHECK-LE-NEXT: ld r30, -16(r1) ++; CHECK-LE-NEXT: blr ++; ++; CHECK-BE-LABEL: f9: ++; CHECK-BE: # %bb.0: ++; CHECK-BE-NEXT: std r30, -16(r1) ++; CHECK-BE-NEXT: mr r30, r1 ++; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 ++; CHECK-BE-NEXT: clrldi r0, r30, 53 ++; CHECK-BE-NEXT: subc r12, r30, r0 ++; CHECK-BE-NEXT: clrldi r0, r0, 52 ++; CHECK-BE-NEXT: cmpdi r0, 0 ++; CHECK-BE-NEXT: beq cr0, .LBB9_2 ++; CHECK-BE-NEXT: # %bb.1: ++; CHECK-BE-NEXT: neg r0, r0 ++; CHECK-BE-NEXT: stdux r30, r1, r0 ++; CHECK-BE-NEXT: .LBB9_2: ++; CHECK-BE-NEXT: li r0, -4096 ++; CHECK-BE-NEXT: cmpd r1, r12 ++; CHECK-BE-NEXT: beq cr0, .LBB9_4 ++; CHECK-BE-NEXT: .LBB9_3: ++; CHECK-BE-NEXT: stdux r30, r1, r0 ++; CHECK-BE-NEXT: cmpd r1, r12 ++; CHECK-BE-NEXT: bne cr0, .LBB9_3 ++; CHECK-BE-NEXT: .LBB9_4: ++; CHECK-BE-NEXT: mr r12, r30 ++; CHECK-BE-NEXT: stdu r12, -2048(r1) ++; CHECK-BE-NEXT: stdu r12, -4096(r1) ++; CHECK-BE-NEXT: stdu r12, -4096(r1) ++; CHECK-BE-NEXT: .cfi_def_cfa_register r1 ++; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ++; CHECK-BE-NEXT: .cfi_offset r30, -16 ++; CHECK-BE-NEXT: addi r4, r1, 2048 ++; CHECK-BE-NEXT: li r5, 1 ++; CHECK-BE-NEXT: sldi r3, r3, 2 ++; CHECK-BE-NEXT: stwx r5, r4, r3 ++; CHECK-BE-NEXT: lwz r3, 2048(r1) ++; CHECK-BE-NEXT: mr r1, r30 ++; CHECK-BE-NEXT: ld r30, -16(r1) ++; CHECK-BE-NEXT: blr ++; ++; CHECK-32-LABEL: f9: ++; CHECK-32: # %bb.0: ++; CHECK-32-NEXT: mr r12, r1 ++; CHECK-32-NEXT: .cfi_def_cfa r12, 0 ++; CHECK-32-NEXT: clrlwi r0, r12, 21 ++; CHECK-32-NEXT: subc r1, r1, r0 ++; CHECK-32-NEXT: stwu r12, -2048(r1) ++; CHECK-32-NEXT: stwu r12, -4096(r1) ++; CHECK-32-NEXT: stwu r12, -4096(r1) ++; CHECK-32-NEXT: .cfi_def_cfa_register r1 ++; CHECK-32-NEXT: sub r0, r1, r12 ++; CHECK-32-NEXT: sub r0, r1, r0 ++; CHECK-32-NEXT: addic r0, r0, -8 ++; CHECK-32-NEXT: stwx r30, 0, r0 ++; CHECK-32-NEXT: addic r30, r0, 8 ++; CHECK-32-NEXT: .cfi_def_cfa_register r30 ++; CHECK-32-NEXT: .cfi_offset r30, -8 ++; CHECK-32-NEXT: addi r3, r1, 2048 ++; CHECK-32-NEXT: li r5, 1 ++; CHECK-32-NEXT: slwi r4, r4, 2 ++; CHECK-32-NEXT: stwx r5, r3, r4 ++; CHECK-32-NEXT: mr r0, r31 ++; CHECK-32-NEXT: lwz r3, 2048(r1) ++; CHECK-32-NEXT: lwz r31, 0(r1) ++; CHECK-32-NEXT: lwz r30, -8(r31) ++; CHECK-32-NEXT: mr r1, r31 ++; CHECK-32-NEXT: mr r31, r0 ++; CHECK-32-NEXT: blr ++ %a = alloca i32, i32 2000, align 2048 ++ %b = getelementptr inbounds i32, i32* %a, i64 %i ++ store volatile i32 1, i32* %b ++ %c = load volatile i32, i32* %a ++ ret i32 %c ++} ++ ++; alloca < probe_size, align < probe_size, alloca + align > probe_size ++define i32 @f10(i64 %i) local_unnamed_addr #0 { ++; CHECK-LE-LABEL: f10: ++; CHECK-LE: # %bb.0: ++; CHECK-LE-NEXT: std r30, -16(r1) ++; CHECK-LE-NEXT: mr r30, r1 ++; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 ++; CHECK-LE-NEXT: clrldi r0, r30, 54 ++; CHECK-LE-NEXT: subc r12, r30, r0 ++; CHECK-LE-NEXT: clrldi r0, r0, 52 ++; CHECK-LE-NEXT: cmpdi r0, 0 ++; CHECK-LE-NEXT: beq cr0, .LBB10_2 ++; CHECK-LE-NEXT: # %bb.1: ++; CHECK-LE-NEXT: neg r0, r0 ++; CHECK-LE-NEXT: stdux r30, r1, r0 ++; CHECK-LE-NEXT: .LBB10_2: ++; CHECK-LE-NEXT: li r0, -4096 ++; CHECK-LE-NEXT: cmpd r1, r12 ++; CHECK-LE-NEXT: beq cr0, .LBB10_4 ++; CHECK-LE-NEXT: .LBB10_3: ++; CHECK-LE-NEXT: stdux r30, r1, r0 ++; CHECK-LE-NEXT: cmpd r1, r12 ++; CHECK-LE-NEXT: bne cr0, .LBB10_3 ++; CHECK-LE-NEXT: .LBB10_4: ++; CHECK-LE-NEXT: mr r12, r30 ++; CHECK-LE-NEXT: stdu r12, -1024(r1) ++; CHECK-LE-NEXT: stdu r12, -4096(r1) ++; CHECK-LE-NEXT: .cfi_def_cfa_register r1 ++; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ++; CHECK-LE-NEXT: .cfi_offset r30, -16 ++; CHECK-LE-NEXT: addi r4, r1, 1024 ++; CHECK-LE-NEXT: sldi r3, r3, 2 ++; CHECK-LE-NEXT: li r5, 1 ++; CHECK-LE-NEXT: stwx r5, r4, r3 ++; CHECK-LE-NEXT: lwz r3, 1024(r1) ++; CHECK-LE-NEXT: mr r1, r30 ++; CHECK-LE-NEXT: ld r30, -16(r1) ++; CHECK-LE-NEXT: blr ++; ++; CHECK-BE-LABEL: f10: ++; CHECK-BE: # %bb.0: ++; CHECK-BE-NEXT: std r30, -16(r1) ++; CHECK-BE-NEXT: mr r30, r1 ++; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 ++; CHECK-BE-NEXT: clrldi r0, r30, 54 ++; CHECK-BE-NEXT: subc r12, r30, r0 ++; CHECK-BE-NEXT: clrldi r0, r0, 52 ++; CHECK-BE-NEXT: cmpdi r0, 0 ++; CHECK-BE-NEXT: beq cr0, .LBB10_2 ++; CHECK-BE-NEXT: # %bb.1: ++; CHECK-BE-NEXT: neg r0, r0 ++; CHECK-BE-NEXT: stdux r30, r1, r0 ++; CHECK-BE-NEXT: .LBB10_2: ++; CHECK-BE-NEXT: li r0, -4096 ++; CHECK-BE-NEXT: cmpd r1, r12 ++; CHECK-BE-NEXT: beq cr0, .LBB10_4 ++; CHECK-BE-NEXT: .LBB10_3: ++; CHECK-BE-NEXT: stdux r30, r1, r0 ++; CHECK-BE-NEXT: cmpd r1, r12 ++; CHECK-BE-NEXT: bne cr0, .LBB10_3 ++; CHECK-BE-NEXT: .LBB10_4: ++; CHECK-BE-NEXT: mr r12, r30 ++; CHECK-BE-NEXT: stdu r12, -1024(r1) ++; CHECK-BE-NEXT: stdu r12, -4096(r1) ++; CHECK-BE-NEXT: .cfi_def_cfa_register r1 ++; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ++; CHECK-BE-NEXT: .cfi_offset r30, -16 ++; CHECK-BE-NEXT: addi r4, r1, 1024 ++; CHECK-BE-NEXT: li r5, 1 ++; CHECK-BE-NEXT: sldi r3, r3, 2 ++; CHECK-BE-NEXT: stwx r5, r4, r3 ++; CHECK-BE-NEXT: lwz r3, 1024(r1) ++; CHECK-BE-NEXT: mr r1, r30 ++; CHECK-BE-NEXT: ld r30, -16(r1) ++; CHECK-BE-NEXT: blr ++; ++; CHECK-32-LABEL: f10: ++; CHECK-32: # %bb.0: ++; CHECK-32-NEXT: mr r12, r1 ++; CHECK-32-NEXT: .cfi_def_cfa r12, 0 ++; CHECK-32-NEXT: clrlwi r0, r12, 22 ++; CHECK-32-NEXT: subc r1, r1, r0 ++; CHECK-32-NEXT: stwu r12, -1024(r1) ++; CHECK-32-NEXT: stwu r12, -4096(r1) ++; CHECK-32-NEXT: .cfi_def_cfa_register r1 ++; CHECK-32-NEXT: sub r0, r1, r12 ++; CHECK-32-NEXT: sub r0, r1, r0 ++; CHECK-32-NEXT: addic r0, r0, -8 ++; CHECK-32-NEXT: stwx r30, 0, r0 ++; CHECK-32-NEXT: addic r30, r0, 8 ++; CHECK-32-NEXT: .cfi_def_cfa_register r30 ++; CHECK-32-NEXT: .cfi_offset r30, -8 ++; CHECK-32-NEXT: addi r3, r1, 1024 ++; CHECK-32-NEXT: li r5, 1 ++; CHECK-32-NEXT: slwi r4, r4, 2 ++; CHECK-32-NEXT: stwx r5, r3, r4 ++; CHECK-32-NEXT: mr r0, r31 ++; CHECK-32-NEXT: lwz r3, 1024(r1) ++; CHECK-32-NEXT: lwz r31, 0(r1) ++; CHECK-32-NEXT: lwz r30, -8(r31) ++; CHECK-32-NEXT: mr r1, r31 ++; CHECK-32-NEXT: mr r31, r0 ++; CHECK-32-NEXT: blr ++ %a = alloca i32, i32 1000, align 1024 ++ %b = getelementptr inbounds i32, i32* %a, i64 %i ++ store volatile i32 1, i32* %b ++ %c = load volatile i32, i32* %a ++ ret i32 %c ++} ++ ++define void @f11(i32 %vla_size, i64 %i) #0 { ++; CHECK-LE-LABEL: f11: ++; CHECK-LE: # %bb.0: ++; CHECK-LE-NEXT: std r31, -8(r1) ++; CHECK-LE-NEXT: std r30, -16(r1) ++; CHECK-LE-NEXT: mr r30, r1 ++; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 ++; CHECK-LE-NEXT: clrldi r0, r30, 49 ++; CHECK-LE-NEXT: subc r12, r30, r0 ++; CHECK-LE-NEXT: clrldi r0, r0, 52 ++; CHECK-LE-NEXT: cmpdi r0, 0 ++; CHECK-LE-NEXT: beq cr0, .LBB11_2 ++; CHECK-LE-NEXT: # %bb.1: ++; CHECK-LE-NEXT: neg r0, r0 ++; CHECK-LE-NEXT: stdux r30, r1, r0 ++; CHECK-LE-NEXT: .LBB11_2: ++; CHECK-LE-NEXT: li r0, -4096 ++; CHECK-LE-NEXT: cmpd r1, r12 ++; CHECK-LE-NEXT: beq cr0, .LBB11_4 ++; CHECK-LE-NEXT: .LBB11_3: ++; CHECK-LE-NEXT: stdux r30, r1, r0 ++; CHECK-LE-NEXT: cmpd r1, r12 ++; CHECK-LE-NEXT: bne cr0, .LBB11_3 ++; CHECK-LE-NEXT: .LBB11_4: ++; CHECK-LE-NEXT: mr r12, r30 ++; CHECK-LE-NEXT: li r0, 24 ++; CHECK-LE-NEXT: mtctr r0 ++; CHECK-LE-NEXT: .LBB11_5: ++; CHECK-LE-NEXT: stdu r12, -4096(r1) ++; CHECK-LE-NEXT: bdnz .LBB11_5 ++; CHECK-LE-NEXT: # %bb.6: ++; CHECK-LE-NEXT: .cfi_def_cfa_register r1 ++; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ++; CHECK-LE-NEXT: .cfi_offset r31, -8 ++; CHECK-LE-NEXT: .cfi_offset r30, -16 ++; CHECK-LE-NEXT: clrldi r3, r3, 32 ++; CHECK-LE-NEXT: lis r5, 1 ++; CHECK-LE-NEXT: mr r31, r1 ++; CHECK-LE-NEXT: li r6, 1 ++; CHECK-LE-NEXT: addi r3, r3, 15 ++; CHECK-LE-NEXT: ori r5, r5, 0 ++; CHECK-LE-NEXT: rldicl r3, r3, 60, 4 ++; CHECK-LE-NEXT: sldi r4, r4, 2 ++; CHECK-LE-NEXT: add r5, r31, r5 ++; CHECK-LE-NEXT: rldicl r3, r3, 4, 31 ++; CHECK-LE-NEXT: stwx r6, r5, r4 ++; CHECK-LE-NEXT: li r4, -32768 ++; CHECK-LE-NEXT: neg r7, r3 ++; CHECK-LE-NEXT: ld r3, 0(r1) ++; CHECK-LE-NEXT: and r4, r7, r4 ++; CHECK-LE-NEXT: mr r7, r4 ++; CHECK-LE-NEXT: li r4, -4096 ++; CHECK-LE-NEXT: divd r5, r7, r4 ++; CHECK-LE-NEXT: mulld r4, r5, r4 ++; CHECK-LE-NEXT: sub r5, r7, r4 ++; CHECK-LE-NEXT: add r4, r1, r7 ++; CHECK-LE-NEXT: stdux r3, r1, r5 ++; CHECK-LE-NEXT: cmpd r1, r4 ++; CHECK-LE-NEXT: beq cr0, .LBB11_8 ++; CHECK-LE-NEXT: .LBB11_7: ++; CHECK-LE-NEXT: stdu r3, -4096(r1) ++; CHECK-LE-NEXT: cmpd r1, r4 ++; CHECK-LE-NEXT: bne cr0, .LBB11_7 ++; CHECK-LE-NEXT: .LBB11_8: ++; CHECK-LE-NEXT: addi r3, r1, -32768 ++; CHECK-LE-NEXT: lbz r3, 0(r3) ++; CHECK-LE-NEXT: mr r1, r30 ++; CHECK-LE-NEXT: ld r31, -8(r1) ++; CHECK-LE-NEXT: ld r30, -16(r1) ++; CHECK-LE-NEXT: blr ++; ++; CHECK-BE-LABEL: f11: ++; CHECK-BE: # %bb.0: ++; CHECK-BE-NEXT: std r31, -8(r1) ++; CHECK-BE-NEXT: std r30, -16(r1) ++; CHECK-BE-NEXT: mr r30, r1 ++; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 ++; CHECK-BE-NEXT: clrldi r0, r30, 49 ++; CHECK-BE-NEXT: subc r12, r30, r0 ++; CHECK-BE-NEXT: clrldi r0, r0, 52 ++; CHECK-BE-NEXT: cmpdi r0, 0 ++; CHECK-BE-NEXT: beq cr0, .LBB11_2 ++; CHECK-BE-NEXT: # %bb.1: ++; CHECK-BE-NEXT: neg r0, r0 ++; CHECK-BE-NEXT: stdux r30, r1, r0 ++; CHECK-BE-NEXT: .LBB11_2: ++; CHECK-BE-NEXT: li r0, -4096 ++; CHECK-BE-NEXT: cmpd r1, r12 ++; CHECK-BE-NEXT: beq cr0, .LBB11_4 ++; CHECK-BE-NEXT: .LBB11_3: ++; CHECK-BE-NEXT: stdux r30, r1, r0 ++; CHECK-BE-NEXT: cmpd r1, r12 ++; CHECK-BE-NEXT: bne cr0, .LBB11_3 ++; CHECK-BE-NEXT: .LBB11_4: ++; CHECK-BE-NEXT: mr r12, r30 ++; CHECK-BE-NEXT: li r0, 24 ++; CHECK-BE-NEXT: mtctr r0 ++; CHECK-BE-NEXT: .LBB11_5: ++; CHECK-BE-NEXT: stdu r12, -4096(r1) ++; CHECK-BE-NEXT: bdnz .LBB11_5 ++; CHECK-BE-NEXT: # %bb.6: ++; CHECK-BE-NEXT: .cfi_def_cfa_register r1 ++; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ++; CHECK-BE-NEXT: .cfi_offset r31, -8 ++; CHECK-BE-NEXT: .cfi_offset r30, -16 ++; CHECK-BE-NEXT: clrldi r3, r3, 32 ++; CHECK-BE-NEXT: lis r5, 1 ++; CHECK-BE-NEXT: addi r3, r3, 15 ++; CHECK-BE-NEXT: mr r31, r1 ++; CHECK-BE-NEXT: ori r5, r5, 0 ++; CHECK-BE-NEXT: rldicl r3, r3, 60, 4 ++; CHECK-BE-NEXT: add r5, r31, r5 ++; CHECK-BE-NEXT: sldi r4, r4, 2 ++; CHECK-BE-NEXT: li r6, 1 ++; CHECK-BE-NEXT: rldicl r3, r3, 4, 31 ++; CHECK-BE-NEXT: stwx r6, r5, r4 ++; CHECK-BE-NEXT: neg r7, r3 ++; CHECK-BE-NEXT: li r4, -32768 ++; CHECK-BE-NEXT: and r4, r7, r4 ++; CHECK-BE-NEXT: ld r3, 0(r1) ++; CHECK-BE-NEXT: mr r7, r4 ++; CHECK-BE-NEXT: li r4, -4096 ++; CHECK-BE-NEXT: divd r5, r7, r4 ++; CHECK-BE-NEXT: mulld r4, r5, r4 ++; CHECK-BE-NEXT: sub r5, r7, r4 ++; CHECK-BE-NEXT: add r4, r1, r7 ++; CHECK-BE-NEXT: stdux r3, r1, r5 ++; CHECK-BE-NEXT: cmpd r1, r4 ++; CHECK-BE-NEXT: beq cr0, .LBB11_8 ++; CHECK-BE-NEXT: .LBB11_7: ++; CHECK-BE-NEXT: stdu r3, -4096(r1) ++; CHECK-BE-NEXT: cmpd r1, r4 ++; CHECK-BE-NEXT: bne cr0, .LBB11_7 ++; CHECK-BE-NEXT: .LBB11_8: ++; CHECK-BE-NEXT: addi r3, r1, -32768 ++; CHECK-BE-NEXT: lbz r3, 0(r3) ++; CHECK-BE-NEXT: mr r1, r30 ++; CHECK-BE-NEXT: ld r31, -8(r1) ++; CHECK-BE-NEXT: ld r30, -16(r1) ++; CHECK-BE-NEXT: blr ++; ++; CHECK-32-LABEL: f11: ++; CHECK-32: # %bb.0: ++; CHECK-32-NEXT: mr r12, r1 ++; CHECK-32-NEXT: .cfi_def_cfa r12, 0 ++; CHECK-32-NEXT: clrlwi r0, r12, 17 ++; CHECK-32-NEXT: subc r1, r1, r0 ++; CHECK-32-NEXT: li r0, 24 ++; CHECK-32-NEXT: mtctr r0 ++; CHECK-32-NEXT: .LBB11_1: ++; CHECK-32-NEXT: stwu r12, -4096(r1) ++; CHECK-32-NEXT: bdnz .LBB11_1 ++; CHECK-32-NEXT: # %bb.2: ++; CHECK-32-NEXT: .cfi_def_cfa_register r1 ++; CHECK-32-NEXT: sub r0, r1, r12 ++; CHECK-32-NEXT: sub r0, r1, r0 ++; CHECK-32-NEXT: addic r0, r0, -4 ++; CHECK-32-NEXT: stwx r31, 0, r0 ++; CHECK-32-NEXT: addic r0, r0, -4 ++; CHECK-32-NEXT: stwx r30, 0, r0 ++; CHECK-32-NEXT: addic r30, r0, 8 ++; CHECK-32-NEXT: .cfi_def_cfa_register r30 ++; CHECK-32-NEXT: .cfi_offset r31, -4 ++; CHECK-32-NEXT: .cfi_offset r30, -8 ++; CHECK-32-NEXT: lis r4, 1 ++; CHECK-32-NEXT: mr r31, r1 ++; CHECK-32-NEXT: ori r4, r4, 0 ++; CHECK-32-NEXT: addi r3, r3, 15 ++; CHECK-32-NEXT: add r4, r31, r4 ++; CHECK-32-NEXT: li r5, 1 ++; CHECK-32-NEXT: slwi r6, r6, 2 ++; CHECK-32-NEXT: rlwinm r3, r3, 0, 0, 27 ++; CHECK-32-NEXT: neg r7, r3 ++; CHECK-32-NEXT: stwx r5, r4, r6 ++; CHECK-32-NEXT: li r4, -32768 ++; CHECK-32-NEXT: and r4, r7, r4 ++; CHECK-32-NEXT: lwz r3, 0(r1) ++; CHECK-32-NEXT: mr r7, r4 ++; CHECK-32-NEXT: li r4, -4096 ++; CHECK-32-NEXT: divw r5, r7, r4 ++; CHECK-32-NEXT: mullw r4, r5, r4 ++; CHECK-32-NEXT: sub r5, r7, r4 ++; CHECK-32-NEXT: add r4, r1, r7 ++; CHECK-32-NEXT: stwux r3, r1, r5 ++; CHECK-32-NEXT: cmpw r1, r4 ++; CHECK-32-NEXT: beq cr0, .LBB11_4 ++; CHECK-32-NEXT: .LBB11_3: ++; CHECK-32-NEXT: stwu r3, -4096(r1) ++; CHECK-32-NEXT: cmpw r1, r4 ++; CHECK-32-NEXT: bne cr0, .LBB11_3 ++; CHECK-32-NEXT: .LBB11_4: ++; CHECK-32-NEXT: addi r3, r1, -32768 ++; CHECK-32-NEXT: lbz r3, 0(r3) ++; CHECK-32-NEXT: lwz r31, 0(r1) ++; CHECK-32-NEXT: lwz r0, -4(r31) ++; CHECK-32-NEXT: lwz r30, -8(r31) ++; CHECK-32-NEXT: mr r1, r31 ++; CHECK-32-NEXT: mr r31, r0 ++; CHECK-32-NEXT: blr ++ %a = alloca i32, i32 4096, align 32768 ++ %b = getelementptr inbounds i32, i32* %a, i64 %i ++ store volatile i32 1, i32* %b ++ %1 = zext i32 %vla_size to i64 ++ %vla = alloca i8, i64 %1, align 2048 ++ %2 = load volatile i8, i8* %vla, align 2048 ++ ret void ++} ++ + attributes #0 = { "probe-stack"="inline-asm" } +diff --git llvm/test/CodeGen/PowerPC/stack-realign.ll llvm/test/CodeGen/PowerPC/stack-realign.ll +index ea3603b9ce20..640bfb81709a 100644 +--- llvm/test/CodeGen/PowerPC/stack-realign.ll ++++ llvm/test/CodeGen/PowerPC/stack-realign.ll +@@ -43,7 +43,7 @@ entry: + + ; CHECK: std 3, 48(30) + +-; CHECK: ld 1, 0(1) ++; CHECK: mr 1, 30 + ; CHECK-DAG: ld [[SR:[0-9]+]], 16(1) + ; CHECK-DAG: ld 30, -16(1) + ; CHECK-DAG: mtlr [[SR]] +@@ -69,7 +69,7 @@ entry: + + ; CHECK-FP: std 3, 48(30) + +-; CHECK-FP: ld 1, 0(1) ++; CHECK-FP: mr 1, 30 + ; CHECK-FP-DAG: ld [[SR:[0-9]+]], 16(1) + ; CHECK-FP-DAG: ld 31, -8(1) + ; CHECK-FP-DAG: ld 30, -16(1) +-- +2.30.0 +