From f6896d14fb4cf03db8c94a402edbd22626d7a2dd Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 14 Dec 2023 19:46:56 +0000 Subject: [PATCH 1/4] [ARM] Check all terms in emitPopInst when clearing Restored for LR. emitPopInst checks a single function exit MBB. If other paths also exit the function and any of there terminators uses LR implicitly, it is not save to clear the Restored bit. Check all terminators for the function before clearing Restored. --- llvm/lib/Target/ARM/ARMFrameLowering.cpp | 18 +++++++++++++++--- .../test/CodeGen/Thumb2/mve-float16regloops.ll | 1 + .../test/CodeGen/Thumb2/mve-float32regloops.ll | 1 + .../outlined-fn-may-clobber-lr-in-caller.ll | 12 ++++++++++-- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index a3a71a8ec09a45..79b70cab82f145 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -1645,9 +1645,21 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, // Fold the return instruction into the LDM. DeleteRet = true; LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; - // We 'restore' LR into PC so it is not live out of the return block: - // Clear Restored bit. - Info.setRestored(false); + // Check if all terminators do not implicitly use LR. Then we can + // 'restore' LR into PC so it is not live out of the return block: Clear + // Restored bit. + if (all_of(MF, [MI](const MachineBasicBlock &MBB) { + return all_of(MBB.terminators(), [MI](const MachineInstr &Term) { + // MI's terminator is to be re-written, don't check the old + // opcode. + if (&*MI == &Term) + return true; + return Term.getOpcode() == ARM::LDMIA_RET || + Term.getOpcode() == ARM::t2LDMIA_RET || + Term.getOpcode() == ARM::tPOP_RET; + }); + })) + Info.setRestored(false); } // If NoGap is true, pop consecutive registers and then leave the rest diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll index 1c95d28b5eed1b..a75f445097f28b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -831,6 +831,7 @@ define void @arm_fir_f32_1_4_mve(ptr nocapture readonly %S, ptr nocapture readon ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: .LBB15_10: @ %while.end55 ; CHECK-NEXT: ands r1, r9, #3 +; CHECK-NEXT: @ implicit-def: $lr ; CHECK-NEXT: beq .LBB15_12 ; CHECK-NEXT: @ %bb.11: @ %if.then59 ; CHECK-NEXT: vldrw.u32 q0, [r0] diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll index 808626d9a0aebe..c29653e6827263 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -822,6 +822,7 @@ define void @arm_fir_f32_1_4_mve(ptr nocapture readonly %S, ptr nocapture readon ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: .LBB15_10: @ %while.end55 ; CHECK-NEXT: ands r1, r10, #3 +; CHECK-NEXT: @ implicit-def: $lr ; CHECK-NEXT: beq .LBB15_12 ; CHECK-NEXT: @ %bb.11: @ %if.then59 ; CHECK-NEXT: vldrw.u32 q0, [r0] diff --git a/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll b/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll index d81d008b44bed8..34d93c985e7204 100644 --- a/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll +++ b/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll @@ -22,11 +22,19 @@ define void @test(ptr nocapture noundef writeonly %arg, i32 noundef %arg1, i8 no ; CHECK-NEXT: cmp r1, #1 ; CHECK-NEXT: bne .LBB0_5 ; CHECK-NEXT: @ %bb.2: @ %bb4 -; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: movs r1, #1 +; CHECK-NEXT: strb.w r1, [r0, #36] +; CHECK-NEXT: movs r1, #30 +; CHECK-NEXT: strb.w r1, [r0, #34] +; CHECK-NEXT: add.w r1, r2, r2, lsl #3 ; CHECK-NEXT: ldr r2, .LCPI0_1 ; CHECK-NEXT: b .LBB0_4 ; CHECK-NEXT: .LBB0_3: @ %bb14 -; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: movs r1, #1 +; CHECK-NEXT: strb.w r1, [r0, #36] +; CHECK-NEXT: movs r1, #30 +; CHECK-NEXT: strb.w r1, [r0, #34] +; CHECK-NEXT: add.w r1, r2, r2, lsl #3 ; CHECK-NEXT: ldr r2, .LCPI0_0 ; CHECK-NEXT: .LBB0_4: @ %bb4 ; CHECK-NEXT: add.w r1, r2, r1, lsl #2 From cd4021fefaa294f974755eb926178b59ddbca42b Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 19 Dec 2023 17:56:05 +0000 Subject: [PATCH 2/4] !fixup move setting IsRestored to processFunctionBeforeFrameFinalized --- llvm/lib/Target/ARM/ARMFrameLowering.cpp | 40 ++++++++++++------- llvm/lib/Target/ARM/ARMFrameLowering.h | 3 ++ .../CodeGen/Thumb2/mve-float16regloops.ll | 1 - .../CodeGen/Thumb2/mve-float32regloops.ll | 1 - 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 79b70cab82f145..27cffc4971400c 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -1645,21 +1645,6 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, // Fold the return instruction into the LDM. DeleteRet = true; LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; - // Check if all terminators do not implicitly use LR. Then we can - // 'restore' LR into PC so it is not live out of the return block: Clear - // Restored bit. - if (all_of(MF, [MI](const MachineBasicBlock &MBB) { - return all_of(MBB.terminators(), [MI](const MachineInstr &Term) { - // MI's terminator is to be re-written, don't check the old - // opcode. - if (&*MI == &Term) - return true; - return Term.getOpcode() == ARM::LDMIA_RET || - Term.getOpcode() == ARM::t2LDMIA_RET || - Term.getOpcode() == ARM::tPOP_RET; - }); - })) - Info.setRestored(false); } // If NoGap is true, pop consecutive registers and then leave the rest @@ -2797,6 +2782,31 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, AFI->setLRIsSpilled(SavedRegs.test(ARM::LR)); } +void ARMFrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS); + + MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid()) + return; + + // Check if all terminators do not implicitly use LR. Then we can 'restore' LR + // into PC so it is not live out of the return block: Clear the Restored bit + // in that case. + for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) { + if (Info.getReg() != ARM::LR) + continue; + if (all_of(MF, [](const MachineBasicBlock &MBB) { + return all_of(MBB.terminators(), [](const MachineInstr &Term) { + return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET || + Term.getOpcode() == ARM::t2LDMIA_RET || + Term.getOpcode() == ARM::tPOP_RET; + }); + })) + Info.setRestored(false); + } +} + void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const { TargetFrameLowering::getCalleeSaves(MF, SavedRegs); diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h index 16f2ce6bea6f1a..8d2b8beb9a58fb 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -59,6 +59,9 @@ class ARMFrameLowering : public TargetFrameLowering { void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; + void processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS = nullptr) const override; + void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll index a75f445097f28b..1c95d28b5eed1b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -831,7 +831,6 @@ define void @arm_fir_f32_1_4_mve(ptr nocapture readonly %S, ptr nocapture readon ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: .LBB15_10: @ %while.end55 ; CHECK-NEXT: ands r1, r9, #3 -; CHECK-NEXT: @ implicit-def: $lr ; CHECK-NEXT: beq .LBB15_12 ; CHECK-NEXT: @ %bb.11: @ %if.then59 ; CHECK-NEXT: vldrw.u32 q0, [r0] diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll index c29653e6827263..808626d9a0aebe 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -822,7 +822,6 @@ define void @arm_fir_f32_1_4_mve(ptr nocapture readonly %S, ptr nocapture readon ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: .LBB15_10: @ %while.end55 ; CHECK-NEXT: ands r1, r10, #3 -; CHECK-NEXT: @ implicit-def: $lr ; CHECK-NEXT: beq .LBB15_12 ; CHECK-NEXT: @ %bb.11: @ %if.then59 ; CHECK-NEXT: vldrw.u32 q0, [r0] From 53d8fc4aa580dff25f9c0aafd3d2cacf9b243002 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 19 Dec 2023 18:05:22 +0000 Subject: [PATCH 3/4] !fixup remove FIXME. --- .../test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll b/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll index 34d93c985e7204..1dbb21f40a761c 100644 --- a/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll +++ b/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll @@ -11,8 +11,6 @@ target datalayout = "e-m:o-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" ; Test case to make sure calling an outlined function does not clobber LR used ; by a tail call in caller. -; FIXME: Currently bl OUTLINED_FUNCTION_0 clobbers LR, which in turn is used -; by the later call to memcpy to return to the caller. define void @test(ptr nocapture noundef writeonly %arg, i32 noundef %arg1, i8 noundef zeroext %arg2) unnamed_addr #0 { ; CHECK-LABEL: test: ; CHECK: @ %bb.0: @ %bb From 0f799ea878f997da3eed2aec532fcefc89463f59 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 20 Dec 2023 15:54:39 +0000 Subject: [PATCH 4/4] !fixup add break after setRestored. --- llvm/lib/Target/ARM/ARMFrameLowering.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 27cffc4971400c..10d9c7f275bebb 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -2802,8 +2802,10 @@ void ARMFrameLowering::processFunctionBeforeFrameFinalized( Term.getOpcode() == ARM::t2LDMIA_RET || Term.getOpcode() == ARM::tPOP_RET; }); - })) + })) { Info.setRestored(false); + break; + } } }