Skip to content

Commit

Permalink
[ARM] Adjust the frame pointer when it's needed for SEH unwinding
Browse files Browse the repository at this point in the history
For functions that require restoring SP from FP (e.g. that need to
align the stack, or that have variable sized allocations), the prologue
and epilogue previously used to look like this:

    push {r4-r5, r11, lr}
    add r11, sp, #8
    ...
    sub r4, r11, #8
    mov sp, r4
    pop {r4-r5, r11, pc}

This is problematic, because this unwinding operation (restoring sp
from r11 - offset) can't be expressed with the SEH unwind opcodes
(probably because this unwind procedure doesn't map exactly to
individual instructions; note the detour via r4 in the epilogue too).

To make unwinding work, the GPR push is split into two; the first one
pushing all other registers, and the second one pushing r11+lr, so that
r11 can be set pointing at this spot on the stack:

    push {r4-r5}
    push {r11, lr}
    mov r11, sp
    ...
    mov sp, r11
    pop {r11, lr}
    pop {r4-r5}
    bx lr

For the same setup, MSVC generates code that uses two registers;
r11 still pointing at the {r11,lr} pair, but a separate register
used for restoring the stack at the end:

    push {r4-r5, r7, r11, lr}
    add r11, sp, #12
    mov r7, sp
    ...
    mov sp, r7
    pop {r4-r5, r7, r11, pc}

For cases with clobbered float/vector registers, they are pushed
after the GPRs, before the {r11,lr} pair.

Differential Revision: https://reviews.llvm.org/D125649
  • Loading branch information
mstorsjo committed Jun 2, 2022
1 parent d8e67c1 commit 2ab19bf
Show file tree
Hide file tree
Showing 7 changed files with 265 additions and 66 deletions.
2 changes: 2 additions & 0 deletions llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
// GHC set of callee saved regs is empty as all those regs are
// used for passing STG regs around
return CSR_NoRegs_SaveList;
} else if (STI.splitFramePointerPush(*MF)) {
return CSR_Win_SplitFP_SaveList;
} else if (F.getCallingConv() == CallingConv::CFGuard_Check) {
return CSR_Win_AAPCS_CFGuard_Check_SaveList;
} else if (F.getCallingConv() == CallingConv::SwiftTail) {
Expand Down
27 changes: 27 additions & 0 deletions llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,33 @@ static inline bool isARMArea2Register(unsigned Reg, bool SplitFramePushPop) {
}
}

static inline bool isSplitFPArea1Register(unsigned Reg,
bool SplitFramePushPop) {
using namespace ARM;

switch (Reg) {
case R0: case R1: case R2: case R3:
case R4: case R5: case R6: case R7:
case R8: case R9: case R10: case R12:
case SP: case PC:
return true;
default:
return false;
}
}

static inline bool isSplitFPArea2Register(unsigned Reg,
bool SplitFramePushPop) {
using namespace ARM;

switch (Reg) {
case R11: case LR:
return true;
default:
return false;
}
}

static inline bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop) {
using namespace ARM;

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/ARM/ARMCallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,10 @@ def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
R11, R10, R9, R8,
(sequence "D%u", 15, 8))>;

def CSR_Win_SplitFP : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4,
(sequence "D%u", 15, 8),
LR, R11)>;

// R8 is used to pass swifterror, remove it from CSR.
def CSR_AAPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush,
R8)>;
Expand Down
224 changes: 158 additions & 66 deletions llvm/lib/Target/ARM/ARMFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,6 @@ static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
break;
case ARM::t2ADDri: // add.w r11, sp, #xx
case ARM::t2ADDri12: // add.w r11, sp, #xx
case ARM::t2SUBri: // sub.w r4, r11, #xx
case ARM::t2MOVTi16: // movt r4, #xx
case ARM::t2MOVi16: // movw r4, #xx
case ARM::tBL: // bl __chkstk
Expand Down Expand Up @@ -633,15 +632,23 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
/// this to produce a conservative estimate that we check in an assert() later.
static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI) {
static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
const MachineFunction &MF) {
// For Thumb1, push.w isn't available, so the first push will always push
// r7 and lr onto the stack first.
if (AFI.isThumb1OnlyFunction())
return -AFI.getArgRegsSaveSize() - (2 * 4);
// This is a conservative estimation: Assume the frame pointer being r7 and
// pc("r15") up to r8 getting spilled before (= 8 registers).
int FPCXTSaveSize = (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
return - FPCXTSaveSize - AFI.getArgRegsSaveSize() - (8 * 4);
int MaxRegBytes = 8 * 4;
if (STI.splitFramePointerPush(MF)) {
// Here, r11 can be stored below all of r4-r15 (3 registers more than
// above), plus d8-d15.
MaxRegBytes = 11 * 4 + 8 * 8;
}
int FPCXTSaveSize =
(STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
}

void ARMFrameLowering::emitPrologue(MachineFunction &MF,
Expand Down Expand Up @@ -704,42 +711,80 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}

// Determine spill area sizes.
for (const CalleeSavedInfo &I : CSI) {
Register Reg = I.getReg();
int FI = I.getFrameIdx();
switch (Reg) {
case ARM::R8:
case ARM::R9:
case ARM::R10:
case ARM::R11:
case ARM::R12:
if (STI.splitFramePushPop(MF)) {
if (STI.splitFramePointerPush(MF)) {
for (const CalleeSavedInfo &I : CSI) {
Register Reg = I.getReg();
int FI = I.getFrameIdx();
switch (Reg) {
case ARM::R11:
case ARM::LR:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
GPRCS2Size += 4;
break;
case ARM::R0:
case ARM::R1:
case ARM::R2:
case ARM::R3:
case ARM::R4:
case ARM::R5:
case ARM::R6:
case ARM::R7:
case ARM::R8:
case ARM::R9:
case ARM::R10:
case ARM::R12:
GPRCS1Size += 4;
break;
case ARM::FPCXTNS:
FPCXTSaveSize = 4;
break;
default:
// This is a DPR. Exclude the aligned DPRCS2 spills.
if (Reg == ARM::D8)
D8SpillFI = FI;
if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
DPRCSSize += 8;
}
}
} else {
for (const CalleeSavedInfo &I : CSI) {
Register Reg = I.getReg();
int FI = I.getFrameIdx();
switch (Reg) {
case ARM::R8:
case ARM::R9:
case ARM::R10:
case ARM::R11:
case ARM::R12:
if (STI.splitFramePushPop(MF)) {
GPRCS2Size += 4;
break;
}
LLVM_FALLTHROUGH;
case ARM::R0:
case ARM::R1:
case ARM::R2:
case ARM::R3:
case ARM::R4:
case ARM::R5:
case ARM::R6:
case ARM::R7:
case ARM::LR:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
GPRCS1Size += 4;
break;
case ARM::FPCXTNS:
FPCXTSaveSize = 4;
break;
default:
// This is a DPR. Exclude the aligned DPRCS2 spills.
if (Reg == ARM::D8)
D8SpillFI = FI;
if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
DPRCSSize += 8;
}
LLVM_FALLTHROUGH;
case ARM::R0:
case ARM::R1:
case ARM::R2:
case ARM::R3:
case ARM::R4:
case ARM::R5:
case ARM::R6:
case ARM::R7:
case ARM::LR:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
GPRCS1Size += 4;
break;
case ARM::FPCXTNS:
FPCXTSaveSize = 4;
break;
default:
// This is a DPR. Exclude the aligned DPRCS2 spills.
if (Reg == ARM::D8)
D8SpillFI = FI;
if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
DPRCSSize += 8;
}
}

Expand Down Expand Up @@ -774,15 +819,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
unsigned DPRGapSize =
(GPRCS1Size + GPRCS2Size + FPCXTSaveSize + ArgRegsSaveSize) %
DPRAlign.value();
unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
if (!STI.splitFramePointerPush(MF)) {
DPRGapSize += GPRCS2Size;
}
DPRGapSize %= DPRAlign.value();

unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
unsigned DPRCSOffset;
if (STI.splitFramePointerPush(MF)) {
DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
GPRCS2Offset = DPRCSOffset - GPRCS2Size;
} else {
DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
}
int FramePtrOffsetInPush = 0;
if (HasFP) {
int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
assert(getMaxFPOffset(STI, *AFI) <= FPOffset &&
assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
"Max FP estimation is wrong");
FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
Expand All @@ -793,7 +846,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);

// Move past area 2.
if (GPRCS2Size > 0) {
if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
GPRCS2Push = LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
}
Expand Down Expand Up @@ -833,6 +886,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
} else
NumBytes = DPRCSOffset;

if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) {
GPRCS2Push = LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
}

bool NeedsWinCFIStackAlloc = NeedsWinCFI;
if (STI.splitFramePointerPush(MF) && HasFP)
NeedsWinCFIStackAlloc = false;

if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
uint32_t NumWords = NumBytes >> 2;

Expand Down Expand Up @@ -888,7 +950,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlags(MachineInstr::FrameSetup)
.add(predOps(ARMCC::AL))
.add(condCodeOp());
if (NeedsWinCFI) {
if (NeedsWinCFIStackAlloc) {
SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
.addImm(NumBytes)
.addImm(/*Wide=*/1)
Expand Down Expand Up @@ -927,13 +989,20 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// into spill area 1, including the FP in R11. In either case, it
// is in area one and the adjustment needs to take place just after
// that push.
MachineBasicBlock::iterator AfterPush;
if (HasFP) {
MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
AfterPush = std::next(GPRCS1Push);
unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
dl, TII, FramePtr, ARM::SP,
PushSize + FramePtrOffsetInPush,
MachineInstr::FrameSetup);
int FPOffset = PushSize + FramePtrOffsetInPush;
if (STI.splitFramePointerPush(MF)) {
AfterPush = std::next(GPRCS2Push);
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
} else {
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
FramePtr, ARM::SP, FPOffset,
MachineInstr::FrameSetup);
}
if (!NeedsWinCFI) {
if (FramePtrOffsetInPush + PushSize != 0) {
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
Expand All @@ -956,8 +1025,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// Emit a SEH opcode indicating the prologue end. The rest of the prologue
// instructions below don't need to be replayed to unwind the stack.
if (NeedsWinCFI && MBBI != MBB.begin()) {
insertSEHRange(MBB, {}, MBBI, TII, MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
MachineBasicBlock::iterator End = MBBI;
if (HasFP && STI.splitFramePointerPush(MF))
End = AfterPush;
insertSEHRange(MBB, {}, End, TII, MachineInstr::FrameSetup);
BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
.setMIFlag(MachineInstr::FrameSetup);
MF.setHasWinCFI(true);
}
Expand Down Expand Up @@ -1483,7 +1555,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
continue;
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
STI.hasV5TOps() && MBB.succ_empty() && !hasPAC) {
STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
!STI.splitFramePointerPush(MF)) {
Reg = ARM::PC;
// Fold the return instruction into the LDM.
DeleteRet = true;
Expand Down Expand Up @@ -1847,12 +1920,21 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
.addImm(-4)
.add(predOps(ARMCC::AL));
}
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
MachineInstr::FrameSetup);
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
MachineInstr::FrameSetup);
emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
if (STI.splitFramePointerPush(MF)) {
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
&isSplitFPArea1Register, 0, MachineInstr::FrameSetup);
emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
&isSplitFPArea2Register, 0, MachineInstr::FrameSetup);
} else {
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
0, MachineInstr::FrameSetup);
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
0, MachineInstr::FrameSetup);
emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
}

// The code above does not insert spill code for the aligned DPRCS2 registers.
// The stack realignment code will be inserted between the push instructions
Expand Down Expand Up @@ -1880,14 +1962,24 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);

unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
unsigned LdrOpc =
AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
unsigned FltOpc = ARM::VLDMDIA_UPD;
emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
NumAlignedDPRCS2Regs);
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
&isARMArea2Register, 0);
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
&isARMArea1Register, 0);
if (STI.splitFramePointerPush(MF)) {
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
&isSplitFPArea2Register, 0);
emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
NumAlignedDPRCS2Regs);
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
&isSplitFPArea1Register, 0);
} else {
emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
NumAlignedDPRCS2Regs);
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
&isARMArea2Register, 0);
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
&isARMArea1Register, 0);
}

return true;
}
Expand Down Expand Up @@ -2287,7 +2379,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
//
// We could do slightly better on Thumb1; in some cases, an sp-relative
// offset would be legal even though an fp-relative offset is not.
int MaxFPOffset = getMaxFPOffset(STI, *AFI);
int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);
bool HasLargeArgumentList =
HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;

Expand Down
Loading

0 comments on commit 2ab19bf

Please sign in to comment.