Skip to content

Commit

Permalink
Enable fake hot/cold splitting on ARM64
Browse files Browse the repository at this point in the history
This commit contains fixes for various bugs exposed by enabling fake
hot/cold splitting on ARM64:
- Branches between hot/cold sections are now always long.
- The pseudoinstruction for loading a constant from the cold section
did not support loading 16-byte data into vector registers, as it
temporarily loaded the constant into an 8-byte integer register. Now,
16-byte constants are loaded directly into vector registers via an
`ld1` instruction.
- Tests involving loading 16-byte constants exposed the data section
is not always aligned to its largest constant. Now, the data section
is always aligned to `emitConsDsc.alignment` when calling `eeAllocMem`.
- Asserts/NYIs blocking hot/cold splitting on ARM64 have been removed.

Fake hot/cold splitting requires we fake unwind info by treating each
split function as one hot section. A more architecture-agnostic
approach for this has been applied.
  • Loading branch information
Aman Khalid authored and amanasifkhalid committed Jun 14, 2022
1 parent 7989a93 commit d2bbed8
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 105 deletions.
4 changes: 0 additions & 4 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -8000,10 +8000,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void unwindReserveFuncHelper(FuncInfoDsc* func, bool isHotCode);
void unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pColdCode, bool isHotCode);

#ifdef DEBUG
void fakeUnwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode);
#endif // DEBUG

#endif // TARGET_AMD64 || (TARGET_X86 && FEATURE_EH_FUNCLETS)

UNATIVE_OFFSET unwindGetCurrentOffset(FuncInfoDsc* func);
Expand Down
9 changes: 2 additions & 7 deletions src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6045,13 +6045,8 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
// For arm64/LoongArch64, we want to allocate JIT data always adjacent to code similar to what native compiler does.
// This way allows us to use a single `ldr` to access such data like float constant/jmp table.

UNATIVE_OFFSET roDataAlignmentDelta = 0;
if (emitConsDsc.dsdOffs && (emitConsDsc.alignment == TARGET_POINTER_SIZE))
{
UNATIVE_OFFSET roDataAlignment = TARGET_POINTER_SIZE; // 8 Byte align by default.
roDataAlignmentDelta = (UNATIVE_OFFSET)ALIGN_UP(emitTotalHotCodeSize, roDataAlignment) - emitTotalHotCodeSize;
assert((roDataAlignmentDelta == 0) || (roDataAlignmentDelta == 4));
}
const UNATIVE_OFFSET roDataAlignmentDelta =
(UNATIVE_OFFSET)ALIGN_UP(emitTotalHotCodeSize, emitConsDsc.alignment) - emitTotalHotCodeSize;

args.hotCodeSize = emitTotalHotCodeSize + roDataAlignmentDelta + emitConsDsc.dsdOffs;
args.coldCodeSize = emitTotalColdCodeSize;
Expand Down
6 changes: 5 additions & 1 deletion src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8422,7 +8422,8 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
{
case INS_bl_local:
idjShort = true;
// Fall through.
fmt = IF_BI_0A;
break;
case INS_b:
// Unconditional jump is a single form.
// Assume is long in case we cross hot/cold sections.
Expand Down Expand Up @@ -9825,6 +9826,9 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
// Special case: emit add + ld1 instructions for loading 16-byte data into vector register.
if (isVectorRegister(dstReg) && (opSize == EA_16BYTE))
{
// Low 4 bits should be 0 -- 16-byte JIT data should be aligned on 16 bytes.
assert((imm12 & 15) == 0);

const emitAttr elemSize = EA_1BYTE;
const insOpts opt = optMakeArrangement(opSize, elemSize);

Expand Down
18 changes: 17 additions & 1 deletion src/coreclr/jit/unwind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,16 @@ void Compiler::unwindGetFuncLocations(FuncInfoDsc* func,
// The hot section only goes up to the cold section
assert(fgFirstFuncletBB == nullptr);

*ppEndLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(fgFirstColdBlock));
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting())
{
*ppEndLoc = nullptr; // If fake-splitting, "trick" VM by pretending entire function is hot.
}
else
#endif // DEBUG
{
*ppEndLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(fgFirstColdBlock));
}
}
else
{
Expand Down Expand Up @@ -259,6 +268,13 @@ void Compiler::unwindEmitFuncCFI(FuncInfoDsc* func, void* pHotCode, void* pColdC
DWORD unwindCodeBytes = 0;
BYTE* pUnwindBlock = nullptr;

#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting())
{
pColdCode = nullptr;
}
#endif // DEBUG

if (func->startLoc == nullptr)
{
startOffset = 0;
Expand Down
67 changes: 25 additions & 42 deletions src/coreclr/jit/unwindamd64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -656,18 +656,17 @@ void Compiler::unwindReserve()
//
void Compiler::unwindReserveFunc(FuncInfoDsc* func)
{
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (fgFirstColdBlock != nullptr))
unwindReserveFuncHelper(func, true);

if (fgFirstColdBlock != nullptr)
{
assert(func->funKind == FUNC_ROOT); // No fake-splitting of funclets.
unwindReserveFuncHelper(func, true);
}
else
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting())
{
assert(func->funKind == FUNC_ROOT); // No splitting of funclets.
}
else
#endif // DEBUG
{
unwindReserveFuncHelper(func, true);

if (fgFirstColdBlock != nullptr)
{
unwindReserveFuncHelper(func, false);
}
Expand Down Expand Up @@ -859,7 +858,17 @@ void Compiler::unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pCo

if (isHotCode)
{
assert(endOffset <= info.compTotalHotCodeSize);
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (fgFirstColdBlock != nullptr))
{
assert(endOffset <= info.compNativeCodeSize);
}
else
#endif // DEBUG
{
assert(endOffset <= info.compTotalHotCodeSize);
}

pColdCode = nullptr;
}
else
Expand Down Expand Up @@ -890,43 +899,17 @@ void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode
static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER);
static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER);

#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (pColdCode != nullptr))
unwindEmitFuncHelper(func, pHotCode, pColdCode, true);

if (pColdCode != nullptr)
{
fakeUnwindEmitFuncHelper(func, pHotCode);
}
else
#ifdef DEBUG
if (!JitConfig.JitFakeProcedureSplitting())
#endif // DEBUG
{
unwindEmitFuncHelper(func, pHotCode, pColdCode, true);

if (pColdCode != nullptr)
{
unwindEmitFuncHelper(func, pHotCode, pColdCode, false);
}
}
}

#ifdef DEBUG
void Compiler::fakeUnwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode)
{
assert(fgFirstColdBlock != nullptr);
assert(func->funKind == FUNC_ROOT); // No fake-splitting of funclets.

const UNATIVE_OFFSET startOffset = 0;
const UNATIVE_OFFSET endOffset = info.compNativeCodeSize;
const DWORD unwindCodeBytes = sizeof(func->unwindCodes) - func->unwindCodeSlot;
BYTE* pUnwindBlock = &func->unwindCodes[func->unwindCodeSlot];

if (opts.dspUnwind)
{
DumpUnwindInfo(true, startOffset, endOffset, (const UNWIND_INFO* const)pUnwindBlock);
}

// Pass pColdCode = nullptr; VM allocs unwind info for combined hot/cold section
eeAllocUnwindInfo((BYTE*)pHotCode, nullptr, startOffset, endOffset, unwindCodeBytes, pUnwindBlock,
(CorJitFuncKind)func->funKind);
}
#endif // DEBUG

#endif // TARGET_AMD64
45 changes: 34 additions & 11 deletions src/coreclr/jit/unwindarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -563,13 +563,20 @@ void Compiler::unwindReserve()
void Compiler::unwindReserveFunc(FuncInfoDsc* func)
{
BOOL isFunclet = (func->funKind == FUNC_ROOT) ? FALSE : TRUE;
bool funcHasColdSection = false;
bool funcHasColdSection = (fgFirstColdBlock != nullptr);

#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && funcHasColdSection)
{
funcHasColdSection = false; // "Trick" the VM into thinking we don't have a cold section.
}
#endif // DEBUG

#if defined(FEATURE_CFI_SUPPORT)
if (generateCFIUnwindCodes())
{
DWORD unwindCodeBytes = 0;
if (fgFirstColdBlock != nullptr)
if (funcHasColdSection)
{
eeReserveUnwindInfo(isFunclet, true /*isColdCode*/, unwindCodeBytes);
}
Expand All @@ -584,7 +591,7 @@ void Compiler::unwindReserveFunc(FuncInfoDsc* func)
// cold section. This needs to be done before we split into fragments, as each
// of the hot and cold sections can have multiple fragments.

if (fgFirstColdBlock != NULL)
if (funcHasColdSection)
{
assert(!isFunclet); // TODO-CQ: support hot/cold splitting with EH

Expand All @@ -595,8 +602,6 @@ void Compiler::unwindReserveFunc(FuncInfoDsc* func)
func->uwiCold = new (this, CMK_UnwindInfo) UnwindInfo();
func->uwiCold->InitUnwindInfo(this, startLoc, endLoc);
func->uwiCold->HotColdSplitCodes(&func->uwi);

funcHasColdSection = true;
}

// First we need to split the function or funclet into fragments that are no larger
Expand Down Expand Up @@ -1604,11 +1609,19 @@ void UnwindFragmentInfo::Allocate(
UNATIVE_OFFSET endOffset;
UNATIVE_OFFSET codeSize;

// We don't support hot/cold splitting with EH, so if there is cold code, this
// better not be a funclet!
// TODO-CQ: support funclets in cold code

noway_assert(isHotCode || funKind == CORJIT_FUNC_ROOT);
// We don't support hot/cold splitting with EH, so if there is cold code, this
// better not be a funclet!
// TODO-CQ: support funclets in cold code
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (pColdCode != NULL))
{
noway_assert(isHotCode && (funKind == CORJIT_FUNC_ROOT));
}
else
#endif // DEBUG
{
noway_assert(isHotCode || (funKind == CORJIT_FUNC_ROOT));
}

// Compute the final size, and start and end offsets of the fragment

Expand Down Expand Up @@ -1656,7 +1669,17 @@ void UnwindFragmentInfo::Allocate(

if (isHotCode)
{
assert(endOffset <= uwiComp->info.compTotalHotCodeSize);
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (pColdCode != NULL))
{
assert(endOffset <= uwiComp->info.compNativeCodeSize);
}
else
#endif // DEBUG
{
assert(endOffset <= uwiComp->info.compTotalHotCodeSize);
}

pColdCode = NULL;
}
else
Expand Down
63 changes: 25 additions & 38 deletions src/coreclr/jit/unwindx86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,18 +113,17 @@ void Compiler::unwindEmit(void* pHotCode, void* pColdCode)
//
void Compiler::unwindReserveFunc(FuncInfoDsc* func)
{
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (fgFirstColdBlock != nullptr))
unwindReserveFuncHelper(func, true);

if (fgFirstColdBlock != nullptr)
{
assert(func->funKind == FUNC_ROOT); // No fake-splitting of funclets.
unwindReserveFuncHelper(func, true);
}
else
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting())
{
assert(func->funKind == FUNC_ROOT); // No splitting of funclets.
}
else
#endif // DEBUG
{
unwindReserveFuncHelper(func, true);

if (fgFirstColdBlock != nullptr)
{
unwindReserveFuncHelper(func, false);
}
Expand Down Expand Up @@ -164,17 +163,13 @@ void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode
static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER);
static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER);

#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (pColdCode != nullptr))
unwindEmitFuncHelper(func, pHotCode, pColdCode, true);

if (pColdCode != nullptr)
{
fakeUnwindEmitFuncHelper(func, pHotCode);
}
else
#ifdef DEBUG
if (!JitConfig.JitFakeProcedureSplitting())
#endif // DEBUG
{
unwindEmitFuncHelper(func, pHotCode, pColdCode, true);

if (pColdCode != nullptr)
{
unwindEmitFuncHelper(func, pHotCode, pColdCode, false);
}
Expand Down Expand Up @@ -258,7 +253,17 @@ void Compiler::unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pCo

if (isHotCode)
{
assert(endOffset <= info.compTotalHotCodeSize);
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (fgFirstColdBlock != nullptr))
{
assert(endOffset <= info.compNativeCodeSize);
}
else
#endif // DEBUG
{
assert(endOffset <= info.compTotalHotCodeSize);
}

pColdCode = nullptr;
}
else
Expand All @@ -276,22 +281,4 @@ void Compiler::unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pCo
(BYTE*)&unwindInfo, (CorJitFuncKind)func->funKind);
}

#ifdef DEBUG
void Compiler::fakeUnwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode)
{
assert(fgFirstColdBlock != nullptr);
assert(func->funKind == FUNC_ROOT); // No fake-splitting of funclets.

const UNATIVE_OFFSET startOffset = 0;
const UNATIVE_OFFSET endOffset = info.compNativeCodeSize;

UNWIND_INFO unwindInfo;
unwindInfo.FunctionLength = (ULONG)(endOffset);

// Pass pColdCode = nullptr; VM allocs unwind info for combined hot/cold section
eeAllocUnwindInfo((BYTE*)pHotCode, nullptr, startOffset, endOffset, sizeof(UNWIND_INFO), (BYTE*)&unwindInfo,
(CorJitFuncKind)func->funKind);
}
#endif // DEBUG

#endif // FEATURE_EH_FUNCLETS
1 change: 0 additions & 1 deletion src/tests/Common/testenvironment.proj
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
COMPlus_EnableSSE42;
COMPlus_EnableSSSE3;
COMPlus_ForceRelocs;
COMPlus_GCgen0size;
COMPlus_GCStress;
COMPlus_GCName;
COMPlus_gcServer;
Expand Down

0 comments on commit d2bbed8

Please sign in to comment.