Skip to content

Commit

Permalink
Update fake-splitting implementation on ARM64
Browse files Browse the repository at this point in the history
To facilitate generating unwind info, fake-splitting now places the
read-only data section after the cold section. This allows the
hot/cold code sections to be truly contiguous.
  • Loading branch information
Aman Khalid committed Jun 21, 2022
1 parent d208b3a commit 8ed9046
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 50 deletions.
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -7646,7 +7646,7 @@ class Compiler

// ICorJitInfo wrappers

void eeAllocMem(AllocMemArgs* args);
void eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSectionAlignment);

void eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize);

Expand Down
62 changes: 46 additions & 16 deletions src/coreclr/jit/ee_il_dll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1122,34 +1122,64 @@ void Compiler::eeDispLineInfos()
* (e.g., host AMD64, target ARM64), then VM will get confused anyway.
*/

void Compiler::eeAllocMem(AllocMemArgs* args)
void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSectionAlignment)
{
#ifdef DEBUG
const UNATIVE_OFFSET hotSizeRequest = args->hotCodeSize;
const UNATIVE_OFFSET coldSizeRequest = args->coldCodeSize;

// Fake splitting implementation: place hot/cold code in contiguous section
if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0))
// Fake splitting implementation: place hot/cold code in contiguous section.
UNATIVE_OFFSET coldCodeOffset = 0;
if (JitConfig.JitFakeProcedureSplitting() && (args->coldCodeSize > 0))
{
args->hotCodeSize = hotSizeRequest + coldSizeRequest;
coldCodeOffset = args->hotCodeSize;
assert(coldCodeOffset > 0);
args->hotCodeSize += args->coldCodeSize;
args->coldCodeSize = 0;
}
#endif

#endif // DEBUG

#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)

// For arm64/LoongArch64, we want to allocate JIT data always adjacent to code similar to what native compiler does.
// This way allows us to use a single `ldr` to access such data like float constant/jmp table.
// For LoongArch64 using `pcaddi + ld` to access such data.

UNATIVE_OFFSET roDataAlignmentDelta = 0;
if (args->roDataSize > 0)
{
roDataAlignmentDelta = AlignmentPad(args->hotCodeSize, roDataSectionAlignment);
}

const UNATIVE_OFFSET roDataOffset = args->hotCodeSize + roDataAlignmentDelta;
args->hotCodeSize = roDataOffset + args->roDataSize;
args->roDataSize = 0;

#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)

info.compCompHnd->allocMem(args);

#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0))
{
// Fix up hot/cold code pointers
args->coldCodeBlock = ((BYTE*)args->hotCodeBlock) + hotSizeRequest;
args->coldCodeBlockRW = ((BYTE*)args->hotCodeBlockRW) + hotSizeRequest;

// Reset args' hot/cold code sizes in case caller reads them later
args->hotCodeSize = hotSizeRequest;
args->coldCodeSize = coldSizeRequest;
if (JitConfig.JitFakeProcedureSplitting() && (coldCodeOffset > 0))
{
// Fix up cold code pointers. Cold section is adjacent to hot section.
assert(args->coldCodeBlock == nullptr);
assert(args->coldCodeBlockRW == nullptr);
args->coldCodeBlock = ((BYTE*)args->hotCodeBlock) + coldCodeOffset;
args->coldCodeBlockRW = ((BYTE*)args->hotCodeBlockRW) + coldCodeOffset;
}
#endif

#endif // DEBUG

#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)

// Fix up data section pointers.
assert(args->roDataBlock == nullptr);
assert(args->roDataBlockRW == nullptr);
args->roDataBlock = ((BYTE*)args->hotCodeBlock) + roDataOffset;
args->roDataBlockRW = ((BYTE*)args->hotCodeBlockRW) + roDataOffset;

#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
}

void Compiler::eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize)
Expand Down
29 changes: 1 addition & 28 deletions src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6304,38 +6304,13 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
AllocMemArgs args;
memset(&args, 0, sizeof(args));

#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
// For arm64/LoongArch64, we want to allocate JIT data always adjacent to code similar to what native compiler does.
// This way allows us to use a single `ldr` to access such data like float constant/jmp table.

const UNATIVE_OFFSET roDataAlignmentDelta =
(UNATIVE_OFFSET)ALIGN_UP(emitTotalHotCodeSize, emitConsDsc.alignment) - emitTotalHotCodeSize;

args.hotCodeSize = emitTotalHotCodeSize + roDataAlignmentDelta + emitConsDsc.dsdOffs;
args.coldCodeSize = emitTotalColdCodeSize;
args.roDataSize = 0;
args.xcptnsCount = xcptnsCount;
args.flag = allocMemFlag;

emitComp->eeAllocMem(&args);

codeBlock = (BYTE*)args.hotCodeBlock;
codeBlockRW = (BYTE*)args.hotCodeBlockRW;
coldCodeBlock = (BYTE*)args.coldCodeBlock;
coldCodeBlockRW = (BYTE*)args.coldCodeBlockRW;

consBlock = codeBlock + emitTotalHotCodeSize + roDataAlignmentDelta;
consBlockRW = codeBlockRW + emitTotalHotCodeSize + roDataAlignmentDelta;

#else

args.hotCodeSize = emitTotalHotCodeSize;
args.coldCodeSize = emitTotalColdCodeSize;
args.roDataSize = emitConsDsc.dsdOffs;
args.xcptnsCount = xcptnsCount;
args.flag = allocMemFlag;

emitComp->eeAllocMem(&args);
emitComp->eeAllocMem(&args, emitConsDsc.alignment);

codeBlock = (BYTE*)args.hotCodeBlock;
codeBlockRW = (BYTE*)args.hotCodeBlockRW;
Expand All @@ -6344,8 +6319,6 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
consBlock = (BYTE*)args.roDataBlock;
consBlockRW = (BYTE*)args.roDataBlockRW;

#endif

#ifdef DEBUG
if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN) != 0)
{
Expand Down
6 changes: 1 addition & 5 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8437,8 +8437,7 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
{
case INS_bl_local:
idjShort = true;
fmt = IF_BI_0A;
break;
FALLTHROUGH;
case INS_b:
// Unconditional jump is a single form.
// Assume is long in case we cross hot/cold sections.
Expand Down Expand Up @@ -9841,9 +9840,6 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
// Special case: emit add + ld1 instructions for loading 16-byte data into vector register.
if (isVectorRegister(dstReg) && (opSize == EA_16BYTE))
{
// Low 4 bits should be 0 -- 16-byte JIT data should be aligned on 16 bytes.
assert((imm12 & 15) == 0);

const emitAttr elemSize = EA_1BYTE;
const insOpts opt = optMakeArrangement(opSize, elemSize);

Expand Down

0 comments on commit 8ed9046

Please sign in to comment.