From 8ed9046e38fcad718feab82dbedcbfe625f903ce Mon Sep 17 00:00:00 2001 From: Aman Khalid Date: Mon, 20 Jun 2022 11:01:36 -0700 Subject: [PATCH] Update fake-splitting implementation on ARM64 To facilitate generating unwind info, fake-splitting now places the read-only data section after the cold section. This allows the hot/cold code sections to be truly contiguous. --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/ee_il_dll.cpp | 62 ++++++++++++++++++++++++++--------- src/coreclr/jit/emit.cpp | 29 +--------------- src/coreclr/jit/emitarm64.cpp | 6 +--- 4 files changed, 49 insertions(+), 50 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index fee93a619876f..29c6361cfcace 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7646,7 +7646,7 @@ class Compiler // ICorJitInfo wrappers - void eeAllocMem(AllocMemArgs* args); + void eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSectionAlignment); void eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize); diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp index f8c437e326694..d09bffa0a5e9a 100644 --- a/src/coreclr/jit/ee_il_dll.cpp +++ b/src/coreclr/jit/ee_il_dll.cpp @@ -1122,34 +1122,64 @@ void Compiler::eeDispLineInfos() * (e.g., host AMD64, target ARM64), then VM will get confused anyway. */ -void Compiler::eeAllocMem(AllocMemArgs* args) +void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSectionAlignment) { #ifdef DEBUG - const UNATIVE_OFFSET hotSizeRequest = args->hotCodeSize; - const UNATIVE_OFFSET coldSizeRequest = args->coldCodeSize; - // Fake splitting implementation: place hot/cold code in contiguous section - if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0)) + // Fake splitting implementation: place hot/cold code in contiguous section. + UNATIVE_OFFSET coldCodeOffset = 0; + if (JitConfig.JitFakeProcedureSplitting() && (args->coldCodeSize > 0)) { - args->hotCodeSize = hotSizeRequest + coldSizeRequest; + coldCodeOffset = args->hotCodeSize; + assert(coldCodeOffset > 0); + args->hotCodeSize += args->coldCodeSize; args->coldCodeSize = 0; } -#endif + +#endif // DEBUG + +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) + + // For arm64/LoongArch64, we want to allocate JIT data always adjacent to code similar to what native compiler does. + // This way allows us to use a single `ldr` to access such data like float constant/jmp table. + // For LoongArch64 using `pcaddi + ld` to access such data. + + UNATIVE_OFFSET roDataAlignmentDelta = 0; + if (args->roDataSize > 0) + { + roDataAlignmentDelta = AlignmentPad(args->hotCodeSize, roDataSectionAlignment); + } + + const UNATIVE_OFFSET roDataOffset = args->hotCodeSize + roDataAlignmentDelta; + args->hotCodeSize = roDataOffset + args->roDataSize; + args->roDataSize = 0; + +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) info.compCompHnd->allocMem(args); #ifdef DEBUG - if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0)) - { - // Fix up hot/cold code pointers - args->coldCodeBlock = ((BYTE*)args->hotCodeBlock) + hotSizeRequest; - args->coldCodeBlockRW = ((BYTE*)args->hotCodeBlockRW) + hotSizeRequest; - // Reset args' hot/cold code sizes in case caller reads them later - args->hotCodeSize = hotSizeRequest; - args->coldCodeSize = coldSizeRequest; + if (JitConfig.JitFakeProcedureSplitting() && (coldCodeOffset > 0)) + { + // Fix up cold code pointers. Cold section is adjacent to hot section. + assert(args->coldCodeBlock == nullptr); + assert(args->coldCodeBlockRW == nullptr); + args->coldCodeBlock = ((BYTE*)args->hotCodeBlock) + coldCodeOffset; + args->coldCodeBlockRW = ((BYTE*)args->hotCodeBlockRW) + coldCodeOffset; } -#endif + +#endif // DEBUG + +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) + + // Fix up data section pointers. + assert(args->roDataBlock == nullptr); + assert(args->roDataBlockRW == nullptr); + args->roDataBlock = ((BYTE*)args->hotCodeBlock) + roDataOffset; + args->roDataBlockRW = ((BYTE*)args->hotCodeBlockRW) + roDataOffset; + +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) } void Compiler::eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 10464c6c08ef7..6ecc7a32fcab0 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -6304,38 +6304,13 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, AllocMemArgs args; memset(&args, 0, sizeof(args)); -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) - // For arm64/LoongArch64, we want to allocate JIT data always adjacent to code similar to what native compiler does. - // This way allows us to use a single `ldr` to access such data like float constant/jmp table. - - const UNATIVE_OFFSET roDataAlignmentDelta = - (UNATIVE_OFFSET)ALIGN_UP(emitTotalHotCodeSize, emitConsDsc.alignment) - emitTotalHotCodeSize; - - args.hotCodeSize = emitTotalHotCodeSize + roDataAlignmentDelta + emitConsDsc.dsdOffs; - args.coldCodeSize = emitTotalColdCodeSize; - args.roDataSize = 0; - args.xcptnsCount = xcptnsCount; - args.flag = allocMemFlag; - - emitComp->eeAllocMem(&args); - - codeBlock = (BYTE*)args.hotCodeBlock; - codeBlockRW = (BYTE*)args.hotCodeBlockRW; - coldCodeBlock = (BYTE*)args.coldCodeBlock; - coldCodeBlockRW = (BYTE*)args.coldCodeBlockRW; - - consBlock = codeBlock + emitTotalHotCodeSize + roDataAlignmentDelta; - consBlockRW = codeBlockRW + emitTotalHotCodeSize + roDataAlignmentDelta; - -#else - args.hotCodeSize = emitTotalHotCodeSize; args.coldCodeSize = emitTotalColdCodeSize; args.roDataSize = emitConsDsc.dsdOffs; args.xcptnsCount = xcptnsCount; args.flag = allocMemFlag; - emitComp->eeAllocMem(&args); + emitComp->eeAllocMem(&args, emitConsDsc.alignment); codeBlock = (BYTE*)args.hotCodeBlock; codeBlockRW = (BYTE*)args.hotCodeBlockRW; @@ -6344,8 +6319,6 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, consBlock = (BYTE*)args.roDataBlock; consBlockRW = (BYTE*)args.roDataBlockRW; -#endif - #ifdef DEBUG if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN) != 0) { diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 3068c4dc74edf..7b55449e5aef2 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -8437,8 +8437,7 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) { case INS_bl_local: idjShort = true; - fmt = IF_BI_0A; - break; + FALLTHROUGH; case INS_b: // Unconditional jump is a single form. // Assume is long in case we cross hot/cold sections. @@ -9841,9 +9840,6 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i) // Special case: emit add + ld1 instructions for loading 16-byte data into vector register. if (isVectorRegister(dstReg) && (opSize == EA_16BYTE)) { - // Low 4 bits should be 0 -- 16-byte JIT data should be aligned on 16 bytes. - assert((imm12 & 15) == 0); - const emitAttr elemSize = EA_1BYTE; const insOpts opt = optMakeArrangement(opSize, elemSize);