-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Align arm64 data section as requested #71044
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6260,7 +6260,12 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, | |
|
||
coldCodeBlock = nullptr; | ||
|
||
CorJitAllocMemFlag allocMemFlag = CORJIT_ALLOCMEM_DEFAULT_CODE_ALIGN; | ||
// This restricts the data alignment to: 4, 8, 16, or 32 bytes | ||
// Alignments greater than 32 would require VM support in ICorJitInfo::allocMem | ||
uint32_t dataAlignment = emitConsDsc.alignment; | ||
assert((dataSection::MIN_DATA_ALIGN <= dataAlignment) && (dataAlignment <= dataSection::MAX_DATA_ALIGN) && isPow2(dataAlignment)); | ||
|
||
uint32_t codeAlignment = TARGET_POINTER_SIZE; | ||
|
||
#ifdef TARGET_X86 | ||
// | ||
|
@@ -6280,14 +6285,14 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, | |
const weight_t scenarioHotWeight = 256.0; | ||
if (emitComp->fgCalledCount > (scenarioHotWeight * emitComp->fgProfileRunsCount())) | ||
{ | ||
allocMemFlag = CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN; | ||
codeAlignment = 16; | ||
} | ||
} | ||
else | ||
{ | ||
if (emitTotalHotCodeSize <= 16) | ||
{ | ||
allocMemFlag = CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN; | ||
codeAlignment = 16; | ||
} | ||
} | ||
#endif | ||
|
@@ -6299,23 +6304,44 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, | |
if (emitComp->opts.OptimizationEnabled() && !emitComp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && | ||
(emitTotalHotCodeSize > 16) && emitComp->fgHasLoops) | ||
{ | ||
allocMemFlag = CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN; | ||
codeAlignment = 32; | ||
} | ||
#endif | ||
|
||
// This restricts the emitConsDsc.alignment to: 1, 2, 4, 8, 16, or 32 bytes | ||
// Alignments greater than 32 would require VM support in ICorJitInfo::allocMem | ||
assert(isPow2(emitConsDsc.alignment) && (emitConsDsc.alignment <= 32)); | ||
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) | ||
// For arm64/LoongArch64, we're going to put the data in the code section. So make sure the code section has | ||
// adequate alignment. | ||
if (emitConsDsc.dsdOffs > 0) | ||
{ | ||
codeAlignment = max(codeAlignment, dataAlignment); | ||
} | ||
#endif | ||
|
||
// Note that we don't support forcing code alignment of 8 bytes on 32-bit platforms; an omission? | ||
assert((TARGET_POINTER_SIZE <= codeAlignment) && (codeAlignment <= 32) && isPow2(codeAlignment)); | ||
|
||
if (emitConsDsc.alignment == 16) | ||
CorJitAllocMemFlag allocMemFlagCodeAlign = CORJIT_ALLOCMEM_DEFAULT_CODE_ALIGN; | ||
if (codeAlignment == 32) | ||
{ | ||
allocMemFlag = static_cast<CorJitAllocMemFlag>(allocMemFlag | CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN); | ||
allocMemFlagCodeAlign = CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN; | ||
} | ||
else if (emitConsDsc.alignment == 32) | ||
else if (codeAlignment == 16) | ||
{ | ||
allocMemFlag = static_cast<CorJitAllocMemFlag>(allocMemFlag | CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN); | ||
allocMemFlagCodeAlign = CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN; | ||
} | ||
|
||
CorJitAllocMemFlag allocMemFlagDataAlign = static_cast<CorJitAllocMemFlag>(0); | ||
if (dataAlignment == 16) | ||
{ | ||
allocMemFlagDataAlign = CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN; | ||
} | ||
else if (dataAlignment == 32) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just wondering, is there a reason why this one is What is the default for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
No reason; probably should have been more consistent, but it doesn't actually matter.
It's 8, or 4 for 32-bit platforms with <8 bytes of data. |
||
{ | ||
allocMemFlagDataAlign = CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN; | ||
} | ||
|
||
CorJitAllocMemFlag allocMemFlag = static_cast<CorJitAllocMemFlag>(allocMemFlagCodeAlign | allocMemFlagDataAlign); | ||
|
||
AllocMemArgs args; | ||
memset(&args, 0, sizeof(args)); | ||
|
||
|
@@ -6332,7 +6358,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, | |
UNATIVE_OFFSET roDataAlignmentDelta = 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unrelated to your change here, but there is a comment above that reads:
I'm wondering why this is. In particular, x86/x64 explicitly say "don't do this" because it messes with the instruction decoder/cache and can lead to very poor speculative execution, etc. I would expect Arm64 to have similar limitations and for us to likewise want this data separate from the code. This also includes for other reasons like preventing users from trying to execute "data", etc. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's reasonable to reconsider. However, on arm64, we have limited addressing mode range for data load instructions. If we put the data in a "data section", we would either have to (1) generate pessimistic code to allow the largest possible range, (2) ensure that data section is "close enough" to the code, or (3) optimistically assume the data is "close enough" to the code, and allow a back-off/retry if not. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps @TamarChristinaArm or our other friends at ARM could provide input here on what's the recommended/optimal approach and if Arm64 has similar considerations around having data/instructions close together. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Indeed we do have similar issues on Arm64 and the NX bits are of particular interest these days. What we try to do in these cases is to create an anchor to the data section, and then subsequent loads just use offsets from the anchor. typically we also then consider the anchors cheap to re-materialize to avoid spilling them around call sites etc. If you're doing NX bits you'd have to allocate new pages for the constants anyway, you could consider getting a page near the code. If you're within the range of an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the insight here! I'll log an issue capturing this and ensuring we consider the potential impact longer term. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Logged #71155 |
||
if (emitConsDsc.dsdOffs > 0) | ||
{ | ||
roDataAlignmentDelta = AlignmentPad(emitTotalHotCodeSize, emitConsDsc.alignment); | ||
roDataAlignmentDelta = AlignmentPad(emitTotalHotCodeSize, dataAlignment); | ||
} | ||
|
||
args.hotCodeSize = emitTotalHotCodeSize + roDataAlignmentDelta + emitConsDsc.dsdOffs; | ||
|
@@ -6375,6 +6401,18 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, | |
{ | ||
assert(((size_t)codeBlock & 31) == 0); | ||
} | ||
if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN) != 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suppose that makes sense. Probably should be some asserts places (including in the VM and crossgen) that doesn't allow setting multiple; I don't see those asserts today (at least they check 32 before 16). |
||
{ | ||
assert(((size_t)codeBlock & 15) == 0); | ||
} | ||
if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN) != 0) | ||
{ | ||
assert(((size_t)consBlock & 31) == 0); | ||
} | ||
if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN) != 0) | ||
{ | ||
assert(((size_t)consBlock & 15) == 0); | ||
} | ||
#endif | ||
|
||
// if (emitConsDsc.dsdOffs) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we know why this heuristic is 32-bit x86 only and not also for x64?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't know. Maybe historical and should be revisited?