From f01e191a21251bb16412a55b5625a1adef2b383a Mon Sep 17 00:00:00 2001 From: Noah Falk Date: Sat, 13 Jul 2024 00:56:18 -0700 Subject: [PATCH] Add ee_alloc_context (NativeAOT) This change is some preparatory refactoring for the randomized allocation sampling feature. We need to add more state onto allocation context but we don't want to do a breaking change of the GC interface. The new state only needs to be visible to the EE but we want it physically near the existing alloc context state for good cache locality. To accomplish this we created a new ee_alloc_context struct which contains an instance of gc_alloc_context within it. In a future PR we will add a field called combined_limit which should be used by fast allocation helpers to determine when to go down the slow path. Most of the time combined_limit has the same value as alloc_limit, but periodically we need to emit an allocation sampling event on an object that is somewhere in the middle of an AC. Using combined_limit rather than alloc_limit as the slow path trigger allows us to keep all the sampling event logic in the slow path. This PR introduces the abstraction for combined_limit and changes all the fast allocation helpers to use it, but it does not physically create the field yet. For now combined_limit is just an alias back to alloc_limit. Overall this PR should not cause any change in runtime behavior and compiled code should be largely identical to before assuming modest inlining and optimization by the compiler. --- src/coreclr/nativeaot/Runtime/AsmOffsets.h | 4 ++- .../nativeaot/Runtime/AsmOffsetsVerify.cpp | 2 +- src/coreclr/nativeaot/Runtime/DebugHeader.cpp | 5 ++- .../nativeaot/Runtime/amd64/AllocFast.S | 6 ++-- .../nativeaot/Runtime/amd64/AllocFast.asm | 6 ++-- .../nativeaot/Runtime/amd64/AsmMacros.inc | 7 ++-- src/coreclr/nativeaot/Runtime/arm/AllocFast.S | 10 +++--- .../nativeaot/Runtime/arm64/AllocFast.S | 13 +++++--- .../nativeaot/Runtime/arm64/AllocFast.asm | 6 ++-- .../nativeaot/Runtime/arm64/AsmMacros.h | 7 ++-- .../nativeaot/Runtime/i386/AllocFast.asm | 6 ++-- .../nativeaot/Runtime/i386/AsmMacros.inc | 7 ++-- .../nativeaot/Runtime/loongarch64/AllocFast.S | 13 +++++--- src/coreclr/nativeaot/Runtime/portable.cpp | 25 +++++++++------ src/coreclr/nativeaot/Runtime/thread.h | 15 ++++++++- src/coreclr/nativeaot/Runtime/thread.inl | 32 ++++++++++++++++++- .../Runtime/unix/unixasmmacrosamd64.inc | 7 ++-- .../Runtime/unix/unixasmmacrosarm.inc | 7 ++-- 18 files changed, 126 insertions(+), 52 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsets.h b/src/coreclr/nativeaot/Runtime/AsmOffsets.h index 32abd406175e76..ce0aae03eb0788 100644 --- a/src/coreclr/nativeaot/Runtime/AsmOffsets.h +++ b/src/coreclr/nativeaot/Runtime/AsmOffsets.h @@ -46,7 +46,7 @@ ASM_OFFSET( 0, 0, MethodTable, m_uFlags) ASM_OFFSET( 4, 4, MethodTable, m_uBaseSize) ASM_OFFSET( 14, 18, MethodTable, m_VTable) -ASM_OFFSET( 0, 0, Thread, m_rgbAllocContextBuffer) +ASM_OFFSET( 0, 0, Thread, m_eeAllocContext) ASM_OFFSET( 28, 38, Thread, m_ThreadStateFlags) ASM_OFFSET( 2c, 40, Thread, m_pTransitionFrame) ASM_OFFSET( 30, 48, Thread, m_pDeferredTransitionFrame) @@ -61,6 +61,8 @@ ASM_SIZEOF( 14, 20, EHEnum) ASM_OFFSET( 0, 0, gc_alloc_context, alloc_ptr) ASM_OFFSET( 4, 8, gc_alloc_context, alloc_limit) +ASM_OFFSET( 0, 0, ee_alloc_context, m_rgbAllocContextBuffer) + #ifdef FEATURE_CACHED_INTERFACE_DISPATCH ASM_OFFSET( 4, 8, InterfaceDispatchCell, m_pCache) #ifdef INTERFACE_DISPATCH_CACHE_HAS_CELL_BACKPOINTER diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp index b5520d739e871b..d27884dbdf1ff3 100644 --- a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp +++ b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp @@ -22,7 +22,7 @@ class AsmOffsets { - static_assert(sizeof(Thread::m_rgbAllocContextBuffer) >= sizeof(gc_alloc_context), "Thread::m_rgbAllocContextBuffer is not big enough to hold a gc_alloc_context"); + static_assert(sizeof(ee_alloc_context::m_rgbAllocContextBuffer) >= sizeof(gc_alloc_context), "ee_alloc_context::m_rgbAllocContextBuffer is not big enough to hold a gc_alloc_context"); // Some assembly helpers for arrays and strings are shared and use the fact that arrays and strings have similar layouts) static_assert(offsetof(Array, m_Length) == offsetof(String, m_Length), "The length field of String and Array have different offsets"); diff --git a/src/coreclr/nativeaot/Runtime/DebugHeader.cpp b/src/coreclr/nativeaot/Runtime/DebugHeader.cpp index 324e0f86f2aea8..e32956dde4ee28 100644 --- a/src/coreclr/nativeaot/Runtime/DebugHeader.cpp +++ b/src/coreclr/nativeaot/Runtime/DebugHeader.cpp @@ -163,6 +163,9 @@ extern "C" void PopulateDebugHeaders() MAKE_DEBUG_FIELD_ENTRY(dac_gc_heap, finalize_queue); MAKE_DEBUG_FIELD_ENTRY(dac_gc_heap, generation_table); + MAKE_SIZE_ENTRY(ee_alloc_context); + MAKE_DEBUG_FIELD_ENTRY(ee_alloc_context, m_rgbAllocContextBuffer); + MAKE_SIZE_ENTRY(gc_alloc_context); MAKE_DEBUG_FIELD_ENTRY(gc_alloc_context, alloc_ptr); MAKE_DEBUG_FIELD_ENTRY(gc_alloc_context, alloc_limit); @@ -194,7 +197,7 @@ extern "C" void PopulateDebugHeaders() MAKE_SIZE_ENTRY(RuntimeThreadLocals); MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_pNext); - MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_rgbAllocContextBuffer); + MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_eeAllocContext); MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_threadId); MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_pThreadStressLog); MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_pExInfoStackHead); diff --git a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.S b/src/coreclr/nativeaot/Runtime/amd64/AllocFast.S index 6cb85bcc507a09..8923a7a4fbb64b 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/amd64/AllocFast.S @@ -28,7 +28,7 @@ NESTED_ENTRY RhpNewFast, _TEXT, NoHandler mov rsi, [rax + OFFSETOF__Thread__m_alloc_context__alloc_ptr] add rdx, rsi - cmp rdx, [rax + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp rdx, [rax + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja LOCAL_LABEL(RhpNewFast_RarePath) // set the new alloc pointer @@ -143,7 +143,7 @@ NESTED_ENTRY RhNewString, _TEXT, NoHandler // rcx == Thread* // rdx == string size // r12 == element count - cmp rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp rax, [rcx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja LOCAL_LABEL(RhNewString_RarePath) mov [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax @@ -226,7 +226,7 @@ NESTED_ENTRY RhpNewArray, _TEXT, NoHandler // rcx == Thread* // rdx == array size // r12 == element count - cmp rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp rax, [rcx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja LOCAL_LABEL(RhpNewArray_RarePath) mov [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax diff --git a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm b/src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm index 37be558c3cef1d..6ba69c0c141274 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm @@ -25,7 +25,7 @@ LEAF_ENTRY RhpNewFast, _TEXT mov rax, [rdx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] add r8, rax - cmp r8, [rdx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp r8, [rdx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja RhpNewFast_RarePath ;; set the new alloc pointer @@ -118,7 +118,7 @@ LEAF_ENTRY RhNewString, _TEXT ; rdx == element count ; r8 == array size ; r10 == thread - cmp rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp rax, [r10 + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja RhpNewArrayRare mov [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax @@ -179,7 +179,7 @@ LEAF_ENTRY RhpNewArray, _TEXT ; rdx == element count ; r8 == array size ; r10 == thread - cmp rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp rax, [r10 + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja RhpNewArrayRare mov [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc index 33089b6643d382..e16434792b9b7c 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc @@ -336,8 +336,11 @@ TSF_DoNotTriggerGc equ 10h ;; ;; Rename fields of nested structs ;; -OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_alloc_context__alloc_limit equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit +OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +;; +;; Combined_limit doesn't exist yet. It is planned to come as part of the randomized allocation sampling feature. For now this aliases alloc_limit +;; +OFFSETOF__Thread__m_eeAllocContext__combined_limit equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit diff --git a/src/coreclr/nativeaot/Runtime/arm/AllocFast.S b/src/coreclr/nativeaot/Runtime/arm/AllocFast.S index 31b54d1bca313a..76091303696546 100644 --- a/src/coreclr/nativeaot/Runtime/arm/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/arm/AllocFast.S @@ -26,7 +26,7 @@ LEAF_ENTRY RhpNewFast, _TEXT ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] add r2, r3 - ldr r1, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr r1, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp r2, r1 bhi LOCAL_LABEL(RhpNewFast_RarePath) @@ -132,7 +132,7 @@ LEAF_ENTRY RhNewString, _TEXT adds r6, r12 bcs LOCAL_LABEL(RhNewString_RarePath) // if we get a carry here, the string is too large to fit below 4 GB - ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr r12, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp r6, r12 bhi LOCAL_LABEL(RhNewString_RarePath) @@ -213,7 +213,7 @@ LOCAL_LABEL(ArrayAlignSize): adds r6, r12 bcs LOCAL_LABEL(RhpNewArray_RarePath) // if we get a carry here, the array is too large to fit below 4 GB - ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr r12, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp r6, r12 bhi LOCAL_LABEL(RhpNewArray_RarePath) @@ -349,7 +349,7 @@ LEAF_ENTRY RhpNewFastAlign8, _TEXT // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add r2, r3 - ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr r3, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp r2, r3 bhi LOCAL_LABEL(Alloc8Failed) @@ -412,7 +412,7 @@ LEAF_ENTRY RhpNewFastMisalign, _TEXT // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add r2, r3 - ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr r3, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp r2, r3 bhi LOCAL_LABEL(BoxAlloc8Failed) diff --git a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S b/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S index 966b052a2b9f9e..2e6c0585d347e1 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S @@ -10,8 +10,11 @@ GC_ALLOC_FINALIZE = 1 // // Rename fields of nested structs // -OFFSETOF__Thread__m_alloc_context__alloc_ptr = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_alloc_context__alloc_limit = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit +OFFSETOF__Thread__m_alloc_context__alloc_ptr = OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +// +// Combined_limit doesn't exist yet. It is planned to come as part of the randomized allocation sampling feature. For now this aliases alloc_limit +// +OFFSETOF__Thread__m_eeAllocContext__combined_limit = OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit @@ -44,7 +47,7 @@ OFFSETOF__Thread__m_alloc_context__alloc_limit = OFFSETOF__Thread__m_rgbAll // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x13, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x13, [x1, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x13 bhi LOCAL_LABEL(RhpNewFast_RarePath) @@ -139,7 +142,7 @@ LOCAL_LABEL(NewOutOfMemory): // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x12 bhi LOCAL_LABEL(RhNewString_Rare) @@ -207,7 +210,7 @@ LOCAL_LABEL(RhNewString_Rare): // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x12 bhi LOCAL_LABEL(RhpNewArray_Rare) diff --git a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm b/src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm index e6849b87312669..d8e506335d77f2 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm @@ -30,7 +30,7 @@ ;; Determine whether the end of the object would lie outside of the current allocation context. If so, ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x13, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x13, [x1, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x13 bhi RhpNewFast_RarePath @@ -118,7 +118,7 @@ NewOutOfMemory ;; Determine whether the end of the object would lie outside of the current allocation context. If so, ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x12 bhi RhpNewArrayRare @@ -179,7 +179,7 @@ StringSizeOverflow ;; Determine whether the end of the object would lie outside of the current allocation context. If so, ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x12 bhi RhpNewArrayRare diff --git a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h index 94a559df719e02..a72a86bd9648ed 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h +++ b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h @@ -87,8 +87,11 @@ STATUS_REDHAWK_THREAD_ABORT equ 0x43 ;; ;; Rename fields of nested structs ;; -OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_alloc_context__alloc_limit equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit +OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +;; +;; Combined_limit doesn't exist yet. It is planned to come as part of the randomized allocation sampling feature. For now this aliases alloc_limit +;; +OFFSETOF__Thread__m_eeAllocContext__combined_limit equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit ;; ;; IMPORTS diff --git a/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm b/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm index 8d28e94c944177..d557f5ec750774 100644 --- a/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm +++ b/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm @@ -29,7 +29,7 @@ FASTCALL_FUNC RhpNewFast, 4 ;; add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - cmp eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp eax, [edx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja AllocFailed ;; set the new alloc pointer @@ -165,7 +165,7 @@ FASTCALL_FUNC RhNewString, 8 mov ecx, eax add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] jc StringAllocContextOverflow - cmp eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp eax, [edx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja StringAllocContextOverflow ; ECX == allocation size @@ -282,7 +282,7 @@ ArrayAlignSize: mov ecx, eax add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] jc ArrayAllocContextOverflow - cmp eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp eax, [edx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja ArrayAllocContextOverflow ; ECX == array size diff --git a/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc b/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc index 896bf8e67dab53..7a613c6e128ee7 100644 --- a/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc +++ b/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc @@ -140,8 +140,11 @@ STATUS_REDHAWK_THREAD_ABORT equ 43h ;; ;; Rename fields of nested structs ;; -OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_alloc_context__alloc_limit equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit +OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +;; +;; Combined_limit doesn't exist yet. It is planned to come as part of the randomized allocation sampling feature. For now this aliases alloc_limit +;; +OFFSETOF__Thread__m_eeAllocContext__combined_limit equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit ;; ;; CONSTANTS -- SYMBOLS diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S b/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S index dc344183e927ba..f697ccbb1972d0 100644 --- a/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S @@ -10,8 +10,11 @@ GC_ALLOC_FINALIZE = 1 // // Rename fields of nested structs // -OFFSETOF__Thread__m_alloc_context__alloc_ptr = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_alloc_context__alloc_limit = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit +OFFSETOF__Thread__m_alloc_context__alloc_ptr = OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +// +// Combined_limit doesn't exist yet. It is planned to come as part of the randomized allocation sampling feature. For now this aliases alloc_limit +// +OFFSETOF__Thread__m_eeAllocContext__combined_limit = OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit @@ -44,7 +47,7 @@ OFFSETOF__Thread__m_alloc_context__alloc_limit = OFFSETOF__Thread__m_rgbAll // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add.d $a2, $a2, $t3 - ld.d $t4, $a1, OFFSETOF__Thread__m_alloc_context__alloc_limit + ld.d $t4, $a1, OFFSETOF__Thread__m_eeAllocContext__combined_limit bltu $t4, $a2, RhpNewFast_RarePath // Update the alloc pointer to account for the allocation. @@ -137,7 +140,7 @@ NewOutOfMemory: // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add.d $a2, $a2, $t3 - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_limit + ld.d $t3, $a3, OFFSETOF__Thread__m_eeAllocContext__combined_limit bltu $t3, $a2, RhNewString_Rare // Reload new object address into r12. @@ -199,7 +202,7 @@ RhNewString_Rare: // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add.d $a2, $a2, $t3 - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_limit + ld.d $t3, $a3, OFFSETOF__Thread__m_eeAllocContext__combined_limit bltu $t3, $a2, RhpNewArray_Rare // Reload new object address into t3. diff --git a/src/coreclr/nativeaot/Runtime/portable.cpp b/src/coreclr/nativeaot/Runtime/portable.cpp index 318a10fd20a526..d42a6ecc67efe8 100644 --- a/src/coreclr/nativeaot/Runtime/portable.cpp +++ b/src/coreclr/nativeaot/Runtime/portable.cpp @@ -64,8 +64,9 @@ FCIMPL1(Object *, RhpNewFast, MethodTable* pEEType) size_t size = pEEType->GetBaseSize(); uint8_t* alloc_ptr = acontext->alloc_ptr; - ASSERT(alloc_ptr <= acontext->alloc_limit); - if ((size_t)(acontext->alloc_limit - alloc_ptr) >= size) + uint8_t* combined_limit = pCurThread->GetEEAllocContext()->GetCombinedLimit(); + ASSERT(alloc_ptr <= combined_limit); + if ((size_t)(combined_limit - alloc_ptr) >= size) { acontext->alloc_ptr = alloc_ptr + size; Object* pObject = (Object *)alloc_ptr; @@ -112,8 +113,9 @@ FCIMPL2(Array *, RhpNewArray, MethodTable * pArrayEEType, int numElements) size = ALIGN_UP(size, sizeof(uintptr_t)); uint8_t* alloc_ptr = acontext->alloc_ptr; - ASSERT(alloc_ptr <= acontext->alloc_limit); - if ((size_t)(acontext->alloc_limit - alloc_ptr) >= size) + uint8_t* combined_limit = pCurThread->GetEEAllocContext()->GetCombinedLimit(); + ASSERT(alloc_ptr <= combined_limit); + if ((size_t)(combined_limit - alloc_ptr) >= size) { acontext->alloc_ptr = alloc_ptr + size; Array* pObject = (Array*)alloc_ptr; @@ -165,8 +167,9 @@ FCIMPL1(Object*, RhpNewFastAlign8, MethodTable* pEEType) paddedSize += 12; } - ASSERT(alloc_ptr <= acontext->alloc_limit); - if ((size_t)(acontext->alloc_limit - alloc_ptr) >= paddedSize) + uint8_t* combined_limit = pCurThread->GetEEAllocContext()->GetCombinedLimit(); + ASSERT(alloc_ptr <= combined_limit); + if ((size_t)(combined_limit - alloc_ptr) >= paddedSize) { acontext->alloc_ptr = alloc_ptr + paddedSize; if (requiresPadding) @@ -199,8 +202,9 @@ FCIMPL1(Object*, RhpNewFastMisalign, MethodTable* pEEType) paddedSize += 12; } - ASSERT(alloc_ptr <= acontext->alloc_limit); - if ((size_t)(acontext->alloc_limit - alloc_ptr) >= paddedSize) + uint8_t* combined_limit = pCurThread->GetEEAllocContext()->GetCombinedLimit(); + ASSERT(alloc_ptr <= combined_limit); + if ((size_t)(combined_limit - alloc_ptr) >= paddedSize) { acontext->alloc_ptr = alloc_ptr + paddedSize; if (requiresPadding) @@ -248,8 +252,9 @@ FCIMPL2(Array*, RhpNewArrayAlign8, MethodTable* pArrayEEType, int numElements) paddedSize += 12; } - ASSERT(alloc_ptr <= acontext->alloc_limit); - if ((size_t)(acontext->alloc_limit - alloc_ptr) >= paddedSize) + uint8_t* combined_limit = pCurThread->GetEEAllocContext()->GetCombinedLimit(); + ASSERT(alloc_ptr <= combined_limit); + if ((size_t)(combined_limit - alloc_ptr) >= paddedSize) { acontext->alloc_ptr = alloc_ptr + paddedSize; if (requiresAlignObject) diff --git a/src/coreclr/nativeaot/Runtime/thread.h b/src/coreclr/nativeaot/Runtime/thread.h index 4c0a21e9f9ab7f..5310289692565c 100644 --- a/src/coreclr/nativeaot/Runtime/thread.h +++ b/src/coreclr/nativeaot/Runtime/thread.h @@ -83,9 +83,21 @@ struct InlinedThreadStaticRoot TypeManager* m_typeManager; }; +// This struct allows adding some state that is only visible to the EE onto the standard gc_alloc_context +// Right now there is no additional state, but we are planning to add a field as part of the randomized allocation +// sampling feature. Adding the struct now is some preparatory refactoring to make that change easier. +struct ee_alloc_context +{ + uint8_t m_rgbAllocContextBuffer[SIZEOF_ALLOC_CONTEXT]; + + gc_alloc_context* GetGCAllocContext(); + uint8_t* GetCombinedLimit(); +}; + + struct RuntimeThreadLocals { - uint8_t m_rgbAllocContextBuffer[SIZEOF_ALLOC_CONTEXT]; + ee_alloc_context m_eeAllocContext; uint32_t volatile m_ThreadStateFlags; // see Thread::ThreadStateFlags enum PInvokeTransitionFrame* m_pTransitionFrame; PInvokeTransitionFrame* m_pDeferredTransitionFrame; // see Thread::EnablePreemptiveMode @@ -215,6 +227,7 @@ class Thread : private RuntimeThreadLocals bool IsInitialized(); + ee_alloc_context * GetEEAllocContext(); gc_alloc_context * GetAllocContext(); uint64_t GetPalThreadIdForLogging(); diff --git a/src/coreclr/nativeaot/Runtime/thread.inl b/src/coreclr/nativeaot/Runtime/thread.inl index 2daffd06922134..1ea91aec15e21b 100644 --- a/src/coreclr/nativeaot/Runtime/thread.inl +++ b/src/coreclr/nativeaot/Runtime/thread.inl @@ -2,6 +2,31 @@ // The .NET Foundation licenses this file to you under the MIT license. #ifndef DACCESS_COMPILE + + + +inline gc_alloc_context* ee_alloc_context::GetGCAllocContext() +{ + return (gc_alloc_context*)&m_rgbAllocContextBuffer; +} + +// It seems like there is a desire not to include a definition of gc_alloc_context in a more global place within +// the NativeAOT runtime? Instead some individual files include their own definition as needed and others reference +// gcinterface.h to get the official definition. This .inl file gets included from multiple places some of which +// do define the type and others that do not. To avoid getting a redefinition error I added this private definition. +struct _thread_inl_gc_alloc_context +{ + uint8_t* alloc_ptr; + uint8_t* alloc_limit; +}; + +inline uint8_t* ee_alloc_context::GetCombinedLimit() +{ + // For the randomized allocation sampling feature, we plan to add a combined_limit field here, + // but for now this just aliases the alloc_limit field. + return ((_thread_inl_gc_alloc_context*)GetGCAllocContext())->alloc_limit; +} + // Set the m_pDeferredTransitionFrame field for GC allocation helpers that setup transition frame // in assembly code. Do not use anywhere else. inline void Thread::SetDeferredTransitionFrame(PInvokeTransitionFrame* pTransitionFrame) @@ -59,9 +84,14 @@ inline void Thread::PopGCFrameRegistration(GCFrameRegistration* pRegistration) m_pGCFrameRegistrations = pRegistration->m_pNext; } +inline ee_alloc_context* Thread::GetEEAllocContext() +{ + return &m_eeAllocContext; +} + inline gc_alloc_context* Thread::GetAllocContext() { - return (gc_alloc_context*)m_rgbAllocContextBuffer; + return GetEEAllocContext()->GetGCAllocContext(); } inline bool Thread::IsStateSet(ThreadStateFlags flags) diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc index f8ec8f5037b1b2..cd8f93aa5cc8d1 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc @@ -240,8 +240,11 @@ C_FUNC(\Name): // // Rename fields of nested structs // -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -#define OFFSETOF__Thread__m_alloc_context__alloc_limit OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit +#define OFFSETOF__Thread__m_alloc_context__alloc_ptr OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +// +// Combined_limit doesn't exist yet. It is planned to come as part of the randomized allocation sampling feature. For now this aliases alloc_limit +// +#define OFFSETOF__Thread__m_eeAllocContext__combined_limit OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit // GC type flags #define GC_ALLOC_FINALIZE 1 diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc index 68631819f7dee4..05c46c368af36a 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc @@ -28,8 +28,11 @@ #define TrapThreadsFlags_TrapThreads 2 // Rename fields of nested structs -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) -#define OFFSETOF__Thread__m_alloc_context__alloc_limit (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit) +#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) +// +// Combined_limit doesn't exist yet. It is planned to come as part of the randomized allocation sampling feature. For now this aliases alloc_limit +// +#define OFFSETOF__Thread__m_eeAllocContext__combined_limit (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit) // GC minimal sized object. We use this to switch between 4 and 8 byte alignment in the GC heap (see AllocFast.asm). #define SIZEOF__MinObject 12