Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NativeAOT] Simplifying access to thread static variables #84566

Merged
merged 30 commits into from
May 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
4f6180c
separate inlined threadstatics from multimodule/dynamic ones
VSadov Apr 10, 2023
212cf92
allocate threadstatic storage on first use
VSadov Apr 14, 2023
6f68fec
no need to pass type manager to the fast helpers
VSadov Apr 19, 2023
965c097
RhpGetThreadStaticBaseForType for arm64
VSadov Apr 19, 2023
000dcc7
fix multifile case
VSadov Apr 19, 2023
330b33c
introduced InlinedThreadStatics
VSadov Apr 21, 2023
8df68fa
no index when inlined
VSadov Apr 21, 2023
99441a2
no index for inlined storage on the runtime side
VSadov Apr 21, 2023
5564d12
encode the storage type
VSadov Apr 21, 2023
c649f9d
report offsets to JIT
VSadov Apr 21, 2023
ce5ba65
reflection
VSadov Apr 21, 2023
f2c993a
add root, layout tweaks, couple fixes
VSadov Apr 22, 2023
9e60d58
all pass
VSadov Apr 22, 2023
530e7b3
more compact storage
VSadov Apr 22, 2023
18395e9
unix x64 asm helper
VSadov Apr 22, 2023
ad25223
arm64
VSadov Apr 22, 2023
2671c9d
tweak arm64
VSadov Apr 23, 2023
13270c6
simplify GetThreadStaticBaseForType
VSadov Apr 23, 2023
383961c
renames
VSadov Apr 23, 2023
b802437
Undo unnecessary changes
VSadov Apr 23, 2023
d88d024
a few tweaks
VSadov Apr 23, 2023
79cfc7e
add some comments and asserts
VSadov Apr 23, 2023
1802dd7
INLINE_GET_TLS_VAR for win-x64
VSadov Apr 28, 2023
76b4884
moved the managed tls root off the thread
VSadov Apr 28, 2023
39cdd4c
unix x64
VSadov Apr 28, 2023
b81666f
unix arm64
VSadov Apr 28, 2023
567f5eb
win-arm64
VSadov Apr 28, 2023
cfc81f0
adjust dwarf test for fewer warnings
VSadov Apr 28, 2023
1ee3b96
remove unused asm offsets
VSadov Apr 28, 2023
419af38
PR feedback
VSadov May 1, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/coreclr/nativeaot/Runtime/AsmOffsets.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,6 @@ ASM_OFFSET( 0, 78, Thread, m_uHijackedReturnValueFlags)
ASM_OFFSET( 48, 80, Thread, m_pExInfoStackHead)
ASM_OFFSET( 4c, 88, Thread, m_threadAbortException)

ASM_OFFSET( 50, 90, Thread, m_pThreadLocalModuleStatics)

ASM_SIZEOF( 14, 20, EHEnum)

ASM_OFFSET( 0, 0, gc_alloc_context, alloc_ptr)
Expand Down
19 changes: 19 additions & 0 deletions src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,25 @@ RuntimeInstance::TypeManagerList& RuntimeInstance::GetTypeManagerList()
return m_TypeManagerList;
}

TypeManager* RuntimeInstance::GetSingleTypeManager()
{
auto head = m_TypeManagerList.GetHead();
if (head != NULL && head->m_pNext == NULL)
{
return head->m_pTypeManager;
}

return NULL;
}

COOP_PINVOKE_HELPER(TypeManagerHandle, RhGetSingleTypeManager, ())
{
TypeManager* typeManager = GetRuntimeInstance()->GetSingleTypeManager();
ASSERT(typeManager != NULL);

return TypeManagerHandle::Create(typeManager);
}

// static
bool RuntimeInstance::Initialize(HANDLE hPalInstance)
{
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/nativeaot/Runtime/RuntimeInstance.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class RuntimeInstance

bool RegisterTypeManager(TypeManager * pTypeManager);
TypeManagerList& GetTypeManagerList();
TypeManager* GetSingleTypeManager();
OsModuleList* GetOsModuleList();

bool RegisterUnboxingStubs(PTR_VOID pvStartRange, uint32_t cbRange);
Expand Down
15 changes: 11 additions & 4 deletions src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc
Original file line number Diff line number Diff line change
Expand Up @@ -235,11 +235,11 @@ Name dq offset AddressToExport
_tls_array equ 58h ;; offsetof(TEB, ThreadLocalStoragePointer)

;;
;; __declspec(thread) version
;; __declspec(thread) variable
;;
INLINE_GETTHREAD macro destReg, trashReg
INLINE_GET_TLS_VAR macro destReg, trashReg, variable
EXTERN _tls_index : DWORD
EXTERN tls_CurrentThread:DWORD
EXTERN variable:DWORD

;;
;; construct 'eax' from 'rax' so that the register size and data size match
Expand All @@ -255,11 +255,18 @@ endif
mov destRegDWORD, [_tls_index]
mov trashReg, gs:[_tls_array]
mov trashReg, [trashReg + destReg * 8]
mov destRegDWORD, SECTIONREL tls_CurrentThread
mov destRegDWORD, SECTIONREL variable
add destReg, trashReg

endm

;;
;; __declspec(thread) tls_CurrentThread
;;
INLINE_GETTHREAD macro destReg, trashReg
INLINE_GET_TLS_VAR destReg, trashReg, tls_CurrentThread
endm

INLINE_THREAD_UNHIJACK macro threadReg, trashReg1, trashReg2
;;
;; Thread::Unhijack()
Expand Down
53 changes: 8 additions & 45 deletions src/coreclr/nativeaot/Runtime/amd64/MiscStubs.S
Original file line number Diff line number Diff line change
Expand Up @@ -45,57 +45,20 @@ LOCAL_LABEL(ProbeLoop):
ret
NESTED_END RhpStackProbe, _TEXT

NESTED_ENTRY RhpGetThreadStaticBaseForType, _TEXT, NoHandler
// On entry:
// rdi - TypeManagerSlot*
// rsi - type index
NESTED_ENTRY RhpGetInlinedThreadStaticBase, _TEXT, NoHandler
// On exit:
// rax - the thread static base for the given type

push_nonvol_reg rbx
push_nonvol_reg r12

mov rbx, rdi // Save TypeManagerSlot*
mov r12, rsi // Save type index

// rax = GetThread()
INLINE_GETTHREAD

mov r8d, [rbx + 8] // Get ModuleIndex out of the TypeManagerSlot
// rdi = &tls_InlinedThreadStatics
INLINE_GET_TLS_VAR tls_InlinedThreadStatics
mov rdi, rax

// get per-thread storage
mov rax, [rax + OFFSETOF__Thread__m_pThreadLocalModuleStatics]

// get per-module storage
mov rax, [rdi]
test rax, rax
jz LOCAL_LABEL(RhpGetThreadStaticBaseForType_RarePath)
cmp r8d, [rax + OFFSETOF__Array__m_Length]
jae LOCAL_LABEL(RhpGetThreadStaticBaseForType_RarePath)
mov rax, [rax + r8 * 8 + 0x10]
jz C_FUNC(RhpGetInlinedThreadStaticBaseSlow) // rdi contains the storage ref

// get the actual per-type storage
test rax, rax
jz LOCAL_LABEL(RhpGetThreadStaticBaseForType_RarePath)
cmp r12d, [rax + OFFSETOF__Array__m_Length]
jae LOCAL_LABEL(RhpGetThreadStaticBaseForType_RarePath)
mov rax, [rax + r12 * 8 + 0x10]

// if have storage, return it
test rax, rax
jz LOCAL_LABEL(RhpGetThreadStaticBaseForType_RarePath)

.cfi_remember_state
pop_nonvol_reg r12
pop_nonvol_reg rbx
// return it
ret
NESTED_END RhpGetInlinedThreadStaticBase, _TEXT

.cfi_restore_state
.cfi_def_cfa_offset 24 // workaround cfi_restore_state bug
LOCAL_LABEL(RhpGetThreadStaticBaseForType_RarePath):
mov rdi, rbx // restore TypeManagerSlot*
mov rsi, r12 // restore type index

pop_nonvol_reg r12
pop_nonvol_reg rbx
jmp C_FUNC(RhpGetThreadStaticBaseForTypeSlow)
NESTED_END RhpGetThreadStaticBaseForType, _TEXT
37 changes: 8 additions & 29 deletions src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

include AsmMacros.inc

EXTERN RhpGetThreadStaticBaseForTypeSlow : PROC
EXTERN RhpGetInlinedThreadStaticBaseSlow : PROC

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The following helper will access ("probe") a word on each page of the stack
Expand Down Expand Up @@ -39,41 +39,20 @@ ProbeLoop:

LEAF_END RhpStackProbe, _TEXT

LEAF_ENTRY RhpGetThreadStaticBaseForType, _TEXT
; On entry and thorough the procedure:
; rcx - TypeManagerSlot*
; rdx - type index
LEAF_ENTRY RhpGetInlinedThreadStaticBase, _TEXT
; On exit:
; rax - the thread static base for the given type

;; rax = GetThread(), TRASHES r8
INLINE_GETTHREAD rax, r8

mov r8d, [rcx + 8] ; Get ModuleIndex out of the TypeManagerSlot
;; rcx = &tls_InlinedThreadStatics, TRASHES r8
INLINE_GET_TLS_VAR rcx, r8, tls_InlinedThreadStatics

;; get per-thread storage
mov rax, [rax + OFFSETOF__Thread__m_pThreadLocalModuleStatics]

;; get per-module storage
test rax, rax
jz RhpGetThreadStaticBaseForTypeSlow
cmp r8d, [rax + OFFSETOF__Array__m_Length]
jae RhpGetThreadStaticBaseForTypeSlow
mov rax, [rax + r8 * 8 + 10h]

;; get the actual per-type storage
mov rax, [rcx]
test rax, rax
jz RhpGetThreadStaticBaseForTypeSlow
cmp edx, [rax + OFFSETOF__Array__m_Length]
jae RhpGetThreadStaticBaseForTypeSlow
mov rax, [rax + rdx * 8 + 10h]

;; if have storage, return it
test rax, rax
jz RhpGetThreadStaticBaseForTypeSlow
jz RhpGetInlinedThreadStaticBaseSlow ;; rcx contains the storage ref

;; return it
ret

LEAF_END RhpGetThreadStaticBaseForType, _TEXT
LEAF_END RhpGetInlinedThreadStaticBase, _TEXT

end
18 changes: 18 additions & 0 deletions src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,21 @@
#include <unixasmmacros.inc>
#include "AsmOffsets.inc"

NESTED_ENTRY RhpGetInlinedThreadStaticBase, _TEXT, NoHandler
// On exit:
// x0 - the thread static base for the given type

// x1 = GetThread()
INLINE_GET_TLS_VAR x1, C_FUNC(tls_InlinedThreadStatics)

// get per-thread storage
ldr x0, [x1]
cbnz x0, HaveValue
mov x0, x1
b C_FUNC(RhpGetInlinedThreadStaticBaseSlow)

HaveValue:
// return it
ret

NESTED_END RhpGetInlinedThreadStaticBase, _TEXT
19 changes: 19 additions & 0 deletions src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,25 @@

#include "AsmMacros.h"

EXTERN RhpGetInlinedThreadStaticBaseSlow

TEXTAREA

;; On exit:
;; x0 - the thread static base for the given type
LEAF_ENTRY RhpGetInlinedThreadStaticBase
;; x1 = &tls_InlinedThreadStatics, TRASHES x2
INLINE_GET_TLS_VAR x1, x2, tls_InlinedThreadStatics

;; get per-thread storage
ldr x0, [x1]
cbnz x0, HaveValue
mov x0, x1
b RhpGetInlinedThreadStaticBaseSlow

HaveValue
;; return it
ret
LEAF_END RhpGetInlinedThreadStaticBase

end
8 changes: 8 additions & 0 deletions src/coreclr/nativeaot/Runtime/gcrhscan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,14 @@ void GCToEEInterface::GcScanRoots(EnumGcRefCallbackFunc * fn, int condemned, in
else
#endif
{
InlinedThreadStaticRoot* pRoot = pThread->GetInlinedThreadStaticList();
while (pRoot != NULL)
{
STRESS_LOG2(LF_GC | LF_GCROOTS, LL_INFO100, "{ Scanning Thread's %p inline thread statics root %p. \n", pThread, pRoot);
GcEnumObject(&pRoot->m_threadStaticsBase, 0 /*flags*/, fn, sc);
pRoot = pRoot->m_next;
}

STRESS_LOG1(LF_GC | LF_GCROOTS, LL_INFO100, "{ Scanning Thread's %p thread statics root. \n", pThread);
GcEnumObject(pThread->GetThreadStaticStorage(), 0 /*flags*/, fn, sc);

Expand Down
25 changes: 22 additions & 3 deletions src/coreclr/nativeaot/Runtime/thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,8 @@ void Thread::Construct()

// Everything else should be initialized to 0 via the static initialization of tls_CurrentThread.

ASSERT(m_pThreadLocalModuleStatics == NULL);
ASSERT(m_pThreadLocalStatics == NULL);
ASSERT(m_pInlinedThreadLocalStatics == NULL);

ASSERT(m_pGCFrameRegistrations == NULL);

Expand Down Expand Up @@ -1266,15 +1267,33 @@ COOP_PINVOKE_HELPER(Object *, RhpGetThreadAbortException, ())

Object** Thread::GetThreadStaticStorage()
{
return &m_pThreadLocalModuleStatics;
return &m_pThreadLocalStatics;
}

COOP_PINVOKE_HELPER(Object**, RhGetThreadStaticStorage, ())
{
Thread * pCurrentThread = ThreadStore::RawGetCurrentThread();
Thread* pCurrentThread = ThreadStore::RawGetCurrentThread();
return pCurrentThread->GetThreadStaticStorage();
}

InlinedThreadStaticRoot* Thread::GetInlinedThreadStaticList()
{
return m_pInlinedThreadLocalStatics;
}

void Thread::RegisterInlinedThreadStaticRoot(InlinedThreadStaticRoot* newRoot)
{
ASSERT(newRoot->m_next == NULL);
newRoot->m_next = m_pInlinedThreadLocalStatics;
m_pInlinedThreadLocalStatics = newRoot;
}

COOP_PINVOKE_HELPER(void, RhRegisterInlinedThreadStaticRoot, (Object** root))
{
Thread* pCurrentThread = ThreadStore::RawGetCurrentThread();
pCurrentThread->RegisterInlinedThreadStaticRoot((InlinedThreadStaticRoot*)root);
}

// This is function is used to quickly query a value that can uniquely identify a thread
COOP_PINVOKE_HELPER(uint8_t*, RhCurrentNativeThreadId, ())
{
Expand Down
12 changes: 11 additions & 1 deletion src/coreclr/nativeaot/Runtime/thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ struct GCFrameRegistration
int m_MaybeInterior;
};

struct InlinedThreadStaticRoot
{
Object* m_threadStaticsBase;
InlinedThreadStaticRoot* m_next;
};

struct ThreadBuffer
{
uint8_t m_rgbAllocContextBuffer[SIZEOF_ALLOC_CONTEXT];
Expand All @@ -88,7 +94,8 @@ struct ThreadBuffer
uintptr_t m_uHijackedReturnValueFlags;
PTR_ExInfo m_pExInfoStackHead;
Object* m_threadAbortException; // ThreadAbortException instance -set only during thread abort
Object* m_pThreadLocalModuleStatics;
Object* m_pThreadLocalStatics;
InlinedThreadStaticRoot* m_pInlinedThreadLocalStatics;
GCFrameRegistration* m_pGCFrameRegistrations;
PTR_VOID m_pStackLow;
PTR_VOID m_pStackHigh;
Expand Down Expand Up @@ -288,6 +295,9 @@ class Thread : private ThreadBuffer

Object** GetThreadStaticStorage();

InlinedThreadStaticRoot* GetInlinedThreadStaticList();
void RegisterInlinedThreadStaticRoot(InlinedThreadStaticRoot* newRoot);

NATIVE_CONTEXT* GetInterruptedContext();

void PushGCFrameRegistration(GCFrameRegistration* pRegistration);
Expand Down
12 changes: 11 additions & 1 deletion src/coreclr/nativeaot/Runtime/threadstore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -430,13 +430,23 @@ C_ASSERT(sizeof(Thread) == sizeof(ThreadBuffer));

#ifndef _MSC_VER
__thread ThreadBuffer tls_CurrentThread;

// the root of inlined threadstatics storage
// there is only one now,
// eventually this will be emitted by ILC and we may have more than one such variable
__thread InlinedThreadStaticRoot tls_InlinedThreadStatics;
#endif

EXTERN_C ThreadBuffer* RhpGetThread()
{
return &tls_CurrentThread;
}

COOP_PINVOKE_HELPER(Object**, RhGetInlinedThreadStaticStorage, ())
{
return &tls_InlinedThreadStatics.m_threadStaticsBase;
}

#endif // !DACCESS_COMPILE

#ifdef _WIN32
Expand Down Expand Up @@ -505,4 +515,4 @@ void ThreadStore::SaveCurrentThreadOffsetForDAC()
{
}

#endif // _WIN32
#endif // _WIN32
6 changes: 6 additions & 0 deletions src/coreclr/nativeaot/Runtime/threadstore.inl
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,14 @@
#ifdef _MSC_VER
// a workaround to prevent tls_CurrentThread from becoming dynamically checked/initialized.
EXTERN_C __declspec(selectany) __declspec(thread) ThreadBuffer tls_CurrentThread;

// the root of inlined threadstatics storage
// there is only one now,
// eventually this will be emitted by ILC and we may have more than one such variable
EXTERN_C __declspec(selectany) __declspec(thread) InlinedThreadStaticRoot tls_InlinedThreadStatics;
#else
EXTERN_C __thread ThreadBuffer tls_CurrentThread;
EXTERN_C __thread InlinedThreadStaticRoot tls_InlinedThreadStatics;
#endif

// static
Expand Down
Loading