From 9387a597b8f20129a1037c245ab811ed63b52aa9 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 1 Jun 2023 14:57:39 -0700 Subject: [PATCH 01/79] wip --- src/coreclr/inc/corinfo.h | 2 + src/coreclr/inc/jithelpers.h | 4 + src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/emit.cpp | 4 +- src/coreclr/jit/helperexpansion.cpp | 111 ++++++++++++++---- src/coreclr/jit/lsraxarch.cpp | 17 +++ src/coreclr/pal/inc/unixasmmacrosamd64.inc | 13 ++ .../Common/JitInterface/CorInfoHelpFunc.cs | 2 + src/coreclr/vm/amd64/AsmHelpers.asm | 1 + src/coreclr/vm/amd64/JitHelpers_Fast.asm | 2 - src/coreclr/vm/amd64/asmhelpers.S | 12 ++ src/coreclr/vm/amd64/jithelpers_fast.S | 1 - src/coreclr/vm/appdomain.cpp | 4 - src/coreclr/vm/appdomain.hpp | 5 - src/coreclr/vm/i386/asmhelpers.S | 7 ++ src/coreclr/vm/jithelpers.cpp | 10 +- src/coreclr/vm/jitinterface.cpp | 20 +++- .../static/singlefilehost_unixexports.src | 2 +- 18 files changed, 167 insertions(+), 52 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 04a8f2fb898d4..5d15c22900936 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -550,6 +550,8 @@ enum CorInfoHelpFunc CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS, CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED, CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED, + GetNonGCMaxThreadStaticBlocksAddr, + GetGCMaxThreadStaticBlocksAddr, /* Debugger */ diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index 1913a428da942..bdd9740ed4a54 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -208,6 +208,10 @@ JITHELPER(CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED, JIT_GetSharedGCThreadStaticBaseOptimized, CORINFO_HELP_SIG_REG_ONLY) JITHELPER(CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED, JIT_GetSharedNonGCThreadStaticBaseOptimized, CORINFO_HELP_SIG_REG_ONLY) + JITHELPER(GetNonGCMaxThreadStaticBlocksAddr, GetNonGCMaxThreadStaticBlocksAddr, CORINFO_HELP_SIG_REG_ONLY) + JITHELPER(GetGCMaxThreadStaticBlocksAddr, GetGCMaxThreadStaticBlocksAddr, CORINFO_HELP_SIG_REG_ONLY) + + // Debugger JITHELPER(CORINFO_HELP_DBG_IS_JUST_MY_CODE, JIT_DbgIsJustMyCode,CORINFO_HELP_SIG_REG_ONLY) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 6a19d89253d40..8f0fcf95f45e5 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5044,7 +5044,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Partially inline static initializations DoPhase(this, PHASE_EXPAND_STATIC_INIT, &Compiler::fgExpandStaticInit); - if (TargetOS::IsWindows) + // if (TargetOS::IsWindows) { // Currently this is only applicable for Windows DoPhase(this, PHASE_EXPAND_TLS, &Compiler::fgExpandThreadLocalAccess); diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 11407c9cabb1d..0fea001f5cc1c 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -10131,9 +10131,7 @@ void emitter::emitRecordCallSite(ULONG instrOffset, /* IN */ if (callSig == nullptr) { - assert(methodHandle != nullptr); - - if (Compiler::eeGetHelperNum(methodHandle) == CORINFO_HELP_UNDEF) + if ((methodHandle != nullptr) && (Compiler::eeGetHelperNum(methodHandle) == CORINFO_HELP_UNDEF)) { emitComp->eeGetMethodSig(methodHandle, &sigInfo); callSig = &sigInfo; diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 983cf4d256955..62447d565cc07 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -445,6 +445,38 @@ PhaseStatus Compiler::fgExpandThreadLocalAccess() return fgExpandHelper<&Compiler::fgExpandThreadLocalAccessForCall>(true); } +extern "C" void * __tls_get_addr (void *ti); +__thread int x; + +void* getDescriptor() +{ + uint8_t* p; + __asm__ ( + "leaq 0(%%rip), %%rbx\n" + "data16\n" + "leaq x@TLSGD(%%rip), %%rdi\n" + "data16\n" + "data16\n" + "rex64\n" + "callq __tls_get_addr\n" + : "=b" (p) + ); + + // printf("p= %x\n", p[0]); + // printf("p= %x\n", p[1]); + // printf("p= %x\n", p[2]); + // printf("p= %x\n", p[3]); + if (p[0] != 0x66 || p[1] != 0x48 || p[2] != 0x8d || p[3] != 0x3d) + { + printf("Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)\n"); + exit(1); + } + p += 4; + + return *(uint32_t*)p + (p + 4); + // return p; +} + //------------------------------------------------------------------------------ // fgExpandThreadLocalAccessForCall : Expand the CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED // or CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED, that access fields marked with [ThreadLocal]. @@ -487,8 +519,9 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* JITDUMP("Expanding thread static local access for [%06d] in " FMT_BB ":\n", dspTreeID(call), block->bbNum); DISPTREE(call); JITDUMP("\n"); - bool isGCThreadStatic = - eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; + + bool isGCThreadStatic = false; + isGCThreadStatic = eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; CORINFO_THREAD_STATIC_BLOCKS_INFO threadStaticBlocksInfo; info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); @@ -505,10 +538,16 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", offsetOfMaxThreadStaticBlocksVal); JITDUMP("offsetOfThreadStaticBlocks= %u\n", offsetOfThreadStaticBlocksVal); JITDUMP("offsetOfGCDataPointer= %u\n", threadStaticBlocksInfo.offsetOfGCDataPointer); + // assert(false); - assert(threadStaticBlocksInfo.tlsIndex.accessType == IAT_VALUE); - assert((eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED) || - (eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED)); + // assert(threadStaticBlocksInfo.tlsIndex.accessType == IAT_VALUE); + // if (TargetOS::IsWindows) { + assert((eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED) || + (eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED)); + // } else { + // assert((eeGetHelperNum(call->gtCallMethHnd) == GetGCMaxThreadStaticBlocksAddr) || + // (eeGetHelperNum(call->gtCallMethHnd) == GetNonGCMaxThreadStaticBlocksAddr)); + // } call->ClearExpTLSFieldAccess(); assert(call->gtArgs.CountArgs() == 1); @@ -545,34 +584,59 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* gtUpdateStmtSideEffects(stmt); GenTree* typeThreadStaticBlockIndexValue = call->gtArgs.GetArgByIndex(0)->GetNode(); + GenTree* tlsValue = nullptr; - void** pIdAddr = nullptr; - - size_t tlsIndexValue = (size_t)threadStaticBlocksInfo.tlsIndex.addr; - GenTree* dllRef = nullptr; - - if (tlsIndexValue != 0) + if (TargetOS::IsWindows) { - dllRef = gtNewIconHandleNode(tlsIndexValue * TARGET_POINTER_SIZE, GTF_ICON_TLS_HDL); - } + size_t tlsIndexValue = (size_t)threadStaticBlocksInfo.tlsIndex.addr; + GenTree* dllRef = nullptr; - // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] or GS:[cns] - GenTree* tlsRef = gtNewIconHandleNode(threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer, GTF_ICON_TLS_HDL); + if (tlsIndexValue != 0) + { + dllRef = gtNewIconHandleNode(tlsIndexValue * TARGET_POINTER_SIZE, GTF_ICON_TLS_HDL); + } + + // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] or GS:[cns] + tlsValue = gtNewIconHandleNode(threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer, GTF_ICON_TLS_HDL); + tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - tlsRef = gtNewIndir(TYP_I_IMPL, tlsRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + if (dllRef != nullptr) + { + // Add the dllRef to produce thread local storage reference for coreclr + tlsValue = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsValue, dllRef); + } - if (dllRef != nullptr) + // Base of coreclr's thread local storage + tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + } + else { - // Add the dllRef to produce thread local storage reference for coreclr - tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef); + void* (*pTlsGetAddr)(void*) = &__tls_get_addr; + GenTree* tls_get_addr_val = gtNewIconHandleNode((size_t)pTlsGetAddr, GTF_ICON_FTN_ADDR); + GenTreeCall* tlsRefCall = gtNewIndCallNode(tls_get_addr_val, TYP_ULONG); + ssize_t xaddr = (ssize_t)getDescriptor(); + printf("addr of x= %p\n", xaddr); + + GenTree* tlsArg = gtNewIconNode(5, TYP_I_IMPL); + + tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); + + CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); + + arg0->AbiInfo = CallArgABIInformation(); + arg0->AbiInfo.SetRegNum(0, REG_ARG_0); + + tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT); +#ifdef UNIX_X86_ABI + tlsRefCall->gtFlags &= ~GTF_CALL_POP_ARGS; +#endif + tlsValue = tlsRefCall; } - // Base of coreclr's thread local storage - GenTree* tlsValue = gtNewIndir(TYP_I_IMPL, tlsRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - // Cache the tls value unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access")); lvaTable[tlsLclNum].lvType = TYP_I_IMPL; + GenTree* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); @@ -636,6 +700,9 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // block (...): [weight: 1.0] // use(threadStaticBlockBase); + fgDumpBlock(prevBb); + fgDumpBlock(block); + // maxThreadStaticBlocksCondBB BasicBlock* maxThreadStaticBlocksCondBB = fgNewBBFromTreeAfter(BBJ_COND, prevBb, tlsValueDef, debugInfo); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 4beb10368abc5..40cc8f3c3dd71 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1329,6 +1329,23 @@ int LinearScan::BuildCall(GenTreeCall* call) ctrlExprCandidates = availableIntRegs & ~(RBM_ARG_REGS); } srcCount += BuildOperandUses(ctrlExpr, ctrlExprCandidates); + if (call->gtCallType == CT_INDIRECT) + { + for (CallArg& arg : call->gtArgs.EarlyArgs()) + { + CallArgABIInformation& abiInfo = arg.AbiInfo; + GenTree* argNode = arg.GetEarlyNode(); + + // Each register argument corresponds to one source. + if (argNode->OperIsPutArgReg()) + { + srcCount++; + BuildUse(argNode, genRegMask(argNode->GetRegNum())); + const regNumber argReg = abiInfo.GetRegNum(); + assert(argNode->GetRegNum() == argReg); + } + } + } } buildInternalRegisterUses(); diff --git a/src/coreclr/pal/inc/unixasmmacrosamd64.inc b/src/coreclr/pal/inc/unixasmmacrosamd64.inc index bb1e70a27bef0..ce372c7615482 100644 --- a/src/coreclr/pal/inc/unixasmmacrosamd64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosamd64.inc @@ -363,3 +363,16 @@ C_FUNC(\Name\()_End): .cfi_same_value rbp .endm + +.macro INLINE_GET_TLS_VAR Var + .att_syntax +#if defined(__APPLE__) + movq _\Var@TLVP(%rip), %rdi + callq *(%rdi) +#else + leaq \Var@TLSLD(%rip), %rdi + callq __tls_get_addr@PLT + addq $\Var@DTPOFF, %rax +#endif + .intel_syntax noprefix +.endm \ No newline at end of file diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs index 0693f3c1b69f1..8478d6dacff69 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs @@ -192,6 +192,8 @@ which is the right helper to use to allocate an object of a given type. */ CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED, CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS, CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS, + GetNonGCMaxThreadStaticBlocksAddr, + GetGCMaxThreadStaticBlocksAddr, /* Debugger */ diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index c4501546d836e..6a304b006f311 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -20,6 +20,7 @@ endif GenerateArrayOpStubExceptionCase macro ErrorCaseName, ExceptionName + NESTED_ENTRY ErrorCaseName&_RSIRDI_ScratchArea, _TEXT ; account for scratch area, rsi, rdi already on the stack diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm index dd5b891a44134..a6240dddd8249 100644 --- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm +++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm @@ -49,13 +49,11 @@ endif extern JIT_InternalThrow:proc - ; Mark start of the code region that we patch at runtime LEAF_ENTRY JIT_PatchedCodeStart, _TEXT ret LEAF_END JIT_PatchedCodeStart, _TEXT - ; This is used by the mechanism to hold either the JIT_WriteBarrier_PreGrow ; or JIT_WriteBarrier_PostGrow code (depending on the state of the GC). It _WILL_ ; change at runtime as the GC changes. Initially it should simply be a copy of the diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index bebfd3376c12d..e9c9f613bcc53 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -160,6 +160,18 @@ NESTED_ENTRY ProfileEnterNaked, _TEXT, NoHandler ret NESTED_END ProfileEnterNaked, _TEXT +// LPVOID __stdcall GetNonGCMaxThreadStaticBlocksAddr(void)// +LEAF_ENTRY GetNonGCMaxThreadStaticBlocksAddr, _TEXT + INLINE_GET_TLS_VAR t_NonGCMaxThreadStaticBlocks + ret +LEAF_END GetNonGCMaxThreadStaticBlocksAddr, _TEXT + +// LPVOID __stdcall GetGCMaxThreadStaticBlocksAddr(void)// +LEAF_ENTRY GetGCMaxThreadStaticBlocksAddr, _TEXT + INLINE_GET_TLS_VAR t_GCMaxThreadStaticBlocks + ret +LEAF_END GetGCMaxThreadStaticBlocksAddr, _TEXT + # EXTERN_C void ProfileLeaveNaked(FunctionIDOrClientID functionIDOrClientID, size_t profiledRsp); # # diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index 32890b471b26c..d2ec286d71a94 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -10,7 +10,6 @@ LEAF_ENTRY JIT_PatchedCodeStart, _TEXT ret LEAF_END JIT_PatchedCodeStart, _TEXT - // There is an even more optimized version of these helpers possible which takes // advantage of knowledge of which way the ephemeral heap is growing to only do 1/2 // that check (this is more significant in the JIT_WriteBarrier case). diff --git a/src/coreclr/vm/appdomain.cpp b/src/coreclr/vm/appdomain.cpp index d7210892f2ae5..5529cd6e454a2 100644 --- a/src/coreclr/vm/appdomain.cpp +++ b/src/coreclr/vm/appdomain.cpp @@ -665,7 +665,6 @@ void BaseDomain::InitVSD() GetLoaderAllocator()->InitVirtualCallStubManager(this); } -#ifdef HOST_WINDOWS void BaseDomain::InitThreadStaticBlockTypeMap() { STANDARD_VM_CONTRACT; @@ -673,7 +672,6 @@ void BaseDomain::InitThreadStaticBlockTypeMap() m_NonGCThreadStaticBlockTypeIDMap.Init(); m_GCThreadStaticBlockTypeIDMap.Init(); } -#endif // HOST_WINDOWS void BaseDomain::ClearBinderContext() { @@ -4679,7 +4677,6 @@ PTR_MethodTable BaseDomain::LookupType(UINT32 id) { return pMT; } -#ifdef HOST_WINDOWS //------------------------------------------------------------------------ UINT32 BaseDomain::GetNonGCThreadStaticTypeIndex(PTR_MethodTable pMT) { @@ -4730,7 +4727,6 @@ PTR_MethodTable BaseDomain::LookupGCThreadStaticBlockType(UINT32 id) { CONSISTENCY_CHECK(CheckPointer(pMT)); return pMT; } -#endif // HOST_WINDOWS #ifndef DACCESS_COMPILE //--------------------------------------------------------------------------------------- diff --git a/src/coreclr/vm/appdomain.hpp b/src/coreclr/vm/appdomain.hpp index ab928ca5975ab..67dcc3c1d48e7 100644 --- a/src/coreclr/vm/appdomain.hpp +++ b/src/coreclr/vm/appdomain.hpp @@ -1221,18 +1221,14 @@ class BaseDomain private: TypeIDMap m_typeIDMap; -#ifdef HOST_WINDOWS // MethodTable to `typeIndex` map. `typeIndex` is embedded in the code during codegen. // During execution corresponding thread static data blocks are stored in `t_NonGCThreadStaticBlocks` // and `t_GCThreadStaticBlocks` array at the `typeIndex`. TypeIDMap m_NonGCThreadStaticBlockTypeIDMap; TypeIDMap m_GCThreadStaticBlockTypeIDMap; -#endif // HOST_WINDOWS - public: -#ifdef HOST_WINDOWS void InitThreadStaticBlockTypeMap(); UINT32 GetNonGCThreadStaticTypeIndex(PTR_MethodTable pMT); @@ -1240,7 +1236,6 @@ class BaseDomain PTR_MethodTable LookupNonGCThreadStaticBlockType(UINT32 id); PTR_MethodTable LookupGCThreadStaticBlockType(UINT32 id); -#endif UINT32 GetTypeID(PTR_MethodTable pMT); UINT32 LookupTypeID(PTR_MethodTable pMT); diff --git a/src/coreclr/vm/i386/asmhelpers.S b/src/coreclr/vm/i386/asmhelpers.S index e0b87813592dd..108fc5259deb4 100644 --- a/src/coreclr/vm/i386/asmhelpers.S +++ b/src/coreclr/vm/i386/asmhelpers.S @@ -5,6 +5,13 @@ #include "unixasmmacros.inc" #include "asmconstants.h" + +// LPVOID __stdcall GetNonGCThreadStaticBlocksAddr(void)// + LEAF_ENTRY GetNonGCThreadStaticBlocksAddr, _TEXT + leaq t_NonGCThreadStaticBlocks@TLSLD(rip), rax + ret + LEAF_END GetNonGCThreadStaticBlocksAddr, _TEXT + // // FramedMethodFrame prolog // diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index d4ce2c9aa69ac..e31e90da0ae94 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1807,6 +1807,8 @@ EXTERN_C __thread void** t_GCThreadStaticBlocks; #include HCIMPL2(void*, JIT_GetSharedNonGCThreadStaticBase, DomainLocalModule *pDomainLocalModule, DWORD dwClassDomainID) { + // t_NonGCMaxThreadStaticBlocks = 500; + FCALL_CONTRACT; // Get the ModuleIndex @@ -1840,7 +1842,6 @@ HCIMPL1(void*, JIT_GetSharedNonGCThreadStaticBaseOptimized, UINT32 staticBlockIn { void* staticBlock = nullptr; -#ifdef HOST_WINDOWS FCALL_CONTRACT; HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame @@ -1885,9 +1886,6 @@ HCIMPL1(void*, JIT_GetSharedNonGCThreadStaticBaseOptimized, UINT32 staticBlockIn t_NonGCMaxThreadStaticBlocks = max(t_NonGCMaxThreadStaticBlocks, staticBlockIndex); } HELPER_METHOD_FRAME_END(); -#else - _ASSERTE(!"JIT_GetSharedNonGCThreadStaticBaseOptimized not supported on non-windows."); -#endif // HOST_WINDOWS return staticBlock; } @@ -1938,7 +1936,6 @@ HCIMPL1(void*, JIT_GetSharedGCThreadStaticBaseOptimized, UINT32 staticBlockIndex { void* staticBlock = nullptr; -#ifdef HOST_WINDOWS FCALL_CONTRACT; HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame @@ -1987,9 +1984,6 @@ HCIMPL1(void*, JIT_GetSharedGCThreadStaticBaseOptimized, UINT32 staticBlockIndex staticBlock = (void*) pMT->GetGCThreadStaticsBasePointer(); HELPER_METHOD_FRAME_END(); -#else - _ASSERTE(!"JIT_GetSharedGCThreadStaticBaseOptimized not supported on non-windows."); -#endif // HOST_WINDOWS return staticBlock; } diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 85d904b566cdd..dd51d8e26e17e 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -76,10 +76,18 @@ __declspec(selectany) __declspec(thread) uint32_t t_GCMaxThreadStaticBlocks; __declspec(selectany) __declspec(thread) void** t_NonGCThreadStaticBlocks; __declspec(selectany) __declspec(thread) void** t_GCThreadStaticBlocks; #else -EXTERN_C __thread uint32_t t_maxThreadStaticBlocks; -EXTERN_C __thread void** t_threadStaticBlocks; +__thread uint32_t t_NonGCMaxThreadStaticBlocks; +__thread void** t_NonGCThreadStaticBlocks; + +__thread uint32_t t_NonGCThreadStaticBlocksSize; +__thread uint32_t t_GCThreadStaticBlocksSize; + +__thread uint32_t t_GCMaxThreadStaticBlocks; +__thread void** t_GCThreadStaticBlocks; #endif +// EXTERN_C UINT_PTR STDCALL GetNonGCMaxThreadStaticBlocksAddr(); + // The Stack Overflow probe takes place in the COOPERATIVE_TRANSITION_BEGIN() macro // @@ -1488,6 +1496,11 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, FieldDesc * pField = (FieldDesc*)pResolvedToken->hField; MethodTable * pFieldMT = pField->GetApproxEnclosingMethodTable(); + +// #ifndef _MSC_VER +// printf("addr: %lu\n", GetNonGCMaxThreadStaticBlocksAddr()); +// #endif + // Helper to use if the field access requires it CORINFO_FIELD_ACCESSOR fieldAccessor = (CORINFO_FIELD_ACCESSOR)-1; DWORD fieldFlags = 0; @@ -1570,8 +1583,6 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, fieldAccessor = CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER; pResult->helper = getSharedStaticsHelper(pField, pFieldMT); - -#ifdef HOST_WINDOWS #ifndef TARGET_ARM // For windows, we convert the TLS access to the optimized helper where we will store // the static blocks in TLS directly and access them via inline code. @@ -1588,7 +1599,6 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, pResult->helper = CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; } #endif // !TARGET_ARM -#endif // HOST_WINDOWS } else { diff --git a/src/native/corehost/apphost/static/singlefilehost_unixexports.src b/src/native/corehost/apphost/static/singlefilehost_unixexports.src index 18d5697e84580..db495e5ece2da 100644 --- a/src/native/corehost/apphost/static/singlefilehost_unixexports.src +++ b/src/native/corehost/apphost/static/singlefilehost_unixexports.src @@ -8,4 +8,4 @@ DotNetRuntimeInfo g_dacTable ; Used by profilers -MetaDataGetDispenser +MetaDataGetDispenser \ No newline at end of file From 4b27a2d709e51118c0cb098f7ba9c0ce8478009f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 1 Jun 2023 23:30:30 -0700 Subject: [PATCH 02/79] add __tls_get_addr() code in jitinterface --- src/coreclr/inc/corinfo.h | 6 ++ src/coreclr/jit/helperexpansion.cpp | 100 +++++++++--------- .../tools/superpmi/superpmi-shared/agnostic.h | 5 + .../superpmi-shared/methodcontext.cpp | 40 ++++--- src/coreclr/vm/jithelpers.cpp | 6 +- src/coreclr/vm/jitinterface.cpp | 38 ++++++- 6 files changed, 125 insertions(+), 70 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 5d15c22900936..20cfe2c84fc65 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -1730,10 +1730,16 @@ struct CORINFO_FIELD_INFO struct CORINFO_THREAD_STATIC_BLOCKS_INFO { +#ifdef _MSC_VER CORINFO_CONST_LOOKUP tlsIndex; + uint32_t offsetOfThreadLocalStoragePointer; uint32_t offsetOfMaxThreadStaticBlocks; uint32_t offsetOfThreadStaticBlocks; +#else + size_t tlsGetAddrFtnPtr; + size_t descrAddrOfNonGCMaxThreadStaticBlock; +#endif uint32_t offsetOfGCDataPointer; }; diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 62447d565cc07..5aa8d844bada5 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -444,38 +444,36 @@ PhaseStatus Compiler::fgExpandThreadLocalAccess() return fgExpandHelper<&Compiler::fgExpandThreadLocalAccessForCall>(true); } - -extern "C" void * __tls_get_addr (void *ti); -__thread int x; - -void* getDescriptor() -{ - uint8_t* p; - __asm__ ( - "leaq 0(%%rip), %%rbx\n" - "data16\n" - "leaq x@TLSGD(%%rip), %%rdi\n" - "data16\n" - "data16\n" - "rex64\n" - "callq __tls_get_addr\n" - : "=b" (p) - ); - - // printf("p= %x\n", p[0]); - // printf("p= %x\n", p[1]); - // printf("p= %x\n", p[2]); - // printf("p= %x\n", p[3]); - if (p[0] != 0x66 || p[1] != 0x48 || p[2] != 0x8d || p[3] != 0x3d) - { - printf("Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)\n"); - exit(1); - } - p += 4; - - return *(uint32_t*)p + (p + 4); - // return p; -} +// +//extern "C" void* __tls_get_addr(void* ti); +//__thread int x; +// +//void* getDescriptor() +//{ +// uint8_t* p; +// __asm__("leaq 0(%%rip), %%rbx\n" +// "data16\n" +// "leaq x@TLSGD(%%rip), %%rdi\n" +// "data16\n" +// "data16\n" +// "rex64\n" +// "callq __tls_get_addr\n" +// : "=b"(p)); +// +// // printf("p= %x\n", p[0]); +// // printf("p= %x\n", p[1]); +// // printf("p= %x\n", p[2]); +// // printf("p= %x\n", p[3]); +// if (p[0] != 0x66 || p[1] != 0x48 || p[2] != 0x8d || p[3] != 0x3d) +// { +// printf("Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)\n"); +// exit(1); +// } +// p += 4; +// +// return *(uint32_t*)p + (p + 4); +// // return p; +//} //------------------------------------------------------------------------------ // fgExpandThreadLocalAccessForCall : Expand the CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED @@ -521,7 +519,8 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* JITDUMP("\n"); bool isGCThreadStatic = false; - isGCThreadStatic = eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; + isGCThreadStatic = + eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; CORINFO_THREAD_STATIC_BLOCKS_INFO threadStaticBlocksInfo; info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); @@ -542,11 +541,11 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // assert(threadStaticBlocksInfo.tlsIndex.accessType == IAT_VALUE); // if (TargetOS::IsWindows) { - assert((eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED) || - (eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED)); + assert((eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED) || + (eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED)); // } else { // assert((eeGetHelperNum(call->gtCallMethHnd) == GetGCMaxThreadStaticBlocksAddr) || - // (eeGetHelperNum(call->gtCallMethHnd) == GetNonGCMaxThreadStaticBlocksAddr)); + // (eeGetHelperNum(call->gtCallMethHnd) == GetNonGCMaxThreadStaticBlocksAddr)); // } call->ClearExpTLSFieldAccess(); @@ -584,7 +583,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* gtUpdateStmtSideEffects(stmt); GenTree* typeThreadStaticBlockIndexValue = call->gtArgs.GetArgByIndex(0)->GetNode(); - GenTree* tlsValue = nullptr; + GenTree* tlsValue = nullptr; if (TargetOS::IsWindows) { @@ -611,34 +610,36 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* } else { - void* (*pTlsGetAddr)(void*) = &__tls_get_addr; - GenTree* tls_get_addr_val = gtNewIconHandleNode((size_t)pTlsGetAddr, GTF_ICON_FTN_ADDR); - GenTreeCall* tlsRefCall = gtNewIndCallNode(tls_get_addr_val, TYP_ULONG); - ssize_t xaddr = (ssize_t)getDescriptor(); - printf("addr of x= %p\n", xaddr); + //void* (*pTlsGetAddr)(void*) = &__tls_get_addr; + GenTree* tls_get_addr_val = gtNewIconHandleNode((size_t)1, GTF_ICON_FTN_ADDR); + tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); + + GenTreeCall* tlsRefCall = tlsValue->AsCall(); - GenTree* tlsArg = gtNewIconNode(5, TYP_I_IMPL); + ssize_t xaddr = (ssize_t)5; + //getDescriptor(); + + GenTree* tlsArg = gtNewIconNode(xaddr, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); - + arg0->AbiInfo = CallArgABIInformation(); arg0->AbiInfo.SetRegNum(0, REG_ARG_0); - + tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT); #ifdef UNIX_X86_ABI tlsRefCall->gtFlags &= ~GTF_CALL_POP_ARGS; #endif - tlsValue = tlsRefCall; } // Cache the tls value unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access")); lvaTable[tlsLclNum].lvType = TYP_I_IMPL; - GenTree* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); - GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); + GenTree* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); + GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); // Create tree for "maxThreadStaticBlocks = tls[offsetOfMaxThreadStaticBlocks]" GenTree* offsetOfMaxThreadStaticBlocks = gtNewIconNode(offsetOfMaxThreadStaticBlocksVal, TYP_I_IMPL); @@ -700,9 +701,6 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // block (...): [weight: 1.0] // use(threadStaticBlockBase); - fgDumpBlock(prevBb); - fgDumpBlock(block); - // maxThreadStaticBlocksCondBB BasicBlock* maxThreadStaticBlocksCondBB = fgNewBBFromTreeAfter(BBJ_COND, prevBb, tlsValueDef, debugInfo); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h index 0783fa52962a4..da36fd2d3b77d 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h @@ -518,10 +518,15 @@ struct Agnostic_GetProfilingHandle struct Agnostic_GetThreadLocalStaticBlocksInfo { +#ifdef _MSC_VER Agnostic_CORINFO_CONST_LOOKUP tlsIndex; UINT offsetOfThreadLocalStoragePointer; UINT offsetOfMaxThreadStaticBlocks; UINT offsetOfThreadStaticBlocks; +#else + DWORDLONG tlsGetAddrFtnPtr; + DWORDLONG descrAddrOfNonGCMaxThreadStaticBlock; +#endif UINT offsetOfGCDataPointer; }; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index aa491393c414b..367e78f676f0e 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3641,12 +3641,17 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC Agnostic_GetThreadLocalStaticBlocksInfo value; ZeroMemory(&value, sizeof(value)); - value.tlsIndex.handle = CastHandle(pInfo->tlsIndex.addr); - value.tlsIndex.accessType = pInfo->tlsIndex.accessType; - value.offsetOfMaxThreadStaticBlocks = pInfo->offsetOfMaxThreadStaticBlocks; - value.offsetOfThreadLocalStoragePointer = pInfo->offsetOfThreadLocalStoragePointer; - value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks; - value.offsetOfGCDataPointer = pInfo->offsetOfGCDataPointer; +#ifdef _MSC_VER + value.tlsIndex.handle = CastHandle(pInfo->tlsIndex.addr); + value.tlsIndex.accessType = pInfo->tlsIndex.accessType; + value.offsetOfMaxThreadStaticBlocks = pInfo->offsetOfMaxThreadStaticBlocks; + value.offsetOfThreadLocalStoragePointer = pInfo->offsetOfThreadLocalStoragePointer; + value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks; +#else + value.tlsGetAddrFtnPtr = pInfo->tlsGetAddrFtnPtr; + value.descrAddrOfNonGCMaxThreadStaticBlock = pInfo->descrAddrOfNonGCMaxThreadStaticBlock; +#endif + value.offsetOfGCDataPointer = pInfo->offsetOfGCDataPointer; // This data is same for entire process, so just add it against key '0'. DWORD key = isGCType ? 0 : 1; @@ -3656,11 +3661,17 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_GetThreadLocalStaticBlocksInfo& value) { +#ifdef _MSC_VER printf("GetThreadLocalStaticBlocksInfo key %u, value tlsIndex-%016" PRIX64 ", offsetOfThreadLocalStoragePointer-%u, offsetOfMaxThreadStaticBlocks-%u" ", offsetOfThreadStaticBlocks-%u offsetOfGCDataPointer-%u", key, value.tlsIndex.handle, value.offsetOfThreadLocalStoragePointer, value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer); +#else + printf("GetThreadLocalStaticBlocksInfo key %u, value tlsGetAddrFtnPtr-%016" PRIX64 + ", descrAddrOfNonGCMaxThreadStaticBlock-%u", + key, value.tlsGetAddrFtnPtr, value.descrAddrOfNonGCMaxThreadStaticBlock); +#endif } void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType) @@ -3670,12 +3681,17 @@ void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC DEBUG_REP(dmpGetThreadLocalStaticBlocksInfo(key, value)); - pInfo->tlsIndex.accessType = (InfoAccessType)value.tlsIndex.accessType; - pInfo->tlsIndex.addr = (void*)value.tlsIndex.handle; - pInfo->offsetOfMaxThreadStaticBlocks = value.offsetOfMaxThreadStaticBlocks; - pInfo->offsetOfThreadLocalStoragePointer = value.offsetOfThreadLocalStoragePointer; - pInfo->offsetOfThreadStaticBlocks = value.offsetOfThreadStaticBlocks; - pInfo->offsetOfGCDataPointer = value.offsetOfGCDataPointer; +#ifdef _MSC_VER + pInfo->tlsIndex.accessType = (InfoAccessType)value.tlsIndex.accessType; + pInfo->tlsIndex.addr = (void*)value.tlsIndex.handle; + pInfo->offsetOfMaxThreadStaticBlocks = value.offsetOfMaxThreadStaticBlocks; + pInfo->offsetOfThreadLocalStoragePointer = value.offsetOfThreadLocalStoragePointer; + pInfo->offsetOfThreadStaticBlocks = value.offsetOfThreadStaticBlocks; +#else + pInfo->tlsGetAddrFtnPtr = value.tlsGetAddrFtnPtr; + pInfo->descrAddrOfNonGCMaxThreadStaticBlock = value.descrAddrOfNonGCMaxThreadStaticBlock; +#endif + pInfo->offsetOfGCDataPointer = value.offsetOfGCDataPointer; } void MethodContext::recEmbedMethodHandle(CORINFO_METHOD_HANDLE handle, diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index e31e90da0ae94..9fa79a224f9d9 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1789,13 +1789,13 @@ __declspec(selectany) __declspec(thread) void** t_NonGCThreadStaticBlocks; __declspec(selectany) __declspec(thread) void** t_GCThreadStaticBlocks; #else EXTERN_C __thread uint32_t t_NonGCMaxThreadStaticBlocks; +EXTERN_C __thread void** t_NonGCThreadStaticBlocks; + EXTERN_C __thread uint32_t t_GCMaxThreadStaticBlocks; +EXTERN_C __thread void** t_GCThreadStaticBlocks; EXTERN_C __thread uint32_t t_NonGCThreadStaticBlocksSize; EXTERN_C __thread uint32_t t_GCThreadStaticBlocksSize; - -EXTERN_C __thread void** t_NonGCThreadStaticBlocks; -EXTERN_C __thread void** t_GCThreadStaticBlocks; #endif // *** This helper corresponds to both CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE and diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index dd51d8e26e17e..2441ccb715279 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -79,11 +79,12 @@ __declspec(selectany) __declspec(thread) void** t_GCThreadStaticBlocks; __thread uint32_t t_NonGCMaxThreadStaticBlocks; __thread void** t_NonGCThreadStaticBlocks; -__thread uint32_t t_NonGCThreadStaticBlocksSize; -__thread uint32_t t_GCThreadStaticBlocksSize; - __thread uint32_t t_GCMaxThreadStaticBlocks; __thread void** t_GCThreadStaticBlocks; + +__thread uint32_t t_NonGCThreadStaticBlocksSize; +__thread uint32_t t_GCThreadStaticBlocksSize; +extern "C" void* __tls_get_addr(void* ti); #endif // EXTERN_C UINT_PTR STDCALL GetNonGCMaxThreadStaticBlocksAddr(); @@ -1815,6 +1816,30 @@ uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isG return typeIndex; } +#ifdef _MSC_VER +void* getDescriptor() +{ + uint8_t* p; + __asm__("leaq 0(%%rip), %%rbx\n" + "data16\n" + "leaq t_NonGCMaxThreadStaticBlocks@TLSGD(%%rip), %%rdi\n" + "data16\n" + "data16\n" + "rex64\n" + "callq __tls_get_addr\n" + : "=b"(p)); + + if (p[0] != 0x66 || p[1] != 0x48 || p[2] != 0x8d || p[3] != 0x3d) + { + printf("Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)\n"); + exit(1); + } + p += 4; + + return *(uint32_t*)p + (p + 4); +} +#endif + /*********************************************************************/ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType) { @@ -1826,6 +1851,7 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* JIT_TO_EE_TRANSITION_LEAF(); +#ifdef _MSC_VER pInfo->tlsIndex.addr = (void*)static_cast(_tls_index); pInfo->tlsIndex.accessType = IAT_VALUE; @@ -1840,7 +1866,11 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo->offsetOfThreadStaticBlocks = CEEInfo::ThreadLocalOffset(&t_NonGCThreadStaticBlocks); pInfo->offsetOfMaxThreadStaticBlocks = CEEInfo::ThreadLocalOffset(&t_NonGCMaxThreadStaticBlocks); } - +#else + pInfo->tlsGetAddrFtnPtr = (size_t)&__tls_get_addr; + pInfo->descrAddrOfNonGCMaxThreadStaticBlock = getDescriptor(); + +#endif pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); JIT_TO_EE_TRANSITION_LEAF(); From 909b8e62c76b283b26ab3fb0be3750a820c4be84 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 2 Jun 2023 00:00:41 -0700 Subject: [PATCH 03/79] working model --- src/coreclr/jit/helperexpansion.cpp | 70 +++++------ .../superpmi-shared/methodcontext.cpp | 2 +- src/coreclr/vm/appdomain.cpp | 2 - src/coreclr/vm/jitinterface.cpp | 111 ++++++++++++------ 4 files changed, 106 insertions(+), 79 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 5aa8d844bada5..bcd3d3e787837 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -528,6 +528,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* uint32_t offsetOfMaxThreadStaticBlocksVal = 0; uint32_t offsetOfThreadStaticBlocksVal = 0; +#ifdef _MSC_VER JITDUMP("getThreadLocalStaticBlocksInfo (%s)\n:", isGCThreadStatic ? "GC" : "Non-GC"); offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; @@ -536,6 +537,8 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer); JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", offsetOfMaxThreadStaticBlocksVal); JITDUMP("offsetOfThreadStaticBlocks= %u\n", offsetOfThreadStaticBlocksVal); +#endif + JITDUMP("offsetOfGCDataPointer= %u\n", threadStaticBlocksInfo.offsetOfGCDataPointer); // assert(false); @@ -585,54 +588,45 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* GenTree* typeThreadStaticBlockIndexValue = call->gtArgs.GetArgByIndex(0)->GetNode(); GenTree* tlsValue = nullptr; - if (TargetOS::IsWindows) - { - size_t tlsIndexValue = (size_t)threadStaticBlocksInfo.tlsIndex.addr; - GenTree* dllRef = nullptr; - - if (tlsIndexValue != 0) - { - dllRef = gtNewIconHandleNode(tlsIndexValue * TARGET_POINTER_SIZE, GTF_ICON_TLS_HDL); - } - - // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] or GS:[cns] - tlsValue = gtNewIconHandleNode(threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer, GTF_ICON_TLS_HDL); - tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - - if (dllRef != nullptr) - { - // Add the dllRef to produce thread local storage reference for coreclr - tlsValue = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsValue, dllRef); - } +#ifdef _MSC_VER + size_t tlsIndexValue = (size_t)threadStaticBlocksInfo.tlsIndex.addr; + GenTree* dllRef = nullptr; - // Base of coreclr's thread local storage - tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - } - else + if (tlsIndexValue != 0) { - //void* (*pTlsGetAddr)(void*) = &__tls_get_addr; - GenTree* tls_get_addr_val = gtNewIconHandleNode((size_t)1, GTF_ICON_FTN_ADDR); - tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); + dllRef = gtNewIconHandleNode(tlsIndexValue * TARGET_POINTER_SIZE, GTF_ICON_TLS_HDL); + } - GenTreeCall* tlsRefCall = tlsValue->AsCall(); + // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] or GS:[cns] + tlsValue = gtNewIconHandleNode(threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer, GTF_ICON_TLS_HDL); + tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - ssize_t xaddr = (ssize_t)5; - //getDescriptor(); + if (dllRef != nullptr) + { + // Add the dllRef to produce thread local storage reference for coreclr + tlsValue = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsValue, dllRef); + } - GenTree* tlsArg = gtNewIconNode(xaddr, TYP_I_IMPL); + // Base of coreclr's thread local storage + tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); +#else + GenTree* tls_get_addr_val = gtNewIconHandleNode(threadStaticBlocksInfo.tlsGetAddrFtnPtr, GTF_ICON_FTN_ADDR); + tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); + GenTreeCall* tlsRefCall = tlsValue->AsCall(); - tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); - CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); + GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.descrAddrOfNonGCMaxThreadStaticBlock, TYP_I_IMPL); + tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); - arg0->AbiInfo = CallArgABIInformation(); - arg0->AbiInfo.SetRegNum(0, REG_ARG_0); + CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); + arg0->AbiInfo = CallArgABIInformation(); + arg0->AbiInfo.SetRegNum(0, REG_ARG_0); - tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT); + tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT); #ifdef UNIX_X86_ABI - tlsRefCall->gtFlags &= ~GTF_CALL_POP_ARGS; -#endif - } + tlsRefCall->gtFlags &= ~GTF_CALL_POP_ARGS; +#endif // UNIX_X86_ABI +#endif // _MSC_VER // Cache the tls value unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access")); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 367e78f676f0e..eaf3bf6a0bbcb 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3669,7 +3669,7 @@ void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_ value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer); #else printf("GetThreadLocalStaticBlocksInfo key %u, value tlsGetAddrFtnPtr-%016" PRIX64 - ", descrAddrOfNonGCMaxThreadStaticBlock-%u", + ", descrAddrOfNonGCMaxThreadStaticBlock-%lu", key, value.tlsGetAddrFtnPtr, value.descrAddrOfNonGCMaxThreadStaticBlock); #endif } diff --git a/src/coreclr/vm/appdomain.cpp b/src/coreclr/vm/appdomain.cpp index 5529cd6e454a2..aadf72ec87ec8 100644 --- a/src/coreclr/vm/appdomain.cpp +++ b/src/coreclr/vm/appdomain.cpp @@ -1769,10 +1769,8 @@ void AppDomain::Create() // allocate a Virtual Call Stub Manager for the default domain pDomain->InitVSD(); -#ifdef HOST_WINDOWS // allocate a thread static block to index map pDomain->InitThreadStaticBlockTypeMap(); -#endif pDomain->SetStage(AppDomain::STAGE_OPEN); pDomain->CreateDefaultBinder(); diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 2441ccb715279..16e7accf9e0ed 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1782,9 +1782,6 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, } - -#ifdef HOST_WINDOWS - /*********************************************************************/ uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isGCType) { @@ -1816,7 +1813,7 @@ uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isG return typeIndex; } -#ifdef _MSC_VER +#ifndef _MSC_VER void* getDescriptor() { uint8_t* p; @@ -1838,6 +1835,72 @@ void* getDescriptor() return *(uint32_t*)p + (p + 4); } + +void* getDescriptor2() +{ + uint8_t* p; + __asm__("leaq 0(%%rip), %%rbx\n" + "data16\n" + "leaq t_GCMaxThreadStaticBlocks@TLSGD(%%rip), %%rdi\n" + "data16\n" + "data16\n" + "rex64\n" + "callq __tls_get_addr\n" + : "=b"(p)); + + if (p[0] != 0x66 || p[1] != 0x48 || p[2] != 0x8d || p[3] != 0x3d) + { + printf("Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)\n"); + exit(1); + } + p += 4; + + return *(uint32_t*)p + (p + 4); +} + +void* getDescriptor3() +{ + uint8_t* p; + __asm__("leaq 0(%%rip), %%rbx\n" + "data16\n" + "leaq t_NonGCThreadStaticBlocks@TLSGD(%%rip), %%rdi\n" + "data16\n" + "data16\n" + "rex64\n" + "callq __tls_get_addr\n" + : "=b"(p)); + + if (p[0] != 0x66 || p[1] != 0x48 || p[2] != 0x8d || p[3] != 0x3d) + { + printf("Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)\n"); + exit(1); + } + p += 4; + + return *(uint32_t*)p + (p + 4); +} + +void* getDescriptor4() +{ + uint8_t* p; + __asm__("leaq 0(%%rip), %%rbx\n" + "data16\n" + "leaq t_GCThreadStaticBlocks@TLSGD(%%rip), %%rdi\n" + "data16\n" + "data16\n" + "rex64\n" + "callq __tls_get_addr\n" + : "=b"(p)); + + if (p[0] != 0x66 || p[1] != 0x48 || p[2] != 0x8d || p[3] != 0x3d) + { + printf("Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)\n"); + exit(1); + } + p += 4; + + return *(uint32_t*)p + (p + 4); +} #endif /*********************************************************************/ @@ -1868,46 +1931,18 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* } #else pInfo->tlsGetAddrFtnPtr = (size_t)&__tls_get_addr; - pInfo->descrAddrOfNonGCMaxThreadStaticBlock = getDescriptor(); + pInfo->descrAddrOfNonGCMaxThreadStaticBlock = (size_t)getDescriptor(); + + printf("t_NonGCMaxThreadStaticBlocks: %p\n", getDescriptor()); + printf("t_GCMaxThreadStaticBlocks: %p\n", getDescriptor2()); + printf("t_NonGCThreadStaticBlocks: %p\n", getDescriptor3()); + printf("t_GCThreadStaticBlocks: %p\n", getDescriptor4()); #endif pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); JIT_TO_EE_TRANSITION_LEAF(); } -#else - -uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isGCType) -{ - CONTRACTL { - NOTHROW; - GC_NOTRIGGER; - MODE_PREEMPTIVE; - } CONTRACTL_END; - - return 0; -} - -void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType) -{ - CONTRACTL { - NOTHROW; - GC_NOTRIGGER; - MODE_PREEMPTIVE; - } CONTRACTL_END; - - JIT_TO_EE_TRANSITION_LEAF(); - - pInfo->tlsIndex.addr = (UINT8*)0; - - pInfo->offsetOfThreadLocalStoragePointer = 0; - pInfo->offsetOfThreadStaticBlocks = 0; - pInfo->offsetOfMaxThreadStaticBlocks = 0; - pInfo->offsetOfGCDataPointer = 0; - - JIT_TO_EE_TRANSITION_LEAF(); -} -#endif // HOST_WINDOWS //--------------------------------------------------------------------------------------- // From 773eb84b77353b509c0c841a7bdeb967ba66013a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 2 Jun 2023 00:24:44 -0700 Subject: [PATCH 04/79] linux rely on __tls_get_addr() value --- src/coreclr/jit/helperexpansion.cpp | 53 ++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index bcd3d3e787837..5ddc0b1f628ce 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -525,10 +525,9 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* CORINFO_THREAD_STATIC_BLOCKS_INFO threadStaticBlocksInfo; info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); +#ifdef _MSC_VER uint32_t offsetOfMaxThreadStaticBlocksVal = 0; uint32_t offsetOfThreadStaticBlocksVal = 0; - -#ifdef _MSC_VER JITDUMP("getThreadLocalStaticBlocksInfo (%s)\n:", isGCThreadStatic ? "GC" : "Non-GC"); offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; @@ -537,6 +536,9 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer); JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", offsetOfMaxThreadStaticBlocksVal); JITDUMP("offsetOfThreadStaticBlocks= %u\n", offsetOfThreadStaticBlocksVal); +#else + JITDUMP("tlsGetAddrFtnPtr= %u\n", threadStaticBlocksInfo.tlsGetAddrFtnPtr); + JITDUMP("descrAddrOfNonGCMaxThreadStaticBlock= %u\n", threadStaticBlocksInfo.descrAddrOfNonGCMaxThreadStaticBlock); #endif JITDUMP("offsetOfGCDataPointer= %u\n", threadStaticBlocksInfo.offsetOfGCDataPointer); @@ -609,6 +611,28 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // Base of coreclr's thread local storage tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + + // Cache the tls value + unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access")); + lvaTable[tlsLclNum].lvType = TYP_I_IMPL; + + GenTree* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); + GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); + + // Create tree for "maxThreadStaticBlocks = tls[offsetOfMaxThreadStaticBlocks]" + GenTree* offsetOfMaxThreadStaticBlocks = gtNewIconNode(offsetOfMaxThreadStaticBlocksVal, TYP_I_IMPL); + GenTree* maxThreadStaticBlocksRef = + gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfMaxThreadStaticBlocks); + GenTree* maxThreadStaticBlocksValue = + gtNewIndir(TYP_INT, maxThreadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + + // Create tree for "threadStaticBlockBase = tls[offsetOfThreadStaticBlocks]" + GenTree* offsetOfThreadStaticBlocks = gtNewIconNode(offsetOfThreadStaticBlocksVal, TYP_I_IMPL); + GenTree* threadStaticBlocksRef = + gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfThreadStaticBlocks); + GenTree* threadStaticBlocksValue = + gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + #else GenTree* tls_get_addr_val = gtNewIconHandleNode(threadStaticBlocksInfo.tlsGetAddrFtnPtr, GTF_ICON_FTN_ADDR); tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); @@ -626,33 +650,30 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* #ifdef UNIX_X86_ABI tlsRefCall->gtFlags &= ~GTF_CALL_POP_ARGS; #endif // UNIX_X86_ABI -#endif // _MSC_VER // Cache the tls value unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access")); lvaTable[tlsLclNum].lvType = TYP_I_IMPL; + //TODO: GC vs. Non-GC handling GenTree* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); - GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); - - // Create tree for "maxThreadStaticBlocks = tls[offsetOfMaxThreadStaticBlocks]" - GenTree* offsetOfMaxThreadStaticBlocks = gtNewIconNode(offsetOfMaxThreadStaticBlocksVal, TYP_I_IMPL); - GenTree* maxThreadStaticBlocksRef = - gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfMaxThreadStaticBlocks); + GenTree* maxThreadStaticBlocksRef = gtNewLclVarNode(tlsLclNum); GenTree* maxThreadStaticBlocksValue = - gtNewIndir(TYP_INT, maxThreadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + gtNewIndir(TYP_INT, gtCloneExpr(maxThreadStaticBlocksRef), GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + + GenTree* threadStaticBlocksRef = + gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(maxThreadStaticBlocksRef), gtIconNode(16, TYP_I_IMPL)); + GenTree* threadStaticBlocksValue = + gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + +#endif // _MSC_VER + // Create tree for "if (maxThreadStaticBlocks < typeIndex)" GenTree* maxThreadStaticBlocksCond = gtNewOperNode(GT_LT, TYP_INT, maxThreadStaticBlocksValue, gtCloneExpr(typeThreadStaticBlockIndexValue)); maxThreadStaticBlocksCond = gtNewOperNode(GT_JTRUE, TYP_VOID, maxThreadStaticBlocksCond); - // Create tree for "threadStaticBlockBase = tls[offsetOfThreadStaticBlocks]" - GenTree* offsetOfThreadStaticBlocks = gtNewIconNode(offsetOfThreadStaticBlocksVal, TYP_I_IMPL); - GenTree* threadStaticBlocksRef = - gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfThreadStaticBlocks); - GenTree* threadStaticBlocksValue = - gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); // Create tree to "threadStaticBlockValue = threadStaticBlockBase[typeIndex]" typeThreadStaticBlockIndexValue = gtNewOperNode(GT_MUL, TYP_INT, gtCloneExpr(typeThreadStaticBlockIndexValue), From 05aaa68d6e4352a598c072ff9687bffde5bc1973 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 2 Jun 2023 01:00:14 -0700 Subject: [PATCH 05/79] Add fields for both max/threadSTaticBlocks, have separate for GC/non-gc --- src/coreclr/inc/corinfo.h | 3 +- src/coreclr/jit/helperexpansion.cpp | 8 ++-- .../tools/superpmi/superpmi-shared/agnostic.h | 3 +- .../superpmi-shared/methodcontext.cpp | 10 +++-- src/coreclr/vm/jitinterface.cpp | 41 ++++++++++++++----- 5 files changed, 44 insertions(+), 21 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 20cfe2c84fc65..13fe4715be653 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -1738,7 +1738,8 @@ struct CORINFO_THREAD_STATIC_BLOCKS_INFO uint32_t offsetOfThreadStaticBlocks; #else size_t tlsGetAddrFtnPtr; - size_t descrAddrOfNonGCMaxThreadStaticBlock; + size_t descrAddrOfMaxThreadStaticBlock; + size_t descrAddrOfThreadStaticBlocks; #endif uint32_t offsetOfGCDataPointer; }; diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 5ddc0b1f628ce..4253a120384db 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -538,7 +538,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* JITDUMP("offsetOfThreadStaticBlocks= %u\n", offsetOfThreadStaticBlocksVal); #else JITDUMP("tlsGetAddrFtnPtr= %u\n", threadStaticBlocksInfo.tlsGetAddrFtnPtr); - JITDUMP("descrAddrOfNonGCMaxThreadStaticBlock= %u\n", threadStaticBlocksInfo.descrAddrOfNonGCMaxThreadStaticBlock); + JITDUMP("descrAddrOfMaxThreadStaticBlock= %u\n", threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock); #endif JITDUMP("offsetOfGCDataPointer= %u\n", threadStaticBlocksInfo.offsetOfGCDataPointer); @@ -639,7 +639,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* GenTreeCall* tlsRefCall = tlsValue->AsCall(); - GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.descrAddrOfNonGCMaxThreadStaticBlock, TYP_I_IMPL); + GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); @@ -661,8 +661,8 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* GenTree* maxThreadStaticBlocksValue = gtNewIndir(TYP_INT, gtCloneExpr(maxThreadStaticBlocksRef), GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - GenTree* threadStaticBlocksRef = - gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(maxThreadStaticBlocksRef), gtIconNode(16, TYP_I_IMPL)); + GenTree* threadStaticBlocksRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(maxThreadStaticBlocksRef), + gtNewIconNode(TARGET_POINTER_SIZE, TYP_I_IMPL)); GenTree* threadStaticBlocksValue = gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h index da36fd2d3b77d..d16369ec48925 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h @@ -525,7 +525,8 @@ struct Agnostic_GetThreadLocalStaticBlocksInfo UINT offsetOfThreadStaticBlocks; #else DWORDLONG tlsGetAddrFtnPtr; - DWORDLONG descrAddrOfNonGCMaxThreadStaticBlock; + DWORDLONG descrAddrOfMaxThreadStaticBlock; + DWORDLONG descrAddrOfThreadStaticBlocks; #endif UINT offsetOfGCDataPointer; }; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index eaf3bf6a0bbcb..b200bf721386b 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3649,7 +3649,8 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks; #else value.tlsGetAddrFtnPtr = pInfo->tlsGetAddrFtnPtr; - value.descrAddrOfNonGCMaxThreadStaticBlock = pInfo->descrAddrOfNonGCMaxThreadStaticBlock; + value.descrAddrOfMaxThreadStaticBlock = pInfo->descrAddrOfMaxThreadStaticBlock; + value.descrAddrOfThreadStaticBlocks = pInfo->descrAddrOfThreadStaticBlocks; #endif value.offsetOfGCDataPointer = pInfo->offsetOfGCDataPointer; @@ -3669,8 +3670,8 @@ void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_ value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer); #else printf("GetThreadLocalStaticBlocksInfo key %u, value tlsGetAddrFtnPtr-%016" PRIX64 - ", descrAddrOfNonGCMaxThreadStaticBlock-%lu", - key, value.tlsGetAddrFtnPtr, value.descrAddrOfNonGCMaxThreadStaticBlock); + ", descrAddrOfMaxThreadStaticBlock-%lu, descrAddrOfThreadStaticBlocks-%lu", + key, value.tlsGetAddrFtnPtr, value.descrAddrOfMaxThreadStaticBlock, value.descrAddrOfThreadStaticBlocks); #endif } @@ -3689,7 +3690,8 @@ void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC pInfo->offsetOfThreadStaticBlocks = value.offsetOfThreadStaticBlocks; #else pInfo->tlsGetAddrFtnPtr = value.tlsGetAddrFtnPtr; - pInfo->descrAddrOfNonGCMaxThreadStaticBlock = value.descrAddrOfNonGCMaxThreadStaticBlock; + pInfo->descrAddrOfMaxThreadStaticBlock = value.descrAddrOfMaxThreadStaticBlock; + pInfo->descrAddrOfThreadStaticBlocks = value.descrAddrOfThreadStaticBlocks; #endif pInfo->offsetOfGCDataPointer = value.offsetOfGCDataPointer; } diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 16e7accf9e0ed..30816b895c734 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1814,7 +1814,7 @@ uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isG } #ifndef _MSC_VER -void* getDescriptor() +void* getNonGCMaxThreadStaticDescriptor() { uint8_t* p; __asm__("leaq 0(%%rip), %%rbx\n" @@ -1836,12 +1836,12 @@ void* getDescriptor() return *(uint32_t*)p + (p + 4); } -void* getDescriptor2() +void* getNonGCThreadStaticBlockDescriptor() { uint8_t* p; __asm__("leaq 0(%%rip), %%rbx\n" "data16\n" - "leaq t_GCMaxThreadStaticBlocks@TLSGD(%%rip), %%rdi\n" + "leaq t_NonGCThreadStaticBlocks@TLSGD(%%rip), %%rdi\n" "data16\n" "data16\n" "rex64\n" @@ -1858,12 +1858,12 @@ void* getDescriptor2() return *(uint32_t*)p + (p + 4); } -void* getDescriptor3() +void* getGCMaxThreadStaticDescriptor() { uint8_t* p; __asm__("leaq 0(%%rip), %%rbx\n" "data16\n" - "leaq t_NonGCThreadStaticBlocks@TLSGD(%%rip), %%rdi\n" + "leaq t_GCMaxThreadStaticBlocks@TLSGD(%%rip), %%rdi\n" "data16\n" "data16\n" "rex64\n" @@ -1880,7 +1880,7 @@ void* getDescriptor3() return *(uint32_t*)p + (p + 4); } -void* getDescriptor4() +void* getGCThreadStaticBlockDescriptor() { uint8_t* p; __asm__("leaq 0(%%rip), %%rbx\n" @@ -1931,12 +1931,31 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* } #else pInfo->tlsGetAddrFtnPtr = (size_t)&__tls_get_addr; - pInfo->descrAddrOfNonGCMaxThreadStaticBlock = (size_t)getDescriptor(); + if (isGCType) + { + pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)getGCMaxThreadStaticDescriptor(); + pInfo->descrAddrOfThreadStaticBlocks = (size_t)getGCThreadStaticBlockDescriptor(); + } + else + { + pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)getNonGCMaxThreadStaticDescriptor(); + pInfo->descrAddrOfThreadStaticBlocks = (size_t)getNonGCThreadStaticBlockDescriptor(); + } + + printf("t_NonGCMaxThreadStaticBlocks: %p\n", getNonGCMaxThreadStaticDescriptor()); + printf("t_NonGCThreadStaticBlocks: %p\n", getNonGCThreadStaticBlockDescriptor()); + printf("t_GCMaxThreadStaticBlocks: %p\n", getGCMaxThreadStaticDescriptor()); + printf("t_GCThreadStaticBlocks: %p\n", getGCThreadStaticBlockDescriptor()); + + printf("&t_NonGCMaxThreadStaticBlocks = %p\n", &t_NonGCMaxThreadStaticBlocks); + printf("&t_NonGCThreadStaticBlocks = %p\n", &t_NonGCThreadStaticBlocks); + printf("&t_GCMaxThreadStaticBlocks = %p\n", &t_GCMaxThreadStaticBlocks); + printf("&t_GCThreadStaticBlocks = %p\n", &t_GCThreadStaticBlocks); - printf("t_NonGCMaxThreadStaticBlocks: %p\n", getDescriptor()); - printf("t_GCMaxThreadStaticBlocks: %p\n", getDescriptor2()); - printf("t_NonGCThreadStaticBlocks: %p\n", getDescriptor3()); - printf("t_GCThreadStaticBlocks: %p\n", getDescriptor4()); + printf("sizeof(t_NonGCMaxThreadStaticBlocks): %lu\n", sizeof(t_NonGCMaxThreadStaticBlocks)); + printf("sizeof(t_GCMaxThreadStaticBlocks): %lu\n", sizeof(t_GCMaxThreadStaticBlocks)); + printf("sizeof(t_NonGCThreadStaticBlocks): %lu\n", sizeof(t_NonGCThreadStaticBlocks)); + printf("sizeof(t_GCThreadStaticBlocks): %lu\n", sizeof(t_GCThreadStaticBlocks)); #endif pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); From 03d9c2c695ba3c389c232f2affed92e339008486 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 2 Jun 2023 13:44:30 -0700 Subject: [PATCH 06/79] code cleanup --- src/coreclr/inc/corinfo.h | 3 - src/coreclr/inc/jithelpers.h | 4 -- src/coreclr/jit/helperexpansion.cpp | 11 +-- .../Common/JitInterface/CorInfoHelpFunc.cs | 2 - src/coreclr/vm/amd64/asmhelpers.S | 12 ---- src/coreclr/vm/jitinterface.cpp | 70 ++++--------------- 6 files changed, 17 insertions(+), 85 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 13fe4715be653..739715db99902 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -550,8 +550,6 @@ enum CorInfoHelpFunc CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS, CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED, CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED, - GetNonGCMaxThreadStaticBlocksAddr, - GetGCMaxThreadStaticBlocksAddr, /* Debugger */ @@ -1732,7 +1730,6 @@ struct CORINFO_THREAD_STATIC_BLOCKS_INFO { #ifdef _MSC_VER CORINFO_CONST_LOOKUP tlsIndex; - uint32_t offsetOfThreadLocalStoragePointer; uint32_t offsetOfMaxThreadStaticBlocks; uint32_t offsetOfThreadStaticBlocks; diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index bdd9740ed4a54..1913a428da942 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -208,10 +208,6 @@ JITHELPER(CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED, JIT_GetSharedGCThreadStaticBaseOptimized, CORINFO_HELP_SIG_REG_ONLY) JITHELPER(CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED, JIT_GetSharedNonGCThreadStaticBaseOptimized, CORINFO_HELP_SIG_REG_ONLY) - JITHELPER(GetNonGCMaxThreadStaticBlocksAddr, GetNonGCMaxThreadStaticBlocksAddr, CORINFO_HELP_SIG_REG_ONLY) - JITHELPER(GetGCMaxThreadStaticBlocksAddr, GetGCMaxThreadStaticBlocksAddr, CORINFO_HELP_SIG_REG_ONLY) - - // Debugger JITHELPER(CORINFO_HELP_DBG_IS_JUST_MY_CODE, JIT_DbgIsJustMyCode,CORINFO_HELP_SIG_REG_ONLY) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 4253a120384db..4d15a8444b4e8 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -542,16 +542,9 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* #endif JITDUMP("offsetOfGCDataPointer= %u\n", threadStaticBlocksInfo.offsetOfGCDataPointer); - // assert(false); - // assert(threadStaticBlocksInfo.tlsIndex.accessType == IAT_VALUE); - // if (TargetOS::IsWindows) { assert((eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED) || (eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED)); - // } else { - // assert((eeGetHelperNum(call->gtCallMethHnd) == GetGCMaxThreadStaticBlocksAddr) || - // (eeGetHelperNum(call->gtCallMethHnd) == GetNonGCMaxThreadStaticBlocksAddr)); - // } call->ClearExpTLSFieldAccess(); assert(call->gtArgs.CountArgs() == 1); @@ -638,7 +631,8 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); GenTreeCall* tlsRefCall = tlsValue->AsCall(); - + // This is a syscall indirect call which takes an argument. + // Populate and set the ABI apporpriately. GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); @@ -655,7 +649,6 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access")); lvaTable[tlsLclNum].lvType = TYP_I_IMPL; - //TODO: GC vs. Non-GC handling GenTree* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); GenTree* maxThreadStaticBlocksRef = gtNewLclVarNode(tlsLclNum); GenTree* maxThreadStaticBlocksValue = diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs index 8478d6dacff69..0693f3c1b69f1 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs @@ -192,8 +192,6 @@ which is the right helper to use to allocate an object of a given type. */ CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED, CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS, CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS, - GetNonGCMaxThreadStaticBlocksAddr, - GetGCMaxThreadStaticBlocksAddr, /* Debugger */ diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index e9c9f613bcc53..bebfd3376c12d 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -160,18 +160,6 @@ NESTED_ENTRY ProfileEnterNaked, _TEXT, NoHandler ret NESTED_END ProfileEnterNaked, _TEXT -// LPVOID __stdcall GetNonGCMaxThreadStaticBlocksAddr(void)// -LEAF_ENTRY GetNonGCMaxThreadStaticBlocksAddr, _TEXT - INLINE_GET_TLS_VAR t_NonGCMaxThreadStaticBlocks - ret -LEAF_END GetNonGCMaxThreadStaticBlocksAddr, _TEXT - -// LPVOID __stdcall GetGCMaxThreadStaticBlocksAddr(void)// -LEAF_ENTRY GetGCMaxThreadStaticBlocksAddr, _TEXT - INLINE_GET_TLS_VAR t_GCMaxThreadStaticBlocks - ret -LEAF_END GetGCMaxThreadStaticBlocksAddr, _TEXT - # EXTERN_C void ProfileLeaveNaked(FunctionIDOrClientID functionIDOrClientID, size_t profiledRsp); # # diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 30816b895c734..fd2e5534a470d 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -87,8 +87,6 @@ __thread uint32_t t_GCThreadStaticBlocksSize; extern "C" void* __tls_get_addr(void* ti); #endif -// EXTERN_C UINT_PTR STDCALL GetNonGCMaxThreadStaticBlocksAddr(); - // The Stack Overflow probe takes place in the COOPERATIVE_TRANSITION_BEGIN() macro // @@ -1497,11 +1495,6 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, FieldDesc * pField = (FieldDesc*)pResolvedToken->hField; MethodTable * pFieldMT = pField->GetApproxEnclosingMethodTable(); - -// #ifndef _MSC_VER -// printf("addr: %lu\n", GetNonGCMaxThreadStaticBlocksAddr()); -// #endif - // Helper to use if the field access requires it CORINFO_FIELD_ACCESSOR fieldAccessor = (CORINFO_FIELD_ACCESSOR)-1; DWORD fieldFlags = 0; @@ -1814,6 +1807,16 @@ uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isG } #ifndef _MSC_VER + +void* getThreadStaticDescriptor(uint8_t* p) +{ + _ASSERTE_MSG((p[0] == 0x66 && p[1] == 0x48 && p[2] == 0x8d && p[3] == 0x3d), + "Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)"); + + p += 4; + return *(uint32_t*)p + (p + 4); +} + void* getNonGCMaxThreadStaticDescriptor() { uint8_t* p; @@ -1826,14 +1829,7 @@ void* getNonGCMaxThreadStaticDescriptor() "callq __tls_get_addr\n" : "=b"(p)); - if (p[0] != 0x66 || p[1] != 0x48 || p[2] != 0x8d || p[3] != 0x3d) - { - printf("Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)\n"); - exit(1); - } - p += 4; - - return *(uint32_t*)p + (p + 4); + return getThreadStaticDescriptor(p); } void* getNonGCThreadStaticBlockDescriptor() @@ -1848,14 +1844,7 @@ void* getNonGCThreadStaticBlockDescriptor() "callq __tls_get_addr\n" : "=b"(p)); - if (p[0] != 0x66 || p[1] != 0x48 || p[2] != 0x8d || p[3] != 0x3d) - { - printf("Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)\n"); - exit(1); - } - p += 4; - - return *(uint32_t*)p + (p + 4); + return getThreadStaticDescriptor(p); } void* getGCMaxThreadStaticDescriptor() @@ -1870,14 +1859,7 @@ void* getGCMaxThreadStaticDescriptor() "callq __tls_get_addr\n" : "=b"(p)); - if (p[0] != 0x66 || p[1] != 0x48 || p[2] != 0x8d || p[3] != 0x3d) - { - printf("Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)\n"); - exit(1); - } - p += 4; - - return *(uint32_t*)p + (p + 4); + return getThreadStaticDescriptor(p); } void* getGCThreadStaticBlockDescriptor() @@ -1892,14 +1874,7 @@ void* getGCThreadStaticBlockDescriptor() "callq __tls_get_addr\n" : "=b"(p)); - if (p[0] != 0x66 || p[1] != 0x48 || p[2] != 0x8d || p[3] != 0x3d) - { - printf("Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)\n"); - exit(1); - } - p += 4; - - return *(uint32_t*)p + (p + 4); + return getThreadStaticDescriptor(p); } #endif @@ -1941,23 +1916,8 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)getNonGCMaxThreadStaticDescriptor(); pInfo->descrAddrOfThreadStaticBlocks = (size_t)getNonGCThreadStaticBlockDescriptor(); } - - printf("t_NonGCMaxThreadStaticBlocks: %p\n", getNonGCMaxThreadStaticDescriptor()); - printf("t_NonGCThreadStaticBlocks: %p\n", getNonGCThreadStaticBlockDescriptor()); - printf("t_GCMaxThreadStaticBlocks: %p\n", getGCMaxThreadStaticDescriptor()); - printf("t_GCThreadStaticBlocks: %p\n", getGCThreadStaticBlockDescriptor()); - - printf("&t_NonGCMaxThreadStaticBlocks = %p\n", &t_NonGCMaxThreadStaticBlocks); - printf("&t_NonGCThreadStaticBlocks = %p\n", &t_NonGCThreadStaticBlocks); - printf("&t_GCMaxThreadStaticBlocks = %p\n", &t_GCMaxThreadStaticBlocks); - printf("&t_GCThreadStaticBlocks = %p\n", &t_GCThreadStaticBlocks); - - printf("sizeof(t_NonGCMaxThreadStaticBlocks): %lu\n", sizeof(t_NonGCMaxThreadStaticBlocks)); - printf("sizeof(t_GCMaxThreadStaticBlocks): %lu\n", sizeof(t_GCMaxThreadStaticBlocks)); - printf("sizeof(t_NonGCThreadStaticBlocks): %lu\n", sizeof(t_NonGCThreadStaticBlocks)); - printf("sizeof(t_GCThreadStaticBlocks): %lu\n", sizeof(t_GCThreadStaticBlocks)); - #endif + pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); JIT_TO_EE_TRANSITION_LEAF(); From a98c2cf217fb73e689a842ee23389387554186b9 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 2 Jun 2023 15:24:32 -0700 Subject: [PATCH 07/79] code cleanup --- src/coreclr/jit/compiler.cpp | 7 +--- src/coreclr/jit/helperexpansion.cpp | 42 ++----------------- src/coreclr/pal/inc/unixasmmacrosamd64.inc | 13 ------ src/coreclr/vm/amd64/AsmHelpers.asm | 1 - src/coreclr/vm/amd64/JitHelpers_Fast.asm | 2 + src/coreclr/vm/amd64/jithelpers_fast.S | 1 + src/coreclr/vm/i386/asmhelpers.S | 7 ---- src/coreclr/vm/jithelpers.cpp | 2 - .../static/singlefilehost_unixexports.src | 2 +- 9 files changed, 10 insertions(+), 67 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 8f0fcf95f45e5..9c14e41c3bd7f 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5044,11 +5044,8 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Partially inline static initializations DoPhase(this, PHASE_EXPAND_STATIC_INIT, &Compiler::fgExpandStaticInit); - // if (TargetOS::IsWindows) - { - // Currently this is only applicable for Windows - DoPhase(this, PHASE_EXPAND_TLS, &Compiler::fgExpandThreadLocalAccess); - } + // Expand thread local access + DoPhase(this, PHASE_EXPAND_TLS, &Compiler::fgExpandThreadLocalAccess); // Insert GC Polls DoPhase(this, PHASE_INSERT_GC_POLLS, &Compiler::fgInsertGCPolls); diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 4d15a8444b4e8..c463cb27d6fa3 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -444,36 +444,6 @@ PhaseStatus Compiler::fgExpandThreadLocalAccess() return fgExpandHelper<&Compiler::fgExpandThreadLocalAccessForCall>(true); } -// -//extern "C" void* __tls_get_addr(void* ti); -//__thread int x; -// -//void* getDescriptor() -//{ -// uint8_t* p; -// __asm__("leaq 0(%%rip), %%rbx\n" -// "data16\n" -// "leaq x@TLSGD(%%rip), %%rdi\n" -// "data16\n" -// "data16\n" -// "rex64\n" -// "callq __tls_get_addr\n" -// : "=b"(p)); -// -// // printf("p= %x\n", p[0]); -// // printf("p= %x\n", p[1]); -// // printf("p= %x\n", p[2]); -// // printf("p= %x\n", p[3]); -// if (p[0] != 0x66 || p[1] != 0x48 || p[2] != 0x8d || p[3] != 0x3d) -// { -// printf("Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)\n"); -// exit(1); -// } -// p += 4; -// -// return *(uint32_t*)p + (p + 4); -// // return p; -//} //------------------------------------------------------------------------------ // fgExpandThreadLocalAccessForCall : Expand the CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED @@ -518,8 +488,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* DISPTREE(call); JITDUMP("\n"); - bool isGCThreadStatic = false; - isGCThreadStatic = + bool isGCThreadStatic = eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; CORINFO_THREAD_STATIC_BLOCKS_INFO threadStaticBlocksInfo; @@ -539,6 +508,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* #else JITDUMP("tlsGetAddrFtnPtr= %u\n", threadStaticBlocksInfo.tlsGetAddrFtnPtr); JITDUMP("descrAddrOfMaxThreadStaticBlock= %u\n", threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock); + JITDUMP("descrAddrOfThreadStaticBlocks= %u\n", threadStaticBlocksInfo.descrAddrOfThreadStaticBlocks); #endif JITDUMP("offsetOfGCDataPointer= %u\n", threadStaticBlocksInfo.offsetOfGCDataPointer); @@ -582,6 +552,8 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* GenTree* typeThreadStaticBlockIndexValue = call->gtArgs.GetArgByIndex(0)->GetNode(); GenTree* tlsValue = nullptr; + unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access")); + lvaTable[tlsLclNum].lvType = TYP_I_IMPL; #ifdef _MSC_VER size_t tlsIndexValue = (size_t)threadStaticBlocksInfo.tlsIndex.addr; @@ -606,9 +578,6 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); // Cache the tls value - unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access")); - lvaTable[tlsLclNum].lvType = TYP_I_IMPL; - GenTree* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); @@ -646,9 +615,6 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* #endif // UNIX_X86_ABI // Cache the tls value - unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access")); - lvaTable[tlsLclNum].lvType = TYP_I_IMPL; - GenTree* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); GenTree* maxThreadStaticBlocksRef = gtNewLclVarNode(tlsLclNum); GenTree* maxThreadStaticBlocksValue = diff --git a/src/coreclr/pal/inc/unixasmmacrosamd64.inc b/src/coreclr/pal/inc/unixasmmacrosamd64.inc index ce372c7615482..bb1e70a27bef0 100644 --- a/src/coreclr/pal/inc/unixasmmacrosamd64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosamd64.inc @@ -363,16 +363,3 @@ C_FUNC(\Name\()_End): .cfi_same_value rbp .endm - -.macro INLINE_GET_TLS_VAR Var - .att_syntax -#if defined(__APPLE__) - movq _\Var@TLVP(%rip), %rdi - callq *(%rdi) -#else - leaq \Var@TLSLD(%rip), %rdi - callq __tls_get_addr@PLT - addq $\Var@DTPOFF, %rax -#endif - .intel_syntax noprefix -.endm \ No newline at end of file diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index 6a304b006f311..c4501546d836e 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -20,7 +20,6 @@ endif GenerateArrayOpStubExceptionCase macro ErrorCaseName, ExceptionName - NESTED_ENTRY ErrorCaseName&_RSIRDI_ScratchArea, _TEXT ; account for scratch area, rsi, rdi already on the stack diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm index a6240dddd8249..dd5b891a44134 100644 --- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm +++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm @@ -49,11 +49,13 @@ endif extern JIT_InternalThrow:proc + ; Mark start of the code region that we patch at runtime LEAF_ENTRY JIT_PatchedCodeStart, _TEXT ret LEAF_END JIT_PatchedCodeStart, _TEXT + ; This is used by the mechanism to hold either the JIT_WriteBarrier_PreGrow ; or JIT_WriteBarrier_PostGrow code (depending on the state of the GC). It _WILL_ ; change at runtime as the GC changes. Initially it should simply be a copy of the diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index d2ec286d71a94..32890b471b26c 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -10,6 +10,7 @@ LEAF_ENTRY JIT_PatchedCodeStart, _TEXT ret LEAF_END JIT_PatchedCodeStart, _TEXT + // There is an even more optimized version of these helpers possible which takes // advantage of knowledge of which way the ephemeral heap is growing to only do 1/2 // that check (this is more significant in the JIT_WriteBarrier case). diff --git a/src/coreclr/vm/i386/asmhelpers.S b/src/coreclr/vm/i386/asmhelpers.S index 108fc5259deb4..e0b87813592dd 100644 --- a/src/coreclr/vm/i386/asmhelpers.S +++ b/src/coreclr/vm/i386/asmhelpers.S @@ -5,13 +5,6 @@ #include "unixasmmacros.inc" #include "asmconstants.h" - -// LPVOID __stdcall GetNonGCThreadStaticBlocksAddr(void)// - LEAF_ENTRY GetNonGCThreadStaticBlocksAddr, _TEXT - leaq t_NonGCThreadStaticBlocks@TLSLD(rip), rax - ret - LEAF_END GetNonGCThreadStaticBlocksAddr, _TEXT - // // FramedMethodFrame prolog // diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 9fa79a224f9d9..7eb34d9a9b4ec 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1807,8 +1807,6 @@ EXTERN_C __thread uint32_t t_GCThreadStaticBlocksSize; #include HCIMPL2(void*, JIT_GetSharedNonGCThreadStaticBase, DomainLocalModule *pDomainLocalModule, DWORD dwClassDomainID) { - // t_NonGCMaxThreadStaticBlocks = 500; - FCALL_CONTRACT; // Get the ModuleIndex diff --git a/src/native/corehost/apphost/static/singlefilehost_unixexports.src b/src/native/corehost/apphost/static/singlefilehost_unixexports.src index db495e5ece2da..18d5697e84580 100644 --- a/src/native/corehost/apphost/static/singlefilehost_unixexports.src +++ b/src/native/corehost/apphost/static/singlefilehost_unixexports.src @@ -8,4 +8,4 @@ DotNetRuntimeInfo g_dacTable ; Used by profilers -MetaDataGetDispenser \ No newline at end of file +MetaDataGetDispenser From 04b3cbc08961f71e249ec84fdffd45c6d8e546df Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 2 Jun 2023 15:28:30 -0700 Subject: [PATCH 08/79] add comments --- src/coreclr/vm/jitinterface.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index fd2e5534a470d..13f0e17e8909e 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1813,7 +1813,14 @@ void* getThreadStaticDescriptor(uint8_t* p) _ASSERTE_MSG((p[0] == 0x66 && p[1] == 0x48 && p[2] == 0x8d && p[3] == 0x3d), "Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)"); + // At this point, `p` contains the instruction pointer and is pointing to the above opcodes. + // These opcodes are patched by the dynamic linker. + // Move beyond the opcodes that we have already checked above. p += 4; + + // The descriptor address is located at *p at this point. Ready that and add + // it to the instruction pointer to locate the address of `ti` that will be used + // to pass to __tls_get_addr during execution. return *(uint32_t*)p + (p + 4); } From 46d8fc3ff1bc3d0c2defa40eab05dad9366554b5 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 2 Jun 2023 15:28:56 -0700 Subject: [PATCH 09/79] jit format --- src/coreclr/jit/helperexpansion.cpp | 10 ++++------ src/coreclr/jit/lsraxarch.cpp | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index c463cb27d6fa3..7879ecd2ac1d0 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -596,9 +596,9 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); #else - GenTree* tls_get_addr_val = gtNewIconHandleNode(threadStaticBlocksInfo.tlsGetAddrFtnPtr, GTF_ICON_FTN_ADDR); - tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); - GenTreeCall* tlsRefCall = tlsValue->AsCall(); + GenTree* tls_get_addr_val = gtNewIconHandleNode(threadStaticBlocksInfo.tlsGetAddrFtnPtr, GTF_ICON_FTN_ADDR); + tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); + GenTreeCall* tlsRefCall = tlsValue->AsCall(); // This is a syscall indirect call which takes an argument. // Populate and set the ABI apporpriately. @@ -615,7 +615,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* #endif // UNIX_X86_ABI // Cache the tls value - GenTree* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); + GenTree* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); GenTree* maxThreadStaticBlocksRef = gtNewLclVarNode(tlsLclNum); GenTree* maxThreadStaticBlocksValue = gtNewIndir(TYP_INT, gtCloneExpr(maxThreadStaticBlocksRef), GTF_IND_NONFAULTING | GTF_IND_INVARIANT); @@ -627,13 +627,11 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* #endif // _MSC_VER - // Create tree for "if (maxThreadStaticBlocks < typeIndex)" GenTree* maxThreadStaticBlocksCond = gtNewOperNode(GT_LT, TYP_INT, maxThreadStaticBlocksValue, gtCloneExpr(typeThreadStaticBlockIndexValue)); maxThreadStaticBlocksCond = gtNewOperNode(GT_JTRUE, TYP_VOID, maxThreadStaticBlocksCond); - // Create tree to "threadStaticBlockValue = threadStaticBlockBase[typeIndex]" typeThreadStaticBlockIndexValue = gtNewOperNode(GT_MUL, TYP_INT, gtCloneExpr(typeThreadStaticBlockIndexValue), gtNewIconNode(TARGET_POINTER_SIZE, TYP_INT)); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 40cc8f3c3dd71..f63afc08c38d0 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1345,7 +1345,7 @@ int LinearScan::BuildCall(GenTreeCall* call) assert(argNode->GetRegNum() == argReg); } } - } + } } buildInternalRegisterUses(); From f44d745df11a972d1de25c9e61e6226b571b0af0 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 2 Jun 2023 17:09:50 -0700 Subject: [PATCH 10/79] update guid --- src/coreclr/inc/jiteeversionguid.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index eee46d6068395..904b102aeb06b 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* dfc41bc9-f134-4c50-897e-fc9304a82059 */ - 0xdfc41bc9, - 0xf134, - 0x4c50, - {0x89, 0x7e, 0xfc, 0x93, 0x04, 0xa8, 0x20, 0x59} +constexpr GUID JITEEVersionIdentifier = { /* 02e334af-4e6e-4a68-9feb-308d3d2661bc */ + 0x2e334af, + 0x4e6e, + 0x4a68, + {0x9f, 0xeb, 0x30, 0x8d, 0x3d, 0x26, 0x61, 0xbc} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// From fcbebaac6d2fbc339c62a9cbdb94d34650ba0bbc Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 5 Jun 2023 19:22:20 -0700 Subject: [PATCH 11/79] review feedback --- src/coreclr/inc/corinfo.h | 2 +- src/coreclr/jit/helperexpansion.cpp | 10 +++++----- src/coreclr/vm/jitinterface.cpp | 10 ++++++---- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 739715db99902..bd3698fc26be3 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -1736,7 +1736,7 @@ struct CORINFO_THREAD_STATIC_BLOCKS_INFO #else size_t tlsGetAddrFtnPtr; size_t descrAddrOfMaxThreadStaticBlock; - size_t descrAddrOfThreadStaticBlocks; + size_t offsetOfThreadStaticBlocks; #endif uint32_t offsetOfGCDataPointer; }; diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index c1b73084fb0d9..dd08fcf9247fb 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -495,13 +495,13 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* CORINFO_THREAD_STATIC_BLOCKS_INFO threadStaticBlocksInfo; info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); + uint32_t offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; + #ifdef _MSC_VER uint32_t offsetOfMaxThreadStaticBlocksVal = 0; - uint32_t offsetOfThreadStaticBlocksVal = 0; + JITDUMP("getThreadLocalStaticBlocksInfo (%s)\n:", isGCThreadStatic ? "GC" : "Non-GC"); offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; - offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; - JITDUMP("tlsIndex= %u\n", (ssize_t)threadStaticBlocksInfo.tlsIndex.addr); JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer); JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", offsetOfMaxThreadStaticBlocksVal); @@ -509,7 +509,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* #else JITDUMP("tlsGetAddrFtnPtr= %u\n", threadStaticBlocksInfo.tlsGetAddrFtnPtr); JITDUMP("descrAddrOfMaxThreadStaticBlock= %u\n", threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock); - JITDUMP("descrAddrOfThreadStaticBlocks= %u\n", threadStaticBlocksInfo.descrAddrOfThreadStaticBlocks); + JITDUMP("offsetOfThreadStaticBlocks= %u\n", threadStaticBlocksInfo.offsetOfThreadStaticBlocks); #endif JITDUMP("offsetOfGCDataPointer= %u\n", threadStaticBlocksInfo.offsetOfGCDataPointer); @@ -622,7 +622,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* gtNewIndir(TYP_INT, gtCloneExpr(maxThreadStaticBlocksRef), GTF_IND_NONFAULTING | GTF_IND_INVARIANT); GenTree* threadStaticBlocksRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(maxThreadStaticBlocksRef), - gtNewIconNode(TARGET_POINTER_SIZE, TYP_I_IMPL)); + gtNewIconNode(offsetOfThreadStaticBlocksVal, TYP_I_IMPL)); GenTree* threadStaticBlocksValue = gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 7716a0a5a49e6..99c3c8fbef173 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1915,13 +1915,15 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo->tlsGetAddrFtnPtr = (size_t)&__tls_get_addr; if (isGCType) { - pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)getGCMaxThreadStaticDescriptor(); - pInfo->descrAddrOfThreadStaticBlocks = (size_t)getGCThreadStaticBlockDescriptor(); + size_t maxThreadStaticDescriptor = getGCMaxThreadStaticDescriptor(); + pInfo->descrAddrOfMaxThreadStaticBlock = maxThreadStaticDescriptor; + pInfo->offsetOfThreadStaticBlocks = (size_t)(&t_GCThreadStaticBlocks - __tls_get_addr(maxThreadStaticDescriptor) ); } else { - pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)getNonGCMaxThreadStaticDescriptor(); - pInfo->descrAddrOfThreadStaticBlocks = (size_t)getNonGCThreadStaticBlockDescriptor(); + size_t maxThreadStaticDescriptor = getNonGCMaxThreadStaticDescriptor(); + pInfo->descrAddrOfMaxThreadStaticBlock = maxThreadStaticDescriptor; + pInfo->offsetOfThreadStaticBlocks = (size_t)(&t_NonGCThreadStaticBlocks - __tls_get_addr(maxThreadStaticDescriptor); } #endif From 7364faa08d2675f4cf2c6083cf666fe3f867a51c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 6 Jun 2023 11:35:44 -0700 Subject: [PATCH 12/79] fix the offset --- .../tools/superpmi/superpmi-shared/agnostic.h | 2 +- .../superpmi-shared/methodcontext.cpp | 8 +++---- src/coreclr/vm/jitinterface.cpp | 21 ++++++++++++------- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h index d16369ec48925..10f6ef5483b8c 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h @@ -526,7 +526,7 @@ struct Agnostic_GetThreadLocalStaticBlocksInfo #else DWORDLONG tlsGetAddrFtnPtr; DWORDLONG descrAddrOfMaxThreadStaticBlock; - DWORDLONG descrAddrOfThreadStaticBlocks; + DWORDLONG offsetOfThreadStaticBlocks; #endif UINT offsetOfGCDataPointer; }; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index a0294de9ffc62..d4dc5f509784f 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3648,7 +3648,7 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC #else value.tlsGetAddrFtnPtr = pInfo->tlsGetAddrFtnPtr; value.descrAddrOfMaxThreadStaticBlock = pInfo->descrAddrOfMaxThreadStaticBlock; - value.descrAddrOfThreadStaticBlocks = pInfo->descrAddrOfThreadStaticBlocks; + value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks; #endif value.offsetOfGCDataPointer = pInfo->offsetOfGCDataPointer; @@ -3668,8 +3668,8 @@ void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_ value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer); #else printf("GetThreadLocalStaticBlocksInfo key %u, value tlsGetAddrFtnPtr-%016" PRIX64 - ", descrAddrOfMaxThreadStaticBlock-%lu, descrAddrOfThreadStaticBlocks-%lu", - key, value.tlsGetAddrFtnPtr, value.descrAddrOfMaxThreadStaticBlock, value.descrAddrOfThreadStaticBlocks); + ", descrAddrOfMaxThreadStaticBlock-%lu, offsetOfThreadStaticBlocks-%lu", + key, value.tlsGetAddrFtnPtr, value.descrAddrOfMaxThreadStaticBlock, value.offsetOfThreadStaticBlocks); #endif } @@ -3689,7 +3689,7 @@ void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC #else pInfo->tlsGetAddrFtnPtr = value.tlsGetAddrFtnPtr; pInfo->descrAddrOfMaxThreadStaticBlock = value.descrAddrOfMaxThreadStaticBlock; - pInfo->descrAddrOfThreadStaticBlocks = value.descrAddrOfThreadStaticBlocks; + pInfo->offsetOfThreadStaticBlocks = value.offsetOfThreadStaticBlocks; #endif pInfo->offsetOfGCDataPointer = value.offsetOfGCDataPointer; } diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 99c3c8fbef173..b9f6d99fbe21a 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1912,19 +1912,26 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo->offsetOfMaxThreadStaticBlocks = CEEInfo::ThreadLocalOffset(&t_NonGCMaxThreadStaticBlocks); } #else - pInfo->tlsGetAddrFtnPtr = (size_t)&__tls_get_addr; + void* maxThreadStaticDescriptor = 0; + size_t addrOfMaxThreadStaticBlock = 0; + size_t addressOfThreadStaticBlock = 0; + if (isGCType) { - size_t maxThreadStaticDescriptor = getGCMaxThreadStaticDescriptor(); - pInfo->descrAddrOfMaxThreadStaticBlock = maxThreadStaticDescriptor; - pInfo->offsetOfThreadStaticBlocks = (size_t)(&t_GCThreadStaticBlocks - __tls_get_addr(maxThreadStaticDescriptor) ); + maxThreadStaticDescriptor = getGCMaxThreadStaticDescriptor(); + addressOfThreadStaticBlock = (size_t)&t_GCThreadStaticBlocks; } else { - size_t maxThreadStaticDescriptor = getNonGCMaxThreadStaticDescriptor(); - pInfo->descrAddrOfMaxThreadStaticBlock = maxThreadStaticDescriptor; - pInfo->offsetOfThreadStaticBlocks = (size_t)(&t_NonGCThreadStaticBlocks - __tls_get_addr(maxThreadStaticDescriptor); + maxThreadStaticDescriptor = getNonGCMaxThreadStaticDescriptor(); + addressOfThreadStaticBlock = (size_t)&t_NonGCThreadStaticBlocks; } + + addrOfMaxThreadStaticBlock = (size_t)__tls_get_addr(maxThreadStaticDescriptor); + + pInfo->tlsGetAddrFtnPtr = (size_t)&__tls_get_addr; + pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)maxThreadStaticDescriptor; + pInfo->offsetOfThreadStaticBlocks = addressOfThreadStaticBlock - addrOfMaxThreadStaticBlock; #endif pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); From f257987c4a094464a1ba029cdb9f778c5cc49824 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 6 Jun 2023 23:42:30 -0700 Subject: [PATCH 13/79] arm64: wip --- src/coreclr/inc/corinfo.h | 8 +--- src/coreclr/jit/helperexpansion.cpp | 3 +- .../tools/superpmi/superpmi-shared/agnostic.h | 4 -- src/coreclr/vm/jitinterface.cpp | 42 ++++--------------- 4 files changed, 10 insertions(+), 47 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index bd3698fc26be3..4df7f278a9e4f 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -1728,16 +1728,12 @@ struct CORINFO_FIELD_INFO struct CORINFO_THREAD_STATIC_BLOCKS_INFO { -#ifdef _MSC_VER CORINFO_CONST_LOOKUP tlsIndex; uint32_t offsetOfThreadLocalStoragePointer; uint32_t offsetOfMaxThreadStaticBlocks; uint32_t offsetOfThreadStaticBlocks; -#else - size_t tlsGetAddrFtnPtr; - size_t descrAddrOfMaxThreadStaticBlock; - size_t offsetOfThreadStaticBlocks; -#endif + size_t tlsGetAddrFtnPtr; // linux-specific + size_t descrAddrOfMaxThreadStaticBlock; // linux-specific uint32_t offsetOfGCDataPointer; }; diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index dd08fcf9247fb..4187f1fbe784f 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -496,6 +496,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); uint32_t offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; + JITDUMP("offsetOfThreadStaticBlocks= %u\n", offsetOfThreadStaticBlocksVal); #ifdef _MSC_VER uint32_t offsetOfMaxThreadStaticBlocksVal = 0; @@ -505,11 +506,9 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* JITDUMP("tlsIndex= %u\n", (ssize_t)threadStaticBlocksInfo.tlsIndex.addr); JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer); JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", offsetOfMaxThreadStaticBlocksVal); - JITDUMP("offsetOfThreadStaticBlocks= %u\n", offsetOfThreadStaticBlocksVal); #else JITDUMP("tlsGetAddrFtnPtr= %u\n", threadStaticBlocksInfo.tlsGetAddrFtnPtr); JITDUMP("descrAddrOfMaxThreadStaticBlock= %u\n", threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock); - JITDUMP("offsetOfThreadStaticBlocks= %u\n", threadStaticBlocksInfo.offsetOfThreadStaticBlocks); #endif JITDUMP("offsetOfGCDataPointer= %u\n", threadStaticBlocksInfo.offsetOfGCDataPointer); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h index 10f6ef5483b8c..c0ac6b1b330dc 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h @@ -518,16 +518,12 @@ struct Agnostic_GetProfilingHandle struct Agnostic_GetThreadLocalStaticBlocksInfo { -#ifdef _MSC_VER Agnostic_CORINFO_CONST_LOOKUP tlsIndex; UINT offsetOfThreadLocalStoragePointer; UINT offsetOfMaxThreadStaticBlocks; UINT offsetOfThreadStaticBlocks; -#else DWORDLONG tlsGetAddrFtnPtr; DWORDLONG descrAddrOfMaxThreadStaticBlock; - DWORDLONG offsetOfThreadStaticBlocks; -#endif UINT offsetOfGCDataPointer; }; diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index b9f6d99fbe21a..642dad4573c9e 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1808,6 +1808,7 @@ uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isG #ifndef _MSC_VER +#if defined(TARGET_AMD64) void* getThreadStaticDescriptor(uint8_t* p) { _ASSERTE_MSG((p[0] == 0x66 && p[1] == 0x48 && p[2] == 0x8d && p[3] == 0x3d), @@ -1829,22 +1830,7 @@ void* getNonGCMaxThreadStaticDescriptor() uint8_t* p; __asm__("leaq 0(%%rip), %%rbx\n" "data16\n" - "leaq t_NonGCMaxThreadStaticBlocks@TLSGD(%%rip), %%rdi\n" - "data16\n" - "data16\n" - "rex64\n" - "callq __tls_get_addr\n" - : "=b"(p)); - - return getThreadStaticDescriptor(p); -} - -void* getNonGCThreadStaticBlockDescriptor() -{ - uint8_t* p; - __asm__("leaq 0(%%rip), %%rbx\n" - "data16\n" - "leaq t_NonGCThreadStaticBlocks@TLSGD(%%rip), %%rdi\n" + "leaq t_NonGCMaxThreadStaticBlocks@TLSLDM(%%rip), %%rdi\n" "data16\n" "data16\n" "rex64\n" @@ -1859,7 +1845,7 @@ void* getGCMaxThreadStaticDescriptor() uint8_t* p; __asm__("leaq 0(%%rip), %%rbx\n" "data16\n" - "leaq t_GCMaxThreadStaticBlocks@TLSGD(%%rip), %%rdi\n" + "leaq t_GCMaxThreadStaticBlocks@TLSLDM(%%rip), %%rdi\n" "data16\n" "data16\n" "rex64\n" @@ -1868,21 +1854,10 @@ void* getGCMaxThreadStaticDescriptor() return getThreadStaticDescriptor(p); } +#elif defined(TARGET_ARM64) -void* getGCThreadStaticBlockDescriptor() -{ - uint8_t* p; - __asm__("leaq 0(%%rip), %%rbx\n" - "data16\n" - "leaq t_GCThreadStaticBlocks@TLSGD(%%rip), %%rdi\n" - "data16\n" - "data16\n" - "rex64\n" - "callq __tls_get_addr\n" - : "=b"(p)); - - return getThreadStaticDescriptor(p); -} +#elif define(TARGET_X86) +#endif #endif /*********************************************************************/ @@ -1913,7 +1888,6 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* } #else void* maxThreadStaticDescriptor = 0; - size_t addrOfMaxThreadStaticBlock = 0; size_t addressOfThreadStaticBlock = 0; if (isGCType) @@ -1927,11 +1901,9 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* addressOfThreadStaticBlock = (size_t)&t_NonGCThreadStaticBlocks; } - addrOfMaxThreadStaticBlock = (size_t)__tls_get_addr(maxThreadStaticDescriptor); - pInfo->tlsGetAddrFtnPtr = (size_t)&__tls_get_addr; pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)maxThreadStaticDescriptor; - pInfo->offsetOfThreadStaticBlocks = addressOfThreadStaticBlock - addrOfMaxThreadStaticBlock; + pInfo->offsetOfThreadStaticBlocks = addressOfThreadStaticBlock - (size_t)__tls_get_addr(maxThreadStaticDescriptor); #endif pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); From f85614b52e979741f17c5928ef0b713fc2e3994a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 7 Jun 2023 16:20:48 -0700 Subject: [PATCH 14/79] linux arm64 model --- src/coreclr/vm/jitinterface.cpp | 94 ++++++++++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 6 deletions(-) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 642dad4573c9e..224e35158e845 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1825,7 +1825,7 @@ void* getThreadStaticDescriptor(uint8_t* p) return *(uint32_t*)p + (p + 4); } -void* getNonGCMaxThreadStaticDescriptor() +void* getNonGCMaxThreadStaticOffset() { uint8_t* p; __asm__("leaq 0(%%rip), %%rbx\n" @@ -1840,7 +1840,7 @@ void* getNonGCMaxThreadStaticDescriptor() return getThreadStaticDescriptor(p); } -void* getGCMaxThreadStaticDescriptor() +void* getGCMaxThreadStaticOffset() { uint8_t* p; __asm__("leaq 0(%%rip), %%rbx\n" @@ -1855,7 +1855,77 @@ void* getGCMaxThreadStaticDescriptor() return getThreadStaticDescriptor(p); } #elif defined(TARGET_ARM64) - +uint64_t getNonGCMaxThreadStaticOffset() +{ + uint64_t offset; + __asm__ ( + "adrp x0, :tlsdesc:t_NonGCMaxThreadStaticBlocks\n" + "ldr x1, [x0, #:tlsdesc_lo12:t_NonGCMaxThreadStaticBlocks]\n" + "add x0, x0, :tlsdesc_lo12:t_NonGCMaxThreadStaticBlocks\n" + ".tlsdesccall t_NonGCMaxThreadStaticBlocks\n" + "blr x1\n" + "mov %[result], x0\n" + : [result] "=r" (offset) + : + : "x0", "x1" + ); + + return offset; +} + +uint64_t getNonGCThreadStaticOffset() +{ + uint64_t offset; + __asm__ ( + "adrp x0, :tlsdesc:t_NonGCThreadStaticBlocks\n" + "ldr x1, [x0, #:tlsdesc_lo12:t_NonGCThreadStaticBlocks]\n" + "add x0, x0, :tlsdesc_lo12:t_NonGCThreadStaticBlocks\n" + ".tlsdesccall t_NonGCThreadStaticBlocks\n" + "blr x1\n" + "mov %[result], x0\n" + : [result] "=r" (offset) + : + : "x0", "x1" + ); + + return offset; +} + +uint64_t getGCMaxThreadStaticOffset() +{ + uint64_t offset; + __asm__ ( + "adrp x0, :tlsdesc:t_GCMaxThreadStaticBlocks\n" + "ldr x1, [x0, #:tlsdesc_lo12:t_GCMaxThreadStaticBlocks]\n" + "add x0, x0, :tlsdesc_lo12:t_GCMaxThreadStaticBlocks\n" + ".tlsdesccall t_GCMaxThreadStaticBlocks\n" + "blr x1\n" + "mov %[result], x0\n" + : [result] "=r" (offset) + : + : "x0", "x1" + ); + + return offset; +} + +uint64_t getGCThreadStaticOffset() +{ + uint64_t offset; + __asm__ ( + "adrp x0, :tlsdesc:t_GCThreadStaticBlocks\n" + "ldr x1, [x0, #:tlsdesc_lo12:t_GCThreadStaticBlocks]\n" + "add x0, x0, :tlsdesc_lo12:t_GCThreadStaticBlocks\n" + ".tlsdesccall t_GCThreadStaticBlocks\n" + "blr x1\n" + "mov %[result], x0\n" + : [result] "=r" (offset) + : + : "x0", "x1" + ); + + return offset; +} #elif define(TARGET_X86) #endif #endif @@ -1886,24 +1956,36 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo->offsetOfThreadStaticBlocks = CEEInfo::ThreadLocalOffset(&t_NonGCThreadStaticBlocks); pInfo->offsetOfMaxThreadStaticBlocks = CEEInfo::ThreadLocalOffset(&t_NonGCMaxThreadStaticBlocks); } -#else +#elif defined(TARGET_AMD64) void* maxThreadStaticDescriptor = 0; size_t addressOfThreadStaticBlock = 0; if (isGCType) { - maxThreadStaticDescriptor = getGCMaxThreadStaticDescriptor(); + maxThreadStaticDescriptor = getGCMaxThreadStaticOffset(); addressOfThreadStaticBlock = (size_t)&t_GCThreadStaticBlocks; } else { - maxThreadStaticDescriptor = getNonGCMaxThreadStaticDescriptor(); + maxThreadStaticDescriptor = getNonGCMaxThreadStaticOffset(); addressOfThreadStaticBlock = (size_t)&t_NonGCThreadStaticBlocks; } pInfo->tlsGetAddrFtnPtr = (size_t)&__tls_get_addr; pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)maxThreadStaticDescriptor; pInfo->offsetOfThreadStaticBlocks = addressOfThreadStaticBlock - (size_t)__tls_get_addr(maxThreadStaticDescriptor); + +#elif defined(TARGET_ARM64) + if (isGCType) + { + pInfo->offsetOfThreadStaticBlocks = getGCThreadStaticOffset(); + pInfo->offsetOfMaxThreadStaticBlocks = getGCMaxThreadStaticOffset(); + } + else + { + pInfo->offsetOfThreadStaticBlocks = getNonGCThreadStaticOffset(); + pInfo->offsetOfMaxThreadStaticBlocks = getNonGCMaxThreadStaticOffset(); + } #endif pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); From 5bdf88118c5cd0c8d60363b2da4dda3f7e95ffac Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 8 Jun 2023 13:43:19 -0700 Subject: [PATCH 15/79] arm64: offsetOfThreadStaticBlock adjustment --- src/coreclr/jit/helperexpansion.cpp | 3 ++- src/coreclr/vm/jitinterface.cpp | 10 +++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 4187f1fbe784f..10cfea522ce11 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -594,8 +594,9 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfThreadStaticBlocks); GenTree* threadStaticBlocksValue = gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); +#elif defined(TARGET_ARM64) -#else +#elif defined(TARGET_AMD64) GenTree* tls_get_addr_val = gtNewIconHandleNode(threadStaticBlocksInfo.tlsGetAddrFtnPtr, GTF_ICON_FTN_ADDR); tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); GenTreeCall* tlsRefCall = tlsValue->AsCall(); diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 224e35158e845..c29c8a2b7db84 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1974,17 +1974,21 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo->tlsGetAddrFtnPtr = (size_t)&__tls_get_addr; pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)maxThreadStaticDescriptor; pInfo->offsetOfThreadStaticBlocks = addressOfThreadStaticBlock - (size_t)__tls_get_addr(maxThreadStaticDescriptor); - #elif defined(TARGET_ARM64) if (isGCType) { - pInfo->offsetOfThreadStaticBlocks = getGCThreadStaticOffset(); pInfo->offsetOfMaxThreadStaticBlocks = getGCMaxThreadStaticOffset(); + pInfo->offsetOfThreadStaticBlocks = getGCThreadStaticOffset() - pInfo->offsetOfMaxThreadStaticBlocks; } else { - pInfo->offsetOfThreadStaticBlocks = getNonGCThreadStaticOffset(); pInfo->offsetOfMaxThreadStaticBlocks = getNonGCMaxThreadStaticOffset(); + pInfo->offsetOfThreadStaticBlocks = getNonGCThreadStaticOffset() - pInfo->offsetOfMaxThreadStaticBlocks; + + // x64 and arm64: + //TODO: Think if we should store the distance between (threadStaticBlock - maxThreadStaticBlock) or just the + // overall offsets of those 2 variables. Storing distance is risky as rearranging the variables can lead to + // having distance negative. } #endif From b6d2ef074f8417dbaba317c578187a7a735aabb8 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 8 Jun 2023 16:29:27 -0700 Subject: [PATCH 16/79] Add mrs and tpid0 register --- src/coreclr/jit/emitarm64.cpp | 13 +++++++++++++ src/coreclr/jit/emitarm64.h | 3 +++ src/coreclr/jit/emitfmtsarm64.h | 2 +- src/coreclr/jit/instrsarm64.h | 3 +++ src/coreclr/jit/registerarm64.h | 3 ++- 5 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index d113bd1a296ec..953d855ae68b5 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -9060,6 +9060,19 @@ void emitter::emitIns_Call(EmitCallType callType, return ureg << 10; } +/***************************************************************************** + * + * Returns an encoding for the tpidr_el0 register. + */ + +/*static*/ emitter::code_t emitter::insEncodeReg_Tpid0() +{ + // op0 op1 CRn CRm op2 + // 11 011 1101 0000 010 + emitter::code_t sr = 0xd382; + return sr << 5; +} + /***************************************************************************** * * Returns an encoding for the specified condition code. diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 59806d4b4ea25..c9e0972c106de 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -311,6 +311,9 @@ static code_t insEncodeReg_Vm(regNumber reg); // Returns an encoding for the specified register used in the 'Va' position static code_t insEncodeReg_Va(regNumber reg); +// Returns an encoding for the tpidr_el0 register. +static code_t insEncodeReg_Tpid0(); + // Returns an encoding for the imm which represents the condition code. static code_t insEncodeCond(insCond cond); diff --git a/src/coreclr/jit/emitfmtsarm64.h b/src/coreclr/jit/emitfmtsarm64.h index 81f41085a2ebe..31bbde6afc47a 100644 --- a/src/coreclr/jit/emitfmtsarm64.h +++ b/src/coreclr/jit/emitfmtsarm64.h @@ -227,7 +227,7 @@ IF_DEF(SN_0A, IS_NONE, NONE) // SN_0A ................ ................ IF_DEF(SI_0A, IS_NONE, NONE) // SI_0A ...........iiiii iiiiiiiiiii..... imm16 IF_DEF(SI_0B, IS_NONE, NONE) // SI_0B ................ ....bbbb........ imm4 - barrier -IF_DEF(SR_1A, IS_NONE, NONE) // SR_1A ................ ...........ttttt Rt (dc zva) +IF_DEF(SR_1A, IS_NONE, NONE) // SR_1A ................ ...........ttttt Rt (dc zva, mrs) IF_DEF(INVALID, IS_NONE, NONE) // diff --git a/src/coreclr/jit/instrsarm64.h b/src/coreclr/jit/instrsarm64.h index 5745ac0d70180..398a43620afae 100644 --- a/src/coreclr/jit/instrsarm64.h +++ b/src/coreclr/jit/instrsarm64.h @@ -1595,6 +1595,9 @@ INST1(isb, "isb", 0, IF_SI_0B, 0xD50330DF) INST1(dczva, "dczva", 0, IF_SR_1A, 0xD50B7420) // dc zva,Rt SR_1A 1101010100001011 01110100001ttttt D50B 7420 Rt +INST1(mrs, "mrs", 0, IF_SR_1A, 0xD5300000) + // mrs Rt,SR SR_1A 110101010011ssss sssssssssssttttt D530 0000 Rt, SR + INST1(umov, "umov", 0, IF_DV_2B, 0x0E003C00) // umov Rd,Vn[] DV_2B 0Q001110000iiiii 001111nnnnnddddd 0E00 3C00 Rd,Vn[] diff --git a/src/coreclr/jit/registerarm64.h b/src/coreclr/jit/registerarm64.h index 7ce66ada1beb0..4da71842da361 100644 --- a/src/coreclr/jit/registerarm64.h +++ b/src/coreclr/jit/registerarm64.h @@ -98,8 +98,9 @@ REGDEF(V31, 31+VBASE, VMASK(31), "d31", "s31") #define NBASE 64 REGDEF(SP, 0+NBASE, 0x0000, "sp", "wsp?") +REGDEF(TPID0, 1+NBASE, 0x0000, "tpid0", "tpid0") // This must be last! -REGDEF(STK, 1+NBASE, 0x0000, "STK", "STK") +REGDEF(STK, 2+NBASE, 0x0000, "STK", "STK") /*****************************************************************************/ #undef RMASK From 75ec05a6702e4513ee57d706a12076deda92e34d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 8 Jun 2023 19:46:22 -0700 Subject: [PATCH 17/79] arm64: use the new mrs/tpidr0 --- src/coreclr/jit/emitarm64.cpp | 9 +++++++++ src/coreclr/jit/helperexpansion.cpp | 28 +++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 953d855ae68b5..fc1287874748d 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -14140,11 +14140,20 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR emitIns_R_S(ins, attr, dataReg, lclNum, offset); } } +#ifdef _MSC_VER else if (addr->IsIconHandle(GTF_ICON_TLS_HDL)) { // On Arm64, TEB is in r18, so load from the r18 as base. emitIns_R_R_I(ins, attr, dataReg, REG_R18, addr->AsIntCon()->IconValue()); } +#else + else if (addr->IsIconHandle(GTF_ICON_TLS_HDL)) + { + assert(addr->AsIntCon()->IconValue() == 0); + // On non-windows, need to load the address from system register. + emitIns_R_R(INS_mrs, attr, dataReg, REG_TPID0); + } +#endif else if (emitIns_valid_imm_for_ldst_offset(offset, emitTypeSize(indir->TypeGet()))) { // Then load/store dataReg from/to [memBase + offset] diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 10cfea522ce11..3d7ca0d89fa85 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -496,13 +496,11 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); uint32_t offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; + uint32_t offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; JITDUMP("offsetOfThreadStaticBlocks= %u\n", offsetOfThreadStaticBlocksVal); #ifdef _MSC_VER - uint32_t offsetOfMaxThreadStaticBlocksVal = 0; - JITDUMP("getThreadLocalStaticBlocksInfo (%s)\n:", isGCThreadStatic ? "GC" : "Non-GC"); - offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; JITDUMP("tlsIndex= %u\n", (ssize_t)threadStaticBlocksInfo.tlsIndex.addr); JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer); JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", offsetOfMaxThreadStaticBlocksVal); @@ -594,7 +592,31 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfThreadStaticBlocks); GenTree* threadStaticBlocksValue = gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + #elif defined(TARGET_ARM64) + //TODO: Update the comments + // Mark this ICON as a TLS_HDL, codegen will do: + // mrs xt, tpidr_elf0 + // mov xd, [xt+cns] + tlsValue = gtNewIconHandleNode(0, GTF_ICON_TLS_HDL); + + // Cache the tls value + GenTree* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); + GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); + + // Create tree for "maxThreadStaticBlocks = tls[offsetOfMaxThreadStaticBlocks]" + GenTree* offsetOfMaxThreadStaticBlocks = gtNewIconNode(offsetOfMaxThreadStaticBlocksVal, TYP_I_IMPL); + GenTree* maxThreadStaticBlocksRef = + gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfMaxThreadStaticBlocks); + GenTree* maxThreadStaticBlocksValue = + gtNewIndir(TYP_INT, maxThreadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + + // Create tree for "threadStaticBlockBase = tls[offsetOfThreadStaticBlocks]" + GenTree* offsetOfThreadStaticBlocks = gtNewIconNode(offsetOfThreadStaticBlocksVal, TYP_I_IMPL); + GenTree* threadStaticBlocksRef = + gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfThreadStaticBlocks); + GenTree* threadStaticBlocksValue = + gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); #elif defined(TARGET_AMD64) GenTree* tls_get_addr_val = gtNewIconHandleNode(threadStaticBlocksInfo.tlsGetAddrFtnPtr, GTF_ICON_FTN_ADDR); From fd200b50769b2dfc3a1865c68f174e0a48ea7f4e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 9 Jun 2023 04:54:50 +0000 Subject: [PATCH 18/79] fix arm64 build and offset calculation: --- src/coreclr/jit/helperexpansion.cpp | 21 +++++++++++-------- .../superpmi-shared/methodcontext.cpp | 2 +- src/coreclr/vm/jitinterface.cpp | 4 ++-- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 3d7ca0d89fa85..0615722308eca 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -552,6 +552,9 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* GenTree* tlsValue = nullptr; unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access")); lvaTable[tlsLclNum].lvType = TYP_I_IMPL; + GenTree* maxThreadStaticBlocksValue = nullptr; + GenTree* threadStaticBlocksValue = nullptr; + GenTree* tlsValueDef = nullptr; #ifdef _MSC_VER size_t tlsIndexValue = (size_t)threadStaticBlocksInfo.tlsIndex.addr; @@ -576,21 +579,21 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); // Cache the tls value - GenTree* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); + tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); // Create tree for "maxThreadStaticBlocks = tls[offsetOfMaxThreadStaticBlocks]" GenTree* offsetOfMaxThreadStaticBlocks = gtNewIconNode(offsetOfMaxThreadStaticBlocksVal, TYP_I_IMPL); GenTree* maxThreadStaticBlocksRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfMaxThreadStaticBlocks); - GenTree* maxThreadStaticBlocksValue = + maxThreadStaticBlocksValue = gtNewIndir(TYP_INT, maxThreadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); // Create tree for "threadStaticBlockBase = tls[offsetOfThreadStaticBlocks]" GenTree* offsetOfThreadStaticBlocks = gtNewIconNode(offsetOfThreadStaticBlocksVal, TYP_I_IMPL); GenTree* threadStaticBlocksRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfThreadStaticBlocks); - GenTree* threadStaticBlocksValue = + threadStaticBlocksValue = gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); #elif defined(TARGET_ARM64) @@ -601,21 +604,21 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* tlsValue = gtNewIconHandleNode(0, GTF_ICON_TLS_HDL); // Cache the tls value - GenTree* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); + tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); // Create tree for "maxThreadStaticBlocks = tls[offsetOfMaxThreadStaticBlocks]" GenTree* offsetOfMaxThreadStaticBlocks = gtNewIconNode(offsetOfMaxThreadStaticBlocksVal, TYP_I_IMPL); GenTree* maxThreadStaticBlocksRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfMaxThreadStaticBlocks); - GenTree* maxThreadStaticBlocksValue = + maxThreadStaticBlocksValue = gtNewIndir(TYP_INT, maxThreadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); // Create tree for "threadStaticBlockBase = tls[offsetOfThreadStaticBlocks]" GenTree* offsetOfThreadStaticBlocks = gtNewIconNode(offsetOfThreadStaticBlocksVal, TYP_I_IMPL); GenTree* threadStaticBlocksRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfThreadStaticBlocks); - GenTree* threadStaticBlocksValue = + threadStaticBlocksValue = gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); #elif defined(TARGET_AMD64) @@ -638,14 +641,14 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* #endif // UNIX_X86_ABI // Cache the tls value - GenTree* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); + tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); GenTree* maxThreadStaticBlocksRef = gtNewLclVarNode(tlsLclNum); - GenTree* maxThreadStaticBlocksValue = + maxThreadStaticBlocksValue = gtNewIndir(TYP_INT, gtCloneExpr(maxThreadStaticBlocksRef), GTF_IND_NONFAULTING | GTF_IND_INVARIANT); GenTree* threadStaticBlocksRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(maxThreadStaticBlocksRef), gtNewIconNode(offsetOfThreadStaticBlocksVal, TYP_I_IMPL)); - GenTree* threadStaticBlocksValue = + threadStaticBlocksValue = gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); #endif // _MSC_VER diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index d4dc5f509784f..17cd8fd28592b 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3668,7 +3668,7 @@ void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_ value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer); #else printf("GetThreadLocalStaticBlocksInfo key %u, value tlsGetAddrFtnPtr-%016" PRIX64 - ", descrAddrOfMaxThreadStaticBlock-%lu, offsetOfThreadStaticBlocks-%lu", + ", descrAddrOfMaxThreadStaticBlock-%lu, offsetOfThreadStaticBlocks-%u", key, value.tlsGetAddrFtnPtr, value.descrAddrOfMaxThreadStaticBlock, value.offsetOfThreadStaticBlocks); #endif } diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index c29c8a2b7db84..2ad982f31b88b 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1939,7 +1939,7 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* MODE_PREEMPTIVE; } CONTRACTL_END; - JIT_TO_EE_TRANSITION_LEAF(); + JIT_TO_EE_TRANSITION(); #ifdef _MSC_VER pInfo->tlsIndex.addr = (void*)static_cast(_tls_index); @@ -1994,7 +1994,7 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); - JIT_TO_EE_TRANSITION_LEAF(); + EE_TO_JIT_TRANSITION(); } //--------------------------------------------------------------------------------------- From 45238649569de23356ca62cb507639729423f815 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 9 Jun 2023 06:22:38 +0000 Subject: [PATCH 19/79] arm64: working --- src/coreclr/jit/codegenarm64.cpp | 13 ++++++++++ src/coreclr/jit/emitarm64.cpp | 42 ++++++++++++++++++++------------ src/coreclr/vm/jitinterface.cpp | 4 +-- 3 files changed, 41 insertions(+), 18 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 8e0faf1f05e38..c7c7ee5d4d5ab 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2836,6 +2836,10 @@ void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) // void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) { + if (strcmp(compiler->info.compMethodName, "Main") == 0) + { + printf("hello\n"); + } GenTree* data = lclNode->gtOp1; // Stores from a multi-reg source are handled separately. @@ -2947,6 +2951,15 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) inst_Mov_Extend(targetType, /* srcInReg */ true, targetReg, dataReg, /* canSkip */ true, emitActualTypeSize(targetType)); } +#ifndef _MSC_VER + else if (data->IsIconHandle(GTF_ICON_TLS_HDL)) + { + assert(data->AsIntCon()->IconValue() == 0); + emitAttr attr = emitActualTypeSize(targetType); + // On non-windows, need to load the address from system register. + emit->emitIns_R_R(INS_mrs, attr, targetReg, dataReg); + } +#endif else { inst_Mov(targetType, targetReg, dataReg, /* canSkip */ true); diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index fc1287874748d..79f7cb0bd3081 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -937,10 +937,11 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SI_0B: // SI_0B ................ ....bbbb........ imm4 - barrier break; - case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva) + case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs) datasize = id->idOpSize(); assert(isGeneralRegister(id->idReg1())); assert(datasize == EA_8BYTE); + assert((id->idIns() != INS_mrs) || (id->idReg2() == REG_ZR)); break; default: @@ -4930,6 +4931,11 @@ void emitter::emitIns_R_R( fmt = IF_DV_2L; } break; + case INS_mrs: + // assert(isVectorRegister(reg2)); + fmt = IF_SR_1A; + + break; default: unreached(); @@ -9067,9 +9073,10 @@ void emitter::emitIns_Call(EmitCallType callType, /*static*/ emitter::code_t emitter::insEncodeReg_Tpid0() { - // op0 op1 CRn CRm op2 - // 11 011 1101 0000 010 - emitter::code_t sr = 0xd382; + // o0 op1 CRn CRm op2 + // 1 011 1101 0000 010 + // emitter::code_t sr = 0xd382; + emitter::code_t sr = 0x5e82; return sr << 5; } @@ -11806,9 +11813,13 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; - case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva) + case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs) assert(insOptsNone(id->idInsOpt())); code = emitInsCode(ins, fmt); + if (ins == INS_mrs) + { + code |= insEncodeReg_Tpid0(); + } code |= insEncodeReg_Rt(id->idReg1()); // ttttt dst += emitOutput_Instr(dst, code); break; @@ -13938,8 +13949,16 @@ void emitter::emitDispInsHelp( emitDispBarrier((insBarrier)emitGetInsSC(id)); break; - case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva) - emitDispReg(id->idReg1(), size, false); + case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs) + if (ins == INS_mrs) + { + emitDispReg(id->idReg1(), size, true); + printf("tpidr_el0"); + } + else + { + emitDispReg(id->idReg1(), size, false); + } break; default: @@ -14140,20 +14159,11 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR emitIns_R_S(ins, attr, dataReg, lclNum, offset); } } -#ifdef _MSC_VER else if (addr->IsIconHandle(GTF_ICON_TLS_HDL)) { // On Arm64, TEB is in r18, so load from the r18 as base. emitIns_R_R_I(ins, attr, dataReg, REG_R18, addr->AsIntCon()->IconValue()); } -#else - else if (addr->IsIconHandle(GTF_ICON_TLS_HDL)) - { - assert(addr->AsIntCon()->IconValue() == 0); - // On non-windows, need to load the address from system register. - emitIns_R_R(INS_mrs, attr, dataReg, REG_TPID0); - } -#endif else if (emitIns_valid_imm_for_ldst_offset(offset, emitTypeSize(indir->TypeGet()))) { // Then load/store dataReg from/to [memBase + offset] diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 2ad982f31b88b..40163da964e6e 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1978,12 +1978,12 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* if (isGCType) { pInfo->offsetOfMaxThreadStaticBlocks = getGCMaxThreadStaticOffset(); - pInfo->offsetOfThreadStaticBlocks = getGCThreadStaticOffset() - pInfo->offsetOfMaxThreadStaticBlocks; + pInfo->offsetOfThreadStaticBlocks = getGCThreadStaticOffset();// - pInfo->offsetOfMaxThreadStaticBlocks; } else { pInfo->offsetOfMaxThreadStaticBlocks = getNonGCMaxThreadStaticOffset(); - pInfo->offsetOfThreadStaticBlocks = getNonGCThreadStaticOffset() - pInfo->offsetOfMaxThreadStaticBlocks; + pInfo->offsetOfThreadStaticBlocks = getNonGCThreadStaticOffset();// - pInfo->offsetOfMaxThreadStaticBlocks; // x64 and arm64: //TODO: Think if we should store the distance between (threadStaticBlock - maxThreadStaticBlock) or just the From 4bef20af10a550ac7cee75a9073ec3eb093835a8 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 9 Jun 2023 13:40:33 -0700 Subject: [PATCH 20/79] arm64: move to struct model --- src/coreclr/vm/jithelpers.cpp | 13 ++- src/coreclr/vm/jitinterface.cpp | 200 ++++++++++++++++++++------------ 2 files changed, 133 insertions(+), 80 deletions(-) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 7eb34d9a9b4ec..99426934b0168 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1788,11 +1788,16 @@ __declspec(selectany) __declspec(thread) uint32_t t_GCThreadStaticBlocksSize; __declspec(selectany) __declspec(thread) void** t_NonGCThreadStaticBlocks; __declspec(selectany) __declspec(thread) void** t_GCThreadStaticBlocks; #else -EXTERN_C __thread uint32_t t_NonGCMaxThreadStaticBlocks; -EXTERN_C __thread void** t_NonGCThreadStaticBlocks; +struct ThreadStatics +{ + uint32_t NonGCMaxThreadStaticBlocks; + void** NonGCThreadStaticBlocks; + + uint32_t GCMaxThreadStaticBlocks; + void** GCThreadStaticBlocks; +} +__thread ThreadStatic t_threadStatics; -EXTERN_C __thread uint32_t t_GCMaxThreadStaticBlocks; -EXTERN_C __thread void** t_GCThreadStaticBlocks; EXTERN_C __thread uint32_t t_NonGCThreadStaticBlocksSize; EXTERN_C __thread uint32_t t_GCThreadStaticBlocksSize; diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 40163da964e6e..a96d899048c39 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -75,17 +75,31 @@ __declspec(selectany) __declspec(thread) uint32_t t_GCMaxThreadStaticBlocks; __declspec(selectany) __declspec(thread) void** t_NonGCThreadStaticBlocks; __declspec(selectany) __declspec(thread) void** t_GCThreadStaticBlocks; -#else -__thread uint32_t t_NonGCMaxThreadStaticBlocks; -__thread void** t_NonGCThreadStaticBlocks; -__thread uint32_t t_GCMaxThreadStaticBlocks; -__thread void** t_GCThreadStaticBlocks; +#else +//#ifdef (HOST_AMD64) -__thread uint32_t t_NonGCThreadStaticBlocksSize; -__thread uint32_t t_GCThreadStaticBlocksSize; extern "C" void* __tls_get_addr(void* ti); -#endif +struct ThreadStatics +{ + uint32_t NonGCMaxThreadStaticBlocks; + void** NonGCThreadStaticBlocks; + + uint32_t GCMaxThreadStaticBlocks; + void** GCThreadStaticBlocks; +} +__thread ThreadStatic t_ThreadStatics; + +//#elif defined(HOST_ARM64) +// +//__thread uint32_t t_NonGCMaxThreadStaticBlocks; +//__thread void** t_NonGCThreadStaticBlocks; +//__thread uint32_t t_GCMaxThreadStaticBlocks; +//__thread void** t_GCThreadStaticBlocks; +// +//#endif // HOST_ARM64 +#endif // _MSC_VER + // The Stack Overflow probe takes place in the COOPERATIVE_TRANSITION_BEGIN() macro // @@ -1808,7 +1822,7 @@ uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isG #ifndef _MSC_VER -#if defined(TARGET_AMD64) +#ifdef HOST_AMD64 void* getThreadStaticDescriptor(uint8_t* p) { _ASSERTE_MSG((p[0] == 0x66 && p[1] == 0x48 && p[2] == 0x8d && p[3] == 0x3d), @@ -1825,12 +1839,12 @@ void* getThreadStaticDescriptor(uint8_t* p) return *(uint32_t*)p + (p + 4); } -void* getNonGCMaxThreadStaticOffset() +uint64_t getThreadStaticsBaseOffset() { uint8_t* p; __asm__("leaq 0(%%rip), %%rbx\n" "data16\n" - "leaq t_NonGCMaxThreadStaticBlocks@TLSLDM(%%rip), %%rdi\n" + "leaq t_ThreadStatics@TLSGD(%%rip), %%rdi\n" "data16\n" "data16\n" "rex64\n" @@ -1840,12 +1854,12 @@ void* getNonGCMaxThreadStaticOffset() return getThreadStaticDescriptor(p); } -void* getGCMaxThreadStaticOffset() +void* getNonGCMaxThreadStaticOffset() { uint8_t* p; __asm__("leaq 0(%%rip), %%rbx\n" "data16\n" - "leaq t_GCMaxThreadStaticBlocks@TLSLDM(%%rip), %%rdi\n" + "leaq t_NonGCMaxThreadStaticBlocks@TLSLDM(%%rip), %%rdi\n" "data16\n" "data16\n" "rex64\n" @@ -1854,69 +1868,32 @@ void* getGCMaxThreadStaticOffset() return getThreadStaticDescriptor(p); } -#elif defined(TARGET_ARM64) -uint64_t getNonGCMaxThreadStaticOffset() -{ - uint64_t offset; - __asm__ ( - "adrp x0, :tlsdesc:t_NonGCMaxThreadStaticBlocks\n" - "ldr x1, [x0, #:tlsdesc_lo12:t_NonGCMaxThreadStaticBlocks]\n" - "add x0, x0, :tlsdesc_lo12:t_NonGCMaxThreadStaticBlocks\n" - ".tlsdesccall t_NonGCMaxThreadStaticBlocks\n" - "blr x1\n" - "mov %[result], x0\n" - : [result] "=r" (offset) - : - : "x0", "x1" - ); - - return offset; -} -uint64_t getNonGCThreadStaticOffset() -{ - uint64_t offset; - __asm__ ( - "adrp x0, :tlsdesc:t_NonGCThreadStaticBlocks\n" - "ldr x1, [x0, #:tlsdesc_lo12:t_NonGCThreadStaticBlocks]\n" - "add x0, x0, :tlsdesc_lo12:t_NonGCThreadStaticBlocks\n" - ".tlsdesccall t_NonGCThreadStaticBlocks\n" - "blr x1\n" - "mov %[result], x0\n" - : [result] "=r" (offset) - : - : "x0", "x1" - ); - - return offset; -} - -uint64_t getGCMaxThreadStaticOffset() +void* getGCMaxThreadStaticOffset() { - uint64_t offset; - __asm__ ( - "adrp x0, :tlsdesc:t_GCMaxThreadStaticBlocks\n" - "ldr x1, [x0, #:tlsdesc_lo12:t_GCMaxThreadStaticBlocks]\n" - "add x0, x0, :tlsdesc_lo12:t_GCMaxThreadStaticBlocks\n" - ".tlsdesccall t_GCMaxThreadStaticBlocks\n" - "blr x1\n" - "mov %[result], x0\n" - : [result] "=r" (offset) - : - : "x0", "x1" - ); + uint8_t* p; + __asm__("leaq 0(%%rip), %%rbx\n" + "data16\n" + "leaq t_GCMaxThreadStaticBlocks@TLSLDM(%%rip), %%rdi\n" + "data16\n" + "data16\n" + "rex64\n" + "callq __tls_get_addr\n" + : "=b"(p)); - return offset; + return getThreadStaticDescriptor(p); } +#endif // HOST_AMD64 -uint64_t getGCThreadStaticOffset() +#ifdef HOST_ARM64 +uint64_t getThreadStaticsBaseOffset() { uint64_t offset; __asm__ ( - "adrp x0, :tlsdesc:t_GCThreadStaticBlocks\n" - "ldr x1, [x0, #:tlsdesc_lo12:t_GCThreadStaticBlocks]\n" - "add x0, x0, :tlsdesc_lo12:t_GCThreadStaticBlocks\n" - ".tlsdesccall t_GCThreadStaticBlocks\n" + "adrp x0, :tlsdesc:t_ThreadStatics\n" + "ldr x1, [x0, #:tlsdesc_lo12:t_ThreadStatics]\n" + "add x0, x0, :tlsdesc_lo12:t_ThreadStatics\n" + ".tlsdesccall t_ThreadStatics\n" "blr x1\n" "mov %[result], x0\n" : [result] "=r" (offset) @@ -1926,9 +1903,80 @@ uint64_t getGCThreadStaticOffset() return offset; } -#elif define(TARGET_X86) -#endif -#endif +// +//uint64_t getNonGCMaxThreadStaticOffset() +//{ +// uint64_t offset; +// __asm__ ( +// "adrp x0, :tlsdesc:t_NonGCMaxThreadStaticBlocks\n" +// "ldr x1, [x0, #:tlsdesc_lo12:t_NonGCMaxThreadStaticBlocks]\n" +// "add x0, x0, :tlsdesc_lo12:t_NonGCMaxThreadStaticBlocks\n" +// ".tlsdesccall t_NonGCMaxThreadStaticBlocks\n" +// "blr x1\n" +// "mov %[result], x0\n" +// : [result] "=r" (offset) +// : +// : "x0", "x1" +// ); +// +// return offset; +//} +// +//uint64_t getNonGCThreadStaticOffset() +//{ +// uint64_t offset; +// __asm__ ( +// "adrp x0, :tlsdesc:t_NonGCThreadStaticBlocks\n" +// "ldr x1, [x0, #:tlsdesc_lo12:t_NonGCThreadStaticBlocks]\n" +// "add x0, x0, :tlsdesc_lo12:t_NonGCThreadStaticBlocks\n" +// ".tlsdesccall t_NonGCThreadStaticBlocks\n" +// "blr x1\n" +// "mov %[result], x0\n" +// : [result] "=r" (offset) +// : +// : "x0", "x1" +// ); +// +// return offset; +//} +// +//uint64_t getGCMaxThreadStaticOffset() +//{ +// uint64_t offset; +// __asm__ ( +// "adrp x0, :tlsdesc:t_GCMaxThreadStaticBlocks\n" +// "ldr x1, [x0, #:tlsdesc_lo12:t_GCMaxThreadStaticBlocks]\n" +// "add x0, x0, :tlsdesc_lo12:t_GCMaxThreadStaticBlocks\n" +// ".tlsdesccall t_GCMaxThreadStaticBlocks\n" +// "blr x1\n" +// "mov %[result], x0\n" +// : [result] "=r" (offset) +// : +// : "x0", "x1" +// ); +// +// return offset; +//} +// +//uint64_t getGCThreadStaticOffset() +//{ +// uint64_t offset; +// __asm__ ( +// "adrp x0, :tlsdesc:t_GCThreadStaticBlocks\n" +// "ldr x1, [x0, #:tlsdesc_lo12:t_GCThreadStaticBlocks]\n" +// "add x0, x0, :tlsdesc_lo12:t_GCThreadStaticBlocks\n" +// ".tlsdesccall t_GCThreadStaticBlocks\n" +// "blr x1\n" +// "mov %[result], x0\n" +// : [result] "=r" (offset) +// : +// : "x0", "x1" +// ); +// +// return offset; +//} +#endif // HOST_ARM64 +#endif // !_MSC_VER /*********************************************************************/ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType) @@ -1959,7 +2007,6 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* #elif defined(TARGET_AMD64) void* maxThreadStaticDescriptor = 0; size_t addressOfThreadStaticBlock = 0; - if (isGCType) { maxThreadStaticDescriptor = getGCMaxThreadStaticOffset(); @@ -1975,15 +2022,16 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)maxThreadStaticDescriptor; pInfo->offsetOfThreadStaticBlocks = addressOfThreadStaticBlock - (size_t)__tls_get_addr(maxThreadStaticDescriptor); #elif defined(TARGET_ARM64) + uint64_t threadStaticBaseOffset = getThreadStaticsBaseOffset(); if (isGCType) { - pInfo->offsetOfMaxThreadStaticBlocks = getGCMaxThreadStaticOffset(); - pInfo->offsetOfThreadStaticBlocks = getGCThreadStaticOffset();// - pInfo->offsetOfMaxThreadStaticBlocks; + pInfo->offsetOfMaxThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStatic, GCMaxThreadStaticBlocks); + pInfo->offsetOfThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStatic, GCThreadStaticBlocks); } else { - pInfo->offsetOfMaxThreadStaticBlocks = getNonGCMaxThreadStaticOffset(); - pInfo->offsetOfThreadStaticBlocks = getNonGCThreadStaticOffset();// - pInfo->offsetOfMaxThreadStaticBlocks; + pInfo->offsetOfMaxThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStatic, NonGCMaxThreadStaticBlocks); + pInfo->offsetOfThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStatic, NonGCThreadStaticBlocks); // x64 and arm64: //TODO: Think if we should store the distance between (threadStaticBlock - maxThreadStaticBlock) or just the From 1f437f6d6dd8250dfbfee0d560f662798b6e7eb9 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 9 Jun 2023 21:02:44 +0000 Subject: [PATCH 21/79] arm64: fixed the struct model --- src/coreclr/vm/jithelpers.cpp | 32 ++++++++++++++++---------------- src/coreclr/vm/jitinterface.cpp | 17 +++++++++-------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 99426934b0168..cef8ec5a10945 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1788,17 +1788,17 @@ __declspec(selectany) __declspec(thread) uint32_t t_GCThreadStaticBlocksSize; __declspec(selectany) __declspec(thread) void** t_NonGCThreadStaticBlocks; __declspec(selectany) __declspec(thread) void** t_GCThreadStaticBlocks; #else -struct ThreadStatics +struct ThreadStaticBlockInfo { uint32_t NonGCMaxThreadStaticBlocks; void** NonGCThreadStaticBlocks; uint32_t GCMaxThreadStaticBlocks; void** GCThreadStaticBlocks; -} -__thread ThreadStatic t_threadStatics; - +}; +// struct ThreadStaticBlockInfo; +EXTERN_C __thread ThreadStaticBlockInfo t_ThreadStatics; EXTERN_C __thread uint32_t t_NonGCThreadStaticBlocksSize; EXTERN_C __thread uint32_t t_GCThreadStaticBlocksSize; #endif @@ -1871,22 +1871,22 @@ HCIMPL1(void*, JIT_GetSharedNonGCThreadStaticBaseOptimized, UINT32 staticBlockIn if (t_NonGCThreadStaticBlocksSize > 0) { - memcpy(newThreadStaticBlocks, t_NonGCThreadStaticBlocks, t_NonGCThreadStaticBlocksSize * sizeof(PTR_BYTE)); - delete t_NonGCThreadStaticBlocks; + memcpy(newThreadStaticBlocks, t_ThreadStatics.NonGCThreadStaticBlocks, t_NonGCThreadStaticBlocksSize * sizeof(PTR_BYTE)); + delete t_ThreadStatics.NonGCThreadStaticBlocks; } t_NonGCThreadStaticBlocksSize = newThreadStaticBlocksSize; - t_NonGCThreadStaticBlocks = newThreadStaticBlocks; + t_ThreadStatics.NonGCThreadStaticBlocks = newThreadStaticBlocks; } - void* currentEntry = t_NonGCThreadStaticBlocks[staticBlockIndex]; + void* currentEntry = t_ThreadStatics.NonGCThreadStaticBlocks[staticBlockIndex]; // We could be coming here 2nd time after running the ctor when we try to get the static block. // In such case, just avoid adding the same entry. if (currentEntry != staticBlock) { _ASSERTE(currentEntry == nullptr); - t_NonGCThreadStaticBlocks[staticBlockIndex] = staticBlock; - t_NonGCMaxThreadStaticBlocks = max(t_NonGCMaxThreadStaticBlocks, staticBlockIndex); + t_ThreadStatics.NonGCThreadStaticBlocks[staticBlockIndex] = staticBlock; + t_ThreadStatics.NonGCMaxThreadStaticBlocks = max(t_ThreadStatics.NonGCMaxThreadStaticBlocks, staticBlockIndex); } HELPER_METHOD_FRAME_END(); @@ -1965,22 +1965,22 @@ HCIMPL1(void*, JIT_GetSharedGCThreadStaticBaseOptimized, UINT32 staticBlockIndex if (t_GCThreadStaticBlocksSize > 0) { - memcpy(newThreadStaticBlocks, t_GCThreadStaticBlocks, t_GCThreadStaticBlocksSize * sizeof(PTR_BYTE)); - delete t_GCThreadStaticBlocks; + memcpy(newThreadStaticBlocks, t_ThreadStatics.GCThreadStaticBlocks, t_GCThreadStaticBlocksSize * sizeof(PTR_BYTE)); + delete t_ThreadStatics.GCThreadStaticBlocks; } t_GCThreadStaticBlocksSize = newThreadStaticBlocksSize; - t_GCThreadStaticBlocks = newThreadStaticBlocks; + t_ThreadStatics.GCThreadStaticBlocks = newThreadStaticBlocks; } - void* currentEntry = t_GCThreadStaticBlocks[staticBlockIndex]; + void* currentEntry = t_ThreadStatics.GCThreadStaticBlocks[staticBlockIndex]; // We could be coming here 2nd time after running the ctor when we try to get the static block. // In such case, just avoid adding the same entry. if (currentEntry != staticBlock) { _ASSERTE(currentEntry == nullptr); - t_GCThreadStaticBlocks[staticBlockIndex] = staticBlock; - t_GCMaxThreadStaticBlocks = max(t_GCMaxThreadStaticBlocks, staticBlockIndex); + t_ThreadStatics.GCThreadStaticBlocks[staticBlockIndex] = staticBlock; + t_ThreadStatics.GCMaxThreadStaticBlocks = max(t_ThreadStatics.GCMaxThreadStaticBlocks, staticBlockIndex); } // Get the data pointer of static block diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index a96d899048c39..cc005cd1a2fbc 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -80,16 +80,17 @@ __declspec(selectany) __declspec(thread) void** t_GCThreadStaticBlocks; //#ifdef (HOST_AMD64) extern "C" void* __tls_get_addr(void* ti); -struct ThreadStatics +struct ThreadStaticBlockInfo { uint32_t NonGCMaxThreadStaticBlocks; void** NonGCThreadStaticBlocks; uint32_t GCMaxThreadStaticBlocks; void** GCThreadStaticBlocks; -} -__thread ThreadStatic t_ThreadStatics; - +}; +__thread ThreadStaticBlockInfo t_ThreadStatics; +__thread uint32_t t_NonGCThreadStaticBlocksSize; +__thread uint32_t t_GCThreadStaticBlocksSize; //#elif defined(HOST_ARM64) // //__thread uint32_t t_NonGCMaxThreadStaticBlocks; @@ -2025,13 +2026,13 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* uint64_t threadStaticBaseOffset = getThreadStaticsBaseOffset(); if (isGCType) { - pInfo->offsetOfMaxThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStatic, GCMaxThreadStaticBlocks); - pInfo->offsetOfThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStatic, GCThreadStaticBlocks); + pInfo->offsetOfMaxThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, GCMaxThreadStaticBlocks); + pInfo->offsetOfThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, GCThreadStaticBlocks); } else { - pInfo->offsetOfMaxThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStatic, NonGCMaxThreadStaticBlocks); - pInfo->offsetOfThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStatic, NonGCThreadStaticBlocks); + pInfo->offsetOfMaxThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCMaxThreadStaticBlocks); + pInfo->offsetOfThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCThreadStaticBlocks); // x64 and arm64: //TODO: Think if we should store the distance between (threadStaticBlock - maxThreadStaticBlock) or just the From b5394d7854d317adec3cb3f1dbcc69d4167b282f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 9 Jun 2023 14:47:53 -0700 Subject: [PATCH 22/79] x64: move to struct model --- src/coreclr/jit/helperexpansion.cpp | 11 +- src/coreclr/vm/jitinterface.cpp | 168 +++++++--------------------- 2 files changed, 49 insertions(+), 130 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 0615722308eca..7792b9c865431 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -642,11 +642,14 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // Cache the tls value tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); - GenTree* maxThreadStaticBlocksRef = gtNewLclVarNode(tlsLclNum); - maxThreadStaticBlocksValue = - gtNewIndir(TYP_INT, gtCloneExpr(maxThreadStaticBlocksRef), GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); + + GenTree* offsetOfMaxThreadStaticBlocks = gtNewIconNode(offsetOfMaxThreadStaticBlocksVal, TYP_I_IMPL); + GenTree* maxThreadStaticBlocksRef = + gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfMaxThreadStaticBlocks); + maxThreadStaticBlocksValue = gtNewIndir(TYP_INT, maxThreadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - GenTree* threadStaticBlocksRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(maxThreadStaticBlocksRef), + GenTree* threadStaticBlocksRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), gtNewIconNode(offsetOfThreadStaticBlocksVal, TYP_I_IMPL)); threadStaticBlocksValue = gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index cc005cd1a2fbc..4879782c2b905 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -77,8 +77,6 @@ __declspec(selectany) __declspec(thread) void** t_NonGCThreadStaticBlocks; __declspec(selectany) __declspec(thread) void** t_GCThreadStaticBlocks; #else -//#ifdef (HOST_AMD64) - extern "C" void* __tls_get_addr(void* ti); struct ThreadStaticBlockInfo { @@ -91,14 +89,6 @@ struct ThreadStaticBlockInfo __thread ThreadStaticBlockInfo t_ThreadStatics; __thread uint32_t t_NonGCThreadStaticBlocksSize; __thread uint32_t t_GCThreadStaticBlocksSize; -//#elif defined(HOST_ARM64) -// -//__thread uint32_t t_NonGCMaxThreadStaticBlocks; -//__thread void** t_NonGCThreadStaticBlocks; -//__thread uint32_t t_GCMaxThreadStaticBlocks; -//__thread void** t_GCThreadStaticBlocks; -// -//#endif // HOST_ARM64 #endif // _MSC_VER @@ -1854,36 +1844,36 @@ uint64_t getThreadStaticsBaseOffset() return getThreadStaticDescriptor(p); } - -void* getNonGCMaxThreadStaticOffset() -{ - uint8_t* p; - __asm__("leaq 0(%%rip), %%rbx\n" - "data16\n" - "leaq t_NonGCMaxThreadStaticBlocks@TLSLDM(%%rip), %%rdi\n" - "data16\n" - "data16\n" - "rex64\n" - "callq __tls_get_addr\n" - : "=b"(p)); - - return getThreadStaticDescriptor(p); -} - -void* getGCMaxThreadStaticOffset() -{ - uint8_t* p; - __asm__("leaq 0(%%rip), %%rbx\n" - "data16\n" - "leaq t_GCMaxThreadStaticBlocks@TLSLDM(%%rip), %%rdi\n" - "data16\n" - "data16\n" - "rex64\n" - "callq __tls_get_addr\n" - : "=b"(p)); - - return getThreadStaticDescriptor(p); -} +// +//void* getNonGCMaxThreadStaticOffset() +//{ +// uint8_t* p; +// __asm__("leaq 0(%%rip), %%rbx\n" +// "data16\n" +// "leaq t_NonGCMaxThreadStaticBlocks@TLSLDM(%%rip), %%rdi\n" +// "data16\n" +// "data16\n" +// "rex64\n" +// "callq __tls_get_addr\n" +// : "=b"(p)); +// +// return getThreadStaticDescriptor(p); +//} +// +//void* getGCMaxThreadStaticOffset() +//{ +// uint8_t* p; +// __asm__("leaq 0(%%rip), %%rbx\n" +// "data16\n" +// "leaq t_GCMaxThreadStaticBlocks@TLSLDM(%%rip), %%rdi\n" +// "data16\n" +// "data16\n" +// "rex64\n" +// "callq __tls_get_addr\n" +// : "=b"(p)); +// +// return getThreadStaticDescriptor(p); +//} #endif // HOST_AMD64 #ifdef HOST_ARM64 @@ -1904,78 +1894,6 @@ uint64_t getThreadStaticsBaseOffset() return offset; } -// -//uint64_t getNonGCMaxThreadStaticOffset() -//{ -// uint64_t offset; -// __asm__ ( -// "adrp x0, :tlsdesc:t_NonGCMaxThreadStaticBlocks\n" -// "ldr x1, [x0, #:tlsdesc_lo12:t_NonGCMaxThreadStaticBlocks]\n" -// "add x0, x0, :tlsdesc_lo12:t_NonGCMaxThreadStaticBlocks\n" -// ".tlsdesccall t_NonGCMaxThreadStaticBlocks\n" -// "blr x1\n" -// "mov %[result], x0\n" -// : [result] "=r" (offset) -// : -// : "x0", "x1" -// ); -// -// return offset; -//} -// -//uint64_t getNonGCThreadStaticOffset() -//{ -// uint64_t offset; -// __asm__ ( -// "adrp x0, :tlsdesc:t_NonGCThreadStaticBlocks\n" -// "ldr x1, [x0, #:tlsdesc_lo12:t_NonGCThreadStaticBlocks]\n" -// "add x0, x0, :tlsdesc_lo12:t_NonGCThreadStaticBlocks\n" -// ".tlsdesccall t_NonGCThreadStaticBlocks\n" -// "blr x1\n" -// "mov %[result], x0\n" -// : [result] "=r" (offset) -// : -// : "x0", "x1" -// ); -// -// return offset; -//} -// -//uint64_t getGCMaxThreadStaticOffset() -//{ -// uint64_t offset; -// __asm__ ( -// "adrp x0, :tlsdesc:t_GCMaxThreadStaticBlocks\n" -// "ldr x1, [x0, #:tlsdesc_lo12:t_GCMaxThreadStaticBlocks]\n" -// "add x0, x0, :tlsdesc_lo12:t_GCMaxThreadStaticBlocks\n" -// ".tlsdesccall t_GCMaxThreadStaticBlocks\n" -// "blr x1\n" -// "mov %[result], x0\n" -// : [result] "=r" (offset) -// : -// : "x0", "x1" -// ); -// -// return offset; -//} -// -//uint64_t getGCThreadStaticOffset() -//{ -// uint64_t offset; -// __asm__ ( -// "adrp x0, :tlsdesc:t_GCThreadStaticBlocks\n" -// "ldr x1, [x0, #:tlsdesc_lo12:t_GCThreadStaticBlocks]\n" -// "add x0, x0, :tlsdesc_lo12:t_GCThreadStaticBlocks\n" -// ".tlsdesccall t_GCThreadStaticBlocks\n" -// "blr x1\n" -// "mov %[result], x0\n" -// : [result] "=r" (offset) -// : -// : "x0", "x1" -// ); -// -// return offset; -//} #endif // HOST_ARM64 #endif // !_MSC_VER @@ -2006,22 +1924,25 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo->offsetOfMaxThreadStaticBlocks = CEEInfo::ThreadLocalOffset(&t_NonGCMaxThreadStaticBlocks); } #elif defined(TARGET_AMD64) - void* maxThreadStaticDescriptor = 0; - size_t addressOfThreadStaticBlock = 0; if (isGCType) { - maxThreadStaticDescriptor = getGCMaxThreadStaticOffset(); - addressOfThreadStaticBlock = (size_t)&t_GCThreadStaticBlocks; + pInfo->offsetOfMaxThreadStaticBlocks = offsetof(ThreadStaticBlockInfo, GCMaxThreadStaticBlocks); + pInfo->offsetOfThreadStaticBlocks = offsetof(ThreadStaticBlockInfo, GCThreadStaticBlocks); } else { - maxThreadStaticDescriptor = getNonGCMaxThreadStaticOffset(); - addressOfThreadStaticBlock = (size_t)&t_NonGCThreadStaticBlocks; - } + pInfo->offsetOfMaxThreadStaticBlocks = offsetof(ThreadStaticBlockInfo, NonGCMaxThreadStaticBlocks); + pInfo->offsetOfThreadStaticBlocks = offsetof(ThreadStaticBlockInfo, NonGCThreadStaticBlocks); + } pInfo->tlsGetAddrFtnPtr = (size_t)&__tls_get_addr; - pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)maxThreadStaticDescriptor; - pInfo->offsetOfThreadStaticBlocks = addressOfThreadStaticBlock - (size_t)__tls_get_addr(maxThreadStaticDescriptor); + pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)getThreadStaticsBaseOffset(); + + // x64 and arm64: + //TODO: Think if we should store the distance between (threadStaticBlock - maxThreadStaticBlock) or just the + // overall offsets of those 2 variables. Storing distance is risky as rearranging the variables can lead to + // having distance negative. + #elif defined(TARGET_ARM64) uint64_t threadStaticBaseOffset = getThreadStaticsBaseOffset(); if (isGCType) @@ -2033,11 +1954,6 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* { pInfo->offsetOfMaxThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCMaxThreadStaticBlocks); pInfo->offsetOfThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCThreadStaticBlocks); - - // x64 and arm64: - //TODO: Think if we should store the distance between (threadStaticBlock - maxThreadStaticBlock) or just the - // overall offsets of those 2 variables. Storing distance is risky as rearranging the variables can lead to - // having distance negative. } #endif From dce8d9121740a648110ad8a7a67d9e8f314d0d00 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 9 Jun 2023 18:47:54 -0700 Subject: [PATCH 23/79] code refactoring --- src/coreclr/inc/corinfo.h | 8 ++-- src/coreclr/jit/helperexpansion.cpp | 70 ++++++++++------------------- src/coreclr/vm/jithelpers.cpp | 1 - src/coreclr/vm/jitinterface.cpp | 63 +++++--------------------- 4 files changed, 38 insertions(+), 104 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 4df7f278a9e4f..316c6c2a48211 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -1728,12 +1728,12 @@ struct CORINFO_FIELD_INFO struct CORINFO_THREAD_STATIC_BLOCKS_INFO { - CORINFO_CONST_LOOKUP tlsIndex; - uint32_t offsetOfThreadLocalStoragePointer; + CORINFO_CONST_LOOKUP tlsIndex; // windows specific + uint32_t offsetOfThreadLocalStoragePointer; // windows specific uint32_t offsetOfMaxThreadStaticBlocks; uint32_t offsetOfThreadStaticBlocks; - size_t tlsGetAddrFtnPtr; // linux-specific - size_t descrAddrOfMaxThreadStaticBlock; // linux-specific + size_t tlsGetAddrFtnPtr; // linux/x64 specific + size_t descrAddrOfMaxThreadStaticBlock; // linux/x64 specific uint32_t offsetOfGCDataPointer; }; diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 7792b9c865431..16f97d0b0fd71 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -478,12 +478,24 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* return false; } + if (TargetOS::IsUnix()) + { +#if defined(TARGET_ARM) || !defined(TARGET_64BIT)) + // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such + // as MRC and MCR are used to access them. We do not support them and so should never optimize the + // field access using TLS. + assert(!"Unsupported scenario of optimizing TLS access on Arm32/x86"); +#endif + } + else + { #ifdef TARGET_ARM - // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such - // as MRC and MCR are used to access them. We do not support them and so should never optimize the - // field access using TLS. - assert(!"Unsupported scenario of optimizing TLS access on Arm32"); + // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such + // as MRC and MCR are used to access them. We do not support them and so should never optimize the + // field access using TLS. + assert(!"Unsupported scenario of optimizing TLS access on Arm32"); #endif + } JITDUMP("Expanding thread static local access for [%06d] in " FMT_BB ":\n", dspTreeID(call), block->bbNum); DISPTREE(call); @@ -577,51 +589,15 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // Base of coreclr's thread local storage tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - - // Cache the tls value - tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); - GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); - - // Create tree for "maxThreadStaticBlocks = tls[offsetOfMaxThreadStaticBlocks]" - GenTree* offsetOfMaxThreadStaticBlocks = gtNewIconNode(offsetOfMaxThreadStaticBlocksVal, TYP_I_IMPL); - GenTree* maxThreadStaticBlocksRef = - gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfMaxThreadStaticBlocks); - maxThreadStaticBlocksValue = - gtNewIndir(TYP_INT, maxThreadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - - // Create tree for "threadStaticBlockBase = tls[offsetOfThreadStaticBlocks]" - GenTree* offsetOfThreadStaticBlocks = gtNewIconNode(offsetOfThreadStaticBlocksVal, TYP_I_IMPL); - GenTree* threadStaticBlocksRef = - gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfThreadStaticBlocks); - threadStaticBlocksValue = - gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - + #elif defined(TARGET_ARM64) - //TODO: Update the comments // Mark this ICON as a TLS_HDL, codegen will do: // mrs xt, tpidr_elf0 // mov xd, [xt+cns] tlsValue = gtNewIconHandleNode(0, GTF_ICON_TLS_HDL); - // Cache the tls value - tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); - GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); - - // Create tree for "maxThreadStaticBlocks = tls[offsetOfMaxThreadStaticBlocks]" - GenTree* offsetOfMaxThreadStaticBlocks = gtNewIconNode(offsetOfMaxThreadStaticBlocksVal, TYP_I_IMPL); - GenTree* maxThreadStaticBlocksRef = - gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfMaxThreadStaticBlocks); - maxThreadStaticBlocksValue = - gtNewIndir(TYP_INT, maxThreadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - - // Create tree for "threadStaticBlockBase = tls[offsetOfThreadStaticBlocks]" - GenTree* offsetOfThreadStaticBlocks = gtNewIconNode(offsetOfThreadStaticBlocksVal, TYP_I_IMPL); - GenTree* threadStaticBlocksRef = - gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfThreadStaticBlocks); - threadStaticBlocksValue = - gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - #elif defined(TARGET_AMD64) + GenTree* tls_get_addr_val = gtNewIconHandleNode(threadStaticBlocksInfo.tlsGetAddrFtnPtr, GTF_ICON_FTN_ADDR); tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); GenTreeCall* tlsRefCall = tlsValue->AsCall(); @@ -640,10 +616,13 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* tlsRefCall->gtFlags &= ~GTF_CALL_POP_ARGS; #endif // UNIX_X86_ABI +#endif // _MSC_VER + // Cache the tls value - tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); + tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); + // Create tree for "maxThreadStaticBlocks = tls[offsetOfMaxThreadStaticBlocks]" GenTree* offsetOfMaxThreadStaticBlocks = gtNewIconNode(offsetOfMaxThreadStaticBlocksVal, TYP_I_IMPL); GenTree* maxThreadStaticBlocksRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfMaxThreadStaticBlocks); @@ -651,10 +630,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* GenTree* threadStaticBlocksRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), gtNewIconNode(offsetOfThreadStaticBlocksVal, TYP_I_IMPL)); - threadStaticBlocksValue = - gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - -#endif // _MSC_VER + threadStaticBlocksValue = gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); // Create tree for "if (maxThreadStaticBlocks < typeIndex)" GenTree* maxThreadStaticBlocksCond = diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index cef8ec5a10945..cfcf8ca76c714 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1797,7 +1797,6 @@ struct ThreadStaticBlockInfo void** GCThreadStaticBlocks; }; -// struct ThreadStaticBlockInfo; EXTERN_C __thread ThreadStaticBlockInfo t_ThreadStatics; EXTERN_C __thread uint32_t t_NonGCThreadStaticBlocksSize; EXTERN_C __thread uint32_t t_GCThreadStaticBlocksSize; diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 4879782c2b905..5595ff443f83b 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1830,7 +1830,7 @@ void* getThreadStaticDescriptor(uint8_t* p) return *(uint32_t*)p + (p + 4); } -uint64_t getThreadStaticsBaseOffset() +void* getThreadStaticsBaseOffset() { uint8_t* p; __asm__("leaq 0(%%rip), %%rbx\n" @@ -1844,36 +1844,6 @@ uint64_t getThreadStaticsBaseOffset() return getThreadStaticDescriptor(p); } -// -//void* getNonGCMaxThreadStaticOffset() -//{ -// uint8_t* p; -// __asm__("leaq 0(%%rip), %%rbx\n" -// "data16\n" -// "leaq t_NonGCMaxThreadStaticBlocks@TLSLDM(%%rip), %%rdi\n" -// "data16\n" -// "data16\n" -// "rex64\n" -// "callq __tls_get_addr\n" -// : "=b"(p)); -// -// return getThreadStaticDescriptor(p); -//} -// -//void* getGCMaxThreadStaticOffset() -//{ -// uint8_t* p; -// __asm__("leaq 0(%%rip), %%rbx\n" -// "data16\n" -// "leaq t_GCMaxThreadStaticBlocks@TLSLDM(%%rip), %%rdi\n" -// "data16\n" -// "data16\n" -// "rex64\n" -// "callq __tls_get_addr\n" -// : "=b"(p)); -// -// return getThreadStaticDescriptor(p); -//} #endif // HOST_AMD64 #ifdef HOST_ARM64 @@ -1923,28 +1893,17 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo->offsetOfThreadStaticBlocks = CEEInfo::ThreadLocalOffset(&t_NonGCThreadStaticBlocks); pInfo->offsetOfMaxThreadStaticBlocks = CEEInfo::ThreadLocalOffset(&t_NonGCMaxThreadStaticBlocks); } -#elif defined(TARGET_AMD64) - if (isGCType) - { - pInfo->offsetOfMaxThreadStaticBlocks = offsetof(ThreadStaticBlockInfo, GCMaxThreadStaticBlocks); - pInfo->offsetOfThreadStaticBlocks = offsetof(ThreadStaticBlockInfo, GCThreadStaticBlocks); - } - else - { - pInfo->offsetOfMaxThreadStaticBlocks = offsetof(ThreadStaticBlockInfo, NonGCMaxThreadStaticBlocks); - pInfo->offsetOfThreadStaticBlocks = offsetof(ThreadStaticBlockInfo, NonGCThreadStaticBlocks); - } - +#else + uint64_t threadStaticBaseOffset = 0; +#if defined(TARGET_AMD64) + // get the address of tls_get_addr system method and base address + // of struct pInfo->tlsGetAddrFtnPtr = (size_t)&__tls_get_addr; pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)getThreadStaticsBaseOffset(); - - // x64 and arm64: - //TODO: Think if we should store the distance between (threadStaticBlock - maxThreadStaticBlock) or just the - // overall offsets of those 2 variables. Storing distance is risky as rearranging the variables can lead to - // having distance negative. #elif defined(TARGET_ARM64) - uint64_t threadStaticBaseOffset = getThreadStaticsBaseOffset(); + threadStaticBaseOffset = getThreadStaticsBaseOffset(); +#endif if (isGCType) { pInfo->offsetOfMaxThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, GCMaxThreadStaticBlocks); @@ -1954,11 +1913,11 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* { pInfo->offsetOfMaxThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCMaxThreadStaticBlocks); pInfo->offsetOfThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCThreadStaticBlocks); - } -#endif - + } pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); +#endif // _MSC_VER + EE_TO_JIT_TRANSITION(); } From e96530a7cb2457a8b9444377089df21579030c16 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 9 Jun 2023 18:58:05 -0700 Subject: [PATCH 24/79] #define for field access --- src/coreclr/jit/helperexpansion.cpp | 4 ++-- src/coreclr/vm/jithelpers.cpp | 29 +++++++++++++++++------------ 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 16f97d0b0fd71..a59affdd3f8f2 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -478,9 +478,9 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* return false; } - if (TargetOS::IsUnix()) + if (TargetOS::IsUnix) { -#if defined(TARGET_ARM) || !defined(TARGET_64BIT)) +#if defined(TARGET_ARM) || !defined(TARGET_64BIT) // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such // as MRC and MCR are used to access them. We do not support them and so should never optimize the // field access using TLS. diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index cfcf8ca76c714..4f0e3245541e2 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1797,6 +1797,11 @@ struct ThreadStaticBlockInfo void** GCThreadStaticBlocks; }; +#define t_NonGCMaxThreadStaticBlocks t_ThreadStatics.NonGCMaxThreadStaticBlocks +#define t_NonGCThreadStaticBlocks t_ThreadStatics.NonGCThreadStaticBlocks +#define t_GCMaxThreadStaticBlocks t_ThreadStatics.GCMaxThreadStaticBlocks +#define t_GCThreadStaticBlocks t_ThreadStatics.GCThreadStaticBlocks + EXTERN_C __thread ThreadStaticBlockInfo t_ThreadStatics; EXTERN_C __thread uint32_t t_NonGCThreadStaticBlocksSize; EXTERN_C __thread uint32_t t_GCThreadStaticBlocksSize; @@ -1870,22 +1875,22 @@ HCIMPL1(void*, JIT_GetSharedNonGCThreadStaticBaseOptimized, UINT32 staticBlockIn if (t_NonGCThreadStaticBlocksSize > 0) { - memcpy(newThreadStaticBlocks, t_ThreadStatics.NonGCThreadStaticBlocks, t_NonGCThreadStaticBlocksSize * sizeof(PTR_BYTE)); - delete t_ThreadStatics.NonGCThreadStaticBlocks; + memcpy(newThreadStaticBlocks, t_NonGCThreadStaticBlocks, t_NonGCThreadStaticBlocksSize * sizeof(PTR_BYTE)); + delete t_NonGCThreadStaticBlocks; } t_NonGCThreadStaticBlocksSize = newThreadStaticBlocksSize; - t_ThreadStatics.NonGCThreadStaticBlocks = newThreadStaticBlocks; + t_NonGCThreadStaticBlocks = newThreadStaticBlocks; } - void* currentEntry = t_ThreadStatics.NonGCThreadStaticBlocks[staticBlockIndex]; + void* currentEntry = t_NonGCThreadStaticBlocks[staticBlockIndex]; // We could be coming here 2nd time after running the ctor when we try to get the static block. // In such case, just avoid adding the same entry. if (currentEntry != staticBlock) { _ASSERTE(currentEntry == nullptr); - t_ThreadStatics.NonGCThreadStaticBlocks[staticBlockIndex] = staticBlock; - t_ThreadStatics.NonGCMaxThreadStaticBlocks = max(t_ThreadStatics.NonGCMaxThreadStaticBlocks, staticBlockIndex); + t_NonGCThreadStaticBlocks[staticBlockIndex] = staticBlock; + t_NonGCMaxThreadStaticBlocks = max(t_NonGCMaxThreadStaticBlocks, staticBlockIndex); } HELPER_METHOD_FRAME_END(); @@ -1964,22 +1969,22 @@ HCIMPL1(void*, JIT_GetSharedGCThreadStaticBaseOptimized, UINT32 staticBlockIndex if (t_GCThreadStaticBlocksSize > 0) { - memcpy(newThreadStaticBlocks, t_ThreadStatics.GCThreadStaticBlocks, t_GCThreadStaticBlocksSize * sizeof(PTR_BYTE)); - delete t_ThreadStatics.GCThreadStaticBlocks; + memcpy(newThreadStaticBlocks, t_GCThreadStaticBlocks, t_GCThreadStaticBlocksSize * sizeof(PTR_BYTE)); + delete t_GCThreadStaticBlocks; } t_GCThreadStaticBlocksSize = newThreadStaticBlocksSize; - t_ThreadStatics.GCThreadStaticBlocks = newThreadStaticBlocks; + t_GCThreadStaticBlocks = newThreadStaticBlocks; } - void* currentEntry = t_ThreadStatics.GCThreadStaticBlocks[staticBlockIndex]; + void* currentEntry = t_GCThreadStaticBlocks[staticBlockIndex]; // We could be coming here 2nd time after running the ctor when we try to get the static block. // In such case, just avoid adding the same entry. if (currentEntry != staticBlock) { _ASSERTE(currentEntry == nullptr); - t_ThreadStatics.GCThreadStaticBlocks[staticBlockIndex] = staticBlock; - t_ThreadStatics.GCMaxThreadStaticBlocks = max(t_ThreadStatics.GCMaxThreadStaticBlocks, staticBlockIndex); + t_GCThreadStaticBlocks[staticBlockIndex] = staticBlock; + t_GCMaxThreadStaticBlocks = max(t_GCMaxThreadStaticBlocks, staticBlockIndex); } // Get the data pointer of static block From c4db025c2108fcf965dce9aeaf78b0fedab8d584 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 9 Jun 2023 19:26:52 -0700 Subject: [PATCH 25/79] change mrs -> mrs_tpid0 --- src/coreclr/jit/codegenarm64.cpp | 4 ++-- src/coreclr/jit/emitarm64.cpp | 16 +++++----------- src/coreclr/jit/helperexpansion.cpp | 14 +++++++------- src/coreclr/jit/instrsarm64.h | 4 ++-- src/coreclr/jit/registerarm64.h | 1 - 5 files changed, 16 insertions(+), 23 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index c7c7ee5d4d5ab..c571bb3ec3eb7 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2955,9 +2955,9 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) else if (data->IsIconHandle(GTF_ICON_TLS_HDL)) { assert(data->AsIntCon()->IconValue() == 0); - emitAttr attr = emitActualTypeSize(targetType); + emitAttr attr = emitActualTypeSize(targetType); // On non-windows, need to load the address from system register. - emit->emitIns_R_R(INS_mrs, attr, targetReg, dataReg); + emit->emitIns_R(INS_mrs_tpid0, attr, targetReg); } #endif else diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 79f7cb0bd3081..30130b3d5104b 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -941,7 +941,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) datasize = id->idOpSize(); assert(isGeneralRegister(id->idReg1())); assert(datasize == EA_8BYTE); - assert((id->idIns() != INS_mrs) || (id->idReg2() == REG_ZR)); + assert((id->idIns() != INS_mrs_tpid0) || (id->idReg2() == REG_ZR)); break; default: @@ -3741,7 +3741,10 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) id->idReg1(reg); fmt = IF_SR_1A; break; + case INS_mrs_tpid0: + fmt = IF_SR_1A; + break; default: unreached(); } @@ -4931,11 +4934,6 @@ void emitter::emitIns_R_R( fmt = IF_DV_2L; } break; - case INS_mrs: - // assert(isVectorRegister(reg2)); - fmt = IF_SR_1A; - - break; default: unreached(); @@ -11816,10 +11814,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs) assert(insOptsNone(id->idInsOpt())); code = emitInsCode(ins, fmt); - if (ins == INS_mrs) - { - code |= insEncodeReg_Tpid0(); - } code |= insEncodeReg_Rt(id->idReg1()); // ttttt dst += emitOutput_Instr(dst, code); break; @@ -13950,7 +13944,7 @@ void emitter::emitDispInsHelp( break; case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs) - if (ins == INS_mrs) + if (ins == INS_mrs_tpid0) { emitDispReg(id->idReg1(), size, true); printf("tpidr_el0"); diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index a59affdd3f8f2..505a804a2f8e9 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -507,10 +507,10 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* CORINFO_THREAD_STATIC_BLOCKS_INFO threadStaticBlocksInfo; info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); - uint32_t offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; + uint32_t offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; uint32_t offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; JITDUMP("offsetOfThreadStaticBlocks= %u\n", offsetOfThreadStaticBlocksVal); - + #ifdef _MSC_VER JITDUMP("getThreadLocalStaticBlocksInfo (%s)\n:", isGCThreadStatic ? "GC" : "Non-GC"); JITDUMP("tlsIndex= %u\n", (ssize_t)threadStaticBlocksInfo.tlsIndex.addr); @@ -564,9 +564,9 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* GenTree* tlsValue = nullptr; unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access")); lvaTable[tlsLclNum].lvType = TYP_I_IMPL; - GenTree* maxThreadStaticBlocksValue = nullptr; - GenTree* threadStaticBlocksValue = nullptr; - GenTree* tlsValueDef = nullptr; + GenTree* maxThreadStaticBlocksValue = nullptr; + GenTree* threadStaticBlocksValue = nullptr; + GenTree* tlsValueDef = nullptr; #ifdef _MSC_VER size_t tlsIndexValue = (size_t)threadStaticBlocksInfo.tlsIndex.addr; @@ -589,7 +589,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // Base of coreclr's thread local storage tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - + #elif defined(TARGET_ARM64) // Mark this ICON as a TLS_HDL, codegen will do: // mrs xt, tpidr_elf0 @@ -619,7 +619,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* #endif // _MSC_VER // Cache the tls value - tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); + tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); // Create tree for "maxThreadStaticBlocks = tls[offsetOfMaxThreadStaticBlocks]" diff --git a/src/coreclr/jit/instrsarm64.h b/src/coreclr/jit/instrsarm64.h index 398a43620afae..7dfad4802829b 100644 --- a/src/coreclr/jit/instrsarm64.h +++ b/src/coreclr/jit/instrsarm64.h @@ -1595,8 +1595,8 @@ INST1(isb, "isb", 0, IF_SI_0B, 0xD50330DF) INST1(dczva, "dczva", 0, IF_SR_1A, 0xD50B7420) // dc zva,Rt SR_1A 1101010100001011 01110100001ttttt D50B 7420 Rt -INST1(mrs, "mrs", 0, IF_SR_1A, 0xD5300000) - // mrs Rt,SR SR_1A 110101010011ssss sssssssssssttttt D530 0000 Rt, SR +INST1(mrs_tpid0, "mrs_tpid0", 0, IF_SR_1A, 0x06A9DE82) + // mrs Rt,tpidr_el0 SR_1A 1101010100111011 11010000010ttttt 06A9 DE82 Rt, tpidr_el0 INST1(umov, "umov", 0, IF_DV_2B, 0x0E003C00) // umov Rd,Vn[] DV_2B 0Q001110000iiiii 001111nnnnnddddd 0E00 3C00 Rd,Vn[] diff --git a/src/coreclr/jit/registerarm64.h b/src/coreclr/jit/registerarm64.h index 4da71842da361..83a4306728fab 100644 --- a/src/coreclr/jit/registerarm64.h +++ b/src/coreclr/jit/registerarm64.h @@ -98,7 +98,6 @@ REGDEF(V31, 31+VBASE, VMASK(31), "d31", "s31") #define NBASE 64 REGDEF(SP, 0+NBASE, 0x0000, "sp", "wsp?") -REGDEF(TPID0, 1+NBASE, 0x0000, "tpid0", "tpid0") // This must be last! REGDEF(STK, 2+NBASE, 0x0000, "STK", "STK") From ddc931f6025cba12f09c9b0bea46411302fc41ef Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 9 Jun 2023 19:33:14 -0700 Subject: [PATCH 26/79] fix a bug --- src/coreclr/jit/emitarm64.cpp | 4 ++-- src/coreclr/jit/instrsarm64.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 30130b3d5104b..8d5e17e49517a 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -941,7 +941,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) datasize = id->idOpSize(); assert(isGeneralRegister(id->idReg1())); assert(datasize == EA_8BYTE); - assert((id->idIns() != INS_mrs_tpid0) || (id->idReg2() == REG_ZR)); break; default: @@ -3742,8 +3741,9 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) fmt = IF_SR_1A; break; case INS_mrs_tpid0: + id = emitNewInstrSmall(attr); + id->idReg1(reg); fmt = IF_SR_1A; - break; default: unreached(); diff --git a/src/coreclr/jit/instrsarm64.h b/src/coreclr/jit/instrsarm64.h index 7dfad4802829b..ee7483d5257c8 100644 --- a/src/coreclr/jit/instrsarm64.h +++ b/src/coreclr/jit/instrsarm64.h @@ -1595,8 +1595,8 @@ INST1(isb, "isb", 0, IF_SI_0B, 0xD50330DF) INST1(dczva, "dczva", 0, IF_SR_1A, 0xD50B7420) // dc zva,Rt SR_1A 1101010100001011 01110100001ttttt D50B 7420 Rt -INST1(mrs_tpid0, "mrs_tpid0", 0, IF_SR_1A, 0x06A9DE82) - // mrs Rt,tpidr_el0 SR_1A 1101010100111011 11010000010ttttt 06A9 DE82 Rt, tpidr_el0 +INST1(mrs_tpid0, "mrs", 0, IF_SR_1A, 0xD53BD040) + // mrs Rt,tpidr_el0 SR_1A 1101010100111011 11010000010ttttt D53B D040 Rt, tpidr_el0 INST1(umov, "umov", 0, IF_DV_2B, 0x0E003C00) // umov Rd,Vn[] DV_2B 0Q001110000iiiii 001111nnnnnddddd 0E00 3C00 Rd,Vn[] From 1e14591e84aa60dec0f6db83e6c015a75eca569f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 9 Jun 2023 19:53:19 -0700 Subject: [PATCH 27/79] remove unwanted method --- src/coreclr/jit/emitarm64.cpp | 14 -------------- src/coreclr/jit/emitarm64.h | 3 --- 2 files changed, 17 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 8d5e17e49517a..44fd315d13dc2 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -9064,20 +9064,6 @@ void emitter::emitIns_Call(EmitCallType callType, return ureg << 10; } -/***************************************************************************** - * - * Returns an encoding for the tpidr_el0 register. - */ - -/*static*/ emitter::code_t emitter::insEncodeReg_Tpid0() -{ - // o0 op1 CRn CRm op2 - // 1 011 1101 0000 010 - // emitter::code_t sr = 0xd382; - emitter::code_t sr = 0x5e82; - return sr << 5; -} - /***************************************************************************** * * Returns an encoding for the specified condition code. diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index c9e0972c106de..59806d4b4ea25 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -311,9 +311,6 @@ static code_t insEncodeReg_Vm(regNumber reg); // Returns an encoding for the specified register used in the 'Va' position static code_t insEncodeReg_Va(regNumber reg); -// Returns an encoding for the tpidr_el0 register. -static code_t insEncodeReg_Tpid0(); - // Returns an encoding for the imm which represents the condition code. static code_t insEncodeCond(insCond cond); From 1632086eb2bf7f345550021cbaaee9c0760a2f46 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 9 Jun 2023 19:54:39 -0700 Subject: [PATCH 28/79] another fix --- src/coreclr/jit/codegenarm64.cpp | 4 ---- src/coreclr/jit/registerarm64.h | 2 +- .../tools/superpmi/superpmi-shared/methodcontext.cpp | 2 +- src/coreclr/vm/jitinterface.cpp | 6 ++---- 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index c571bb3ec3eb7..586da3a9f60e9 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2836,10 +2836,6 @@ void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) // void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) { - if (strcmp(compiler->info.compMethodName, "Main") == 0) - { - printf("hello\n"); - } GenTree* data = lclNode->gtOp1; // Stores from a multi-reg source are handled separately. diff --git a/src/coreclr/jit/registerarm64.h b/src/coreclr/jit/registerarm64.h index 83a4306728fab..7ce66ada1beb0 100644 --- a/src/coreclr/jit/registerarm64.h +++ b/src/coreclr/jit/registerarm64.h @@ -99,7 +99,7 @@ REGDEF(V31, 31+VBASE, VMASK(31), "d31", "s31") REGDEF(SP, 0+NBASE, 0x0000, "sp", "wsp?") // This must be last! -REGDEF(STK, 2+NBASE, 0x0000, "STK", "STK") +REGDEF(STK, 1+NBASE, 0x0000, "STK", "STK") /*****************************************************************************/ #undef RMASK diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 17cd8fd28592b..76d6978781d61 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3689,7 +3689,7 @@ void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC #else pInfo->tlsGetAddrFtnPtr = value.tlsGetAddrFtnPtr; pInfo->descrAddrOfMaxThreadStaticBlock = value.descrAddrOfMaxThreadStaticBlock; - pInfo->offsetOfThreadStaticBlocks = value.offsetOfThreadStaticBlocks; + pInfo->offsetOfThreadStaticBlocks = value.offsetOfThreadStaticBlocks; #endif pInfo->offsetOfGCDataPointer = value.offsetOfGCDataPointer; } diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 5595ff443f83b..7ecda25d27875 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -75,7 +75,6 @@ __declspec(selectany) __declspec(thread) uint32_t t_GCMaxThreadStaticBlocks; __declspec(selectany) __declspec(thread) void** t_NonGCThreadStaticBlocks; __declspec(selectany) __declspec(thread) void** t_GCThreadStaticBlocks; - #else extern "C" void* __tls_get_addr(void* ti); struct ThreadStaticBlockInfo @@ -91,7 +90,6 @@ __thread uint32_t t_NonGCThreadStaticBlocksSize; __thread uint32_t t_GCThreadStaticBlocksSize; #endif // _MSC_VER - // The Stack Overflow probe takes place in the COOPERATIVE_TRANSITION_BEGIN() macro // @@ -1582,7 +1580,7 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, fieldAccessor = CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER; pResult->helper = getSharedStaticsHelper(pField, pFieldMT); -#ifndef TARGET_ARM +#if defined(TARGET_ARM) || (!defined(HOST_WINDOWS) && defined(TARGET_32BIT)) // For windows, we convert the TLS access to the optimized helper where we will store // the static blocks in TLS directly and access them via inline code. if ((pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR) || @@ -1597,7 +1595,7 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; pResult->helper = CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; } -#endif // !TARGET_ARM +#endif // !TARGET_ARM || (!HOST_WINDOWS && TARGET_32BIT) } else { From 529c7f76175ab1df4d42c9597f4c8583629ce53d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 9 Jun 2023 20:04:16 -0700 Subject: [PATCH 29/79] Add entries in CorInfoType.cs --- src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index d2a792e25a0c1..5580df13df449 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -1152,6 +1152,8 @@ public unsafe struct CORINFO_THREAD_STATIC_BLOCKS_INFO { public CORINFO_CONST_LOOKUP tlsIndex; public uint offsetOfThreadLocalStoragePointer; + public ulong tlsGetAddrFtnPtr; + public ulong descrAddrOfMaxThreadStaticBlock; public CORINFO_CONST_LOOKUP offsetOfMaxThreadStaticBlocks; public CORINFO_CONST_LOOKUP offsetOfThreadStaticBlocks; public CORINFO_CONST_LOOKUP offsetOfGCDataPointer; From 90e091db38608a4c22a1a6f9ec7a1eb9601dc3c9 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 9 Jun 2023 20:16:08 -0700 Subject: [PATCH 30/79] Update the #ifdef --- src/coreclr/vm/jitinterface.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 7ecda25d27875..e20f152e6a526 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1580,8 +1580,13 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, fieldAccessor = CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER; pResult->helper = getSharedStaticsHelper(pField, pFieldMT); -#if defined(TARGET_ARM) || (!defined(HOST_WINDOWS) && defined(TARGET_32BIT)) - // For windows, we convert the TLS access to the optimized helper where we will store +#if defined(TARGET_ARM) + // Optimization is disabled for linux/windows arm +#elif !defined(_MSC_VER) && defined(TARGET_32BIT) + // Optimization is disabled for linux/x86 +#else + // For windows x64/x86/arm64, linux x64/arm64: + // We convert the TLS access to the optimized helper where we will store // the static blocks in TLS directly and access them via inline code. if ((pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR) || (pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE)) @@ -1595,7 +1600,7 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; pResult->helper = CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; } -#endif // !TARGET_ARM || (!HOST_WINDOWS && TARGET_32BIT) +#endif // TARGET_ARM } else { From 694c9cc1d7b3d4ec1389c0eb339e0f17b057e82f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 12 Jun 2023 10:26:36 -0700 Subject: [PATCH 31/79] fix the windows scenario: --- src/coreclr/vm/jitinterface.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index e20f152e6a526..c32ca05cefaf8 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1582,7 +1582,7 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, pResult->helper = getSharedStaticsHelper(pField, pFieldMT); #if defined(TARGET_ARM) // Optimization is disabled for linux/windows arm -#elif !defined(_MSC_VER) && defined(TARGET_32BIT) +#elif !defined(TARGET_WINDOWS) && defined(TARGET_X86) // Optimization is disabled for linux/x86 #else // For windows x64/x86/arm64, linux x64/arm64: @@ -1917,9 +1917,8 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo->offsetOfMaxThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCMaxThreadStaticBlocks); pInfo->offsetOfThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCThreadStaticBlocks); } - pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); - #endif // _MSC_VER + pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); EE_TO_JIT_TRANSITION(); } From e6044a850b8db9e65f9e6bbb1c67e33f52b08870 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 12 Jun 2023 10:26:49 -0700 Subject: [PATCH 32/79] review feedback --- src/coreclr/jit/codegenarm64.cpp | 4 +- src/coreclr/jit/helperexpansion.cpp | 81 +++++++++---------- .../tools/Common/JitInterface/CorInfoTypes.cs | 4 +- .../superpmi-shared/methodcontext.cpp | 8 +- 4 files changed, 47 insertions(+), 50 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 586da3a9f60e9..be82b6abf60b4 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2947,15 +2947,13 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) inst_Mov_Extend(targetType, /* srcInReg */ true, targetReg, dataReg, /* canSkip */ true, emitActualTypeSize(targetType)); } -#ifndef _MSC_VER - else if (data->IsIconHandle(GTF_ICON_TLS_HDL)) + else if (TargetOS::IsUnix && data->IsIconHandle(GTF_ICON_TLS_HDL)) { assert(data->AsIntCon()->IconValue() == 0); emitAttr attr = emitActualTypeSize(targetType); // On non-windows, need to load the address from system register. emit->emitIns_R(INS_mrs_tpid0, attr, targetReg); } -#endif else { inst_Mov(targetType, targetReg, dataReg, /* canSkip */ true); diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 505a804a2f8e9..1137db82d7a84 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -509,18 +509,14 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* uint32_t offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; uint32_t offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; - JITDUMP("offsetOfThreadStaticBlocks= %u\n", offsetOfThreadStaticBlocksVal); -#ifdef _MSC_VER + JITDUMP("offsetOfThreadStaticBlocks= %u\n", offsetOfThreadStaticBlocksVal); JITDUMP("getThreadLocalStaticBlocksInfo (%s)\n:", isGCThreadStatic ? "GC" : "Non-GC"); JITDUMP("tlsIndex= %u\n", (ssize_t)threadStaticBlocksInfo.tlsIndex.addr); JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer); JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", offsetOfMaxThreadStaticBlocksVal); -#else JITDUMP("tlsGetAddrFtnPtr= %u\n", threadStaticBlocksInfo.tlsGetAddrFtnPtr); JITDUMP("descrAddrOfMaxThreadStaticBlock= %u\n", threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock); -#endif - JITDUMP("offsetOfGCDataPointer= %u\n", threadStaticBlocksInfo.offsetOfGCDataPointer); assert((eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED) || @@ -568,55 +564,58 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* GenTree* threadStaticBlocksValue = nullptr; GenTree* tlsValueDef = nullptr; -#ifdef _MSC_VER - size_t tlsIndexValue = (size_t)threadStaticBlocksInfo.tlsIndex.addr; - GenTree* dllRef = nullptr; - - if (tlsIndexValue != 0) + if (TargetOS::IsWindows) { - dllRef = gtNewIconHandleNode(tlsIndexValue * TARGET_POINTER_SIZE, GTF_ICON_TLS_HDL); - } + size_t tlsIndexValue = (size_t)threadStaticBlocksInfo.tlsIndex.addr; + GenTree* dllRef = nullptr; - // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] or GS:[cns] - tlsValue = gtNewIconHandleNode(threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer, GTF_ICON_TLS_HDL); - tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + if (tlsIndexValue != 0) + { + dllRef = gtNewIconHandleNode(tlsIndexValue * TARGET_POINTER_SIZE, GTF_ICON_TLS_HDL); + } - if (dllRef != nullptr) - { - // Add the dllRef to produce thread local storage reference for coreclr - tlsValue = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsValue, dllRef); - } + // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] or GS:[cns] + tlsValue = gtNewIconHandleNode(threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer, GTF_ICON_TLS_HDL); + tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - // Base of coreclr's thread local storage - tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + if (dllRef != nullptr) + { + // Add the dllRef to produce thread local storage reference for coreclr + tlsValue = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsValue, dllRef); + } -#elif defined(TARGET_ARM64) - // Mark this ICON as a TLS_HDL, codegen will do: - // mrs xt, tpidr_elf0 - // mov xd, [xt+cns] - tlsValue = gtNewIconHandleNode(0, GTF_ICON_TLS_HDL); + // Base of coreclr's thread local storage + tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + } + else + { +#if defined(TARGET_ARM64) + // Mark this ICON as a TLS_HDL, codegen will do: + // mrs xt, tpidr_elf0 + // mov xd, [xt+cns] + tlsValue = gtNewIconHandleNode(0, GTF_ICON_TLS_HDL); #elif defined(TARGET_AMD64) - GenTree* tls_get_addr_val = gtNewIconHandleNode(threadStaticBlocksInfo.tlsGetAddrFtnPtr, GTF_ICON_FTN_ADDR); - tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); - GenTreeCall* tlsRefCall = tlsValue->AsCall(); + GenTree* tls_get_addr_val = gtNewIconHandleNode(threadStaticBlocksInfo.tlsGetAddrFtnPtr, GTF_ICON_FTN_ADDR); + tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); + GenTreeCall* tlsRefCall = tlsValue->AsCall(); - // This is a syscall indirect call which takes an argument. - // Populate and set the ABI apporpriately. - GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock, TYP_I_IMPL); - tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); + // This is a syscall indirect call which takes an argument. + // Populate and set the ABI apporpriately. + GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock, TYP_I_IMPL); + tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); - CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); - arg0->AbiInfo = CallArgABIInformation(); - arg0->AbiInfo.SetRegNum(0, REG_ARG_0); + CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); + arg0->AbiInfo = CallArgABIInformation(); + arg0->AbiInfo.SetRegNum(0, REG_ARG_0); - tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT); + tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT); #ifdef UNIX_X86_ABI - tlsRefCall->gtFlags &= ~GTF_CALL_POP_ARGS; + tlsRefCall->gtFlags &= ~GTF_CALL_POP_ARGS; #endif // UNIX_X86_ABI - -#endif // _MSC_VER +#endif // TARGET_ARM64 + } // Cache the tls value tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index 5580df13df449..eb9c6d88d5ed4 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -1152,8 +1152,8 @@ public unsafe struct CORINFO_THREAD_STATIC_BLOCKS_INFO { public CORINFO_CONST_LOOKUP tlsIndex; public uint offsetOfThreadLocalStoragePointer; - public ulong tlsGetAddrFtnPtr; - public ulong descrAddrOfMaxThreadStaticBlock; + public nuint tlsGetAddrFtnPtr; + public nuint descrAddrOfMaxThreadStaticBlock; public CORINFO_CONST_LOOKUP offsetOfMaxThreadStaticBlocks; public CORINFO_CONST_LOOKUP offsetOfThreadStaticBlocks; public CORINFO_CONST_LOOKUP offsetOfGCDataPointer; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 76d6978781d61..85e55275c06cb 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3639,7 +3639,7 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC Agnostic_GetThreadLocalStaticBlocksInfo value; ZeroMemory(&value, sizeof(value)); -#ifdef _MSC_VER +#ifdef TARGET_WINDOWS value.tlsIndex.handle = CastHandle(pInfo->tlsIndex.addr); value.tlsIndex.accessType = pInfo->tlsIndex.accessType; value.offsetOfMaxThreadStaticBlocks = pInfo->offsetOfMaxThreadStaticBlocks; @@ -3649,7 +3649,7 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC value.tlsGetAddrFtnPtr = pInfo->tlsGetAddrFtnPtr; value.descrAddrOfMaxThreadStaticBlock = pInfo->descrAddrOfMaxThreadStaticBlock; value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks; -#endif +#endif // TARGET_WINDOWS value.offsetOfGCDataPointer = pInfo->offsetOfGCDataPointer; // This data is same for entire process, so just add it against key '0'. @@ -3660,7 +3660,7 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_GetThreadLocalStaticBlocksInfo& value) { -#ifdef _MSC_VER +#ifdef TARGET_WINDOWS printf("GetThreadLocalStaticBlocksInfo key %u, value tlsIndex-%016" PRIX64 ", offsetOfThreadLocalStoragePointer-%u, offsetOfMaxThreadStaticBlocks-%u" ", offsetOfThreadStaticBlocks-%u offsetOfGCDataPointer-%u", @@ -3670,7 +3670,7 @@ void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_ printf("GetThreadLocalStaticBlocksInfo key %u, value tlsGetAddrFtnPtr-%016" PRIX64 ", descrAddrOfMaxThreadStaticBlock-%lu, offsetOfThreadStaticBlocks-%u", key, value.tlsGetAddrFtnPtr, value.descrAddrOfMaxThreadStaticBlock, value.offsetOfThreadStaticBlocks); -#endif +#endif // TARGET_WINDOWS } void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType) From ae768294501ef0b411d90278b1684a47e1426bd2 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 12 Jun 2023 10:53:21 -0700 Subject: [PATCH 33/79] fix the data-type --- src/coreclr/tools/superpmi/superpmi-shared/agnostic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h index c0ac6b1b330dc..572ee7113ce57 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h @@ -522,8 +522,8 @@ struct Agnostic_GetThreadLocalStaticBlocksInfo UINT offsetOfThreadLocalStoragePointer; UINT offsetOfMaxThreadStaticBlocks; UINT offsetOfThreadStaticBlocks; - DWORDLONG tlsGetAddrFtnPtr; - DWORDLONG descrAddrOfMaxThreadStaticBlock; + DWORD tlsGetAddrFtnPtr; + DWORD descrAddrOfMaxThreadStaticBlock; UINT offsetOfGCDataPointer; }; From 76db418cb36bb4d9eb8e6247fd0327b818a530ec Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 14 Jun 2023 09:43:14 -0700 Subject: [PATCH 34/79] add osx-arm64 support --- src/coreclr/jit/helperexpansion.cpp | 20 ++++++++++++----- src/coreclr/vm/jitinterface.cpp | 34 +++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 1137db82d7a84..47303448e3808 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -590,11 +590,21 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* else { #if defined(TARGET_ARM64) - // Mark this ICON as a TLS_HDL, codegen will do: - // mrs xt, tpidr_elf0 - // mov xd, [xt+cns] - tlsValue = gtNewIconHandleNode(0, GTF_ICON_TLS_HDL); - + if (TargetOS::IsMacOS) + { + GenTree* tls_get_addr_val = + gtNewIconHandleNode(threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock, GTF_ICON_FTN_ADDR); + tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); + GenTreeCall* tlsRefCall = tlsValue->AsCall(); + tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT); + } + else + { + // Mark this ICON as a TLS_HDL, codegen will do: + // mrs xt, tpidr_elf0 + // mov xd, [xt+cns] + tlsValue = gtNewIconHandleNode(0, GTF_ICON_TLS_HDL); + } #elif defined(TARGET_AMD64) GenTree* tls_get_addr_val = gtNewIconHandleNode(threadStaticBlocksInfo.tlsGetAddrFtnPtr, GTF_ICON_FTN_ADDR); diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index c32ca05cefaf8..ad7f9abe39884 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1833,6 +1833,9 @@ void* getThreadStaticDescriptor(uint8_t* p) return *(uint32_t*)p + (p + 4); } +#ifdef TARGET_OSX + +#else void* getThreadStaticsBaseOffset() { uint8_t* p; @@ -1847,9 +1850,26 @@ void* getThreadStaticsBaseOffset() return getThreadStaticDescriptor(p); } +#endif // TARGET_OSX #endif // HOST_AMD64 #ifdef HOST_ARM64 + +#ifdef TARGET_OSX +uint64_t getThreadStaticsBaseOffset() +{ + uint64_t tlvGetAddr; + __asm__ ( + "adrp x0, t_ThreadStatics@TLVPPAGE\n" + "ldr x0, [x0, t_ThreadStatics@TLVPPAGEOFF]\n" + "ldr %[result], [x0]\n" + : [result] "=r" (p) + : + : "x0", "x1" + ); + return tlvGetAddr; +} +#else uint64_t getThreadStaticsBaseOffset() { uint64_t offset; @@ -1867,6 +1887,7 @@ uint64_t getThreadStaticsBaseOffset() return offset; } +#endif // TARGET_OSX #endif // HOST_ARM64 #endif // !_MSC_VER @@ -1899,13 +1920,22 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* #else uint64_t threadStaticBaseOffset = 0; #if defined(TARGET_AMD64) - // get the address of tls_get_addr system method and base address - // of struct + // For Linux/x64, get the address of tls_get_addr system method and the base address + // of struct that we will pass to it. pInfo->tlsGetAddrFtnPtr = (size_t)&__tls_get_addr; pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)getThreadStaticsBaseOffset(); #elif defined(TARGET_ARM64) +#ifdef TARGET_OSX + // For OSX/arm64, need to get the address of relevant tlv_get_addr of thread static + // variable that will be invoked during runtime to get the right address of corresponding + // thread. + pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)getThreadStaticsBaseOffset(); +#else + // For Linux/arm64, just get the offset of thread static variable, and during execution, + // this offset, taken from trpid_elp0 system register gives back the thread variable address. threadStaticBaseOffset = getThreadStaticsBaseOffset(); +#endif // TARGET_OSX #endif if (isGCType) { From 63ae9e8f9af97c1b14b4dedfcadddc0ae3f35ae4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 15 Jun 2023 18:58:54 -0700 Subject: [PATCH 35/79] fix osx-arm64 issues --- src/coreclr/jit/helperexpansion.cpp | 21 +++++++++++++++++-- src/coreclr/jit/lsraarmarch.cpp | 20 +++++++++++++++++- .../superpmi-shared/methodcontext.cpp | 4 ++-- src/coreclr/vm/jitinterface.cpp | 8 +++---- 4 files changed, 44 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 47303448e3808..4c81676bb0c12 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -592,10 +592,27 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* #if defined(TARGET_ARM64) if (TargetOS::IsMacOS) { + // mov x0, descrAddrOfMaxThreadStaticBlock + // mov x1, [x0] + // blr x1 + // GenTree* tls_get_addr_val = gtNewIconHandleNode(threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock, GTF_ICON_FTN_ADDR); - tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); - GenTreeCall* tlsRefCall = tlsValue->AsCall(); + + tls_get_addr_val = gtNewIndir(TYP_I_IMPL, tls_get_addr_val, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + + tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); + GenTreeCall* tlsRefCall = tlsValue->AsCall(); + + // This is a syscall indirect call which takes an argument. + // Populate and set the ABI apporpriately. + GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock, TYP_I_IMPL); + tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); + + CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); + arg0->AbiInfo = CallArgABIInformation(); + arg0->AbiInfo.SetRegNum(0, REG_ARG_0); + tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT); } else diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 31f816fda1008..677c66d6bad7b 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -331,6 +331,24 @@ int LinearScan::BuildCall(GenTreeCall* call) } } + if (call->gtCallType == CT_INDIRECT) + { + for (CallArg& arg : call->gtArgs.EarlyArgs()) + { + CallArgABIInformation& abiInfo = arg.AbiInfo; + GenTree* argNode = arg.GetEarlyNode(); + + // Each register argument corresponds to one source. + if (argNode->OperIsPutArgReg()) + { + srcCount++; + BuildUse(argNode, genRegMask(argNode->GetRegNum())); + const regNumber argReg = abiInfo.GetRegNum(); + assert(argNode->GetRegNum() == argReg); + } + } + } + #ifdef DEBUG // Now, count stack args // Note that these need to be computed into a register, but then @@ -355,7 +373,7 @@ int LinearScan::BuildCall(GenTreeCall* call) } else { - assert(!argNode->IsValue() || argNode->IsUnusedValue()); + assert(!argNode->IsValue() || argNode->IsUnusedValue() || (call->gtCallType == CT_INDIRECT)); } } } diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 85e55275c06cb..6d4a4447ee231 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3667,8 +3667,8 @@ void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_ key, value.tlsIndex.handle, value.offsetOfThreadLocalStoragePointer, value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer); #else - printf("GetThreadLocalStaticBlocksInfo key %u, value tlsGetAddrFtnPtr-%016" PRIX64 - ", descrAddrOfMaxThreadStaticBlock-%lu, offsetOfThreadStaticBlocks-%u", + printf("GetThreadLocalStaticBlocksInfo key %u, value tlsGetAddrFtnPtr-%u" + ", descrAddrOfMaxThreadStaticBlock-%u, offsetOfThreadStaticBlocks-%u", key, value.tlsGetAddrFtnPtr, value.descrAddrOfMaxThreadStaticBlock, value.offsetOfThreadStaticBlocks); #endif // TARGET_WINDOWS } diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index ad7f9abe39884..5a2cbd63e14c5 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1860,10 +1860,10 @@ uint64_t getThreadStaticsBaseOffset() { uint64_t tlvGetAddr; __asm__ ( - "adrp x0, t_ThreadStatics@TLVPPAGE\n" - "ldr x0, [x0, t_ThreadStatics@TLVPPAGEOFF]\n" - "ldr %[result], [x0]\n" - : [result] "=r" (p) + "adrp x0, _t_ThreadStatics@TLVPPAGE\n" + "ldr x0, [x0, _t_ThreadStatics@TLVPPAGEOFF]\n" + "ldr %[result], x0\n" + : [result] "=r" (tlvGetAddr) : : "x0", "x1" ); From e3761090684eb62c0b2477b5d89b4d4b67de1a6c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 12 Jun 2023 12:56:50 -0700 Subject: [PATCH 36/79] fix build error --- src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 6d4a4447ee231..ab47e8ac795b3 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3648,7 +3648,7 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC #else value.tlsGetAddrFtnPtr = pInfo->tlsGetAddrFtnPtr; value.descrAddrOfMaxThreadStaticBlock = pInfo->descrAddrOfMaxThreadStaticBlock; - value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks; + value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks; #endif // TARGET_WINDOWS value.offsetOfGCDataPointer = pInfo->offsetOfGCDataPointer; From 0d255d204a867a75be684d4839b7bfa87d75fe94 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 16 Jun 2023 10:44:09 -0700 Subject: [PATCH 37/79] fix build error after merge --- src/coreclr/vm/jitinterface.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index b73a308af58bc..bd001aaf20c0a 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1932,8 +1932,7 @@ uint64_t getThreadStaticsBaseOffset() } #endif // TARGET_OSX #endif // HOST_ARM64 -#endif // !_MSC_VER - +#else /*********************************************************************/ static uint32_t ThreadLocalOffset(void* p) { @@ -1943,6 +1942,9 @@ static uint32_t ThreadLocalOffset(void* p) return (uint32_t)((uint8_t*)p - pOurTls); } +#endif // !_MSC_VER + + void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType) { CONTRACTL { From ac982cb5db9bb022effc048c58c15b715c054f6b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 16 Jun 2023 14:50:35 -0700 Subject: [PATCH 38/79] add osx/x64 support --- src/coreclr/jit/helperexpansion.cpp | 76 +++++++++++++++++------------ src/coreclr/vm/jitinterface.cpp | 52 ++++++++++++++++++-- 2 files changed, 91 insertions(+), 37 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 4c81676bb0c12..b85285cb66340 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -587,43 +587,47 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // Base of coreclr's thread local storage tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); } - else + else if (TargetOS::IsMacOS) { -#if defined(TARGET_ARM64) - if (TargetOS::IsMacOS) - { - // mov x0, descrAddrOfMaxThreadStaticBlock - // mov x1, [x0] - // blr x1 - // - GenTree* tls_get_addr_val = - gtNewIconHandleNode(threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock, GTF_ICON_FTN_ADDR); - - tls_get_addr_val = gtNewIndir(TYP_I_IMPL, tls_get_addr_val, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + // Code sequence to access thread local variable on osx/x64: + // + // mov rdi, descrAddrOfMaxThreadStaticBlock + // call [rdi] + // + // Code sequence to access thread local variable on osx/arm64: + // + // mov x0, descrAddrOfMaxThreadStaticBlock + // mov x1, [x0] + // blr x1 + // + GenTree* tls_get_addr_val = + gtNewIconHandleNode(threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock, GTF_ICON_FTN_ADDR); - tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); - GenTreeCall* tlsRefCall = tlsValue->AsCall(); + tls_get_addr_val = gtNewIndir(TYP_I_IMPL, tls_get_addr_val, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - // This is a syscall indirect call which takes an argument. - // Populate and set the ABI apporpriately. - GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock, TYP_I_IMPL); - tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); + tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); + GenTreeCall* tlsRefCall = tlsValue->AsCall(); - CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); - arg0->AbiInfo = CallArgABIInformation(); - arg0->AbiInfo.SetRegNum(0, REG_ARG_0); + // This is a syscall indirect call which takes an argument. + // Populate and set the ABI apporpriately. + GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock, TYP_I_IMPL); + tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); - tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT); - } - else - { - // Mark this ICON as a TLS_HDL, codegen will do: - // mrs xt, tpidr_elf0 - // mov xd, [xt+cns] - tlsValue = gtNewIconHandleNode(0, GTF_ICON_TLS_HDL); - } -#elif defined(TARGET_AMD64) + CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); + arg0->AbiInfo = CallArgABIInformation(); + arg0->AbiInfo.SetRegNum(0, REG_ARG_0); + tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT); + } + else if (TargetOS::IsUnix) + { +#if defined(TARGET_AMD64) + // Code sequence to access thread local variable on linux/x64: + // + // mov rdi, 0x7FE5C418CD28 ; descrAddrOfMaxThreadStaticBlock + // mov rax, 0x7FE5C47AFDB0 ; _tls_get_addr + // call rax + // GenTree* tls_get_addr_val = gtNewIconHandleNode(threadStaticBlocksInfo.tlsGetAddrFtnPtr, GTF_ICON_FTN_ADDR); tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); GenTreeCall* tlsRefCall = tlsValue->AsCall(); @@ -641,7 +645,15 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* #ifdef UNIX_X86_ABI tlsRefCall->gtFlags &= ~GTF_CALL_POP_ARGS; #endif // UNIX_X86_ABI -#endif // TARGET_ARM64 +#elif defined(TARGET_ARM64) + // Code sequence to access thread local variable on linux/arm64: + // + // mrs xt, tpidr_elf0 + // mov xd, [xt+cns] + tlsValue = gtNewIconHandleNode(0, GTF_ICON_TLS_HDL); +#else + assert(!"Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); +#endif } // Cache the tls value diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index bd001aaf20c0a..3e8fdad82fd01 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1860,25 +1860,57 @@ uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isG #ifndef _MSC_VER #ifdef HOST_AMD64 + +#ifdef TARGET_OSX void* getThreadStaticDescriptor(uint8_t* p) { - _ASSERTE_MSG((p[0] == 0x66 && p[1] == 0x48 && p[2] == 0x8d && p[3] == 0x3d), + _ASSERTE_MSG((p[0] == 0x48 && p[1] == 0x8d && p[2] == 0x3d), "Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)"); // At this point, `p` contains the instruction pointer and is pointing to the above opcodes. // These opcodes are patched by the dynamic linker. // Move beyond the opcodes that we have already checked above. - p += 4; + p += 3; - // The descriptor address is located at *p at this point. Ready that and add + // The descriptor address is located at *p at this point. Read that and add // it to the instruction pointer to locate the address of `ti` that will be used // to pass to __tls_get_addr during execution. + // (p + 4) below skips the descriptor address bytes embedded in the instruction and + // add it to the `instruction pointer` to find out the address. return *(uint32_t*)p + (p + 4); } -#ifdef TARGET_OSX +void* getThreadStaticsBaseOffset() +{ + uint8_t* p; + __asm__ ( + "leaq 0(%%rip), %%rdx\n" + "movq _t_ThreadStatics@TLVP(%%rip), %%rdi\n" + : "=d"(p) + ); + + return getThreadStaticDescriptor(p); +} #else +void* getThreadStaticDescriptor(uint8_t* p) +{ + _ASSERTE_MSG((p[0] == 0x66 && p[1] == 0x48 && p[2] == 0x8d && p[3] == 0x3d), + "Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)"); + + // At this point, `p` contains the instruction pointer and is pointing to the above opcodes. + // These opcodes are patched by the dynamic linker. + // Move beyond the opcodes that we have already checked above. + p += 4; + + // The descriptor address is located at *p at this point. Read that and add + // it to the instruction pointer to locate the address of `ti` that will be used + // to pass to __tls_get_addr during execution. + // (p + 4) below skips the descriptor address bytes embedded in the instruction and + // add it to the `instruction pointer` to find out the address. + return *(uint32_t*)p + (p + 4); +} + void* getThreadStaticsBaseOffset() { uint8_t* p; @@ -1973,10 +2005,18 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* #else uint64_t threadStaticBaseOffset = 0; #if defined(TARGET_AMD64) + +#ifdef TARGET_OSX + // For OSX/x64, need to get the address of relevant tlv_get_addr of thread static + // variable that will be invoked during runtime to get the right address of corresponding + // thread. + pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)getThreadStaticsBaseOffset(); +#else // For Linux/x64, get the address of tls_get_addr system method and the base address // of struct that we will pass to it. pInfo->tlsGetAddrFtnPtr = (size_t)&__tls_get_addr; pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)getThreadStaticsBaseOffset(); +#endif // TARGET_OSX #elif defined(TARGET_ARM64) #ifdef TARGET_OSX @@ -1989,7 +2029,9 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* // this offset, taken from trpid_elp0 system register gives back the thread variable address. threadStaticBaseOffset = getThreadStaticsBaseOffset(); #endif // TARGET_OSX -#endif +#else + _ASSERTE_MSG(false, "Unsupported scenario of optimizing TLS access on Linux Arm32/x86") +#endif // TARGET_AMD64 if (isGCType) { pInfo->offsetOfMaxThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, GCMaxThreadStaticBlocks); From 130bc1449cbf7b5fb5d9006ec05e3a8f4c75c7e1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 16 Jun 2023 15:54:58 -0700 Subject: [PATCH 39/79] fix errors --- src/coreclr/vm/jitinterface.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 3e8fdad82fd01..f0f7c12dfd963 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1937,7 +1937,7 @@ uint64_t getThreadStaticsBaseOffset() __asm__ ( "adrp x0, _t_ThreadStatics@TLVPPAGE\n" "ldr x0, [x0, _t_ThreadStatics@TLVPPAGEOFF]\n" - "ldr %[result], x0\n" + "mov %[result], x0\n" : [result] "=r" (tlvGetAddr) : : "x0", "x1" @@ -2030,7 +2030,7 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* threadStaticBaseOffset = getThreadStaticsBaseOffset(); #endif // TARGET_OSX #else - _ASSERTE_MSG(false, "Unsupported scenario of optimizing TLS access on Linux Arm32/x86") + _ASSERTE_MSG(false, "Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); #endif // TARGET_AMD64 if (isGCType) { From d3cdf77dec4006522a25b3adc93a1f7e2bfa331f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 19 Jun 2023 16:16:19 -0700 Subject: [PATCH 40/79] fix the macos/x64 --- src/coreclr/jit/helperexpansion.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index b85285cb66340..b471ecab318dd 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -587,7 +587,8 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // Base of coreclr's thread local storage tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); } - else if (TargetOS::IsMacOS) + // The check (TargetOS::IsMacOS) doesn't work natively on osx/x64 + else if (eeGetEEInfo()->osType == CORINFO_MACOS) { // Code sequence to access thread local variable on osx/x64: // From ec4093252c016427c4960e33e1369204378a7738 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 21 Jun 2023 09:12:49 -0700 Subject: [PATCH 41/79] disable for alpine linux --- src/coreclr/vm/jitinterface.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 677e64b99c7e5..fecb119b27cc4 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1415,6 +1415,8 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, // Optimization is disabled for linux/windows arm #elif !defined(TARGET_WINDOWS) && defined(TARGET_X86) // Optimization is disabled for linux/x86 +#elif defined(TARGET_ALPINE_LINUX) + // Optimization is disabled for linux/alpine #else // For windows x64/x86/arm64, linux x64/arm64: // We convert the TLS access to the optimized helper where we will store From 368e8c540ca8bba296229ce4c0a0cfe0992bac4e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 21 Jun 2023 10:09:22 -0700 Subject: [PATCH 42/79] Disable for R2R --- src/coreclr/jit/helperexpansion.cpp | 11 +++++++++-- src/coreclr/vm/jitinterface.cpp | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index b471ecab318dd..b2ff8067d6be6 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -478,13 +478,20 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* return false; } +#if defined(FEATURE_READYTORUN) + assert(!"Unsupported scenario of optimizing TLS access for ReadyToRun"); +#endif +#if defined(TARGET_ALPINE_LINUX) + assert(!"Unsupported scenario of optimizing TLS access on Linux Alpine"); +#endif + if (TargetOS::IsUnix) { #if defined(TARGET_ARM) || !defined(TARGET_64BIT) // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such // as MRC and MCR are used to access them. We do not support them and so should never optimize the // field access using TLS. - assert(!"Unsupported scenario of optimizing TLS access on Arm32/x86"); + assert(!"Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); #endif } else @@ -493,7 +500,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such // as MRC and MCR are used to access them. We do not support them and so should never optimize the // field access using TLS. - assert(!"Unsupported scenario of optimizing TLS access on Arm32"); + assert(!"Unsupported scenario of optimizing TLS access on Windows Arm32"); #endif } diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index fecb119b27cc4..c940778e44f7c 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1417,6 +1417,8 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, // Optimization is disabled for linux/x86 #elif defined(TARGET_ALPINE_LINUX) // Optimization is disabled for linux/alpine +#elif defined(FEATURE_READYTORUN) + // Optimization is disabled for R2R #else // For windows x64/x86/arm64, linux x64/arm64: // We convert the TLS access to the optimized helper where we will store From c35938c15a81c486f3ad0281394d5fe74ea0aa40 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 21 Jun 2023 23:46:11 -0700 Subject: [PATCH 43/79] review feedback --- src/coreclr/jit/helperexpansion.cpp | 7 +++---- .../tools/superpmi/superpmi-shared/agnostic.h | 4 ++-- .../superpmi-shared/methodcontext.cpp | 20 ++++--------------- 3 files changed, 9 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index b2ff8067d6be6..4945993597a57 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -478,9 +478,8 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* return false; } -#if defined(FEATURE_READYTORUN) - assert(!"Unsupported scenario of optimizing TLS access for ReadyToRun"); -#endif + assert(!opts.IsReadyToRun()); + #if defined(TARGET_ALPINE_LINUX) assert(!"Unsupported scenario of optimizing TLS access on Linux Alpine"); #endif @@ -641,7 +640,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* GenTreeCall* tlsRefCall = tlsValue->AsCall(); // This is a syscall indirect call which takes an argument. - // Populate and set the ABI apporpriately. + // Populate and set the ABI appropriately. GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h index 1d7cf07d3f2db..ab5a087b52292 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h @@ -514,8 +514,8 @@ struct Agnostic_GetThreadLocalStaticBlocksInfo UINT offsetOfThreadLocalStoragePointer; UINT offsetOfMaxThreadStaticBlocks; UINT offsetOfThreadStaticBlocks; - DWORD tlsGetAddrFtnPtr; - DWORD descrAddrOfMaxThreadStaticBlock; + DWORDLONG tlsGetAddrFtnPtr; + DWORDLONG descrAddrOfMaxThreadStaticBlock; UINT offsetOfGCDataPointer; }; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index c3f1ff1dceac8..4085060635435 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3573,17 +3573,13 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC Agnostic_GetThreadLocalStaticBlocksInfo value; ZeroMemory(&value, sizeof(value)); -#ifdef TARGET_WINDOWS value.tlsIndex.handle = CastHandle(pInfo->tlsIndex.addr); value.tlsIndex.accessType = pInfo->tlsIndex.accessType; value.offsetOfMaxThreadStaticBlocks = pInfo->offsetOfMaxThreadStaticBlocks; value.offsetOfThreadLocalStoragePointer = pInfo->offsetOfThreadLocalStoragePointer; value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks; -#else value.tlsGetAddrFtnPtr = pInfo->tlsGetAddrFtnPtr; value.descrAddrOfMaxThreadStaticBlock = pInfo->descrAddrOfMaxThreadStaticBlock; - value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks; -#endif // TARGET_WINDOWS value.offsetOfGCDataPointer = pInfo->offsetOfGCDataPointer; // This data is same for entire process, so just add it against key '0'. @@ -3594,17 +3590,13 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_GetThreadLocalStaticBlocksInfo& value) { -#ifdef TARGET_WINDOWS printf("GetThreadLocalStaticBlocksInfo key %u, value tlsIndex-%016" PRIX64 ", offsetOfThreadLocalStoragePointer-%u, offsetOfMaxThreadStaticBlocks-%u" - ", offsetOfThreadStaticBlocks-%u offsetOfGCDataPointer-%u", + ", offsetOfThreadStaticBlocks-%u offsetOfGCDataPointer-%u" + ", value tlsGetAddrFtnPtr-%llu, descrAddrOfMaxThreadStaticBlock-%llu", key, value.tlsIndex.handle, value.offsetOfThreadLocalStoragePointer, - value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer); -#else - printf("GetThreadLocalStaticBlocksInfo key %u, value tlsGetAddrFtnPtr-%u" - ", descrAddrOfMaxThreadStaticBlock-%u, offsetOfThreadStaticBlocks-%u", - key, value.tlsGetAddrFtnPtr, value.descrAddrOfMaxThreadStaticBlock, value.offsetOfThreadStaticBlocks); -#endif // TARGET_WINDOWS + value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer, + value.tlsGetAddrFtnPtr, value.descrAddrOfMaxThreadStaticBlock); } void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType) @@ -3614,17 +3606,13 @@ void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC DEBUG_REP(dmpGetThreadLocalStaticBlocksInfo(key, value)); -#ifdef _MSC_VER pInfo->tlsIndex.accessType = (InfoAccessType)value.tlsIndex.accessType; pInfo->tlsIndex.addr = (void*)value.tlsIndex.handle; pInfo->offsetOfMaxThreadStaticBlocks = value.offsetOfMaxThreadStaticBlocks; pInfo->offsetOfThreadLocalStoragePointer = value.offsetOfThreadLocalStoragePointer; pInfo->offsetOfThreadStaticBlocks = value.offsetOfThreadStaticBlocks; -#else pInfo->tlsGetAddrFtnPtr = value.tlsGetAddrFtnPtr; pInfo->descrAddrOfMaxThreadStaticBlock = value.descrAddrOfMaxThreadStaticBlock; - pInfo->offsetOfThreadStaticBlocks = value.offsetOfThreadStaticBlocks; -#endif pInfo->offsetOfGCDataPointer = value.offsetOfGCDataPointer; } From 8a222d26ed1965ed1bf7d2aa3ea7ab7481eca756 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 21 Jun 2023 23:52:04 -0700 Subject: [PATCH 44/79] fix r2r check --- src/coreclr/vm/jitinterface.cpp | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index c940778e44f7c..10ed1749d1aff 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1417,23 +1417,24 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, // Optimization is disabled for linux/x86 #elif defined(TARGET_ALPINE_LINUX) // Optimization is disabled for linux/alpine -#elif defined(FEATURE_READYTORUN) - // Optimization is disabled for R2R #else - // For windows x64/x86/arm64, linux x64/arm64: - // We convert the TLS access to the optimized helper where we will store - // the static blocks in TLS directly and access them via inline code. - if ((pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR) || - (pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE)) + if (!pFieldMT->GetModule()->IsReadyToRun()) { - fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; - pResult->helper = CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; - } - else if ((pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR) || - (pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE)) - { - fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; - pResult->helper = CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; + // For windows x64/x86/arm64, linux x64/arm64: + // We convert the TLS access to the optimized helper where we will store + // the static blocks in TLS directly and access them via inline code. + if ((pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR) || + (pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE)) + { + fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; + pResult->helper = CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; + } + else if ((pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR) || + (pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE)) + { + fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; + pResult->helper = CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; + } } #endif // TARGET_ARM } From 5235d373bc3511fc0dcb244e685115fc446edc5d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 22 Jun 2023 00:37:41 -0700 Subject: [PATCH 45/79] move windows to struct model --- src/coreclr/inc/corinfo.h | 4 +- src/coreclr/jit/helperexpansion.cpp | 4 +- .../tools/superpmi/superpmi-shared/agnostic.h | 4 +- .../superpmi-shared/methodcontext.cpp | 6 +-- src/coreclr/vm/jithelpers.cpp | 45 ++++++---------- src/coreclr/vm/jitinterface.cpp | 53 +++++++------------ 6 files changed, 45 insertions(+), 71 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 2d299c8bdb2bf..2cce5c88f7447 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -1730,8 +1730,8 @@ struct CORINFO_THREAD_STATIC_BLOCKS_INFO { CORINFO_CONST_LOOKUP tlsIndex; // windows specific uint32_t offsetOfThreadLocalStoragePointer; // windows specific - uint32_t offsetOfMaxThreadStaticBlocks; - uint32_t offsetOfThreadStaticBlocks; + size_t offsetOfMaxThreadStaticBlocks; + size_t offsetOfThreadStaticBlocks; size_t tlsGetAddrFtnPtr; // linux/x64 specific size_t descrAddrOfMaxThreadStaticBlock; // linux/x64 specific uint32_t offsetOfGCDataPointer; diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 4945993597a57..52c932f1fb07a 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -513,8 +513,8 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* CORINFO_THREAD_STATIC_BLOCKS_INFO threadStaticBlocksInfo; info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); - uint32_t offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; - uint32_t offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; + size_t offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; + size_t offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; JITDUMP("offsetOfThreadStaticBlocks= %u\n", offsetOfThreadStaticBlocksVal); JITDUMP("getThreadLocalStaticBlocksInfo (%s)\n:", isGCThreadStatic ? "GC" : "Non-GC"); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h index ab5a087b52292..a9ed4a162eb75 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h @@ -512,8 +512,8 @@ struct Agnostic_GetThreadLocalStaticBlocksInfo { Agnostic_CORINFO_CONST_LOOKUP tlsIndex; UINT offsetOfThreadLocalStoragePointer; - UINT offsetOfMaxThreadStaticBlocks; - UINT offsetOfThreadStaticBlocks; + DWORDLONG offsetOfMaxThreadStaticBlocks; + DWORDLONG offsetOfThreadStaticBlocks; DWORDLONG tlsGetAddrFtnPtr; DWORDLONG descrAddrOfMaxThreadStaticBlock; UINT offsetOfGCDataPointer; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 4085060635435..ead0497ad58a3 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3591,9 +3591,9 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_GetThreadLocalStaticBlocksInfo& value) { printf("GetThreadLocalStaticBlocksInfo key %u, value tlsIndex-%016" PRIX64 - ", offsetOfThreadLocalStoragePointer-%u, offsetOfMaxThreadStaticBlocks-%u" - ", offsetOfThreadStaticBlocks-%u offsetOfGCDataPointer-%u" - ", value tlsGetAddrFtnPtr-%llu, descrAddrOfMaxThreadStaticBlock-%llu", + ", offsetOfThreadLocalStoragePointer-%u, offsetOfMaxThreadStaticBlocks-%016" PRIX64 + ", offsetOfThreadStaticBlocks-%016" PRIX64 " offsetOfGCDataPointer-%u" + ", value tlsGetAddrFtnPtr-%016" PRIX64 ", descrAddrOfMaxThreadStaticBlock--%016" PRIX64 , key, value.tlsIndex.handle, value.offsetOfThreadLocalStoragePointer, value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer, value.tlsGetAddrFtnPtr, value.descrAddrOfMaxThreadStaticBlock); diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 4f0e3245541e2..e80156c165b48 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1777,17 +1777,6 @@ HCIMPL1(void*, JIT_GetGCThreadStaticBase_Helper, MethodTable * pMT) } HCIMPLEND - -#ifdef _MSC_VER -__declspec(selectany) __declspec(thread) uint32_t t_NonGCMaxThreadStaticBlocks; -__declspec(selectany) __declspec(thread) uint32_t t_GCMaxThreadStaticBlocks; - -__declspec(selectany) __declspec(thread) uint32_t t_NonGCThreadStaticBlocksSize; -__declspec(selectany) __declspec(thread) uint32_t t_GCThreadStaticBlocksSize; - -__declspec(selectany) __declspec(thread) void** t_NonGCThreadStaticBlocks; -__declspec(selectany) __declspec(thread) void** t_GCThreadStaticBlocks; -#else struct ThreadStaticBlockInfo { uint32_t NonGCMaxThreadStaticBlocks; @@ -1797,11 +1786,11 @@ struct ThreadStaticBlockInfo void** GCThreadStaticBlocks; }; -#define t_NonGCMaxThreadStaticBlocks t_ThreadStatics.NonGCMaxThreadStaticBlocks -#define t_NonGCThreadStaticBlocks t_ThreadStatics.NonGCThreadStaticBlocks -#define t_GCMaxThreadStaticBlocks t_ThreadStatics.GCMaxThreadStaticBlocks -#define t_GCThreadStaticBlocks t_ThreadStatics.GCThreadStaticBlocks - +#ifdef _MSC_VER +__declspec(selectany) __declspec(thread) ThreadStaticBlockInfo t_ThreadStatics; +__declspec(selectany) __declspec(thread) uint32_t t_NonGCThreadStaticBlocksSize; +__declspec(selectany) __declspec(thread) uint32_t t_GCThreadStaticBlocksSize; +#else EXTERN_C __thread ThreadStaticBlockInfo t_ThreadStatics; EXTERN_C __thread uint32_t t_NonGCThreadStaticBlocksSize; EXTERN_C __thread uint32_t t_GCThreadStaticBlocksSize; @@ -1875,22 +1864,22 @@ HCIMPL1(void*, JIT_GetSharedNonGCThreadStaticBaseOptimized, UINT32 staticBlockIn if (t_NonGCThreadStaticBlocksSize > 0) { - memcpy(newThreadStaticBlocks, t_NonGCThreadStaticBlocks, t_NonGCThreadStaticBlocksSize * sizeof(PTR_BYTE)); - delete t_NonGCThreadStaticBlocks; + memcpy(newThreadStaticBlocks, t_ThreadStatics.NonGCThreadStaticBlocks, t_NonGCThreadStaticBlocksSize * sizeof(PTR_BYTE)); + delete t_ThreadStatics.NonGCThreadStaticBlocks; } t_NonGCThreadStaticBlocksSize = newThreadStaticBlocksSize; - t_NonGCThreadStaticBlocks = newThreadStaticBlocks; + t_ThreadStatics.NonGCThreadStaticBlocks = newThreadStaticBlocks; } - void* currentEntry = t_NonGCThreadStaticBlocks[staticBlockIndex]; + void* currentEntry = t_ThreadStatics.NonGCThreadStaticBlocks[staticBlockIndex]; // We could be coming here 2nd time after running the ctor when we try to get the static block. // In such case, just avoid adding the same entry. if (currentEntry != staticBlock) { _ASSERTE(currentEntry == nullptr); - t_NonGCThreadStaticBlocks[staticBlockIndex] = staticBlock; - t_NonGCMaxThreadStaticBlocks = max(t_NonGCMaxThreadStaticBlocks, staticBlockIndex); + t_ThreadStatics.NonGCThreadStaticBlocks[staticBlockIndex] = staticBlock; + t_ThreadStatics.NonGCMaxThreadStaticBlocks = max(t_ThreadStatics.NonGCMaxThreadStaticBlocks, staticBlockIndex); } HELPER_METHOD_FRAME_END(); @@ -1969,22 +1958,22 @@ HCIMPL1(void*, JIT_GetSharedGCThreadStaticBaseOptimized, UINT32 staticBlockIndex if (t_GCThreadStaticBlocksSize > 0) { - memcpy(newThreadStaticBlocks, t_GCThreadStaticBlocks, t_GCThreadStaticBlocksSize * sizeof(PTR_BYTE)); - delete t_GCThreadStaticBlocks; + memcpy(newThreadStaticBlocks, t_ThreadStatics.GCThreadStaticBlocks, t_GCThreadStaticBlocksSize * sizeof(PTR_BYTE)); + delete t_ThreadStatics.GCThreadStaticBlocks; } t_GCThreadStaticBlocksSize = newThreadStaticBlocksSize; - t_GCThreadStaticBlocks = newThreadStaticBlocks; + t_ThreadStatics.GCThreadStaticBlocks = newThreadStaticBlocks; } - void* currentEntry = t_GCThreadStaticBlocks[staticBlockIndex]; + void* currentEntry = t_ThreadStatics.GCThreadStaticBlocks[staticBlockIndex]; // We could be coming here 2nd time after running the ctor when we try to get the static block. // In such case, just avoid adding the same entry. if (currentEntry != staticBlock) { _ASSERTE(currentEntry == nullptr); - t_GCThreadStaticBlocks[staticBlockIndex] = staticBlock; - t_GCMaxThreadStaticBlocks = max(t_GCMaxThreadStaticBlocks, staticBlockIndex); + t_ThreadStatics.GCThreadStaticBlocks[staticBlockIndex] = staticBlock; + t_ThreadStatics.GCMaxThreadStaticBlocks = max(t_ThreadStatics.GCMaxThreadStaticBlocks, staticBlockIndex); } // Get the data pointer of static block diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 10ed1749d1aff..607b56843185d 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -69,14 +69,6 @@ EXTERN_C uint32_t _tls_index; #endif -#ifdef _MSC_VER -__declspec(selectany) __declspec(thread) uint32_t t_NonGCMaxThreadStaticBlocks; -__declspec(selectany) __declspec(thread) uint32_t t_GCMaxThreadStaticBlocks; - -__declspec(selectany) __declspec(thread) void** t_NonGCThreadStaticBlocks; -__declspec(selectany) __declspec(thread) void** t_GCThreadStaticBlocks; -#else -extern "C" void* __tls_get_addr(void* ti); struct ThreadStaticBlockInfo { uint32_t NonGCMaxThreadStaticBlocks; @@ -85,6 +77,12 @@ struct ThreadStaticBlockInfo uint32_t GCMaxThreadStaticBlocks; void** GCThreadStaticBlocks; }; +#ifdef _MSC_VER +__declspec(selectany) __declspec(thread) ThreadStaticBlockInfo t_ThreadStatics; +__declspec(selectany) __declspec(thread) uint32_t t_NonGCThreadStaticBlocksSize; +__declspec(selectany) __declspec(thread) uint32_t t_GCThreadStaticBlocksSize; +#else +extern "C" void* __tls_get_addr(void* ti); __thread ThreadStaticBlockInfo t_ThreadStatics; __thread uint32_t t_NonGCThreadStaticBlocksSize; __thread uint32_t t_GCThreadStaticBlocksSize; @@ -1780,51 +1778,39 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* JIT_TO_EE_TRANSITION(); + uint64_t threadStaticBaseOffset = 0; + #ifdef _MSC_VER pInfo->tlsIndex.addr = (void*)static_cast(_tls_index); pInfo->tlsIndex.accessType = IAT_VALUE; pInfo->offsetOfThreadLocalStoragePointer = offsetof(_TEB, ThreadLocalStoragePointer); - if (isGCType) - { - pInfo->offsetOfThreadStaticBlocks = ThreadLocalOffset(&t_GCThreadStaticBlocks); - pInfo->offsetOfMaxThreadStaticBlocks = ThreadLocalOffset(&t_GCMaxThreadStaticBlocks); - } - else - { - pInfo->offsetOfThreadStaticBlocks = ThreadLocalOffset(&t_NonGCThreadStaticBlocks); - pInfo->offsetOfMaxThreadStaticBlocks = ThreadLocalOffset(&t_NonGCMaxThreadStaticBlocks); - } -#else - uint64_t threadStaticBaseOffset = 0; -#if defined(TARGET_AMD64) + threadStaticBaseOffset = ThreadLocalOffset(&t_ThreadStatics); -#ifdef TARGET_OSX - // For OSX/x64, need to get the address of relevant tlv_get_addr of thread static +#elif defined(TARGET_OSX) + + // For OSX x64/arm64, need to get the address of relevant tlv_get_addr of thread static // variable that will be invoked during runtime to get the right address of corresponding // thread. pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)getThreadStaticsBaseOffset(); -#else + +#elif defined(TARGET_AMD64) + // For Linux/x64, get the address of tls_get_addr system method and the base address // of struct that we will pass to it. pInfo->tlsGetAddrFtnPtr = (size_t)&__tls_get_addr; pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)getThreadStaticsBaseOffset(); -#endif // TARGET_OSX #elif defined(TARGET_ARM64) -#ifdef TARGET_OSX - // For OSX/arm64, need to get the address of relevant tlv_get_addr of thread static - // variable that will be invoked during runtime to get the right address of corresponding - // thread. - pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)getThreadStaticsBaseOffset(); -#else + // For Linux/arm64, just get the offset of thread static variable, and during execution, // this offset, taken from trpid_elp0 system register gives back the thread variable address. threadStaticBaseOffset = getThreadStaticsBaseOffset(); -#endif // TARGET_OSX + #else _ASSERTE_MSG(false, "Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); -#endif // TARGET_AMD64 +#endif // _MSC_VER + if (isGCType) { pInfo->offsetOfMaxThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, GCMaxThreadStaticBlocks); @@ -1835,7 +1821,6 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo->offsetOfMaxThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCMaxThreadStaticBlocks); pInfo->offsetOfThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCThreadStaticBlocks); } -#endif // _MSC_VER pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); EE_TO_JIT_TRANSITION(); From a27362352d4dc9de5e2f0982e336f088531d0c2b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 22 Jun 2023 12:29:41 -0700 Subject: [PATCH 46/79] review feedback --- src/coreclr/jit/helperexpansion.cpp | 4 --- .../tools/Common/JitInterface/CorInfoTypes.cs | 6 ++-- .../superpmi-shared/methodcontext.cpp | 8 ++--- src/coreclr/vm/jitinterface.cpp | 31 +++++++++---------- 4 files changed, 21 insertions(+), 28 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 52c932f1fb07a..06ffe5858ebab 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -480,10 +480,6 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* assert(!opts.IsReadyToRun()); -#if defined(TARGET_ALPINE_LINUX) - assert(!"Unsupported scenario of optimizing TLS access on Linux Alpine"); -#endif - if (TargetOS::IsUnix) { #if defined(TARGET_ARM) || !defined(TARGET_64BIT) diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index 2de52ad2ec6a2..cb694f2f9cd9f 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -1153,9 +1153,9 @@ public unsafe struct CORINFO_THREAD_STATIC_BLOCKS_INFO public uint offsetOfThreadLocalStoragePointer; public nuint tlsGetAddrFtnPtr; public nuint descrAddrOfMaxThreadStaticBlock; - public CORINFO_CONST_LOOKUP offsetOfMaxThreadStaticBlocks; - public CORINFO_CONST_LOOKUP offsetOfThreadStaticBlocks; - public CORINFO_CONST_LOOKUP offsetOfGCDataPointer; + public nuint offsetOfMaxThreadStaticBlocks; + public nuint offsetOfThreadStaticBlocks; + public uint offsetOfGCDataPointer; }; // System V struct passing diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index ead0497ad58a3..a45b3a49314da 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3608,11 +3608,11 @@ void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC pInfo->tlsIndex.accessType = (InfoAccessType)value.tlsIndex.accessType; pInfo->tlsIndex.addr = (void*)value.tlsIndex.handle; - pInfo->offsetOfMaxThreadStaticBlocks = value.offsetOfMaxThreadStaticBlocks; + pInfo->offsetOfMaxThreadStaticBlocks = (DWORD)value.offsetOfMaxThreadStaticBlocks; pInfo->offsetOfThreadLocalStoragePointer = value.offsetOfThreadLocalStoragePointer; - pInfo->offsetOfThreadStaticBlocks = value.offsetOfThreadStaticBlocks; - pInfo->tlsGetAddrFtnPtr = value.tlsGetAddrFtnPtr; - pInfo->descrAddrOfMaxThreadStaticBlock = value.descrAddrOfMaxThreadStaticBlock; + pInfo->offsetOfThreadStaticBlocks = (DWORD)value.offsetOfThreadStaticBlocks; + pInfo->tlsGetAddrFtnPtr = (DWORD)value.tlsGetAddrFtnPtr; + pInfo->descrAddrOfMaxThreadStaticBlock = (DWORD)value.descrAddrOfMaxThreadStaticBlock; pInfo->offsetOfGCDataPointer = value.offsetOfGCDataPointer; } diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 607b56843185d..5c569516f1597 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1416,23 +1416,20 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, #elif defined(TARGET_ALPINE_LINUX) // Optimization is disabled for linux/alpine #else - if (!pFieldMT->GetModule()->IsReadyToRun()) + // For windows x64/x86/arm64, linux x64/arm64: + // We convert the TLS access to the optimized helper where we will store + // the static blocks in TLS directly and access them via inline code. + if ((pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR) || + (pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE)) { - // For windows x64/x86/arm64, linux x64/arm64: - // We convert the TLS access to the optimized helper where we will store - // the static blocks in TLS directly and access them via inline code. - if ((pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR) || - (pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE)) - { - fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; - pResult->helper = CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; - } - else if ((pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR) || - (pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE)) - { - fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; - pResult->helper = CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; - } + fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; + pResult->helper = CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; + } + else if ((pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR) || + (pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE)) + { + fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; + pResult->helper = CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; } #endif // TARGET_ARM } @@ -1778,7 +1775,7 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* JIT_TO_EE_TRANSITION(); - uint64_t threadStaticBaseOffset = 0; + size_t threadStaticBaseOffset = 0; #ifdef _MSC_VER pInfo->tlsIndex.addr = (void*)static_cast(_tls_index); From 6ef9e8d5237ad60f38a0ae3276de87b2e60624ba Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 23 Jun 2023 00:13:36 -0700 Subject: [PATCH 47/79] fix the register clobbering in release bits --- src/coreclr/vm/jitinterface.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 5c569516f1597..0df1fb1036387 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1668,6 +1668,7 @@ void* getThreadStaticDescriptor(uint8_t* p) return *(uint32_t*)p + (p + 4); } +// Generates sequence for accessing offset in TLS for osx/x64 void* getThreadStaticsBaseOffset() { uint8_t* p; @@ -1699,9 +1700,13 @@ void* getThreadStaticDescriptor(uint8_t* p) return *(uint32_t*)p + (p + 4); } -void* getThreadStaticsBaseOffset() +// Generates sequence for accessing offset in TLS for linux/x64 +// Note: This method is marked as NOINLINE, so we preserve `rbx +// during the call because it is trashed in the inline assembly. +NOINLINE void* getThreadStaticsBaseOffset() { uint8_t* p; + __asm__("leaq 0(%%rip), %%rbx\n" "data16\n" "leaq t_ThreadStatics@TLSGD(%%rip), %%rdi\n" @@ -1709,7 +1714,10 @@ void* getThreadStaticsBaseOffset() "data16\n" "rex64\n" "callq __tls_get_addr\n" - : "=b"(p)); + : [result] "=b" (p) + : + : "rdi" + ); return getThreadStaticDescriptor(p); } @@ -1719,6 +1727,8 @@ void* getThreadStaticsBaseOffset() #ifdef HOST_ARM64 #ifdef TARGET_OSX + +// Generates sequence for accessing offset in TLS for osx/arm64 uint64_t getThreadStaticsBaseOffset() { uint64_t tlvGetAddr; @@ -1733,6 +1743,8 @@ uint64_t getThreadStaticsBaseOffset() return tlvGetAddr; } #else + +// Generates sequence for accessing offset in TLS for linux/arm64 uint64_t getThreadStaticsBaseOffset() { uint64_t offset; From 6d31ec634e2138861c65534d89a55c1e0e4b149c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 26 Jun 2023 09:01:35 -0700 Subject: [PATCH 48/79] Move the linux/x64 logic to .S file --- src/coreclr/vm/amd64/jithelpers_fast.S | 24 ++++++++++++++++++++++++ src/coreclr/vm/jitinterface.cpp | 23 +++++------------------ 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index 32890b471b26c..cbb8f0db6456d 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -529,3 +529,27 @@ LEAF_ENTRY JIT_DispatchIndirectCall, _TEST movabs r11, 0xCDCDCDCDCDCDCDCD rex64 jmp rax LEAF_END JIT_DispatchIndirectCall, _TEST + +// Calculates the offset of native thread local variable `t_ThreadStatics`. The offset has to be found at runtime +// once linker does its relocation and fixup of thread locals. It retrieves the current `instruction pointer` so +// the caller can walk through the instruction bytes to retrieve the offset embedded by the linker and calculate the +// final offset that should be passed to __tls_get_addr() in order to calculate the address of `t_ThreadStatics` for +// the current thread. Here, we have to call `__tls_get_addr()`, because if the linker tries to find the code pattern +// of "lea t_ThreadStatics@TLSGD", followed by `call __tls_get_addr()`. Without adding the call, the linker complains. +// We do not use the result we get from `__tls_get_addr()` call, but instead just return the `instructino pointer` value +// that we read in the beginning. +LEAF_ENTRY JIT_GetThreadStaticsBaseOffset, _TEXT + push rbx + push rdi + lea rbx, [rip] // return the current instruction pointer + data16 + lea rdi, t_ThreadStatics@TLSGD[rip] // instruction where offset is embedded by the linker during compilation + data16 + data16 + rex64 + call __tls_get_addr // dummy call to have linker see the code pattern to replace the offset + mov rax, rbx // return the instruction pointer + pop rdi + pop rbx + ret +LEAF_END JIT_GetThreadStaticsBaseOffset, _TEXT diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 0df1fb1036387..792c4b6bd7b26 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1700,27 +1700,14 @@ void* getThreadStaticDescriptor(uint8_t* p) return *(uint32_t*)p + (p + 4); } -// Generates sequence for accessing offset in TLS for linux/x64 -// Note: This method is marked as NOINLINE, so we preserve `rbx -// during the call because it is trashed in the inline assembly. -NOINLINE void* getThreadStaticsBaseOffset() -{ - uint8_t* p; - - __asm__("leaq 0(%%rip), %%rbx\n" - "data16\n" - "leaq t_ThreadStatics@TLSGD(%%rip), %%rdi\n" - "data16\n" - "data16\n" - "rex64\n" - "callq __tls_get_addr\n" - : [result] "=b" (p) - : - : "rdi" - ); +extern "C" void* JIT_GetThreadStaticsBaseOffset(); +void* getThreadStaticsBaseOffset() +{ + uint8_t* p = (uint8_t*)JIT_GetThreadStaticsBaseOffset(); return getThreadStaticDescriptor(p); } + #endif // TARGET_OSX #endif // HOST_AMD64 From 9292e2a2f3d720f6451f08a31761a35a465120cf Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 26 Jun 2023 09:03:09 -0700 Subject: [PATCH 49/79] Use TargetOS::IsMacOS --- src/coreclr/jit/helperexpansion.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 06ffe5858ebab..4cb41d66aefd9 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -589,8 +589,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // Base of coreclr's thread local storage tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); } - // The check (TargetOS::IsMacOS) doesn't work natively on osx/x64 - else if (eeGetEEInfo()->osType == CORINFO_MACOS) + else if (TargetOS::IsMacOS) { // Code sequence to access thread local variable on osx/x64: // From de5ada279c064ffd5060d05260a902bfd0188a21 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 26 Jun 2023 17:19:57 -0700 Subject: [PATCH 50/79] disable optimization for single file --- src/coreclr/jit/helperexpansion.cpp | 13 +++++++++++++ src/coreclr/vm/jitinterface.cpp | 7 +++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 4cb41d66aefd9..06bea8563f489 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -509,6 +509,19 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* CORINFO_THREAD_STATIC_BLOCKS_INFO threadStaticBlocksInfo; info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); +#ifdef TARGET_AMD64 + if (TargetOS::IsUnix || TargetOS::IsMacOS) + { + if (threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock == 0) + { + // We possibly compiled coreclr as single file and not .so file. + // Do not perform this optimization for it. + return false; + } + } +#endif // TARGET_AMD64 + + size_t offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; size_t offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 899a07601bd10..fab92fe1e8cd2 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1684,8 +1684,11 @@ void* getThreadStaticsBaseOffset() #else void* getThreadStaticDescriptor(uint8_t* p) { - _ASSERTE_MSG((p[0] == 0x66 && p[1] == 0x48 && p[2] == 0x8d && p[3] == 0x3d), - "Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)"); + if (!(p[0] == 0x66 && p[1] == 0x48 && p[2] == 0x8d && p[3] == 0x3d)) + { + // The optimization is disabled if coreclr is not compiled in .so format. + return 0; + } // At this point, `p` contains the instruction pointer and is pointing to the above opcodes. // These opcodes are patched by the dynamic linker. From 3fcec56201a437c1e6ae7b483c5bb9ee488a0b3d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 26 Jun 2023 21:41:56 -0700 Subject: [PATCH 51/79] working for linux/x64 --- src/coreclr/vm/amd64/jithelpers_fast.S | 21 +++++------ src/coreclr/vm/jitinterface.cpp | 52 ++++++++++++-------------- 2 files changed, 32 insertions(+), 41 deletions(-) diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index cbb8f0db6456d..7afa16bde1ad3 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -530,26 +530,23 @@ LEAF_ENTRY JIT_DispatchIndirectCall, _TEST rex64 jmp rax LEAF_END JIT_DispatchIndirectCall, _TEST -// Calculates the offset of native thread local variable `t_ThreadStatics`. The offset has to be found at runtime -// once linker does its relocation and fixup of thread locals. It retrieves the current `instruction pointer` so -// the caller can walk through the instruction bytes to retrieve the offset embedded by the linker and calculate the +// Helper to calculate the offset of native thread local variable `t_ThreadStatics`. The offset has to be found at runtime +// once linker does its relocation and fixup of thread locals. The runtime gets the address of this function, so +// it can walk through the instruction bytes to retrieve the offset embedded by the linker and calculate the // final offset that should be passed to __tls_get_addr() in order to calculate the address of `t_ThreadStatics` for // the current thread. Here, we have to call `__tls_get_addr()`, because if the linker tries to find the code pattern // of "lea t_ThreadStatics@TLSGD", followed by `call __tls_get_addr()`. Without adding the call, the linker complains. -// We do not use the result we get from `__tls_get_addr()` call, but instead just return the `instructino pointer` value -// that we read in the beginning. +// We never have to call this method directly, and hence there is a `int 3` at the end. LEAF_ENTRY JIT_GetThreadStaticsBaseOffset, _TEXT - push rbx - push rdi - lea rbx, [rip] // return the current instruction pointer +#ifdef TARGET_OSX + movq _t_ThreadStatics@TLVP(%%rip), %%rdi +#else +#endif data16 lea rdi, t_ThreadStatics@TLSGD[rip] // instruction where offset is embedded by the linker during compilation data16 data16 rex64 call __tls_get_addr // dummy call to have linker see the code pattern to replace the offset - mov rax, rbx // return the instruction pointer - pop rdi - pop rbx - ret + int 3 LEAF_END JIT_GetThreadStaticsBaseOffset, _TEXT diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index fab92fe1e8cd2..2af48e385278a 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1649,41 +1649,21 @@ uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isG #ifdef HOST_AMD64 -#ifdef TARGET_OSX + void* getThreadStaticDescriptor(uint8_t* p) { - _ASSERTE_MSG((p[0] == 0x48 && p[1] == 0x8d && p[2] == 0x3d), - "Unexpected instruction - this can happen when this is not compiled in .so (e.g. for single file)"); +#ifdef TARGET_OSX + if (!(p[0] == 0x48 && p[1] == 0x8d && p[2] == 0x3d)) + { + // The optimization is disabled if coreclr is not compiled in .so format. + return 0; + } // At this point, `p` contains the instruction pointer and is pointing to the above opcodes. // These opcodes are patched by the dynamic linker. // Move beyond the opcodes that we have already checked above. p += 3; - - // The descriptor address is located at *p at this point. Read that and add - // it to the instruction pointer to locate the address of `ti` that will be used - // to pass to __tls_get_addr during execution. - // (p + 4) below skips the descriptor address bytes embedded in the instruction and - // add it to the `instruction pointer` to find out the address. - return *(uint32_t*)p + (p + 4); -} - -// Generates sequence for accessing offset in TLS for osx/x64 -void* getThreadStaticsBaseOffset() -{ - uint8_t* p; - __asm__ ( - "leaq 0(%%rip), %%rdx\n" - "movq _t_ThreadStatics@TLVP(%%rip), %%rdi\n" - : "=d"(p) - ); - - return getThreadStaticDescriptor(p); -} - #else -void* getThreadStaticDescriptor(uint8_t* p) -{ if (!(p[0] == 0x66 && p[1] == 0x48 && p[2] == 0x8d && p[3] == 0x3d)) { // The optimization is disabled if coreclr is not compiled in .so format. @@ -1694,6 +1674,7 @@ void* getThreadStaticDescriptor(uint8_t* p) // These opcodes are patched by the dynamic linker. // Move beyond the opcodes that we have already checked above. p += 4; +#endif // The descriptor address is located at *p at this point. Read that and add // it to the instruction pointer to locate the address of `ti` that will be used @@ -1703,15 +1684,28 @@ void* getThreadStaticDescriptor(uint8_t* p) return *(uint32_t*)p + (p + 4); } +//// Generates sequence for accessing offset in TLS for osx/x64 +//void* getThreadStaticsBaseOffset() +//{ +// uint8_t* p; +// __asm__ ( +// "leaq 0(%%rip), %%rdx\n" +// "movq _t_ThreadStatics@TLVP(%%rip), %%rdi\n" +// : "=d"(p) +// ); +// +// return getThreadStaticDescriptor(p); +//} + + extern "C" void* JIT_GetThreadStaticsBaseOffset(); void* getThreadStaticsBaseOffset() { - uint8_t* p = (uint8_t*)JIT_GetThreadStaticsBaseOffset(); + uint8_t* p = reinterpret_cast(&JIT_GetThreadStaticsBaseOffset); return getThreadStaticDescriptor(p); } -#endif // TARGET_OSX #endif // HOST_AMD64 #ifdef HOST_ARM64 From c39cf0eb8d20920fbbb9c19bbce26659994cae83 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 26 Jun 2023 22:00:59 -0700 Subject: [PATCH 52/79] fix some errors for osx/x64 --- src/coreclr/vm/amd64/jithelpers_fast.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index 7afa16bde1ad3..af77273c690c5 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -539,14 +539,14 @@ LEAF_END JIT_DispatchIndirectCall, _TEST // We never have to call this method directly, and hence there is a `int 3` at the end. LEAF_ENTRY JIT_GetThreadStaticsBaseOffset, _TEXT #ifdef TARGET_OSX - movq _t_ThreadStatics@TLVP(%%rip), %%rdi + mov rdi, _t_ThreadStatics@TLVP[rip] #else -#endif data16 - lea rdi, t_ThreadStatics@TLSGD[rip] // instruction where offset is embedded by the linker during compilation + lea rdi, t_ThreadStatics@TLSGD[rip] // instruction where offset is embedded by the linker during compilation data16 data16 rex64 - call __tls_get_addr // dummy call to have linker see the code pattern to replace the offset + call __tls_get_addr // dummy call to have linker see the code pattern to replace the offset +#endif int 3 LEAF_END JIT_GetThreadStaticsBaseOffset, _TEXT From 9906f4e5e3f9af71368718bea0c64e827392f0f0 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 26 Jun 2023 22:22:28 -0700 Subject: [PATCH 53/79] fix for osx x64/arm64 --- src/coreclr/vm/CMakeLists.txt | 1 + src/coreclr/vm/arm64/jithelpers_fast.S | 23 +++++++ src/coreclr/vm/jitinterface.cpp | 94 +++++++++++--------------- 3 files changed, 65 insertions(+), 53 deletions(-) create mode 100644 src/coreclr/vm/arm64/jithelpers_fast.S diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index bf6b91acbf4bd..ec81bcaa3e018 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -732,6 +732,7 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/calldescrworkerarm64.S ${ARCH_SOURCES_DIR}/crthelpers.S + ${ARCH_SOURCES_DIR}/jithelpers_fast.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/thunktemplates.S ) diff --git a/src/coreclr/vm/arm64/jithelpers_fast.S b/src/coreclr/vm/arm64/jithelpers_fast.S new file mode 100644 index 0000000000000..92b60470d0036 --- /dev/null +++ b/src/coreclr/vm/arm64/jithelpers_fast.S @@ -0,0 +1,23 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Helper to calculate the offset of native thread local variable `t_ThreadStatics`. The offset has to be found at runtime +// once linker does its relocation and fixup of thread locals. The runtime gets the address of this function, so +// it can walk through the instruction bytes to retrieve the offset embedded by the linker and calculate the +// final offset that should be passed to __tls_get_addr() in order to calculate the address of `t_ThreadStatics` for +// the current thread. Here, we have to call `__tls_get_addr()`, because if the linker tries to find the code pattern +// of "lea t_ThreadStatics@TLSGD", followed by `call __tls_get_addr()`. Without adding the call, the linker complains. +// We never have to call this method directly, and hence there is a `int 3` at the end. +LEAF_ENTRY JIT_GetThreadStaticsBaseOffset, _TEXT +#ifdef TARGET_OSX + adrp x0, _t_ThreadStatics@TLVPPAGE + ldr x0, [x0, _t_ThreadStatics@TLVPPAGEOFF] +#else + adrp x0, :tlsdesc:t_ThreadStatics + ldr x1, [x0, #:tlsdesc_lo12:t_ThreadStatics] + add x0, x0, :tlsdesc_lo12:t_ThreadStatics + .tlsdesccall t_ThreadStatics + blr x1 +#endif + ret +LEAF_END JIT_GetThreadStaticsBaseOffset, _TEXT diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 2af48e385278a..4f6c40f9654ec 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1647,8 +1647,9 @@ uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isG #ifndef _MSC_VER -#ifdef HOST_AMD64 +extern "C" void* JIT_GetThreadStaticsBaseOffset(); +#ifdef HOST_AMD64 void* getThreadStaticDescriptor(uint8_t* p) { @@ -1684,69 +1685,56 @@ void* getThreadStaticDescriptor(uint8_t* p) return *(uint32_t*)p + (p + 4); } -//// Generates sequence for accessing offset in TLS for osx/x64 -//void* getThreadStaticsBaseOffset() -//{ -// uint8_t* p; -// __asm__ ( -// "leaq 0(%%rip), %%rdx\n" -// "movq _t_ThreadStatics@TLVP(%%rip), %%rdi\n" -// : "=d"(p) -// ); -// -// return getThreadStaticDescriptor(p); -//} - - -extern "C" void* JIT_GetThreadStaticsBaseOffset(); - void* getThreadStaticsBaseOffset() { uint8_t* p = reinterpret_cast(&JIT_GetThreadStaticsBaseOffset); return getThreadStaticDescriptor(p); } -#endif // HOST_AMD64 - -#ifdef HOST_ARM64 - -#ifdef TARGET_OSX +#elif HOST_ARM64 -// Generates sequence for accessing offset in TLS for osx/arm64 uint64_t getThreadStaticsBaseOffset() { - uint64_t tlvGetAddr; - __asm__ ( - "adrp x0, _t_ThreadStatics@TLVPPAGE\n" - "ldr x0, [x0, _t_ThreadStatics@TLVPPAGEOFF]\n" - "mov %[result], x0\n" - : [result] "=r" (tlvGetAddr) - : - : "x0", "x1" - ); - return tlvGetAddr; + return reinterpret_cast(JIT_GetThreadStaticsBaseOffset()); } -#else -// Generates sequence for accessing offset in TLS for linux/arm64 -uint64_t getThreadStaticsBaseOffset() -{ - uint64_t offset; - __asm__ ( - "adrp x0, :tlsdesc:t_ThreadStatics\n" - "ldr x1, [x0, #:tlsdesc_lo12:t_ThreadStatics]\n" - "add x0, x0, :tlsdesc_lo12:t_ThreadStatics\n" - ".tlsdesccall t_ThreadStatics\n" - "blr x1\n" - "mov %[result], x0\n" - : [result] "=r" (offset) - : - : "x0", "x1" - ); - - return offset; -} -#endif // TARGET_OSX +//#ifdef TARGET_OSX +// +//// Generates sequence for accessing offset in TLS for osx/arm64 +//uint64_t getThreadStaticsBaseOffset() +//{ +// uint64_t tlvGetAddr; +// __asm__ ( +// "adrp x0, _t_ThreadStatics@TLVPPAGE\n" +// "ldr x0, [x0, _t_ThreadStatics@TLVPPAGEOFF]\n" +// "mov %[result], x0\n" +// : [result] "=r" (tlvGetAddr) +// : +// : "x0", "x1" +// ); +// return tlvGetAddr; +//} +//#else +// +//// Generates sequence for accessing offset in TLS for linux/arm64 +//uint64_t getThreadStaticsBaseOffset() +//{ +// uint64_t offset; +// __asm__ ( +// "adrp x0, :tlsdesc:t_ThreadStatics\n" +// "ldr x1, [x0, #:tlsdesc_lo12:t_ThreadStatics]\n" +// "add x0, x0, :tlsdesc_lo12:t_ThreadStatics\n" +// ".tlsdesccall t_ThreadStatics\n" +// "blr x1\n" +// "mov %[result], x0\n" +// : [result] "=r" (offset) +// : +// : "x0", "x1" +// ); +// +// return offset; +//} +//#endif // TARGET_OSX #endif // HOST_ARM64 #else /*********************************************************************/ From cf3b8c016f287460cc8ac2b96e6f2ca08fe805b1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 27 Jun 2023 00:02:29 -0700 Subject: [PATCH 54/79] fix for arm64 linux/osx --- src/coreclr/jit/helperexpansion.cpp | 1 - src/coreclr/vm/arm64/jithelpers_fast.S | 15 ++++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 06bea8563f489..f2963afb28e1c 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -520,7 +520,6 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* } } #endif // TARGET_AMD64 - size_t offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; size_t offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; diff --git a/src/coreclr/vm/arm64/jithelpers_fast.S b/src/coreclr/vm/arm64/jithelpers_fast.S index 92b60470d0036..1a64ec19d6e26 100644 --- a/src/coreclr/vm/arm64/jithelpers_fast.S +++ b/src/coreclr/vm/arm64/jithelpers_fast.S @@ -1,14 +1,14 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "asmconstants.h" +#include "unixasmmacros.inc" + // Helper to calculate the offset of native thread local variable `t_ThreadStatics`. The offset has to be found at runtime -// once linker does its relocation and fixup of thread locals. The runtime gets the address of this function, so -// it can walk through the instruction bytes to retrieve the offset embedded by the linker and calculate the -// final offset that should be passed to __tls_get_addr() in order to calculate the address of `t_ThreadStatics` for -// the current thread. Here, we have to call `__tls_get_addr()`, because if the linker tries to find the code pattern -// of "lea t_ThreadStatics@TLSGD", followed by `call __tls_get_addr()`. Without adding the call, the linker complains. -// We never have to call this method directly, and hence there is a `int 3` at the end. +// once linker does its relocation and fixup of thread locals. The offset, after calculation is returned in `x0` register. + LEAF_ENTRY JIT_GetThreadStaticsBaseOffset, _TEXT + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 #ifdef TARGET_OSX adrp x0, _t_ThreadStatics@TLVPPAGE ldr x0, [x0, _t_ThreadStatics@TLVPPAGEOFF] @@ -19,5 +19,6 @@ LEAF_ENTRY JIT_GetThreadStaticsBaseOffset, _TEXT .tlsdesccall t_ThreadStatics blr x1 #endif - ret + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 + EPILOG_RETURN LEAF_END JIT_GetThreadStaticsBaseOffset, _TEXT From ed0c6a7351e4e6f6719311b313f79f27f32e2575 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 27 Jun 2023 10:27:55 -0700 Subject: [PATCH 55/79] try disable for musl/arm64 --- src/coreclr/vm/jitinterface.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 4f6c40f9654ec..0267735a1bb87 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1413,8 +1413,8 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, // Optimization is disabled for linux/windows arm #elif !defined(TARGET_WINDOWS) && defined(TARGET_X86) // Optimization is disabled for linux/x86 -#elif defined(TARGET_ALPINE_LINUX) - // Optimization is disabled for linux/alpine +#elif defined(TARGET_LINUX_MUSL) && defined(TARGET_ARM64) + // Optimization is disabled for linux musl arm64 #else // For windows x64/x86/arm64, linux x64/arm64: // We convert the TLS access to the optimized helper where we will store From 7dca8217664bf4a1ca711ee51f2cf45726c89531 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 27 Jun 2023 16:41:59 -0700 Subject: [PATCH 56/79] rename variable --- src/coreclr/inc/corinfo.h | 2 +- src/coreclr/jit/helperexpansion.cpp | 16 +++---- .../tools/Common/JitInterface/CorInfoTypes.cs | 2 +- .../tools/superpmi/superpmi-shared/agnostic.h | 2 +- .../superpmi-shared/methodcontext.cpp | 10 ++--- src/coreclr/vm/jitinterface.cpp | 42 +------------------ 6 files changed, 18 insertions(+), 56 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 0116389d2a3b5..f165b0cbc74a5 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -1731,7 +1731,7 @@ struct CORINFO_THREAD_STATIC_BLOCKS_INFO size_t offsetOfMaxThreadStaticBlocks; size_t offsetOfThreadStaticBlocks; size_t tlsGetAddrFtnPtr; // linux/x64 specific - size_t descrAddrOfMaxThreadStaticBlock; // linux/x64 specific + size_t threadStaticsBaseOffset; // linux/x64 specific uint32_t offsetOfGCDataPointer; }; diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index f2963afb28e1c..08f5e19527ff0 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -512,7 +512,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* #ifdef TARGET_AMD64 if (TargetOS::IsUnix || TargetOS::IsMacOS) { - if (threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock == 0) + if (threadStaticBlocksInfo.threadStaticsBaseOffset == 0) { // We possibly compiled coreclr as single file and not .so file. // Do not perform this optimization for it. @@ -530,7 +530,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer); JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", offsetOfMaxThreadStaticBlocksVal); JITDUMP("tlsGetAddrFtnPtr= %u\n", threadStaticBlocksInfo.tlsGetAddrFtnPtr); - JITDUMP("descrAddrOfMaxThreadStaticBlock= %u\n", threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock); + JITDUMP("threadStaticsBaseOffset= %u\n", threadStaticBlocksInfo.threadStaticsBaseOffset); JITDUMP("offsetOfGCDataPointer= %u\n", threadStaticBlocksInfo.offsetOfGCDataPointer); assert((eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED) || @@ -605,17 +605,17 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* { // Code sequence to access thread local variable on osx/x64: // - // mov rdi, descrAddrOfMaxThreadStaticBlock + // mov rdi, threadStaticsBaseOffset // call [rdi] // // Code sequence to access thread local variable on osx/arm64: // - // mov x0, descrAddrOfMaxThreadStaticBlock + // mov x0, threadStaticsBaseOffset // mov x1, [x0] // blr x1 // GenTree* tls_get_addr_val = - gtNewIconHandleNode(threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock, GTF_ICON_FTN_ADDR); + gtNewIconHandleNode(threadStaticBlocksInfo.threadStaticsBaseOffset, GTF_ICON_FTN_ADDR); tls_get_addr_val = gtNewIndir(TYP_I_IMPL, tls_get_addr_val, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); @@ -624,7 +624,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // This is a syscall indirect call which takes an argument. // Populate and set the ABI apporpriately. - GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock, TYP_I_IMPL); + GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.threadStaticsBaseOffset, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); @@ -638,7 +638,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* #if defined(TARGET_AMD64) // Code sequence to access thread local variable on linux/x64: // - // mov rdi, 0x7FE5C418CD28 ; descrAddrOfMaxThreadStaticBlock + // mov rdi, 0x7FE5C418CD28 ; threadStaticsBaseOffset // mov rax, 0x7FE5C47AFDB0 ; _tls_get_addr // call rax // @@ -648,7 +648,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // This is a syscall indirect call which takes an argument. // Populate and set the ABI appropriately. - GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.descrAddrOfMaxThreadStaticBlock, TYP_I_IMPL); + GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.threadStaticsBaseOffset, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index 052d80cd21fb0..cef8f653925ca 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -1150,7 +1150,7 @@ public unsafe struct CORINFO_THREAD_STATIC_BLOCKS_INFO public CORINFO_CONST_LOOKUP tlsIndex; public uint offsetOfThreadLocalStoragePointer; public nuint tlsGetAddrFtnPtr; - public nuint descrAddrOfMaxThreadStaticBlock; + public nuint threadStaticsBaseOffset; public nuint offsetOfMaxThreadStaticBlocks; public nuint offsetOfThreadStaticBlocks; public uint offsetOfGCDataPointer; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h index 8b9e5d300ebc6..16aa453213f69 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h @@ -534,7 +534,7 @@ struct Agnostic_GetThreadLocalStaticBlocksInfo DWORDLONG offsetOfMaxThreadStaticBlocks; DWORDLONG offsetOfThreadStaticBlocks; DWORDLONG tlsGetAddrFtnPtr; - DWORDLONG descrAddrOfMaxThreadStaticBlock; + DWORDLONG threadStaticsBaseOffset; UINT offsetOfGCDataPointer; }; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 6c686a4812bae..933987790cbe4 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3579,7 +3579,7 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC value.offsetOfThreadLocalStoragePointer = pInfo->offsetOfThreadLocalStoragePointer; value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks; value.tlsGetAddrFtnPtr = pInfo->tlsGetAddrFtnPtr; - value.descrAddrOfMaxThreadStaticBlock = pInfo->descrAddrOfMaxThreadStaticBlock; + value.threadStaticsBaseOffset = pInfo->threadStaticsBaseOffset; value.offsetOfGCDataPointer = pInfo->offsetOfGCDataPointer; // This data is same for entire process, so just add it against key '0'. @@ -3593,10 +3593,10 @@ void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_ printf("GetThreadLocalStaticBlocksInfo key %u, value tlsIndex-%016" PRIX64 ", offsetOfThreadLocalStoragePointer-%u, offsetOfMaxThreadStaticBlocks-%016" PRIX64 ", offsetOfThreadStaticBlocks-%016" PRIX64 " offsetOfGCDataPointer-%u" - ", value tlsGetAddrFtnPtr-%016" PRIX64 ", descrAddrOfMaxThreadStaticBlock--%016" PRIX64 , + ", value tlsGetAddrFtnPtr-%016" PRIX64 ", threadStaticsBaseOffset--%016" PRIX64 , key, value.tlsIndex.handle, value.offsetOfThreadLocalStoragePointer, - value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer, - value.tlsGetAddrFtnPtr, value.descrAddrOfMaxThreadStaticBlock); + value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer, value.tlsGetAddrFtnPtr, + value.threadStaticsBaseOffset); } void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType) @@ -3612,7 +3612,7 @@ void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC pInfo->offsetOfThreadLocalStoragePointer = value.offsetOfThreadLocalStoragePointer; pInfo->offsetOfThreadStaticBlocks = (DWORD)value.offsetOfThreadStaticBlocks; pInfo->tlsGetAddrFtnPtr = (DWORD)value.tlsGetAddrFtnPtr; - pInfo->descrAddrOfMaxThreadStaticBlock = (DWORD)value.descrAddrOfMaxThreadStaticBlock; + pInfo->threadStaticsBaseOffset = (DWORD)value.threadStaticsBaseOffset; pInfo->offsetOfGCDataPointer = value.offsetOfGCDataPointer; } diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 792267c8b5ccd..503abb310920b 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1697,44 +1697,6 @@ uint64_t getThreadStaticsBaseOffset() { return reinterpret_cast(JIT_GetThreadStaticsBaseOffset()); } - -//#ifdef TARGET_OSX -// -//// Generates sequence for accessing offset in TLS for osx/arm64 -//uint64_t getThreadStaticsBaseOffset() -//{ -// uint64_t tlvGetAddr; -// __asm__ ( -// "adrp x0, _t_ThreadStatics@TLVPPAGE\n" -// "ldr x0, [x0, _t_ThreadStatics@TLVPPAGEOFF]\n" -// "mov %[result], x0\n" -// : [result] "=r" (tlvGetAddr) -// : -// : "x0", "x1" -// ); -// return tlvGetAddr; -//} -//#else -// -//// Generates sequence for accessing offset in TLS for linux/arm64 -//uint64_t getThreadStaticsBaseOffset() -//{ -// uint64_t offset; -// __asm__ ( -// "adrp x0, :tlsdesc:t_ThreadStatics\n" -// "ldr x1, [x0, #:tlsdesc_lo12:t_ThreadStatics]\n" -// "add x0, x0, :tlsdesc_lo12:t_ThreadStatics\n" -// ".tlsdesccall t_ThreadStatics\n" -// "blr x1\n" -// "mov %[result], x0\n" -// : [result] "=r" (offset) -// : -// : "x0", "x1" -// ); -// -// return offset; -//} -//#endif // TARGET_OSX #endif // HOST_ARM64 #else /*********************************************************************/ @@ -1773,14 +1735,14 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* // For OSX x64/arm64, need to get the address of relevant tlv_get_addr of thread static // variable that will be invoked during runtime to get the right address of corresponding // thread. - pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)getThreadStaticsBaseOffset(); + pInfo->threadStaticsBaseOffset = (size_t)getThreadStaticsBaseOffset(); #elif defined(TARGET_AMD64) // For Linux/x64, get the address of tls_get_addr system method and the base address // of struct that we will pass to it. pInfo->tlsGetAddrFtnPtr = (size_t)&__tls_get_addr; - pInfo->descrAddrOfMaxThreadStaticBlock = (size_t)getThreadStaticsBaseOffset(); + pInfo->threadStaticsBaseOffset = (size_t)getThreadStaticsBaseOffset(); #elif defined(TARGET_ARM64) From 99dec18c0b8d353f160505a4365b09aafeaafc92 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 27 Jun 2023 23:05:42 -0700 Subject: [PATCH 57/79] Rename variable to tlsIndexObject --- src/coreclr/inc/corinfo.h | 4 ++-- src/coreclr/jit/helperexpansion.cpp | 18 +++++++++--------- .../tools/Common/JitInterface/CorInfoTypes.cs | 4 ++-- .../tools/superpmi/superpmi-shared/agnostic.h | 4 ++-- .../superpmi/superpmi-shared/methodcontext.cpp | 9 ++++----- src/coreclr/vm/jitinterface.cpp | 10 +++++----- 6 files changed, 24 insertions(+), 25 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index f165b0cbc74a5..7ce566ea72697 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -1727,11 +1727,11 @@ struct CORINFO_FIELD_INFO struct CORINFO_THREAD_STATIC_BLOCKS_INFO { CORINFO_CONST_LOOKUP tlsIndex; // windows specific + size_t tlsGetAddrFtnPtr; // linux/x64 specific - address of __tls_get_addr() function + size_t tlsIndexObject; // linux/x64 specific - address of tls_index object uint32_t offsetOfThreadLocalStoragePointer; // windows specific size_t offsetOfMaxThreadStaticBlocks; size_t offsetOfThreadStaticBlocks; - size_t tlsGetAddrFtnPtr; // linux/x64 specific - size_t threadStaticsBaseOffset; // linux/x64 specific uint32_t offsetOfGCDataPointer; }; diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 08f5e19527ff0..7f0262c6b67e2 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -512,10 +512,11 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* #ifdef TARGET_AMD64 if (TargetOS::IsUnix || TargetOS::IsMacOS) { - if (threadStaticBlocksInfo.threadStaticsBaseOffset == 0) + if (threadStaticBlocksInfo.tlsIndexObject == 0) { // We possibly compiled coreclr as single file and not .so file. // Do not perform this optimization for it. + JITDUMP("There appears some problem finding the address of tls_index object. Exiting the optimization.\n"); return false; } } @@ -530,7 +531,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer); JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", offsetOfMaxThreadStaticBlocksVal); JITDUMP("tlsGetAddrFtnPtr= %u\n", threadStaticBlocksInfo.tlsGetAddrFtnPtr); - JITDUMP("threadStaticsBaseOffset= %u\n", threadStaticBlocksInfo.threadStaticsBaseOffset); + JITDUMP("tlsIndexObject= %u\n", threadStaticBlocksInfo.tlsIndexObject); JITDUMP("offsetOfGCDataPointer= %u\n", threadStaticBlocksInfo.offsetOfGCDataPointer); assert((eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED) || @@ -605,17 +606,16 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* { // Code sequence to access thread local variable on osx/x64: // - // mov rdi, threadStaticsBaseOffset + // mov rdi, tlsIndexObject // call [rdi] // // Code sequence to access thread local variable on osx/arm64: // - // mov x0, threadStaticsBaseOffset + // mov x0, tlsIndexObject // mov x1, [x0] // blr x1 // - GenTree* tls_get_addr_val = - gtNewIconHandleNode(threadStaticBlocksInfo.threadStaticsBaseOffset, GTF_ICON_FTN_ADDR); + GenTree* tls_get_addr_val = gtNewIconHandleNode(threadStaticBlocksInfo.tlsIndexObject, GTF_ICON_FTN_ADDR); tls_get_addr_val = gtNewIndir(TYP_I_IMPL, tls_get_addr_val, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); @@ -624,7 +624,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // This is a syscall indirect call which takes an argument. // Populate and set the ABI apporpriately. - GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.threadStaticsBaseOffset, TYP_I_IMPL); + GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.tlsIndexObject, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); @@ -638,7 +638,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* #if defined(TARGET_AMD64) // Code sequence to access thread local variable on linux/x64: // - // mov rdi, 0x7FE5C418CD28 ; threadStaticsBaseOffset + // mov rdi, 0x7FE5C418CD28 ; tlsIndexObject // mov rax, 0x7FE5C47AFDB0 ; _tls_get_addr // call rax // @@ -648,7 +648,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // This is a syscall indirect call which takes an argument. // Populate and set the ABI appropriately. - GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.threadStaticsBaseOffset, TYP_I_IMPL); + GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.tlsIndexObject, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index cef8f653925ca..2eb6e0e3069ac 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -1148,9 +1148,9 @@ public unsafe struct CORINFO_FIELD_INFO public unsafe struct CORINFO_THREAD_STATIC_BLOCKS_INFO { public CORINFO_CONST_LOOKUP tlsIndex; - public uint offsetOfThreadLocalStoragePointer; public nuint tlsGetAddrFtnPtr; - public nuint threadStaticsBaseOffset; + public nuint tlsIndexObject; + public uint offsetOfThreadLocalStoragePointer; public nuint offsetOfMaxThreadStaticBlocks; public nuint offsetOfThreadStaticBlocks; public uint offsetOfGCDataPointer; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h index 16aa453213f69..f448a30ecee04 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h @@ -530,11 +530,11 @@ struct Agnostic_GetProfilingHandle struct Agnostic_GetThreadLocalStaticBlocksInfo { Agnostic_CORINFO_CONST_LOOKUP tlsIndex; + DWORDLONG tlsGetAddrFtnPtr; + DWORDLONG tlsIndexObject; UINT offsetOfThreadLocalStoragePointer; DWORDLONG offsetOfMaxThreadStaticBlocks; DWORDLONG offsetOfThreadStaticBlocks; - DWORDLONG tlsGetAddrFtnPtr; - DWORDLONG threadStaticsBaseOffset; UINT offsetOfGCDataPointer; }; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 933987790cbe4..9167a32be10cd 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3579,7 +3579,7 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC value.offsetOfThreadLocalStoragePointer = pInfo->offsetOfThreadLocalStoragePointer; value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks; value.tlsGetAddrFtnPtr = pInfo->tlsGetAddrFtnPtr; - value.threadStaticsBaseOffset = pInfo->threadStaticsBaseOffset; + value.tlsIndexObject = pInfo->tlsIndexObject; value.offsetOfGCDataPointer = pInfo->offsetOfGCDataPointer; // This data is same for entire process, so just add it against key '0'. @@ -3593,10 +3593,9 @@ void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_ printf("GetThreadLocalStaticBlocksInfo key %u, value tlsIndex-%016" PRIX64 ", offsetOfThreadLocalStoragePointer-%u, offsetOfMaxThreadStaticBlocks-%016" PRIX64 ", offsetOfThreadStaticBlocks-%016" PRIX64 " offsetOfGCDataPointer-%u" - ", value tlsGetAddrFtnPtr-%016" PRIX64 ", threadStaticsBaseOffset--%016" PRIX64 , + ", value tlsGetAddrFtnPtr-%016" PRIX64 ", tlsIndexObject--%016" PRIX64 , key, value.tlsIndex.handle, value.offsetOfThreadLocalStoragePointer, - value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer, value.tlsGetAddrFtnPtr, - value.threadStaticsBaseOffset); + value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer, value.tlsGetAddrFtnPtr, value.tlsIndexObject); } void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType) @@ -3612,7 +3611,7 @@ void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC pInfo->offsetOfThreadLocalStoragePointer = value.offsetOfThreadLocalStoragePointer; pInfo->offsetOfThreadStaticBlocks = (DWORD)value.offsetOfThreadStaticBlocks; pInfo->tlsGetAddrFtnPtr = (DWORD)value.tlsGetAddrFtnPtr; - pInfo->threadStaticsBaseOffset = (DWORD)value.threadStaticsBaseOffset; + pInfo->tlsIndexObject = (DWORD)value.tlsIndexObject; pInfo->offsetOfGCDataPointer = value.offsetOfGCDataPointer; } diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 503abb310920b..e1d3f776f2e2f 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1685,7 +1685,7 @@ void* getThreadStaticDescriptor(uint8_t* p) return *(uint32_t*)p + (p + 4); } -void* getThreadStaticsBaseOffset() +void* getTlsIndexObjectAddress() { uint8_t* p = reinterpret_cast(&JIT_GetThreadStaticsBaseOffset); return getThreadStaticDescriptor(p); @@ -1693,7 +1693,7 @@ void* getThreadStaticsBaseOffset() #elif HOST_ARM64 -uint64_t getThreadStaticsBaseOffset() +uint64_t getTlsIndexObjectAddress() { return reinterpret_cast(JIT_GetThreadStaticsBaseOffset()); } @@ -1735,20 +1735,20 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* // For OSX x64/arm64, need to get the address of relevant tlv_get_addr of thread static // variable that will be invoked during runtime to get the right address of corresponding // thread. - pInfo->threadStaticsBaseOffset = (size_t)getThreadStaticsBaseOffset(); + pInfo->tlsIndexObject = (size_t)getTlsIndexObjectAddress(); #elif defined(TARGET_AMD64) // For Linux/x64, get the address of tls_get_addr system method and the base address // of struct that we will pass to it. pInfo->tlsGetAddrFtnPtr = (size_t)&__tls_get_addr; - pInfo->threadStaticsBaseOffset = (size_t)getThreadStaticsBaseOffset(); + pInfo->tlsIndexObject = (size_t)getTlsIndexObjectAddress(); #elif defined(TARGET_ARM64) // For Linux/arm64, just get the offset of thread static variable, and during execution, // this offset, taken from trpid_elp0 system register gives back the thread variable address. - threadStaticBaseOffset = getThreadStaticsBaseOffset(); + threadStaticBaseOffset = getTlsIndexObjectAddress(); #else _ASSERTE_MSG(false, "Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); From bdca9fed7a9fb7b2fb90ab1d767d25ce8082dacc Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 27 Jun 2023 23:13:32 -0700 Subject: [PATCH 58/79] Make offset variables as uint32_t --- src/coreclr/inc/corinfo.h | 4 ++-- src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs | 4 ++-- src/coreclr/tools/superpmi/superpmi-shared/agnostic.h | 4 ++-- .../tools/superpmi/superpmi-shared/methodcontext.cpp | 4 ++-- src/coreclr/vm/jitinterface.cpp | 8 ++++---- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 7ce566ea72697..410519bf5e61a 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -1730,8 +1730,8 @@ struct CORINFO_THREAD_STATIC_BLOCKS_INFO size_t tlsGetAddrFtnPtr; // linux/x64 specific - address of __tls_get_addr() function size_t tlsIndexObject; // linux/x64 specific - address of tls_index object uint32_t offsetOfThreadLocalStoragePointer; // windows specific - size_t offsetOfMaxThreadStaticBlocks; - size_t offsetOfThreadStaticBlocks; + uint32_t offsetOfMaxThreadStaticBlocks; + uint32_t offsetOfThreadStaticBlocks; uint32_t offsetOfGCDataPointer; }; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index 2eb6e0e3069ac..c2ae47ffb458b 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -1151,8 +1151,8 @@ public unsafe struct CORINFO_THREAD_STATIC_BLOCKS_INFO public nuint tlsGetAddrFtnPtr; public nuint tlsIndexObject; public uint offsetOfThreadLocalStoragePointer; - public nuint offsetOfMaxThreadStaticBlocks; - public nuint offsetOfThreadStaticBlocks; + public uint offsetOfMaxThreadStaticBlocks; + public uint offsetOfThreadStaticBlocks; public uint offsetOfGCDataPointer; }; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h index f448a30ecee04..9a906b4a10db4 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h @@ -533,8 +533,8 @@ struct Agnostic_GetThreadLocalStaticBlocksInfo DWORDLONG tlsGetAddrFtnPtr; DWORDLONG tlsIndexObject; UINT offsetOfThreadLocalStoragePointer; - DWORDLONG offsetOfMaxThreadStaticBlocks; - DWORDLONG offsetOfThreadStaticBlocks; + UINT offsetOfMaxThreadStaticBlocks; + UINT offsetOfThreadStaticBlocks; UINT offsetOfGCDataPointer; }; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 9167a32be10cd..70c744321a686 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3591,8 +3591,8 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_GetThreadLocalStaticBlocksInfo& value) { printf("GetThreadLocalStaticBlocksInfo key %u, value tlsIndex-%016" PRIX64 - ", offsetOfThreadLocalStoragePointer-%u, offsetOfMaxThreadStaticBlocks-%016" PRIX64 - ", offsetOfThreadStaticBlocks-%016" PRIX64 " offsetOfGCDataPointer-%u" + ", offsetOfThreadLocalStoragePointer-%u, offsetOfMaxThreadStaticBlocks-%u" + ", offsetOfThreadStaticBlocks-%u, offsetOfGCDataPointer-%u" ", value tlsGetAddrFtnPtr-%016" PRIX64 ", tlsIndexObject--%016" PRIX64 , key, value.tlsIndex.handle, value.offsetOfThreadLocalStoragePointer, value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer, value.tlsGetAddrFtnPtr, value.tlsIndexObject); diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index e1d3f776f2e2f..5c83795df1d1d 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1756,13 +1756,13 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* if (isGCType) { - pInfo->offsetOfMaxThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, GCMaxThreadStaticBlocks); - pInfo->offsetOfThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, GCThreadStaticBlocks); + pInfo->offsetOfMaxThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, GCMaxThreadStaticBlocks)); + pInfo->offsetOfThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, GCThreadStaticBlocks)); } else { - pInfo->offsetOfMaxThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCMaxThreadStaticBlocks); - pInfo->offsetOfThreadStaticBlocks = threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCThreadStaticBlocks); + pInfo->offsetOfMaxThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCMaxThreadStaticBlocks)); + pInfo->offsetOfThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCThreadStaticBlocks)); } pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); From f1e5459c389ee07be338787e295a89aa635e39c0 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 28 Jun 2023 08:18:35 -0700 Subject: [PATCH 59/79] change the type of indexObj/ftnAddr to void* --- src/coreclr/inc/corinfo.h | 4 ++-- src/coreclr/jit/helperexpansion.cpp | 18 ++++++++++-------- .../superpmi/superpmi-shared/methodcontext.cpp | 8 ++++---- src/coreclr/vm/jitinterface.cpp | 10 +++++----- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 410519bf5e61a..5145bf24e08f0 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -1727,8 +1727,8 @@ struct CORINFO_FIELD_INFO struct CORINFO_THREAD_STATIC_BLOCKS_INFO { CORINFO_CONST_LOOKUP tlsIndex; // windows specific - size_t tlsGetAddrFtnPtr; // linux/x64 specific - address of __tls_get_addr() function - size_t tlsIndexObject; // linux/x64 specific - address of tls_index object + void* tlsGetAddrFtnPtr; // linux/x64 specific - address of __tls_get_addr() function + void* tlsIndexObject; // linux/x64 specific - address of tls_index object uint32_t offsetOfThreadLocalStoragePointer; // windows specific uint32_t offsetOfMaxThreadStaticBlocks; uint32_t offsetOfThreadStaticBlocks; diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 7f0262c6b67e2..3be477ef830a5 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -522,16 +522,18 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* } #endif // TARGET_AMD64 + size_t tlsIndexObjectVal = (size_t)threadStaticBlocksInfo.tlsIndexObject; + size_t tlsGetAddrFtnPtrVal = (size_t)threadStaticBlocksInfo.tlsGetAddrFtnPtr; size_t offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; size_t offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; - JITDUMP("offsetOfThreadStaticBlocks= %u\n", offsetOfThreadStaticBlocksVal); JITDUMP("getThreadLocalStaticBlocksInfo (%s)\n:", isGCThreadStatic ? "GC" : "Non-GC"); - JITDUMP("tlsIndex= %u\n", (ssize_t)threadStaticBlocksInfo.tlsIndex.addr); JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer); + JITDUMP("tlsIndex= %u\n", (ssize_t)threadStaticBlocksInfo.tlsIndex.addr); + JITDUMP("tlsGetAddrFtnPtr= %u\n", tlsGetAddrFtnPtrVal); + JITDUMP("tlsIndexObject= %u\n", tlsIndexObjectVal); JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", offsetOfMaxThreadStaticBlocksVal); - JITDUMP("tlsGetAddrFtnPtr= %u\n", threadStaticBlocksInfo.tlsGetAddrFtnPtr); - JITDUMP("tlsIndexObject= %u\n", threadStaticBlocksInfo.tlsIndexObject); + JITDUMP("offsetOfThreadStaticBlocks= %u\n", offsetOfThreadStaticBlocksVal); JITDUMP("offsetOfGCDataPointer= %u\n", threadStaticBlocksInfo.offsetOfGCDataPointer); assert((eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED) || @@ -615,7 +617,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // mov x1, [x0] // blr x1 // - GenTree* tls_get_addr_val = gtNewIconHandleNode(threadStaticBlocksInfo.tlsIndexObject, GTF_ICON_FTN_ADDR); + GenTree* tls_get_addr_val = gtNewIconHandleNode(tlsIndexObjectVal, GTF_ICON_FTN_ADDR); tls_get_addr_val = gtNewIndir(TYP_I_IMPL, tls_get_addr_val, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); @@ -624,7 +626,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // This is a syscall indirect call which takes an argument. // Populate and set the ABI apporpriately. - GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.tlsIndexObject, TYP_I_IMPL); + GenTree* tlsArg = gtNewIconNode(tlsIndexObjectVal, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); @@ -642,13 +644,13 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // mov rax, 0x7FE5C47AFDB0 ; _tls_get_addr // call rax // - GenTree* tls_get_addr_val = gtNewIconHandleNode(threadStaticBlocksInfo.tlsGetAddrFtnPtr, GTF_ICON_FTN_ADDR); + GenTree* tls_get_addr_val = gtNewIconHandleNode(tlsGetAddrFtnPtrVal, GTF_ICON_FTN_ADDR); tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); GenTreeCall* tlsRefCall = tlsValue->AsCall(); // This is a syscall indirect call which takes an argument. // Populate and set the ABI appropriately. - GenTree* tlsArg = gtNewIconNode(threadStaticBlocksInfo.tlsIndexObject, TYP_I_IMPL); + GenTree* tlsArg = gtNewIconNode(tlsIndexObjectVal, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 70c744321a686..f72f2cd2a83d2 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3575,11 +3575,11 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC value.tlsIndex.handle = CastHandle(pInfo->tlsIndex.addr); value.tlsIndex.accessType = pInfo->tlsIndex.accessType; + value.tlsGetAddrFtnPtr = CastHandle(pInfo->tlsGetAddrFtnPtr); + value.tlsIndexObject = CastHandle(pInfo->tlsIndexObject); value.offsetOfMaxThreadStaticBlocks = pInfo->offsetOfMaxThreadStaticBlocks; value.offsetOfThreadLocalStoragePointer = pInfo->offsetOfThreadLocalStoragePointer; value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks; - value.tlsGetAddrFtnPtr = pInfo->tlsGetAddrFtnPtr; - value.tlsIndexObject = pInfo->tlsIndexObject; value.offsetOfGCDataPointer = pInfo->offsetOfGCDataPointer; // This data is same for entire process, so just add it against key '0'. @@ -3607,11 +3607,11 @@ void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC pInfo->tlsIndex.accessType = (InfoAccessType)value.tlsIndex.accessType; pInfo->tlsIndex.addr = (void*)value.tlsIndex.handle; + pInfo->tlsGetAddrFtnPtr = (void*)value.tlsGetAddrFtnPtr; + pInfo->tlsIndexObject = (void*)value.tlsIndexObject; pInfo->offsetOfMaxThreadStaticBlocks = (DWORD)value.offsetOfMaxThreadStaticBlocks; pInfo->offsetOfThreadLocalStoragePointer = value.offsetOfThreadLocalStoragePointer; pInfo->offsetOfThreadStaticBlocks = (DWORD)value.offsetOfThreadStaticBlocks; - pInfo->tlsGetAddrFtnPtr = (DWORD)value.tlsGetAddrFtnPtr; - pInfo->tlsIndexObject = (DWORD)value.tlsIndexObject; pInfo->offsetOfGCDataPointer = value.offsetOfGCDataPointer; } diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 5c83795df1d1d..b2c272140a282 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1693,9 +1693,9 @@ void* getTlsIndexObjectAddress() #elif HOST_ARM64 -uint64_t getTlsIndexObjectAddress() +void* getTlsIndexObjectAddress() { - return reinterpret_cast(JIT_GetThreadStaticsBaseOffset()); + return reinterpret_cast(JIT_GetThreadStaticsBaseOffset()); } #endif // HOST_ARM64 #else @@ -1741,14 +1741,14 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* // For Linux/x64, get the address of tls_get_addr system method and the base address // of struct that we will pass to it. - pInfo->tlsGetAddrFtnPtr = (size_t)&__tls_get_addr; - pInfo->tlsIndexObject = (size_t)getTlsIndexObjectAddress(); + pInfo->tlsGetAddrFtnPtr = &__tls_get_addr; + pInfo->tlsIndexObject = getTlsIndexObjectAddress(); #elif defined(TARGET_ARM64) // For Linux/arm64, just get the offset of thread static variable, and during execution, // this offset, taken from trpid_elp0 system register gives back the thread variable address. - threadStaticBaseOffset = getTlsIndexObjectAddress(); + threadStaticBaseOffset = (size_t)getTlsIndexObjectAddress(); #else _ASSERTE_MSG(false, "Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); From cb409f57b47f973c03a9cf2c9725192b92a201fb Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 28 Jun 2023 08:22:42 -0700 Subject: [PATCH 60/79] replace ifdef(msc_ver) with ifdef(windows) --- src/coreclr/vm/jitinterface.cpp | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index b2c272140a282..4e2f3e41d7b53 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1645,7 +1645,16 @@ uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isG return typeIndex; } -#ifndef _MSC_VER +#if defined(TARGET_WINDOWS) +/*********************************************************************/ +static uint32_t ThreadLocalOffset(void* p) +{ + PTEB Teb = NtCurrentTeb(); + uint8_t** pTls = (uint8_t**)Teb->ThreadLocalStoragePointer; + uint8_t* pOurTls = pTls[_tls_index]; + return (uint32_t)((uint8_t*)p - pOurTls); +} +#else extern "C" void* JIT_GetThreadStaticsBaseOffset(); @@ -1698,17 +1707,7 @@ void* getTlsIndexObjectAddress() return reinterpret_cast(JIT_GetThreadStaticsBaseOffset()); } #endif // HOST_ARM64 -#else -/*********************************************************************/ -static uint32_t ThreadLocalOffset(void* p) -{ - PTEB Teb = NtCurrentTeb(); - uint8_t** pTls = (uint8_t**)Teb->ThreadLocalStoragePointer; - uint8_t* pOurTls = pTls[_tls_index]; - return (uint32_t)((uint8_t*)p - pOurTls); -} - -#endif // !_MSC_VER +#endif // TARGET_WINDOWS void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType) From e3b7dc6522d92a0b8c70554fed7f9184834904c3 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 28 Jun 2023 08:44:07 -0700 Subject: [PATCH 61/79] Revert to JIT_TO_EE_TRANSITION_LEAF --- src/coreclr/vm/jitinterface.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 4e2f3e41d7b53..c0508e2a5172c 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1718,7 +1718,7 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* MODE_PREEMPTIVE; } CONTRACTL_END; - JIT_TO_EE_TRANSITION(); + JIT_TO_EE_TRANSITION_LEAF(); size_t threadStaticBaseOffset = 0; @@ -1765,7 +1765,7 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* } pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); - EE_TO_JIT_TRANSITION(); + EE_TO_JIT_TRANSITION_LEAF(); } //--------------------------------------------------------------------------------------- From ab284c77c909ec24bb8efe3436acb23aaac27498 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 28 Jun 2023 08:52:48 -0700 Subject: [PATCH 62/79] Move code to asmHelpers.S and rename method --- src/coreclr/vm/CMakeLists.txt | 1 - src/coreclr/vm/amd64/asmhelpers.S | 27 ++++++++++++++++++++++++++ src/coreclr/vm/amd64/jithelpers_fast.S | 21 -------------------- src/coreclr/vm/arm64/asmhelpers.S | 24 +++++++++++++++++++++++ src/coreclr/vm/arm64/jithelpers_fast.S | 24 ----------------------- src/coreclr/vm/jitinterface.cpp | 6 +++--- 6 files changed, 54 insertions(+), 49 deletions(-) delete mode 100644 src/coreclr/vm/arm64/jithelpers_fast.S diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index ec81bcaa3e018..bf6b91acbf4bd 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -732,7 +732,6 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/calldescrworkerarm64.S ${ARCH_SOURCES_DIR}/crthelpers.S - ${ARCH_SOURCES_DIR}/jithelpers_fast.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/thunktemplates.S ) diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index bebfd3376c12d..0fedff2c4b227 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -307,3 +307,30 @@ NESTED_ENTRY ProfileTailcallNaked, _TEXT, NoHandler ret NESTED_END ProfileTailcallNaked, _TEXT + + +# EXTERN_C void* GetThreadStaticsBaseOffset(); + +# +# Helper to calculate the offset of native thread local variable `t_ThreadStatics`. The offset has to be found at runtime +# once linker does its relocation and fixup of thread locals. The runtime gets the address of this function, so +# it can walk through the instruction bytes to retrieve the offset embedded by the linker and calculate the +# final offset that should be passed to __tls_get_addr() in order to calculate the address of `t_ThreadStatics` for +# the current thread. Here, we have to call `__tls_get_addr()`, because if the linker tries to find the code pattern +# of "lea t_ThreadStatics@TLSGD", followed by `call __tls_get_addr()`. Without adding the call, the linker complains. +# We never have to call this method directly, and hence there is a `int 3` at the end. +# + +LEAF_ENTRY GetThreadStaticsBaseOffset, _TEXT +#ifdef TARGET_OSX + mov rdi, _t_ThreadStatics@TLVP[rip] +#else + data16 + lea rdi, t_ThreadStatics@TLSGD[rip] // instruction where offset is embedded by the linker during compilation + data16 + data16 + rex64 + call __tls_get_addr // dummy call to have linker see the code pattern to replace the offset +#endif + int 3 +LEAF_END JIT_GetThreadStaticsBaseOffset, _TEXT diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index af77273c690c5..32890b471b26c 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -529,24 +529,3 @@ LEAF_ENTRY JIT_DispatchIndirectCall, _TEST movabs r11, 0xCDCDCDCDCDCDCDCD rex64 jmp rax LEAF_END JIT_DispatchIndirectCall, _TEST - -// Helper to calculate the offset of native thread local variable `t_ThreadStatics`. The offset has to be found at runtime -// once linker does its relocation and fixup of thread locals. The runtime gets the address of this function, so -// it can walk through the instruction bytes to retrieve the offset embedded by the linker and calculate the -// final offset that should be passed to __tls_get_addr() in order to calculate the address of `t_ThreadStatics` for -// the current thread. Here, we have to call `__tls_get_addr()`, because if the linker tries to find the code pattern -// of "lea t_ThreadStatics@TLSGD", followed by `call __tls_get_addr()`. Without adding the call, the linker complains. -// We never have to call this method directly, and hence there is a `int 3` at the end. -LEAF_ENTRY JIT_GetThreadStaticsBaseOffset, _TEXT -#ifdef TARGET_OSX - mov rdi, _t_ThreadStatics@TLVP[rip] -#else - data16 - lea rdi, t_ThreadStatics@TLSGD[rip] // instruction where offset is embedded by the linker during compilation - data16 - data16 - rex64 - call __tls_get_addr // dummy call to have linker see the code pattern to replace the offset -#endif - int 3 -LEAF_END JIT_GetThreadStaticsBaseOffset, _TEXT diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 574d30068f099..81e007f7a4a5d 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -974,3 +974,27 @@ LEAF_END JIT_ValidateIndirectCall, _TEXT LEAF_ENTRY JIT_DispatchIndirectCall, _TEXT br x9 LEAF_END JIT_DispatchIndirectCall, _TEXT + + +// ------------------------------------------------------------------ +// void* GetThreadStaticsBaseOffset() + +// Helper to calculate the offset of native thread local variable `t_ThreadStatics`. The offset has to be found at runtime +// once linker does its relocation and fixup of thread locals. The offset, after calculation is returned in `x0` register. + +LEAF_ENTRY GetThreadStaticsBaseOffset, _TEXT + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 +#ifdef TARGET_OSX + adrp x0, _t_ThreadStatics@TLVPPAGE + ldr x0, [x0, _t_ThreadStatics@TLVPPAGEOFF] +#else + adrp x0, :tlsdesc:t_ThreadStatics + ldr x1, [x0, #:tlsdesc_lo12:t_ThreadStatics] + add x0, x0, :tlsdesc_lo12:t_ThreadStatics + .tlsdesccall t_ThreadStatics + blr x1 +#endif + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 + EPILOG_RETURN +LEAF_END GetThreadStaticsBaseOffset, _TEXT +// ------------------------------------------------------------------ diff --git a/src/coreclr/vm/arm64/jithelpers_fast.S b/src/coreclr/vm/arm64/jithelpers_fast.S deleted file mode 100644 index 1a64ec19d6e26..0000000000000 --- a/src/coreclr/vm/arm64/jithelpers_fast.S +++ /dev/null @@ -1,24 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include "asmconstants.h" -#include "unixasmmacros.inc" - -// Helper to calculate the offset of native thread local variable `t_ThreadStatics`. The offset has to be found at runtime -// once linker does its relocation and fixup of thread locals. The offset, after calculation is returned in `x0` register. - -LEAF_ENTRY JIT_GetThreadStaticsBaseOffset, _TEXT - PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 -#ifdef TARGET_OSX - adrp x0, _t_ThreadStatics@TLVPPAGE - ldr x0, [x0, _t_ThreadStatics@TLVPPAGEOFF] -#else - adrp x0, :tlsdesc:t_ThreadStatics - ldr x1, [x0, #:tlsdesc_lo12:t_ThreadStatics] - add x0, x0, :tlsdesc_lo12:t_ThreadStatics - .tlsdesccall t_ThreadStatics - blr x1 -#endif - EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 - EPILOG_RETURN -LEAF_END JIT_GetThreadStaticsBaseOffset, _TEXT diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index c0508e2a5172c..8c77c8a62e5ec 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1656,7 +1656,7 @@ static uint32_t ThreadLocalOffset(void* p) } #else -extern "C" void* JIT_GetThreadStaticsBaseOffset(); +extern "C" void* GetThreadStaticsBaseOffset(); #ifdef HOST_AMD64 @@ -1696,7 +1696,7 @@ void* getThreadStaticDescriptor(uint8_t* p) void* getTlsIndexObjectAddress() { - uint8_t* p = reinterpret_cast(&JIT_GetThreadStaticsBaseOffset); + uint8_t* p = reinterpret_cast(&GetThreadStaticsBaseOffset); return getThreadStaticDescriptor(p); } @@ -1704,7 +1704,7 @@ void* getTlsIndexObjectAddress() void* getTlsIndexObjectAddress() { - return reinterpret_cast(JIT_GetThreadStaticsBaseOffset()); + return reinterpret_cast(GetThreadStaticsBaseOffset()); } #endif // HOST_ARM64 #endif // TARGET_WINDOWS From bcc0a5585c4b07bbb761e719d8797a047c6bc5c0 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 28 Jun 2023 15:27:54 -0700 Subject: [PATCH 63/79] rename the methods per the platform --- src/coreclr/inc/corinfo.h | 5 +- src/coreclr/jit/helperexpansion.cpp | 40 ++++++++------ .../tools/Common/JitInterface/CorInfoTypes.cs | 1 + .../tools/superpmi/superpmi-shared/agnostic.h | 1 + .../superpmi-shared/methodcontext.cpp | 9 +++- src/coreclr/vm/amd64/asmhelpers.S | 30 ++++++++--- src/coreclr/vm/arm64/asmhelpers.S | 30 +++++++---- src/coreclr/vm/jitinterface.cpp | 53 ++++++++++++------- 8 files changed, 113 insertions(+), 56 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 5145bf24e08f0..414bf8bb0560d 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -1727,8 +1727,9 @@ struct CORINFO_FIELD_INFO struct CORINFO_THREAD_STATIC_BLOCKS_INFO { CORINFO_CONST_LOOKUP tlsIndex; // windows specific - void* tlsGetAddrFtnPtr; // linux/x64 specific - address of __tls_get_addr() function - void* tlsIndexObject; // linux/x64 specific - address of tls_index object + void* tlsGetAddrFtnPtr; // linux/x64 specific - address of __tls_get_addr() function + void* tlsIndexObject; // linux/x64 specific - address of tls_index object + void* threadVarsSection; // osx x64/arm64 specific - address of __thread_vars section of `t_ThreadStatics` uint32_t offsetOfThreadLocalStoragePointer; // windows specific uint32_t offsetOfMaxThreadStaticBlocks; uint32_t offsetOfThreadStaticBlocks; diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 3be477ef830a5..24add824a34a2 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -522,18 +522,14 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* } #endif // TARGET_AMD64 - size_t tlsIndexObjectVal = (size_t)threadStaticBlocksInfo.tlsIndexObject; - size_t tlsGetAddrFtnPtrVal = (size_t)threadStaticBlocksInfo.tlsGetAddrFtnPtr; - size_t offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; - size_t offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; - JITDUMP("getThreadLocalStaticBlocksInfo (%s)\n:", isGCThreadStatic ? "GC" : "Non-GC"); JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer); JITDUMP("tlsIndex= %u\n", (ssize_t)threadStaticBlocksInfo.tlsIndex.addr); - JITDUMP("tlsGetAddrFtnPtr= %u\n", tlsGetAddrFtnPtrVal); - JITDUMP("tlsIndexObject= %u\n", tlsIndexObjectVal); - JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", offsetOfMaxThreadStaticBlocksVal); - JITDUMP("offsetOfThreadStaticBlocks= %u\n", offsetOfThreadStaticBlocksVal); + JITDUMP("tlsGetAddrFtnPtr= %u\n", threadStaticBlocksInfo.tlsGetAddrFtnPtr); + JITDUMP("tlsIndexObject= %u\n", (size_t)threadStaticBlocksInfo.tlsIndexObject); + JITDUMP("threadVarsSection= %u\n", (size_t)threadStaticBlocksInfo.threadVarsSection); + JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks); + JITDUMP("offsetOfThreadStaticBlocks= %u\n", threadStaticBlocksInfo.offsetOfThreadStaticBlocks); JITDUMP("offsetOfGCDataPointer= %u\n", threadStaticBlocksInfo.offsetOfGCDataPointer); assert((eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED) || @@ -606,18 +602,24 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* } else if (TargetOS::IsMacOS) { + // For OSX x64/arm64, we need to get the address of relevant __thread_vars section of + // the thread local variable `t_ThreadStatics`. Address of `tlv_get_address` is stored + // in this entry, which we dereference and invoke it, passing the __thread_vars address + // present in `threadVarsSection`. + // // Code sequence to access thread local variable on osx/x64: // - // mov rdi, tlsIndexObject + // mov rdi, threadVarsSection // call [rdi] // // Code sequence to access thread local variable on osx/arm64: // - // mov x0, tlsIndexObject + // mov x0, threadVarsSection // mov x1, [x0] // blr x1 // - GenTree* tls_get_addr_val = gtNewIconHandleNode(tlsIndexObjectVal, GTF_ICON_FTN_ADDR); + size_t threadVarsSectionVal = (size_t)threadStaticBlocksInfo.threadVarsSection; + GenTree* tls_get_addr_val = gtNewIconHandleNode(threadVarsSectionVal, GTF_ICON_FTN_ADDR); tls_get_addr_val = gtNewIndir(TYP_I_IMPL, tls_get_addr_val, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); @@ -626,7 +628,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // This is a syscall indirect call which takes an argument. // Populate and set the ABI apporpriately. - GenTree* tlsArg = gtNewIconNode(tlsIndexObjectVal, TYP_I_IMPL); + GenTree* tlsArg = gtNewIconNode(threadVarsSectionVal, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); @@ -644,13 +646,14 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // mov rax, 0x7FE5C47AFDB0 ; _tls_get_addr // call rax // - GenTree* tls_get_addr_val = gtNewIconHandleNode(tlsGetAddrFtnPtrVal, GTF_ICON_FTN_ADDR); - tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); - GenTreeCall* tlsRefCall = tlsValue->AsCall(); + GenTree* tls_get_addr_val = + gtNewIconHandleNode((size_t)threadStaticBlocksInfo.tlsGetAddrFtnPtr, GTF_ICON_FTN_ADDR); + tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); + GenTreeCall* tlsRefCall = tlsValue->AsCall(); // This is a syscall indirect call which takes an argument. // Populate and set the ABI appropriately. - GenTree* tlsArg = gtNewIconNode(tlsIndexObjectVal, TYP_I_IMPL); + GenTree* tlsArg = gtNewIconNode((size_t)threadStaticBlocksInfo.tlsIndexObject, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); @@ -676,6 +679,9 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); + size_t offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; + size_t offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; + // Create tree for "maxThreadStaticBlocks = tls[offsetOfMaxThreadStaticBlocks]" GenTree* offsetOfMaxThreadStaticBlocks = gtNewIconNode(offsetOfMaxThreadStaticBlocksVal, TYP_I_IMPL); GenTree* maxThreadStaticBlocksRef = diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index c2ae47ffb458b..d21b0c4c17db4 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -1150,6 +1150,7 @@ public unsafe struct CORINFO_THREAD_STATIC_BLOCKS_INFO public CORINFO_CONST_LOOKUP tlsIndex; public nuint tlsGetAddrFtnPtr; public nuint tlsIndexObject; + public nuint threadVarsSection; public uint offsetOfThreadLocalStoragePointer; public uint offsetOfMaxThreadStaticBlocks; public uint offsetOfThreadStaticBlocks; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h index 9a906b4a10db4..6dfcb907d19e2 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h @@ -532,6 +532,7 @@ struct Agnostic_GetThreadLocalStaticBlocksInfo Agnostic_CORINFO_CONST_LOOKUP tlsIndex; DWORDLONG tlsGetAddrFtnPtr; DWORDLONG tlsIndexObject; + DWORDLONG threadVarsSection; UINT offsetOfThreadLocalStoragePointer; UINT offsetOfMaxThreadStaticBlocks; UINT offsetOfThreadStaticBlocks; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index f72f2cd2a83d2..05a69edb1d468 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3577,6 +3577,7 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC value.tlsIndex.accessType = pInfo->tlsIndex.accessType; value.tlsGetAddrFtnPtr = CastHandle(pInfo->tlsGetAddrFtnPtr); value.tlsIndexObject = CastHandle(pInfo->tlsIndexObject); + value.threadVarsSection = CastHandle(pInfo->threadVarsSection); value.offsetOfMaxThreadStaticBlocks = pInfo->offsetOfMaxThreadStaticBlocks; value.offsetOfThreadLocalStoragePointer = pInfo->offsetOfThreadLocalStoragePointer; value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks; @@ -3593,9 +3594,12 @@ void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_ printf("GetThreadLocalStaticBlocksInfo key %u, value tlsIndex-%016" PRIX64 ", offsetOfThreadLocalStoragePointer-%u, offsetOfMaxThreadStaticBlocks-%u" ", offsetOfThreadStaticBlocks-%u, offsetOfGCDataPointer-%u" - ", value tlsGetAddrFtnPtr-%016" PRIX64 ", tlsIndexObject--%016" PRIX64 , + ", value tlsGetAddrFtnPtr-%016" PRIX64 ", tlsIndexObject-%016" PRIX64 + ", threadVarsSection-%016" PRIX64 , key, value.tlsIndex.handle, value.offsetOfThreadLocalStoragePointer, - value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer, value.tlsGetAddrFtnPtr, value.tlsIndexObject); + value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, + value.offsetOfGCDataPointer, value.tlsGetAddrFtnPtr, value.tlsIndexObject, + value.threadVarsSection); } void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType) @@ -3609,6 +3613,7 @@ void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC pInfo->tlsIndex.addr = (void*)value.tlsIndex.handle; pInfo->tlsGetAddrFtnPtr = (void*)value.tlsGetAddrFtnPtr; pInfo->tlsIndexObject = (void*)value.tlsIndexObject; + pInfo->threadVarsSection = (void*)value.threadVarsSection; pInfo->offsetOfMaxThreadStaticBlocks = (DWORD)value.offsetOfMaxThreadStaticBlocks; pInfo->offsetOfThreadLocalStoragePointer = value.offsetOfThreadLocalStoragePointer; pInfo->offsetOfThreadStaticBlocks = (DWORD)value.offsetOfThreadStaticBlocks; diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 0fedff2c4b227..c92341a1caf34 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -308,8 +308,22 @@ NESTED_ENTRY ProfileTailcallNaked, _TEXT, NoHandler ret NESTED_END ProfileTailcallNaked, _TEXT +#ifdef TARGET_OSX +# EXTERN_C void* GetThreadVarsSectionOffset() +# +# Helper to calculate the address of relevant __thread_vars section that holds the address of symbol tlv_get_address for thread +# local `t_ThreadStatics`. The address is updated by the linker, which we retrieve here. In JIT code, this address is called +# to retrieve the address of the thread local. +# +LEAF_ENTRY GetThreadVarsSectionOffset, _TEXT + mov rdi, _t_ThreadStatics@TLVP[rip] + ret +LEAF_END GetThreadVarsSectionAddr, _TEXT +// ------------------------------------------------------------------ +#endif // TARGET_OSX -# EXTERN_C void* GetThreadStaticsBaseOffset(); +#ifndef TARGET_OSX +# EXTERN_C void* GetTlsIndexObjectOffset(); # # Helper to calculate the offset of native thread local variable `t_ThreadStatics`. The offset has to be found at runtime @@ -321,16 +335,16 @@ NESTED_END ProfileTailcallNaked, _TEXT # We never have to call this method directly, and hence there is a `int 3` at the end. # -LEAF_ENTRY GetThreadStaticsBaseOffset, _TEXT -#ifdef TARGET_OSX - mov rdi, _t_ThreadStatics@TLVP[rip] -#else +LEAF_ENTRY GetTlsIndexObjectOffset, _TEXT +# On The `lea` instruction has a data16 prefix and the call instruction has two data16 (0x66) prefixes and one rex64 prefix. +# This is so the total size of lea+call to be 16, suitable for link-time optimization. + data16 - lea rdi, t_ThreadStatics@TLSGD[rip] // instruction where offset is embedded by the linker during compilation + lea rdi, t_ThreadStatics@TLSGD[rip] # instruction where offset is embedded by the linker during compilation data16 data16 rex64 - call __tls_get_addr // dummy call to have linker see the code pattern to replace the offset -#endif + call __tls_get_addr # dummy call to have linker see the code pattern to replace the offset int 3 LEAF_END JIT_GetThreadStaticsBaseOffset, _TEXT +#endif diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 81e007f7a4a5d..d1adfa4b6869f 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -975,26 +975,38 @@ LEAF_ENTRY JIT_DispatchIndirectCall, _TEXT br x9 LEAF_END JIT_DispatchIndirectCall, _TEXT +#ifdef TARGET_OSX +// ------------------------------------------------------------------ +// void* GetThreadVarsSectionOffset() + +// Helper to calculate the address of relevant __thread_vars section that holds the address of symbol tlv_get_address for thread +// local `t_ThreadStatics`. The address is updated by the linker, which we retrieve here. In JIT code, this address is called +// to retrieve the address of the thread local. +LEAF_ENTRY GetThreadVarsSectionOffset, _TEXT + adrp x0, _t_ThreadStatics@TLVPPAGE + ldr x0, [x0, _t_ThreadStatics@TLVPPAGEOFF] + ret +LEAF_END GetThreadVarsSectionOffset, _TEXT // ------------------------------------------------------------------ -// void* GetThreadStaticsBaseOffset() +#endif // TARGET_OSX -// Helper to calculate the offset of native thread local variable `t_ThreadStatics`. The offset has to be found at runtime +#ifndef TARGET_OSX +// ------------------------------------------------------------------ +// void* GetThreadStaticsVariableOffset() + +// Helper to calculate the offset of native thread local variable `t_ThreadStatics` in TCB. The offset has to be found at runtime // once linker does its relocation and fixup of thread locals. The offset, after calculation is returned in `x0` register. -LEAF_ENTRY GetThreadStaticsBaseOffset, _TEXT +LEAF_ENTRY GetThreadStaticsVariableOffset, _TEXT PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 -#ifdef TARGET_OSX - adrp x0, _t_ThreadStatics@TLVPPAGE - ldr x0, [x0, _t_ThreadStatics@TLVPPAGEOFF] -#else adrp x0, :tlsdesc:t_ThreadStatics ldr x1, [x0, #:tlsdesc_lo12:t_ThreadStatics] add x0, x0, :tlsdesc_lo12:t_ThreadStatics .tlsdesccall t_ThreadStatics blr x1 -#endif EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 EPILOG_RETURN -LEAF_END GetThreadStaticsBaseOffset, _TEXT +LEAF_END GetThreadStaticsVariableOffset, _TEXT // ------------------------------------------------------------------ +#endif // !TARGET_OSX diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 8c77c8a62e5ec..2c6d59b0a4032 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1654,15 +1654,11 @@ static uint32_t ThreadLocalOffset(void* p) uint8_t* pOurTls = pTls[_tls_index]; return (uint32_t)((uint8_t*)p - pOurTls); } -#else - -extern "C" void* GetThreadStaticsBaseOffset(); - -#ifdef HOST_AMD64 +#elif TARGET_OSX +extern "C" void* GetThreadVarsSectionOffset(); -void* getThreadStaticDescriptor(uint8_t* p) +void* getThreadVarsSectionAddress(uint8_t* p) { -#ifdef TARGET_OSX if (!(p[0] == 0x48 && p[1] == 0x8d && p[2] == 0x3d)) { // The optimization is disabled if coreclr is not compiled in .so format. @@ -1673,7 +1669,34 @@ void* getThreadStaticDescriptor(uint8_t* p) // These opcodes are patched by the dynamic linker. // Move beyond the opcodes that we have already checked above. p += 3; + + // The descriptor address is located at *p at this point. + // (p + 4) below skips the descriptor address bytes embedded in the instruction and + // add it to the `instruction pointer` to find out the address. + return *(uint32_t*)p + (p + 4); +} + +void* getThreadVarsSectionOffset() +{ +#ifdef TARGET_ARM64 + return reinterpret_cast(GetThreadVarsSectionOffset()); #else + // On x64, the address is related to rip, so, disassemble the function, + // read the offset, and then relative to the IP, find the final address of + // __thread_vars section. + uint8_t* p = reinterpret_cast(&GetThreadVarsSectionOffset); + return getThreadVarsSectionAddress(p); +#endif // TARGET_ARM64 +} + +#else + +extern "C" void* GetTlsIndexObjectOffset(); + +#ifdef HOST_AMD64 + +void* getThreadStaticDescriptor(uint8_t* p) +{ if (!(p[0] == 0x66 && p[1] == 0x48 && p[2] == 0x8d && p[3] == 0x3d)) { // The optimization is disabled if coreclr is not compiled in .so format. @@ -1684,7 +1707,6 @@ void* getThreadStaticDescriptor(uint8_t* p) // These opcodes are patched by the dynamic linker. // Move beyond the opcodes that we have already checked above. p += 4; -#endif // The descriptor address is located at *p at this point. Read that and add // it to the instruction pointer to locate the address of `ti` that will be used @@ -1696,16 +1718,14 @@ void* getThreadStaticDescriptor(uint8_t* p) void* getTlsIndexObjectAddress() { - uint8_t* p = reinterpret_cast(&GetThreadStaticsBaseOffset); + uint8_t* p = reinterpret_cast(&GetTlsIndexObjectOffset); return getThreadStaticDescriptor(p); } #elif HOST_ARM64 -void* getTlsIndexObjectAddress() -{ - return reinterpret_cast(GetThreadStaticsBaseOffset()); -} +extern "C" void* GetThreadStaticsVariableOffset(); + #endif // HOST_ARM64 #endif // TARGET_WINDOWS @@ -1731,10 +1751,7 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* #elif defined(TARGET_OSX) - // For OSX x64/arm64, need to get the address of relevant tlv_get_addr of thread static - // variable that will be invoked during runtime to get the right address of corresponding - // thread. - pInfo->tlsIndexObject = (size_t)getTlsIndexObjectAddress(); + pInfo->threadVarsSection = (size_t)GetThreadVarsSectionAddr(); #elif defined(TARGET_AMD64) @@ -1747,7 +1764,7 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* // For Linux/arm64, just get the offset of thread static variable, and during execution, // this offset, taken from trpid_elp0 system register gives back the thread variable address. - threadStaticBaseOffset = (size_t)getTlsIndexObjectAddress(); + threadStaticBaseOffset = reinterpret_cast(GetThreadStaticsVariableOffset()); #else _ASSERTE_MSG(false, "Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); From f475b6d09d2b6cc325664233720a4117d7ca50b1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 28 Jun 2023 16:04:25 -0700 Subject: [PATCH 64/79] fix osx builds --- src/coreclr/vm/jitinterface.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 2c6d59b0a4032..8fd3e81e014d9 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1751,13 +1751,13 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* #elif defined(TARGET_OSX) - pInfo->threadVarsSection = (size_t)GetThreadVarsSectionAddr(); + pInfo->threadVarsSection = getThreadVarsSectionOffset(); #elif defined(TARGET_AMD64) // For Linux/x64, get the address of tls_get_addr system method and the base address // of struct that we will pass to it. - pInfo->tlsGetAddrFtnPtr = &__tls_get_addr; + pInfo->tlsGetAddrFtnPtr = reinterpret_cast(&__tls_get_addr); pInfo->tlsIndexObject = getTlsIndexObjectAddress(); #elif defined(TARGET_ARM64) From 4e0c21192f6569f5d2fd6b4ad50dd4e2d8f5a613 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 28 Jun 2023 16:58:40 -0700 Subject: [PATCH 65/79] fix build break --- src/coreclr/vm/amd64/asmhelpers.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index c92341a1caf34..2c13ff699f28a 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -339,12 +339,12 @@ LEAF_ENTRY GetTlsIndexObjectOffset, _TEXT # On The `lea` instruction has a data16 prefix and the call instruction has two data16 (0x66) prefixes and one rex64 prefix. # This is so the total size of lea+call to be 16, suitable for link-time optimization. - data16 + .byte 0x66 lea rdi, t_ThreadStatics@TLSGD[rip] # instruction where offset is embedded by the linker during compilation - data16 - data16 - rex64 + .byte 0x66 + .byte 0x66 + .byte 0x48 # rex.W prefix for padding call __tls_get_addr # dummy call to have linker see the code pattern to replace the offset int 3 -LEAF_END JIT_GetThreadStaticsBaseOffset, _TEXT +LEAF_END GetTlsIndexObjectOffset, _TEXT #endif From 47e087dc739178d5b8195649596d8d9154ee8725 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 28 Jun 2023 23:44:15 -0700 Subject: [PATCH 66/79] fix some errors around osx --- src/coreclr/jit/helperexpansion.cpp | 21 ++++++++++++++++++--- src/coreclr/vm/amd64/asmhelpers.S | 2 +- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 24add824a34a2..468d52a8c1031 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -507,20 +507,35 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; CORINFO_THREAD_STATIC_BLOCKS_INFO threadStaticBlocksInfo; + memset(&threadStaticBlocksInfo, 0, sizeof(CORINFO_THREAD_STATIC_BLOCKS_INFO)); + info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); + if (TargetOS::IsMacOS) + { + if (threadStaticBlocksInfo.threadVarsSection == 0) + { + // We possibly compiled coreclr as single file and not .so file. + // Do not perform this optimization for it. + JITDUMP("There appears some problem finding the address of __thread_vars because threadVarsSection=0. " + "Exiting the optimization.\n"); + return false; + } + } #ifdef TARGET_AMD64 - if (TargetOS::IsUnix || TargetOS::IsMacOS) + else if (TargetOS::IsUnix) { + if (threadStaticBlocksInfo.tlsIndexObject == 0) { // We possibly compiled coreclr as single file and not .so file. // Do not perform this optimization for it. - JITDUMP("There appears some problem finding the address of tls_index object. Exiting the optimization.\n"); + JITDUMP("There appears some problem finding the address of tls_index object because tlsIndexObject=0. " + "Exiting the optimization.\n"); return false; } } -#endif // TARGET_AMD64 +#endif //TARGET_AMD64 JITDUMP("getThreadLocalStaticBlocksInfo (%s)\n:", isGCThreadStatic ? "GC" : "Non-GC"); JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer); diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 2c13ff699f28a..78a08fec3f769 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -318,7 +318,7 @@ NESTED_END ProfileTailcallNaked, _TEXT LEAF_ENTRY GetThreadVarsSectionOffset, _TEXT mov rdi, _t_ThreadStatics@TLVP[rip] ret -LEAF_END GetThreadVarsSectionAddr, _TEXT +LEAF_END GetThreadVarsSectionOffset, _TEXT // ------------------------------------------------------------------ #endif // TARGET_OSX From 04420db99d27d29618ea1d6fdf84adc1585e5b03 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 28 Jun 2023 23:57:10 -0700 Subject: [PATCH 67/79] rename some more methods --- src/coreclr/vm/amd64/asmhelpers.S | 12 ++++++------ src/coreclr/vm/arm64/asmhelpers.S | 6 +++--- src/coreclr/vm/jitinterface.cpp | 20 ++++++++++---------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 78a08fec3f769..a8cdb06237eb5 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -309,21 +309,21 @@ NESTED_ENTRY ProfileTailcallNaked, _TEXT, NoHandler NESTED_END ProfileTailcallNaked, _TEXT #ifdef TARGET_OSX -# EXTERN_C void* GetThreadVarsSectionOffset() +# EXTERN_C void* GetThreadVarsAddress() # # Helper to calculate the address of relevant __thread_vars section that holds the address of symbol tlv_get_address for thread # local `t_ThreadStatics`. The address is updated by the linker, which we retrieve here. In JIT code, this address is called # to retrieve the address of the thread local. # -LEAF_ENTRY GetThreadVarsSectionOffset, _TEXT +LEAF_ENTRY GetThreadVarsAddress, _TEXT mov rdi, _t_ThreadStatics@TLVP[rip] ret -LEAF_END GetThreadVarsSectionOffset, _TEXT +LEAF_END GetThreadVarsAddress, _TEXT // ------------------------------------------------------------------ #endif // TARGET_OSX #ifndef TARGET_OSX -# EXTERN_C void* GetTlsIndexObjectOffset(); +# EXTERN_C void* GetTlsIndexObjectDescOffset(); # # Helper to calculate the offset of native thread local variable `t_ThreadStatics`. The offset has to be found at runtime @@ -335,7 +335,7 @@ LEAF_END GetThreadVarsSectionOffset, _TEXT # We never have to call this method directly, and hence there is a `int 3` at the end. # -LEAF_ENTRY GetTlsIndexObjectOffset, _TEXT +LEAF_ENTRY GetTlsIndexObjectDescOffset, _TEXT # On The `lea` instruction has a data16 prefix and the call instruction has two data16 (0x66) prefixes and one rex64 prefix. # This is so the total size of lea+call to be 16, suitable for link-time optimization. @@ -346,5 +346,5 @@ LEAF_ENTRY GetTlsIndexObjectOffset, _TEXT .byte 0x48 # rex.W prefix for padding call __tls_get_addr # dummy call to have linker see the code pattern to replace the offset int 3 -LEAF_END GetTlsIndexObjectOffset, _TEXT +LEAF_END GetTlsIndexObjectDescOffset, _TEXT #endif diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index d1adfa4b6869f..d6d69014d902a 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -977,17 +977,17 @@ LEAF_END JIT_DispatchIndirectCall, _TEXT #ifdef TARGET_OSX // ------------------------------------------------------------------ -// void* GetThreadVarsSectionOffset() +// void* GetThreadVarsAddress() // Helper to calculate the address of relevant __thread_vars section that holds the address of symbol tlv_get_address for thread // local `t_ThreadStatics`. The address is updated by the linker, which we retrieve here. In JIT code, this address is called // to retrieve the address of the thread local. -LEAF_ENTRY GetThreadVarsSectionOffset, _TEXT +LEAF_ENTRY GetThreadVarsAddress, _TEXT adrp x0, _t_ThreadStatics@TLVPPAGE ldr x0, [x0, _t_ThreadStatics@TLVPPAGEOFF] ret -LEAF_END GetThreadVarsSectionOffset, _TEXT +LEAF_END GetThreadVarsAddress, _TEXT // ------------------------------------------------------------------ #endif // TARGET_OSX diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 8fd3e81e014d9..b89934d43e1cb 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1655,9 +1655,9 @@ static uint32_t ThreadLocalOffset(void* p) return (uint32_t)((uint8_t*)p - pOurTls); } #elif TARGET_OSX -extern "C" void* GetThreadVarsSectionOffset(); +extern "C" void* GetThreadVarsAddress(); -void* getThreadVarsSectionAddress(uint8_t* p) +void* getThreadVarsSectionAddressFromDesc(uint8_t* p) { if (!(p[0] == 0x48 && p[1] == 0x8d && p[2] == 0x3d)) { @@ -1676,25 +1676,25 @@ void* getThreadVarsSectionAddress(uint8_t* p) return *(uint32_t*)p + (p + 4); } -void* getThreadVarsSectionOffset() +void* getThreadVarsSectionAddress() { #ifdef TARGET_ARM64 - return reinterpret_cast(GetThreadVarsSectionOffset()); + return reinterpret_cast(GetThreadVarsAddress()); #else // On x64, the address is related to rip, so, disassemble the function, // read the offset, and then relative to the IP, find the final address of // __thread_vars section. - uint8_t* p = reinterpret_cast(&GetThreadVarsSectionOffset); - return getThreadVarsSectionAddress(p); + uint8_t* p = reinterpret_cast(&GetThreadVarsAddress); + return getThreadVarsSectionAddressFromDesc(p); #endif // TARGET_ARM64 } #else -extern "C" void* GetTlsIndexObjectOffset(); - #ifdef HOST_AMD64 +extern "C" void* GetTlsIndexObjectDescOffset(); + void* getThreadStaticDescriptor(uint8_t* p) { if (!(p[0] == 0x66 && p[1] == 0x48 && p[2] == 0x8d && p[3] == 0x3d)) @@ -1718,7 +1718,7 @@ void* getThreadStaticDescriptor(uint8_t* p) void* getTlsIndexObjectAddress() { - uint8_t* p = reinterpret_cast(&GetTlsIndexObjectOffset); + uint8_t* p = reinterpret_cast(&GetTlsIndexObjectDescOffset); return getThreadStaticDescriptor(p); } @@ -1751,7 +1751,7 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* #elif defined(TARGET_OSX) - pInfo->threadVarsSection = getThreadVarsSectionOffset(); + pInfo->threadVarsSection = getThreadVarsSectionAddress(); #elif defined(TARGET_AMD64) From 94f4d431aa687af022bff71e41b92eca0dbd88c5 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 29 Jun 2023 14:49:27 -0700 Subject: [PATCH 68/79] review feedback --- src/coreclr/jit/helperexpansion.cpp | 19 +++++++++---------- src/coreclr/vm/jitinterface.cpp | 24 +++++++++++++----------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 468d52a8c1031..95a2bb4f2514f 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -511,18 +511,18 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); +#ifdef TARGET_AMD64 if (TargetOS::IsMacOS) { if (threadStaticBlocksInfo.threadVarsSection == 0) { - // We possibly compiled coreclr as single file and not .so file. + // We possibly compiled coreclr as single file and not .dylib file. // Do not perform this optimization for it. - JITDUMP("There appears some problem finding the address of __thread_vars because threadVarsSection=0. " - "Exiting the optimization.\n"); + JITDUMP("__thread_vars address not available. Exiting the optimization.\n"); return false; } } -#ifdef TARGET_AMD64 + else if (TargetOS::IsUnix) { @@ -530,12 +530,11 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* { // We possibly compiled coreclr as single file and not .so file. // Do not perform this optimization for it. - JITDUMP("There appears some problem finding the address of tls_index object because tlsIndexObject=0. " - "Exiting the optimization.\n"); + JITDUMP("tls_index object address not available. Exiting the optimization.\n"); return false; } } -#endif //TARGET_AMD64 +#endif // TARGET_AMD64 JITDUMP("getThreadLocalStaticBlocksInfo (%s)\n:", isGCThreadStatic ? "GC" : "Non-GC"); JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer); @@ -641,8 +640,8 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); GenTreeCall* tlsRefCall = tlsValue->AsCall(); - // This is a syscall indirect call which takes an argument. - // Populate and set the ABI apporpriately. + // This is a call which takes an argument. + // Populate and set the ABI appropriately. GenTree* tlsArg = gtNewIconNode(threadVarsSectionVal, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); @@ -666,7 +665,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); GenTreeCall* tlsRefCall = tlsValue->AsCall(); - // This is a syscall indirect call which takes an argument. + // This is an indirect call which takes an argument. // Populate and set the ABI appropriately. GenTree* tlsArg = gtNewIconNode((size_t)threadStaticBlocksInfo.tlsIndexObject, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index b89934d43e1cb..9e043bc5b8edd 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -65,7 +65,7 @@ #include "tailcallhelp.h" -#ifdef HOST_WINDOWS +#ifdef TARGET_WINDOWS EXTERN_C uint32_t _tls_index; #endif @@ -1654,7 +1654,7 @@ static uint32_t ThreadLocalOffset(void* p) uint8_t* pOurTls = pTls[_tls_index]; return (uint32_t)((uint8_t*)p - pOurTls); } -#elif TARGET_OSX +#elif defined(TARGET_OSX) extern "C" void* GetThreadVarsAddress(); void* getThreadVarsSectionAddressFromDesc(uint8_t* p) @@ -1678,20 +1678,22 @@ void* getThreadVarsSectionAddressFromDesc(uint8_t* p) void* getThreadVarsSectionAddress() { -#ifdef TARGET_ARM64 - return reinterpret_cast(GetThreadVarsAddress()); -#else +#ifdef TARGET_AMD64 // On x64, the address is related to rip, so, disassemble the function, // read the offset, and then relative to the IP, find the final address of // __thread_vars section. uint8_t* p = reinterpret_cast(&GetThreadVarsAddress); return getThreadVarsSectionAddressFromDesc(p); -#endif // TARGET_ARM64 +#else + return GetThreadVarsAddress(); +#endif // TARGET_AMD64 } #else -#ifdef HOST_AMD64 +// Linux + +#ifdef TARGET_AMD64 extern "C" void* GetTlsIndexObjectDescOffset(); @@ -1722,11 +1724,11 @@ void* getTlsIndexObjectAddress() return getThreadStaticDescriptor(p); } -#elif HOST_ARM64 +#elif TARGET_ARM64 extern "C" void* GetThreadStaticsVariableOffset(); -#endif // HOST_ARM64 +#endif // TARGET_ARM64 #endif // TARGET_WINDOWS @@ -1742,7 +1744,7 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* size_t threadStaticBaseOffset = 0; -#ifdef _MSC_VER +#if defined(TARGET_WINDOWS) pInfo->tlsIndex.addr = (void*)static_cast(_tls_index); pInfo->tlsIndex.accessType = IAT_VALUE; @@ -1768,7 +1770,7 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* #else _ASSERTE_MSG(false, "Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); -#endif // _MSC_VER +#endif // TARGET_WINDOWS if (isGCType) { From 71d27eb29763ce4ba3464013d7e9f7efbbdac690 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 29 Jun 2023 15:17:55 -0700 Subject: [PATCH 69/79] review feedback --- src/coreclr/jit/helperexpansion.cpp | 8 ++++---- src/coreclr/vm/arm64/asmhelpers.S | 2 +- src/coreclr/vm/jitinterface.cpp | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 95a2bb4f2514f..3744df72cf769 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -516,8 +516,8 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* { if (threadStaticBlocksInfo.threadVarsSection == 0) { - // We possibly compiled coreclr as single file and not .dylib file. - // Do not perform this optimization for it. + // We possibly compiled coreclr as single file and not .dylib file in which case __thread_vars + // we found was not accurate. Do not perform this optimization in such case. JITDUMP("__thread_vars address not available. Exiting the optimization.\n"); return false; } @@ -528,8 +528,8 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* if (threadStaticBlocksInfo.tlsIndexObject == 0) { - // We possibly compiled coreclr as single file and not .so file. - // Do not perform this optimization for it. + // We possibly compiled coreclr as single file and not .dylib file in which case tls_index + // we found was not accurate. Do not perform this optimization in such case. JITDUMP("tls_index object address not available. Exiting the optimization.\n"); return false; } diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index d6d69014d902a..cbe14485e8df4 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -993,7 +993,7 @@ LEAF_END GetThreadVarsAddress, _TEXT #ifndef TARGET_OSX // ------------------------------------------------------------------ -// void* GetThreadStaticsVariableOffset() +// size_t GetThreadStaticsVariableOffset() // Helper to calculate the offset of native thread local variable `t_ThreadStatics` in TCB. The offset has to be found at runtime // once linker does its relocation and fixup of thread locals. The offset, after calculation is returned in `x0` register. diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 9e043bc5b8edd..397aa9e921dbc 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1726,7 +1726,7 @@ void* getTlsIndexObjectAddress() #elif TARGET_ARM64 -extern "C" void* GetThreadStaticsVariableOffset(); +extern "C" size_t GetThreadStaticsVariableOffset(); #endif // TARGET_ARM64 #endif // TARGET_WINDOWS @@ -1766,7 +1766,7 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* // For Linux/arm64, just get the offset of thread static variable, and during execution, // this offset, taken from trpid_elp0 system register gives back the thread variable address. - threadStaticBaseOffset = reinterpret_cast(GetThreadStaticsVariableOffset()); + threadStaticBaseOffset = GetThreadStaticsVariableOffset(); #else _ASSERTE_MSG(false, "Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); From 09ec151d857144703171c491a744bbedb35c0975 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 29 Jun 2023 15:36:25 -0700 Subject: [PATCH 70/79] delete the comment --- src/coreclr/jit/helperexpansion.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 3744df72cf769..dc204a0afd185 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -516,8 +516,6 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* { if (threadStaticBlocksInfo.threadVarsSection == 0) { - // We possibly compiled coreclr as single file and not .dylib file in which case __thread_vars - // we found was not accurate. Do not perform this optimization in such case. JITDUMP("__thread_vars address not available. Exiting the optimization.\n"); return false; } From e28401ccf526da2b44d1c4a6648b42284138cbc1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 29 Jun 2023 16:46:18 -0700 Subject: [PATCH 71/79] make methods static --- src/coreclr/vm/jitinterface.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 397aa9e921dbc..fa2122c44484c 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1657,11 +1657,12 @@ static uint32_t ThreadLocalOffset(void* p) #elif defined(TARGET_OSX) extern "C" void* GetThreadVarsAddress(); -void* getThreadVarsSectionAddressFromDesc(uint8_t* p) +static void* getThreadVarsSectionAddressFromDesc(uint8_t* p) { if (!(p[0] == 0x48 && p[1] == 0x8d && p[2] == 0x3d)) { - // The optimization is disabled if coreclr is not compiled in .so format. + // The optimization is disabled if coreclr is not compiled in .dylib format. + _ASSERTE(false && "Unexpected code sequence"); return 0; } @@ -1676,7 +1677,7 @@ void* getThreadVarsSectionAddressFromDesc(uint8_t* p) return *(uint32_t*)p + (p + 4); } -void* getThreadVarsSectionAddress() +static void* getThreadVarsSectionAddress() { #ifdef TARGET_AMD64 // On x64, the address is related to rip, so, disassemble the function, @@ -1697,11 +1698,12 @@ void* getThreadVarsSectionAddress() extern "C" void* GetTlsIndexObjectDescOffset(); -void* getThreadStaticDescriptor(uint8_t* p) +static void* getThreadStaticDescriptor(uint8_t* p) { if (!(p[0] == 0x66 && p[1] == 0x48 && p[2] == 0x8d && p[3] == 0x3d)) { // The optimization is disabled if coreclr is not compiled in .so format. + _ASSERTE(false && "Unexpected code sequence"); return 0; } @@ -1718,7 +1720,7 @@ void* getThreadStaticDescriptor(uint8_t* p) return *(uint32_t*)p + (p + 4); } -void* getTlsIndexObjectAddress() +static void* getTlsIndexObjectAddress() { uint8_t* p = reinterpret_cast(&GetTlsIndexObjectDescOffset); return getThreadStaticDescriptor(p); From b89a55f34f85230e1da68d39eee2a79be90c132a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 29 Jun 2023 21:29:00 -0700 Subject: [PATCH 72/79] remove macos/x64 check --- src/coreclr/jit/helperexpansion.cpp | 11 +---------- src/coreclr/vm/jitinterface.cpp | 23 +++++++++-------------- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index dc204a0afd185..557c331820aa2 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -512,16 +512,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); #ifdef TARGET_AMD64 - if (TargetOS::IsMacOS) - { - if (threadStaticBlocksInfo.threadVarsSection == 0) - { - JITDUMP("__thread_vars address not available. Exiting the optimization.\n"); - return false; - } - } - - else if (TargetOS::IsUnix) + if (TargetOS::IsUnix) { if (threadStaticBlocksInfo.tlsIndexObject == 0) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index fa2122c44484c..8b3dc2ba58a6d 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1657,14 +1657,9 @@ static uint32_t ThreadLocalOffset(void* p) #elif defined(TARGET_OSX) extern "C" void* GetThreadVarsAddress(); -static void* getThreadVarsSectionAddressFromDesc(uint8_t* p) +static void* GetThreadVarsSectionAddressFromDesc(uint8_t* p) { - if (!(p[0] == 0x48 && p[1] == 0x8d && p[2] == 0x3d)) - { - // The optimization is disabled if coreclr is not compiled in .dylib format. - _ASSERTE(false && "Unexpected code sequence"); - return 0; - } + _ASSERT(p[0] == 0x48 && p[1] == 0x8d && p[2] == 0x3d); // At this point, `p` contains the instruction pointer and is pointing to the above opcodes. // These opcodes are patched by the dynamic linker. @@ -1677,14 +1672,14 @@ static void* getThreadVarsSectionAddressFromDesc(uint8_t* p) return *(uint32_t*)p + (p + 4); } -static void* getThreadVarsSectionAddress() +static void* GetThreadVarsSectionAddress() { #ifdef TARGET_AMD64 // On x64, the address is related to rip, so, disassemble the function, // read the offset, and then relative to the IP, find the final address of // __thread_vars section. uint8_t* p = reinterpret_cast(&GetThreadVarsAddress); - return getThreadVarsSectionAddressFromDesc(p); + return GetThreadVarsSectionAddressFromDesc(p); #else return GetThreadVarsAddress(); #endif // TARGET_AMD64 @@ -1698,7 +1693,7 @@ static void* getThreadVarsSectionAddress() extern "C" void* GetTlsIndexObjectDescOffset(); -static void* getThreadStaticDescriptor(uint8_t* p) +static void* GetThreadStaticDescriptor(uint8_t* p) { if (!(p[0] == 0x66 && p[1] == 0x48 && p[2] == 0x8d && p[3] == 0x3d)) { @@ -1720,10 +1715,10 @@ static void* getThreadStaticDescriptor(uint8_t* p) return *(uint32_t*)p + (p + 4); } -static void* getTlsIndexObjectAddress() +static void* GetTlsIndexObjectAddress() { uint8_t* p = reinterpret_cast(&GetTlsIndexObjectDescOffset); - return getThreadStaticDescriptor(p); + return GetThreadStaticDescriptor(p); } #elif TARGET_ARM64 @@ -1755,14 +1750,14 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* #elif defined(TARGET_OSX) - pInfo->threadVarsSection = getThreadVarsSectionAddress(); + pInfo->threadVarsSection = GetThreadVarsSectionAddress(); #elif defined(TARGET_AMD64) // For Linux/x64, get the address of tls_get_addr system method and the base address // of struct that we will pass to it. pInfo->tlsGetAddrFtnPtr = reinterpret_cast(&__tls_get_addr); - pInfo->tlsIndexObject = getTlsIndexObjectAddress(); + pInfo->tlsIndexObject = GetTlsIndexObjectAddress(); #elif defined(TARGET_ARM64) From c6ae3e4305a02c08b697304ea6b5ca2a7578faf3 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 29 Jun 2023 22:12:37 -0700 Subject: [PATCH 73/79] fix the check for linux/x64 --- src/coreclr/jit/helperexpansion.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 557c331820aa2..f550a70c590ce 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -512,13 +512,13 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); #ifdef TARGET_AMD64 - if (TargetOS::IsUnix) + if (!TargetOS::IsMacOS && TargetOS::IsUnix) { - if (threadStaticBlocksInfo.tlsIndexObject == 0) { - // We possibly compiled coreclr as single file and not .dylib file in which case tls_index - // we found was not accurate. Do not perform this optimization in such case. + // For linux/x64, we possibly compiled coreclr as single file and not .so file in + // which case, the `tls_index` that we found was not accurate. + // Do not perform this optimization in such case. JITDUMP("tls_index object address not available. Exiting the optimization.\n"); return false; } From d237f0bf9f15b6556711babe4a4725f2b5f7a874 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 29 Jun 2023 22:38:03 -0700 Subject: [PATCH 74/79] detect early for single-file linux/x64 --- src/coreclr/jit/helperexpansion.cpp | 9 +- src/coreclr/vm/jitinterface.cpp | 380 ++++++++++++++-------------- 2 files changed, 197 insertions(+), 192 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index f550a70c590ce..8270cdf0cdc4f 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -514,14 +514,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* #ifdef TARGET_AMD64 if (!TargetOS::IsMacOS && TargetOS::IsUnix) { - if (threadStaticBlocksInfo.tlsIndexObject == 0) - { - // For linux/x64, we possibly compiled coreclr as single file and not .so file in - // which case, the `tls_index` that we found was not accurate. - // Do not perform this optimization in such case. - JITDUMP("tls_index object address not available. Exiting the optimization.\n"); - return false; - } + assert(threadStaticBlocksInfo.tlsIndexObject != 0); } #endif // TARGET_AMD64 diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 8b3dc2ba58a6d..710e69e698a3a 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1305,6 +1305,178 @@ static CorInfoHelpFunc getInstanceFieldHelper(FieldDesc * pField, CORINFO_ACCESS return (CorInfoHelpFunc)helper; } + + +/*********************************************************************/ +uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isGCType) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + MODE_PREEMPTIVE; + } CONTRACTL_END; + + UINT32 typeIndex = 0; + + JIT_TO_EE_TRANSITION(); + + FieldDesc* fieldDesc = (FieldDesc*)field; + _ASSERTE(fieldDesc->IsThreadStatic()); + + if (isGCType) + { + typeIndex = AppDomain::GetCurrentDomain()->GetGCThreadStaticTypeIndex(fieldDesc->GetEnclosingMethodTable()); + } + else + { + typeIndex = AppDomain::GetCurrentDomain()->GetNonGCThreadStaticTypeIndex(fieldDesc->GetEnclosingMethodTable()); + } + + assert(typeIndex != TypeIDProvider::INVALID_TYPE_ID); + + EE_TO_JIT_TRANSITION(); + return typeIndex; +} + +#if defined(TARGET_WINDOWS) +/*********************************************************************/ +static uint32_t ThreadLocalOffset(void* p) +{ + PTEB Teb = NtCurrentTeb(); + uint8_t** pTls = (uint8_t**)Teb->ThreadLocalStoragePointer; + uint8_t* pOurTls = pTls[_tls_index]; + return (uint32_t)((uint8_t*)p - pOurTls); +} +#elif defined(TARGET_OSX) +extern "C" void* GetThreadVarsAddress(); + +static void* GetThreadVarsSectionAddressFromDesc(uint8_t* p) +{ + _ASSERT(p[0] == 0x48 && p[1] == 0x8d && p[2] == 0x3d); + + // At this point, `p` contains the instruction pointer and is pointing to the above opcodes. + // These opcodes are patched by the dynamic linker. + // Move beyond the opcodes that we have already checked above. + p += 3; + + // The descriptor address is located at *p at this point. + // (p + 4) below skips the descriptor address bytes embedded in the instruction and + // add it to the `instruction pointer` to find out the address. + return *(uint32_t*)p + (p + 4); +} + +static void* GetThreadVarsSectionAddress() +{ +#ifdef TARGET_AMD64 + // On x64, the address is related to rip, so, disassemble the function, + // read the offset, and then relative to the IP, find the final address of + // __thread_vars section. + uint8_t* p = reinterpret_cast(&GetThreadVarsAddress); + return GetThreadVarsSectionAddressFromDesc(p); +#else + return GetThreadVarsAddress(); +#endif // TARGET_AMD64 +} + +#else + +// Linux + +#ifdef TARGET_AMD64 + +extern "C" void* GetTlsIndexObjectDescOffset(); + +static void* GetThreadStaticDescriptor(uint8_t* p) +{ + if (!(p[0] == 0x66 && p[1] == 0x48 && p[2] == 0x8d && p[3] == 0x3d)) + { + // The optimization is disabled if coreclr is not compiled in .so format. + _ASSERTE(false && "Unexpected code sequence"); + return 0; + } + + // At this point, `p` contains the instruction pointer and is pointing to the above opcodes. + // These opcodes are patched by the dynamic linker. + // Move beyond the opcodes that we have already checked above. + p += 4; + + // The descriptor address is located at *p at this point. Read that and add + // it to the instruction pointer to locate the address of `ti` that will be used + // to pass to __tls_get_addr during execution. + // (p + 4) below skips the descriptor address bytes embedded in the instruction and + // add it to the `instruction pointer` to find out the address. + return *(uint32_t*)p + (p + 4); +} + +static void* GetTlsIndexObjectAddress() +{ + uint8_t* p = reinterpret_cast(&GetTlsIndexObjectDescOffset); + return GetThreadStaticDescriptor(p); +} + +#elif TARGET_ARM64 + +extern "C" size_t GetThreadStaticsVariableOffset(); + +#endif // TARGET_ARM64 +#endif // TARGET_WINDOWS + + +void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + MODE_PREEMPTIVE; + } CONTRACTL_END; + + JIT_TO_EE_TRANSITION_LEAF(); + + size_t threadStaticBaseOffset = 0; + +#if defined(TARGET_WINDOWS) + pInfo->tlsIndex.addr = (void*)static_cast(_tls_index); + pInfo->tlsIndex.accessType = IAT_VALUE; + + pInfo->offsetOfThreadLocalStoragePointer = offsetof(_TEB, ThreadLocalStoragePointer); + threadStaticBaseOffset = ThreadLocalOffset(&t_ThreadStatics); + +#elif defined(TARGET_OSX) + + pInfo->threadVarsSection = GetThreadVarsSectionAddress(); + +#elif defined(TARGET_AMD64) + + // For Linux/x64, get the address of tls_get_addr system method and the base address + // of struct that we will pass to it. + pInfo->tlsGetAddrFtnPtr = reinterpret_cast(&__tls_get_addr); + pInfo->tlsIndexObject = GetTlsIndexObjectAddress(); + +#elif defined(TARGET_ARM64) + + // For Linux/arm64, just get the offset of thread static variable, and during execution, + // this offset, taken from trpid_elp0 system register gives back the thread variable address. + threadStaticBaseOffset = GetThreadStaticsVariableOffset(); + +#else + _ASSERTE_MSG(false, "Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); +#endif // TARGET_WINDOWS + + if (isGCType) + { + pInfo->offsetOfMaxThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, GCMaxThreadStaticBlocks)); + pInfo->offsetOfThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, GCThreadStaticBlocks)); + } + else + { + pInfo->offsetOfMaxThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCMaxThreadStaticBlocks)); + pInfo->offsetOfThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCThreadStaticBlocks)); + } + pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); + + EE_TO_JIT_TRANSITION_LEAF(); +} + /*********************************************************************/ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, CORINFO_METHOD_HANDLE callerHandle, @@ -1416,20 +1588,31 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, #elif defined(TARGET_LINUX_MUSL) && defined(TARGET_ARM64) // Optimization is disabled for linux musl arm64 #else - // For windows x64/x86/arm64, linux x64/arm64: - // We convert the TLS access to the optimized helper where we will store - // the static blocks in TLS directly and access them via inline code. - if ((pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR) || - (pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE)) + bool optimizeThreadStaticAccess = true; +#if defined(TARGET_UNIX) && defined(TARGET_AMD64) + // For linux/x64, check if compiled coreclr as .so file and not single file. + // For single file, the `tls_index` might not be accurate. + // Do not perform this optimization in such case. + optimizeThreadStaticAccess = GetTlsIndexObjectAddress() != 0; +#endif // TARGET_UNIX && TARGET_AMD64 + + if (optimizeThreadStaticAccess) { - fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; - pResult->helper = CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; - } - else if ((pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR) || - (pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE)) - { - fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; - pResult->helper = CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; + // For windows x64/x86/arm64, linux x64/arm64: + // We convert the TLS access to the optimized helper where we will store + // the static blocks in TLS directly and access them via inline code. + if ((pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR) || + (pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE)) + { + fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; + pResult->helper = CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; + } + else if ((pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR) || + (pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE)) + { + fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; + pResult->helper = CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; + } } #endif // TARGET_ARM } @@ -1613,177 +1796,6 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, EE_TO_JIT_TRANSITION(); } - -/*********************************************************************/ -uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isGCType) -{ - CONTRACTL { - THROWS; - GC_TRIGGERS; - MODE_PREEMPTIVE; - } CONTRACTL_END; - - UINT32 typeIndex = 0; - - JIT_TO_EE_TRANSITION(); - - FieldDesc* fieldDesc = (FieldDesc*)field; - _ASSERTE(fieldDesc->IsThreadStatic()); - - if (isGCType) - { - typeIndex = AppDomain::GetCurrentDomain()->GetGCThreadStaticTypeIndex(fieldDesc->GetEnclosingMethodTable()); - } - else - { - typeIndex = AppDomain::GetCurrentDomain()->GetNonGCThreadStaticTypeIndex(fieldDesc->GetEnclosingMethodTable()); - } - - assert(typeIndex != TypeIDProvider::INVALID_TYPE_ID); - - EE_TO_JIT_TRANSITION(); - return typeIndex; -} - -#if defined(TARGET_WINDOWS) -/*********************************************************************/ -static uint32_t ThreadLocalOffset(void* p) -{ - PTEB Teb = NtCurrentTeb(); - uint8_t** pTls = (uint8_t**)Teb->ThreadLocalStoragePointer; - uint8_t* pOurTls = pTls[_tls_index]; - return (uint32_t)((uint8_t*)p - pOurTls); -} -#elif defined(TARGET_OSX) -extern "C" void* GetThreadVarsAddress(); - -static void* GetThreadVarsSectionAddressFromDesc(uint8_t* p) -{ - _ASSERT(p[0] == 0x48 && p[1] == 0x8d && p[2] == 0x3d); - - // At this point, `p` contains the instruction pointer and is pointing to the above opcodes. - // These opcodes are patched by the dynamic linker. - // Move beyond the opcodes that we have already checked above. - p += 3; - - // The descriptor address is located at *p at this point. - // (p + 4) below skips the descriptor address bytes embedded in the instruction and - // add it to the `instruction pointer` to find out the address. - return *(uint32_t*)p + (p + 4); -} - -static void* GetThreadVarsSectionAddress() -{ -#ifdef TARGET_AMD64 - // On x64, the address is related to rip, so, disassemble the function, - // read the offset, and then relative to the IP, find the final address of - // __thread_vars section. - uint8_t* p = reinterpret_cast(&GetThreadVarsAddress); - return GetThreadVarsSectionAddressFromDesc(p); -#else - return GetThreadVarsAddress(); -#endif // TARGET_AMD64 -} - -#else - -// Linux - -#ifdef TARGET_AMD64 - -extern "C" void* GetTlsIndexObjectDescOffset(); - -static void* GetThreadStaticDescriptor(uint8_t* p) -{ - if (!(p[0] == 0x66 && p[1] == 0x48 && p[2] == 0x8d && p[3] == 0x3d)) - { - // The optimization is disabled if coreclr is not compiled in .so format. - _ASSERTE(false && "Unexpected code sequence"); - return 0; - } - - // At this point, `p` contains the instruction pointer and is pointing to the above opcodes. - // These opcodes are patched by the dynamic linker. - // Move beyond the opcodes that we have already checked above. - p += 4; - - // The descriptor address is located at *p at this point. Read that and add - // it to the instruction pointer to locate the address of `ti` that will be used - // to pass to __tls_get_addr during execution. - // (p + 4) below skips the descriptor address bytes embedded in the instruction and - // add it to the `instruction pointer` to find out the address. - return *(uint32_t*)p + (p + 4); -} - -static void* GetTlsIndexObjectAddress() -{ - uint8_t* p = reinterpret_cast(&GetTlsIndexObjectDescOffset); - return GetThreadStaticDescriptor(p); -} - -#elif TARGET_ARM64 - -extern "C" size_t GetThreadStaticsVariableOffset(); - -#endif // TARGET_ARM64 -#endif // TARGET_WINDOWS - - -void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType) -{ - CONTRACTL { - NOTHROW; - GC_NOTRIGGER; - MODE_PREEMPTIVE; - } CONTRACTL_END; - - JIT_TO_EE_TRANSITION_LEAF(); - - size_t threadStaticBaseOffset = 0; - -#if defined(TARGET_WINDOWS) - pInfo->tlsIndex.addr = (void*)static_cast(_tls_index); - pInfo->tlsIndex.accessType = IAT_VALUE; - - pInfo->offsetOfThreadLocalStoragePointer = offsetof(_TEB, ThreadLocalStoragePointer); - threadStaticBaseOffset = ThreadLocalOffset(&t_ThreadStatics); - -#elif defined(TARGET_OSX) - - pInfo->threadVarsSection = GetThreadVarsSectionAddress(); - -#elif defined(TARGET_AMD64) - - // For Linux/x64, get the address of tls_get_addr system method and the base address - // of struct that we will pass to it. - pInfo->tlsGetAddrFtnPtr = reinterpret_cast(&__tls_get_addr); - pInfo->tlsIndexObject = GetTlsIndexObjectAddress(); - -#elif defined(TARGET_ARM64) - - // For Linux/arm64, just get the offset of thread static variable, and during execution, - // this offset, taken from trpid_elp0 system register gives back the thread variable address. - threadStaticBaseOffset = GetThreadStaticsVariableOffset(); - -#else - _ASSERTE_MSG(false, "Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); -#endif // TARGET_WINDOWS - - if (isGCType) - { - pInfo->offsetOfMaxThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, GCMaxThreadStaticBlocks)); - pInfo->offsetOfThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, GCThreadStaticBlocks)); - } - else - { - pInfo->offsetOfMaxThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCMaxThreadStaticBlocks)); - pInfo->offsetOfThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCThreadStaticBlocks)); - } - pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); - - EE_TO_JIT_TRANSITION_LEAF(); -} - //--------------------------------------------------------------------------------------- // bool CEEInfo::isFieldStatic(CORINFO_FIELD_HANDLE fldHnd) From 71b99ddcf5922dd175eb8868c141a59f94fa24fd Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 29 Jun 2023 23:21:01 -0700 Subject: [PATCH 75/79] move the assert --- src/coreclr/jit/helperexpansion.cpp | 9 ++------- src/coreclr/vm/jitinterface.cpp | 2 +- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 8270cdf0cdc4f..0dc2293c4bc28 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -511,13 +511,6 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); -#ifdef TARGET_AMD64 - if (!TargetOS::IsMacOS && TargetOS::IsUnix) - { - assert(threadStaticBlocksInfo.tlsIndexObject != 0); - } -#endif // TARGET_AMD64 - JITDUMP("getThreadLocalStaticBlocksInfo (%s)\n:", isGCThreadStatic ? "GC" : "Non-GC"); JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer); JITDUMP("tlsIndex= %u\n", (ssize_t)threadStaticBlocksInfo.tlsIndex.addr); @@ -624,6 +617,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // This is a call which takes an argument. // Populate and set the ABI appropriately. + assert(threadVarsSectionVal != 0); GenTree* tlsArg = gtNewIconNode(threadVarsSectionVal, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); @@ -649,6 +643,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // This is an indirect call which takes an argument. // Populate and set the ABI appropriately. + assert(threadStaticBlocksInfo.tlsIndexObject != 0); GenTree* tlsArg = gtNewIconNode((size_t)threadStaticBlocksInfo.tlsIndexObject, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 710e69e698a3a..668fa4162b4ee 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1589,7 +1589,7 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, // Optimization is disabled for linux musl arm64 #else bool optimizeThreadStaticAccess = true; -#if defined(TARGET_UNIX) && defined(TARGET_AMD64) +#if !defined(TARGET_OSX) && defined(TARGET_UNIX) && defined(TARGET_AMD64) // For linux/x64, check if compiled coreclr as .so file and not single file. // For single file, the `tls_index` might not be accurate. // Do not perform this optimization in such case. From de5b3b751da864ae3904ceab5f5456cdd5387da2 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sun, 2 Jul 2023 22:51:49 -0700 Subject: [PATCH 76/79] review feedback --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/emit.cpp | 2 ++ src/coreclr/jit/emitarm64.cpp | 2 ++ src/coreclr/jit/gentree.cpp | 3 +- src/coreclr/jit/helperexpansion.cpp | 27 +++++++-------- src/coreclr/jit/lsraarmarch.cpp | 4 +++ .../tools/superpmi/superpmi-shared/agnostic.h | 8 ++--- .../superpmi-shared/methodcontext.cpp | 33 +++++++++---------- src/coreclr/vm/jithelpers.cpp | 4 +-- src/coreclr/vm/jitinterface.cpp | 4 +-- 10 files changed, 48 insertions(+), 41 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 806c2f559dd5c..fb0ae3530bfba 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7123,7 +7123,7 @@ class Compiler optMethodFlags |= OMF_HAS_GUARDEDDEVIRT; } - bool doesMethodHasTlsFieldAccess() + bool methodHasTlsFieldAccess() { return (optMethodFlags & OMF_HAS_TLS_FIELD) != 0; } diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 6eec401676f85..0c1318006e18c 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -10205,6 +10205,8 @@ void emitter::emitRecordCallSite(ULONG instrOffset, /* IN */ if (callSig == nullptr) { + // For certain calls whose target is non-containable (e.g. tls access targets), `methodHandle` + // will be nullptr, because the target is present in a register. if ((methodHandle != nullptr) && (Compiler::eeGetHelperNum(methodHandle) == CORINFO_HELP_UNDEF)) { emitComp->eeGetMethodSig(methodHandle, &sigInfo); diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 583dcc8d47727..4743615fd03d4 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -3740,11 +3740,13 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) id->idReg1(reg); fmt = IF_SR_1A; break; + case INS_mrs_tpid0: id = emitNewInstrSmall(attr); id->idReg1(reg); fmt = IF_SR_1A; break; + default: unreached(); } diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index deee04b0a3d7f..54c834c1c9571 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -17488,7 +17488,8 @@ bool GenTree::isContained() const // return true if node is contained and an indir bool GenTree::isContainedIndir() const { - return OperIsIndir() && isContained(); + bool answer = isContained(); + return OperIsIndir() && answer; } bool GenTree::isIndirAddrMode() diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 0dc2293c4bc28..5ea83207bfc93 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -421,7 +421,7 @@ PhaseStatus Compiler::fgExpandThreadLocalAccess() { PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; - if (!doesMethodHasTlsFieldAccess()) + if (!methodHasTlsFieldAccess()) { // TP: nothing to expand in the current method JITDUMP("Nothing to expand.\n") @@ -486,7 +486,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such // as MRC and MCR are used to access them. We do not support them and so should never optimize the // field access using TLS. - assert(!"Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); + noway_assert(!"Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); #endif } else @@ -495,7 +495,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such // as MRC and MCR are used to access them. We do not support them and so should never optimize the // field access using TLS. - assert(!"Unsupported scenario of optimizing TLS access on Windows Arm32"); + noway_assert(!"Unsupported scenario of optimizing TLS access on Windows Arm32"); #endif } @@ -512,14 +512,15 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); JITDUMP("getThreadLocalStaticBlocksInfo (%s)\n:", isGCThreadStatic ? "GC" : "Non-GC"); - JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer); - JITDUMP("tlsIndex= %u\n", (ssize_t)threadStaticBlocksInfo.tlsIndex.addr); - JITDUMP("tlsGetAddrFtnPtr= %u\n", threadStaticBlocksInfo.tlsGetAddrFtnPtr); - JITDUMP("tlsIndexObject= %u\n", (size_t)threadStaticBlocksInfo.tlsIndexObject); - JITDUMP("threadVarsSection= %u\n", (size_t)threadStaticBlocksInfo.threadVarsSection); - JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks); - JITDUMP("offsetOfThreadStaticBlocks= %u\n", threadStaticBlocksInfo.offsetOfThreadStaticBlocks); - JITDUMP("offsetOfGCDataPointer= %u\n", threadStaticBlocksInfo.offsetOfGCDataPointer); + JITDUMP("tlsIndex= %p\n", dspOffset(dspPtr(threadStaticBlocksInfo.tlsIndex.addr))); + JITDUMP("tlsGetAddrFtnPtr= %p\n", dspOffset(dspPtr(threadStaticBlocksInfo.tlsGetAddrFtnPtr))); + JITDUMP("tlsIndexObject= %p\n", dspOffset(dspPtr(threadStaticBlocksInfo.tlsIndexObject))); + JITDUMP("threadVarsSection= %p\n", dspOffset(dspPtr(threadStaticBlocksInfo.threadVarsSection))); + JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", + dspOffset(threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer)); + JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", dspOffset(threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks)); + JITDUMP("offsetOfThreadStaticBlocks= %u\n", dspOffset(threadStaticBlocksInfo.offsetOfThreadStaticBlocks)); + JITDUMP("offsetOfGCDataPointer= %u\n", dspOffset(threadStaticBlocksInfo.offsetOfGCDataPointer)); assert((eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED) || (eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED)); @@ -617,7 +618,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // This is a call which takes an argument. // Populate and set the ABI appropriately. - assert(threadVarsSectionVal != 0); + assert(opts.altJit || threadVarsSectionVal != 0); GenTree* tlsArg = gtNewIconNode(threadVarsSectionVal, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); @@ -643,7 +644,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // This is an indirect call which takes an argument. // Populate and set the ABI appropriately. - assert(threadStaticBlocksInfo.tlsIndexObject != 0); + assert(opts.altJit || threadStaticBlocksInfo.tlsIndexObject != 0); GenTree* tlsArg = gtNewIconNode((size_t)threadStaticBlocksInfo.tlsIndexObject, TYP_I_IMPL); tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 677c66d6bad7b..aaf79bddfd5a4 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -333,6 +333,10 @@ int LinearScan::BuildCall(GenTreeCall* call) if (call->gtCallType == CT_INDIRECT) { + // For TLS accesses, we need to find the address of the thread local by doing + // an indirect call to the relevant address corresponding to that variable. + // As an (early) argument, it takes the address of tlv_get_address + // symbol (osx) or the offset of the varible in TCB (linux). for (CallArg& arg : call->gtArgs.EarlyArgs()) { CallArgABIInformation& abiInfo = arg.AbiInfo; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h index 6dfcb907d19e2..b4cdb272b3209 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h @@ -533,10 +533,10 @@ struct Agnostic_GetThreadLocalStaticBlocksInfo DWORDLONG tlsGetAddrFtnPtr; DWORDLONG tlsIndexObject; DWORDLONG threadVarsSection; - UINT offsetOfThreadLocalStoragePointer; - UINT offsetOfMaxThreadStaticBlocks; - UINT offsetOfThreadStaticBlocks; - UINT offsetOfGCDataPointer; + DWORD offsetOfThreadLocalStoragePointer; + DWORD offsetOfMaxThreadStaticBlocks; + DWORD offsetOfThreadStaticBlocks; + DWORD offsetOfGCDataPointer; }; struct Agnostic_GetThreadLocalFieldInfo diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 05a69edb1d468..3b2b1899265b8 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3573,13 +3573,12 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC Agnostic_GetThreadLocalStaticBlocksInfo value; ZeroMemory(&value, sizeof(value)); - value.tlsIndex.handle = CastHandle(pInfo->tlsIndex.addr); - value.tlsIndex.accessType = pInfo->tlsIndex.accessType; - value.tlsGetAddrFtnPtr = CastHandle(pInfo->tlsGetAddrFtnPtr); - value.tlsIndexObject = CastHandle(pInfo->tlsIndexObject); - value.threadVarsSection = CastHandle(pInfo->threadVarsSection); - value.offsetOfMaxThreadStaticBlocks = pInfo->offsetOfMaxThreadStaticBlocks; + value.tlsIndex = SpmiRecordsHelper::StoreAgnostic_CORINFO_CONST_LOOKUP(&pInfo->tlsIndex); + value.tlsGetAddrFtnPtr = CastPointer(pInfo->tlsGetAddrFtnPtr); + value.tlsIndexObject = CastPointer(pInfo->tlsIndexObject); + value.threadVarsSection = CastPointer(pInfo->threadVarsSection); value.offsetOfThreadLocalStoragePointer = pInfo->offsetOfThreadLocalStoragePointer; + value.offsetOfMaxThreadStaticBlocks = pInfo->offsetOfMaxThreadStaticBlocks; value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks; value.offsetOfGCDataPointer = pInfo->offsetOfGCDataPointer; @@ -3591,15 +3590,14 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_GetThreadLocalStaticBlocksInfo& value) { - printf("GetThreadLocalStaticBlocksInfo key %u, value tlsIndex-%016" PRIX64 + printf("GetThreadLocalStaticBlocksInfo key %u, tlsIndex-%s, " + ", tlsGetAddrFtnPtr-%016" PRIX64 ", tlsIndexObject - %016" PRIX64 + ", threadVarsSection - %016" PRIX64 ", offsetOfThreadLocalStoragePointer-%u, offsetOfMaxThreadStaticBlocks-%u" - ", offsetOfThreadStaticBlocks-%u, offsetOfGCDataPointer-%u" - ", value tlsGetAddrFtnPtr-%016" PRIX64 ", tlsIndexObject-%016" PRIX64 - ", threadVarsSection-%016" PRIX64 , - key, value.tlsIndex.handle, value.offsetOfThreadLocalStoragePointer, - value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, - value.offsetOfGCDataPointer, value.tlsGetAddrFtnPtr, value.tlsIndexObject, - value.threadVarsSection); + ", offsetOfThreadStaticBlocks-%u, offsetOfGCDataPointer-%u", + key, SpmiDumpHelper::DumpAgnostic_CORINFO_CONST_LOOKUP(value.tlsIndex).c_str(), value.tlsGetAddrFtnPtr, + value.tlsIndexObject, value.threadVarsSection, value.offsetOfThreadLocalStoragePointer, + value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer); } void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType) @@ -3609,14 +3607,13 @@ void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC DEBUG_REP(dmpGetThreadLocalStaticBlocksInfo(key, value)); - pInfo->tlsIndex.accessType = (InfoAccessType)value.tlsIndex.accessType; - pInfo->tlsIndex.addr = (void*)value.tlsIndex.handle; + pInfo->tlsIndex = SpmiRecordsHelper::RestoreCORINFO_CONST_LOOKUP(value.tlsIndex); pInfo->tlsGetAddrFtnPtr = (void*)value.tlsGetAddrFtnPtr; pInfo->tlsIndexObject = (void*)value.tlsIndexObject; pInfo->threadVarsSection = (void*)value.threadVarsSection; - pInfo->offsetOfMaxThreadStaticBlocks = (DWORD)value.offsetOfMaxThreadStaticBlocks; pInfo->offsetOfThreadLocalStoragePointer = value.offsetOfThreadLocalStoragePointer; - pInfo->offsetOfThreadStaticBlocks = (DWORD)value.offsetOfThreadStaticBlocks; + pInfo->offsetOfMaxThreadStaticBlocks = value.offsetOfMaxThreadStaticBlocks; + pInfo->offsetOfThreadStaticBlocks = value.offsetOfThreadStaticBlocks; pInfo->offsetOfGCDataPointer = value.offsetOfGCDataPointer; } diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index e80156c165b48..acb8cefb6942b 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1865,7 +1865,7 @@ HCIMPL1(void*, JIT_GetSharedNonGCThreadStaticBaseOptimized, UINT32 staticBlockIn if (t_NonGCThreadStaticBlocksSize > 0) { memcpy(newThreadStaticBlocks, t_ThreadStatics.NonGCThreadStaticBlocks, t_NonGCThreadStaticBlocksSize * sizeof(PTR_BYTE)); - delete t_ThreadStatics.NonGCThreadStaticBlocks; + delete[] t_ThreadStatics.NonGCThreadStaticBlocks; } t_NonGCThreadStaticBlocksSize = newThreadStaticBlocksSize; @@ -1959,7 +1959,7 @@ HCIMPL1(void*, JIT_GetSharedGCThreadStaticBaseOptimized, UINT32 staticBlockIndex if (t_GCThreadStaticBlocksSize > 0) { memcpy(newThreadStaticBlocks, t_ThreadStatics.GCThreadStaticBlocks, t_GCThreadStaticBlocksSize * sizeof(PTR_BYTE)); - delete t_ThreadStatics.GCThreadStaticBlocks; + delete[] t_ThreadStatics.GCThreadStaticBlocks; } t_GCThreadStaticBlocksSize = newThreadStaticBlocksSize; diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 668fa4162b4ee..bff45e9deb456 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1392,7 +1392,7 @@ static void* GetThreadStaticDescriptor(uint8_t* p) { // The optimization is disabled if coreclr is not compiled in .so format. _ASSERTE(false && "Unexpected code sequence"); - return 0; + return nullptr; } // At this point, `p` contains the instruction pointer and is pointing to the above opcodes. @@ -1593,7 +1593,7 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, // For linux/x64, check if compiled coreclr as .so file and not single file. // For single file, the `tls_index` might not be accurate. // Do not perform this optimization in such case. - optimizeThreadStaticAccess = GetTlsIndexObjectAddress() != 0; + optimizeThreadStaticAccess = GetTlsIndexObjectAddress() != nullptr; #endif // TARGET_UNIX && TARGET_AMD64 if (optimizeThreadStaticAccess) From c7864df985bf4677b74d01e59516b03708a8b075 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 3 Jul 2023 13:14:13 -0700 Subject: [PATCH 77/79] misc fixup --- src/coreclr/jit/gentree.cpp | 3 +-- src/coreclr/jit/helperexpansion.cpp | 8 ++++---- src/coreclr/jit/lsraarmarch.cpp | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 54c834c1c9571..deee04b0a3d7f 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -17488,8 +17488,7 @@ bool GenTree::isContained() const // return true if node is contained and an indir bool GenTree::isContainedIndir() const { - bool answer = isContained(); - return OperIsIndir() && answer; + return OperIsIndir() && isContained(); } bool GenTree::isIndirAddrMode() diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 5ea83207bfc93..6f8b1926eee63 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -512,10 +512,10 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); JITDUMP("getThreadLocalStaticBlocksInfo (%s)\n:", isGCThreadStatic ? "GC" : "Non-GC"); - JITDUMP("tlsIndex= %p\n", dspOffset(dspPtr(threadStaticBlocksInfo.tlsIndex.addr))); - JITDUMP("tlsGetAddrFtnPtr= %p\n", dspOffset(dspPtr(threadStaticBlocksInfo.tlsGetAddrFtnPtr))); - JITDUMP("tlsIndexObject= %p\n", dspOffset(dspPtr(threadStaticBlocksInfo.tlsIndexObject))); - JITDUMP("threadVarsSection= %p\n", dspOffset(dspPtr(threadStaticBlocksInfo.threadVarsSection))); + JITDUMP("tlsIndex= %p\n", dspPtr(threadStaticBlocksInfo.tlsIndex.addr)); + JITDUMP("tlsGetAddrFtnPtr= %p\n", dspPtr(threadStaticBlocksInfo.tlsGetAddrFtnPtr)); + JITDUMP("tlsIndexObject= %p\n", dspPtr(threadStaticBlocksInfo.tlsIndexObject)); + JITDUMP("threadVarsSection= %p\n", dspPtr(threadStaticBlocksInfo.threadVarsSection)); JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", dspOffset(threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer)); JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", dspOffset(threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks)); diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index aaf79bddfd5a4..3227353724dd3 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -336,7 +336,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // For TLS accesses, we need to find the address of the thread local by doing // an indirect call to the relevant address corresponding to that variable. // As an (early) argument, it takes the address of tlv_get_address - // symbol (osx) or the offset of the varible in TCB (linux). + // symbol (osx) or the offset of the variable in TCB (linux). for (CallArg& arg : call->gtArgs.EarlyArgs()) { CallArgABIInformation& abiInfo = arg.AbiInfo; From 2429d754b7ecab451f4e7cb37609b07cd1ade151 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 3 Jul 2023 13:50:07 -0700 Subject: [PATCH 78/79] use fgMorphArgs() --- src/coreclr/jit/helperexpansion.cpp | 12 ++++-------- src/coreclr/jit/lsraarmarch.cpp | 24 +----------------------- src/coreclr/jit/lsraxarch.cpp | 17 ----------------- 3 files changed, 5 insertions(+), 48 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 6f8b1926eee63..b95f75f075a78 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -620,11 +620,9 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // Populate and set the ABI appropriately. assert(opts.altJit || threadVarsSectionVal != 0); GenTree* tlsArg = gtNewIconNode(threadVarsSectionVal, TYP_I_IMPL); - tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); + tlsRefCall->gtArgs.PushBack(this, NewCallArg::Primitive(tlsArg)); - CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); - arg0->AbiInfo = CallArgABIInformation(); - arg0->AbiInfo.SetRegNum(0, REG_ARG_0); + fgMorphArgs(tlsRefCall); tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT); } @@ -646,11 +644,9 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* // Populate and set the ABI appropriately. assert(opts.altJit || threadStaticBlocksInfo.tlsIndexObject != 0); GenTree* tlsArg = gtNewIconNode((size_t)threadStaticBlocksInfo.tlsIndexObject, TYP_I_IMPL); - tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); + tlsRefCall->gtArgs.PushBack(this, NewCallArg::Primitive(tlsArg)); - CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); - arg0->AbiInfo = CallArgABIInformation(); - arg0->AbiInfo.SetRegNum(0, REG_ARG_0); + fgMorphArgs(tlsRefCall); tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT); #ifdef UNIX_X86_ABI diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 3227353724dd3..c89a155009736 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -331,28 +331,6 @@ int LinearScan::BuildCall(GenTreeCall* call) } } - if (call->gtCallType == CT_INDIRECT) - { - // For TLS accesses, we need to find the address of the thread local by doing - // an indirect call to the relevant address corresponding to that variable. - // As an (early) argument, it takes the address of tlv_get_address - // symbol (osx) or the offset of the variable in TCB (linux). - for (CallArg& arg : call->gtArgs.EarlyArgs()) - { - CallArgABIInformation& abiInfo = arg.AbiInfo; - GenTree* argNode = arg.GetEarlyNode(); - - // Each register argument corresponds to one source. - if (argNode->OperIsPutArgReg()) - { - srcCount++; - BuildUse(argNode, genRegMask(argNode->GetRegNum())); - const regNumber argReg = abiInfo.GetRegNum(); - assert(argNode->GetRegNum() == argReg); - } - } - } - #ifdef DEBUG // Now, count stack args // Note that these need to be computed into a register, but then @@ -377,7 +355,7 @@ int LinearScan::BuildCall(GenTreeCall* call) } else { - assert(!argNode->IsValue() || argNode->IsUnusedValue() || (call->gtCallType == CT_INDIRECT)); + assert(!argNode->IsValue() || argNode->IsUnusedValue() /* || (call->gtCallType == CT_INDIRECT)*/); } } } diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 59f0aa7f9f026..0c9855dfe28af 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1324,23 +1324,6 @@ int LinearScan::BuildCall(GenTreeCall* call) ctrlExprCandidates = availableIntRegs & ~(RBM_ARG_REGS); } srcCount += BuildOperandUses(ctrlExpr, ctrlExprCandidates); - if (call->gtCallType == CT_INDIRECT) - { - for (CallArg& arg : call->gtArgs.EarlyArgs()) - { - CallArgABIInformation& abiInfo = arg.AbiInfo; - GenTree* argNode = arg.GetEarlyNode(); - - // Each register argument corresponds to one source. - if (argNode->OperIsPutArgReg()) - { - srcCount++; - BuildUse(argNode, genRegMask(argNode->GetRegNum())); - const regNumber argReg = abiInfo.GetRegNum(); - assert(argNode->GetRegNum() == argReg); - } - } - } } buildInternalRegisterUses(); From d645f2e1c44c6d02335bb91f6ea9a5a04afcdeb1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 5 Jul 2023 15:06:01 -0700 Subject: [PATCH 79/79] remove commented code --- src/coreclr/jit/lsraarmarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index c89a155009736..31f816fda1008 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -355,7 +355,7 @@ int LinearScan::BuildCall(GenTreeCall* call) } else { - assert(!argNode->IsValue() || argNode->IsUnusedValue() /* || (call->gtCallType == CT_INDIRECT)*/); + assert(!argNode->IsValue() || argNode->IsUnusedValue()); } } }