From 4e802bf938a81e5abb71430cbbece397bfd73ac0 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Fri, 28 May 2021 10:27:34 +0200 Subject: [PATCH 1/5] Add writeable holders for executable memory This change adds holders for writeable mappings for executable memory. It is the largest part of the W^X support. The ExecutableWriterHolder implementation is dummy in this change, but it was fully tested with coreclr / libraries tests on Windows arm, arm64, x64 and x86 with the real double memory mapping. There are few concepts / conventions used: * When the writeable pointer isn't known at a place where it is needed and also not at the caller, the ExecutableWriterHolder instance is created. * When a callee needs writeable pointer to executable memory and caller knows RW and RX, the argument is doubled with RX and RW suffixes. For constructors and member methods when "this" is the RW one, we pass just extra RX argument. * Locals holding RW pointer use RW suffix. * Locals holding RX pointer usually have no suffix to minimize number of changes, but in some cases they have a RX suffix where I felt like it was better to make things clear. --- src/coreclr/inc/executableallocator.h | 73 +++++++++ src/coreclr/inc/holder.h | 8 +- src/coreclr/inc/loaderheap.h | 1 + src/coreclr/utilcode/loaderheap.cpp | 30 +++- src/coreclr/vm/amd64/cgenamd64.cpp | 100 ++++++------ src/coreclr/vm/amd64/cgencpu.h | 18 +-- src/coreclr/vm/amd64/virtualcallstubcpu.hpp | 27 ++-- src/coreclr/vm/arm/cgencpu.h | 37 +++-- src/coreclr/vm/arm/stubs.cpp | 97 ++++++------ src/coreclr/vm/arm/virtualcallstubcpu.hpp | 9 +- src/coreclr/vm/arm64/cgencpu.h | 37 +++-- src/coreclr/vm/arm64/stubs.cpp | 100 ++++++------ src/coreclr/vm/arm64/virtualcallstubcpu.hpp | 9 +- src/coreclr/vm/array.cpp | 7 +- src/coreclr/vm/callcounting.cpp | 8 +- src/coreclr/vm/ceeload.cpp | 11 +- src/coreclr/vm/ceemain.cpp | 10 +- src/coreclr/vm/clrtocomcall.cpp | 13 +- src/coreclr/vm/codeman.cpp | 77 +++++---- src/coreclr/vm/comcallablewrapper.cpp | 150 +++++++++++------- src/coreclr/vm/comcallablewrapper.h | 11 +- src/coreclr/vm/comdelegate.cpp | 26 ++- src/coreclr/vm/comtoclrcall.cpp | 24 +-- src/coreclr/vm/crossgencompile.cpp | 2 +- src/coreclr/vm/dataimage.cpp | 4 +- src/coreclr/vm/dllimportcallback.cpp | 26 +-- src/coreclr/vm/dllimportcallback.h | 12 +- src/coreclr/vm/dynamicmethod.cpp | 67 +++++--- src/coreclr/vm/dynamicmethod.h | 2 +- src/coreclr/vm/gccover.cpp | 64 +++++--- src/coreclr/vm/i386/cgencpu.h | 31 ++-- src/coreclr/vm/i386/cgenx86.cpp | 57 ++++--- src/coreclr/vm/i386/stublinkerx86.cpp | 167 +++++++++++--------- src/coreclr/vm/i386/stublinkerx86.h | 16 +- src/coreclr/vm/i386/virtualcallstubcpu.hpp | 24 +-- src/coreclr/vm/jitinterface.cpp | 5 +- src/coreclr/vm/jitinterface.h | 4 +- src/coreclr/vm/method.cpp | 21 +-- src/coreclr/vm/methoddescbackpatchinfo.cpp | 10 +- src/coreclr/vm/precode.cpp | 44 ++++-- src/coreclr/vm/precode.h | 2 +- src/coreclr/vm/prestub.cpp | 14 +- src/coreclr/vm/readytoruninfo.h | 2 +- src/coreclr/vm/stubcache.cpp | 9 +- src/coreclr/vm/stublink.cpp | 87 ++++++---- src/coreclr/vm/stublink.h | 5 +- src/coreclr/vm/threadsuspend.cpp | 12 +- src/coreclr/vm/virtualcallstub.cpp | 20 ++- 48 files changed, 977 insertions(+), 613 deletions(-) create mode 100644 src/coreclr/inc/executableallocator.h diff --git a/src/coreclr/inc/executableallocator.h b/src/coreclr/inc/executableallocator.h new file mode 100644 index 0000000000000..bae9e47d1b488 --- /dev/null +++ b/src/coreclr/inc/executableallocator.h @@ -0,0 +1,73 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +// +// Allocator and holders for double mapped executable memory +// + +#pragma once + +#include "utilcode.h" +#include "ex.h" +#include + +// Holder class to map read-execute memory as read-write so that it can be modified without using read-write-execute mapping. +// At the moment the implementation is dummy, returning the same addresses for both cases and expecting them to be read-write-execute. +// The class uses the move semantics to ensure proper unmapping in case of re-assigning of the holder value. +template +class ExecutableWriterHolder +{ + T *m_addressRX; + T *m_addressRW; + + void Move(ExecutableWriterHolder& other) + { + m_addressRX = other.m_addressRX; + m_addressRW = other.m_addressRW; + other.m_addressRX = NULL; + other.m_addressRW = NULL; + } + + void Unmap() + { + // TODO: this will be added with the double mapped allocator addition + } + +public: + ExecutableWriterHolder(const ExecutableWriterHolder& other) = delete; + ExecutableWriterHolder& operator=(const ExecutableWriterHolder& other) = delete; + + ExecutableWriterHolder(ExecutableWriterHolder&& other) + { + Move(other); + } + + ExecutableWriterHolder& operator=(ExecutableWriterHolder&& other) + { + Unmap(); + Move(other); + return *this; + } + + ExecutableWriterHolder() : m_addressRX(nullptr), m_addressRW(nullptr) + { + } + + ExecutableWriterHolder(T* addressRX, size_t size) + { + m_addressRX = addressRX; + m_addressRW = addressRX; + } + + ~ExecutableWriterHolder() + { + Unmap(); + } + + // Get the writeable address + inline T *GetRW() const + { + return m_addressRW; + } +}; diff --git a/src/coreclr/inc/holder.h b/src/coreclr/inc/holder.h index 20f6aeb964614..448610634a612 100644 --- a/src/coreclr/inc/holder.h +++ b/src/coreclr/inc/holder.h @@ -934,11 +934,17 @@ using NonVMComHolder = SpecializedWrapper<_TYPE, DoTheRelease<_TYPE>>; // } // foo->DecRef() on out of scope // //----------------------------------------------------------------------------- +template +class ExecutableWriterHolder; + template FORCEINLINE void StubRelease(TYPE* value) { if (value) - value->DecRef(); + { + ExecutableWriterHolder stubWriterHolder(value, sizeof(TYPE)); + stubWriterHolder.GetRW()->DecRef(); + } } template diff --git a/src/coreclr/inc/loaderheap.h b/src/coreclr/inc/loaderheap.h index 96fec35be0ce4..2dc156572b442 100644 --- a/src/coreclr/inc/loaderheap.h +++ b/src/coreclr/inc/loaderheap.h @@ -16,6 +16,7 @@ #include "utilcode.h" #include "ex.h" +#include "executableallocator.h" //============================================================================== // Interface used to back out loader heap allocations. diff --git a/src/coreclr/utilcode/loaderheap.cpp b/src/coreclr/utilcode/loaderheap.cpp index 33974b9e29076..adaf07d8f5825 100644 --- a/src/coreclr/utilcode/loaderheap.cpp +++ b/src/coreclr/utilcode/loaderheap.cpp @@ -1330,8 +1330,14 @@ void *UnlockedLoaderHeap::UnlockedAllocMem_NoThrow(size_t dwSize if (pData) { #ifdef _DEBUG + BYTE *pAllocatedBytes = (BYTE*)pData; + ExecutableWriterHolder dataWriterHolder; + if (m_Options & LHF_EXECUTABLE) + { + dataWriterHolder = ExecutableWriterHolder(pData, dwSize); + pAllocatedBytes = (BYTE *)dataWriterHolder.GetRW(); + } - BYTE *pAllocatedBytes = (BYTE *)pData; #if LOADER_HEAP_DEBUG_BOUNDARY > 0 // Don't fill the memory we allocated - it is assumed to be zeroed - fill the memory after it memset(pAllocatedBytes + dwRequestedSize, 0xEE, LOADER_HEAP_DEBUG_BOUNDARY); @@ -1344,7 +1350,7 @@ void *UnlockedLoaderHeap::UnlockedAllocMem_NoThrow(size_t dwSize if (!m_fExplicitControl) { - LoaderHeapValidationTag *pTag = AllocMem_GetTag(pData, dwRequestedSize); + LoaderHeapValidationTag *pTag = AllocMem_GetTag(pAllocatedBytes, dwRequestedSize); pTag->m_allocationType = kAllocMem; pTag->m_dwRequestedSize = dwRequestedSize; pTag->m_szFile = szFile; @@ -1514,7 +1520,14 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem, { // Cool. This was the last block allocated. We can just undo the allocation instead // of going to the freelist. - memset(pMem, 0x00, dwSize); // Fill freed region with 0 + void *pMemRW = pMem; + ExecutableWriterHolder memWriterHolder; + if (m_Options & LHF_EXECUTABLE) + { + memWriterHolder = ExecutableWriterHolder(pMem, dwSize); + pMemRW = memWriterHolder.GetRW(); + } + memset(pMemRW, 0x00, dwSize); // Fill freed region with 0 m_pAllocPtr = (BYTE*)pMem; } else @@ -1626,7 +1639,14 @@ void *UnlockedLoaderHeap::UnlockedAllocAlignedMem_NoThrow(size_t dwRequestedSiz ((BYTE*&)pResult) += extra; #ifdef _DEBUG - BYTE *pAllocatedBytes = (BYTE *)pResult; + BYTE *pAllocatedBytes = (BYTE *)pResult; + ExecutableWriterHolder resultWriterHolder; + if (m_Options & LHF_EXECUTABLE) + { + resultWriterHolder = ExecutableWriterHolder(pResult, dwSize - extra); + pAllocatedBytes = (BYTE *)resultWriterHolder.GetRW(); + } + #if LOADER_HEAP_DEBUG_BOUNDARY > 0 // Don't fill the entire memory - we assume it is all zeroed -just the memory after our alloc memset(pAllocatedBytes + dwRequestedSize, 0xee, LOADER_HEAP_DEBUG_BOUNDARY); @@ -1656,7 +1676,7 @@ void *UnlockedLoaderHeap::UnlockedAllocAlignedMem_NoThrow(size_t dwRequestedSiz if (!m_fExplicitControl) { - LoaderHeapValidationTag *pTag = AllocMem_GetTag(((BYTE*)pResult) - extra, dwRequestedSize + extra); + LoaderHeapValidationTag *pTag = AllocMem_GetTag(pAllocatedBytes - extra, dwRequestedSize + extra); pTag->m_allocationType = kAllocMem; pTag->m_dwRequestedSize = dwRequestedSize + extra; pTag->m_szFile = szFile; diff --git a/src/coreclr/vm/amd64/cgenamd64.cpp b/src/coreclr/vm/amd64/cgenamd64.cpp index 153993cb37c2d..d00f7b74df0d4 100644 --- a/src/coreclr/vm/amd64/cgenamd64.cpp +++ b/src/coreclr/vm/amd64/cgenamd64.cpp @@ -450,7 +450,7 @@ void EncodeLoadAndJumpThunk (LPBYTE pBuffer, LPVOID pv, LPVOID pTarget) _ASSERTE(DbgIsExecutable(pBuffer, 22)); } -void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target) +void emitCOMStubCall (ComCallMethodDesc *pCOMMethodRX, ComCallMethodDesc *pCOMMethodRW, PCODE target) { CONTRACT_VOID { @@ -460,7 +460,8 @@ void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target) } CONTRACT_END; - BYTE *pBuffer = (BYTE*)pCOMMethod - COMMETHOD_CALL_PRESTUB_SIZE; + BYTE *pBufferRX = (BYTE*)pCOMMethodRX - COMMETHOD_CALL_PRESTUB_SIZE; + BYTE *pBufferRW = (BYTE*)pCOMMethodRW - COMMETHOD_CALL_PRESTUB_SIZE; // We need the target to be in a 64-bit aligned memory location and the call instruction // to immediately precede the ComCallMethodDesc. We'll generate an indirect call to avoid @@ -471,21 +472,21 @@ void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target) // nop 90 // call [$ - 10] ff 15 f0 ff ff ff - *((UINT64 *)&pBuffer[COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET]) = (UINT64)target; + *((UINT64 *)&pBufferRW[COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET]) = (UINT64)target; - pBuffer[-2] = 0x90; - pBuffer[-1] = 0x90; + pBufferRW[-2] = 0x90; + pBufferRW[-1] = 0x90; - pBuffer[0] = 0xFF; - pBuffer[1] = 0x15; - *((UINT32 UNALIGNED *)&pBuffer[2]) = (UINT32)(COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET - COMMETHOD_CALL_PRESTUB_SIZE); + pBufferRW[0] = 0xFF; + pBufferRW[1] = 0x15; + *((UINT32 UNALIGNED *)&pBufferRW[2]) = (UINT32)(COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET - COMMETHOD_CALL_PRESTUB_SIZE); - _ASSERTE(DbgIsExecutable(pBuffer, COMMETHOD_CALL_PRESTUB_SIZE)); + _ASSERTE(DbgIsExecutable(pBufferRX, COMMETHOD_CALL_PRESTUB_SIZE)); RETURN; } -void emitJump(LPBYTE pBuffer, LPVOID target) +void emitJump(LPBYTE pBufferRX, LPBYTE pBufferRW, LPVOID target) { CONTRACTL { @@ -493,25 +494,25 @@ void emitJump(LPBYTE pBuffer, LPVOID target) GC_NOTRIGGER; MODE_ANY; - PRECONDITION(CheckPointer(pBuffer)); + PRECONDITION(CheckPointer(pBufferRX)); } CONTRACTL_END; // mov rax, 123456789abcdef0h 48 b8 xx xx xx xx xx xx xx xx // jmp rax ff e0 - pBuffer[0] = 0x48; - pBuffer[1] = 0xB8; + pBufferRW[0] = 0x48; + pBufferRW[1] = 0xB8; - *((UINT64 UNALIGNED *)&pBuffer[2]) = (UINT64)target; + *((UINT64 UNALIGNED *)&pBufferRW[2]) = (UINT64)target; - pBuffer[10] = 0xFF; - pBuffer[11] = 0xE0; + pBufferRW[10] = 0xFF; + pBufferRW[11] = 0xE0; - _ASSERTE(DbgIsExecutable(pBuffer, 12)); + _ASSERTE(DbgIsExecutable(pBufferRX, 12)); } -void UMEntryThunkCode::Encode(BYTE* pTargetCode, void* pvSecretParam) +void UMEntryThunkCode::Encode(UMEntryThunkCode *pEntryThunkCodeRX, BYTE* pTargetCode, void* pvSecretParam) { CONTRACTL { @@ -542,7 +543,7 @@ void UMEntryThunkCode::Encode(BYTE* pTargetCode, void* pvSecretParam) m_jmpRAX[1] = 0xFF; m_jmpRAX[2] = 0xE0; - _ASSERTE(DbgIsExecutable(&m_movR10[0], &m_jmpRAX[3]-&m_movR10[0])); + _ASSERTE(DbgIsExecutable(&pEntryThunkCodeRX->m_movR10[0], &pEntryThunkCodeRX->m_jmpRAX[3]-&pEntryThunkCodeRX->m_movR10[0])); } void UMEntryThunkCode::Poison() @@ -555,15 +556,18 @@ void UMEntryThunkCode::Poison() } CONTRACTL_END; - m_execstub = (BYTE *)UMEntryThunk::ReportViolation; + ExecutableWriterHolder thunkWriterHolder(this, sizeof(UMEntryThunkCode)); + UMEntryThunkCode *pThisRW = thunkWriterHolder.GetRW(); + + pThisRW->m_execstub = (BYTE *)UMEntryThunk::ReportViolation; - m_movR10[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; + pThisRW->m_movR10[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; #ifdef _WIN32 // mov rcx, pUMEntryThunk // 48 b9 xx xx xx xx xx xx xx xx - m_movR10[1] = 0xB9; + pThisRW->m_movR10[1] = 0xB9; #else // mov rdi, pUMEntryThunk // 48 bf xx xx xx xx xx xx xx xx - m_movR10[1] = 0xBF; + pThisRW->m_movR10[1] = 0xBF; #endif ClrFlushInstructionCache(&m_movR10[0], &m_jmpRAX[3]-&m_movR10[0]); @@ -647,7 +651,7 @@ INT32 rel32UsingJumpStub(INT32 UNALIGNED * pRel32, PCODE target, MethodDesc *pMe return static_cast(offset); } -INT32 rel32UsingPreallocatedJumpStub(INT32 UNALIGNED * pRel32, PCODE target, PCODE jumpStubAddr, bool emitJump) +INT32 rel32UsingPreallocatedJumpStub(INT32 UNALIGNED * pRel32, PCODE target, PCODE jumpStubAddrRX, PCODE jumpStubAddrRW, bool emitJump) { CONTRACTL { @@ -657,12 +661,12 @@ INT32 rel32UsingPreallocatedJumpStub(INT32 UNALIGNED * pRel32, PCODE target, PCO CONTRACTL_END; TADDR baseAddr = (TADDR)pRel32 + 4; - _ASSERTE(FitsInI4(jumpStubAddr - baseAddr)); + _ASSERTE(FitsInI4(jumpStubAddrRX - baseAddr)); INT_PTR offset = target - baseAddr; if (!FitsInI4(offset) INDEBUG(|| PEDecoder::GetForceRelocs())) { - offset = jumpStubAddr - baseAddr; + offset = jumpStubAddrRX - baseAddr; if (!FitsInI4(offset)) { _ASSERTE(!"jump stub was not in expected range"); @@ -671,11 +675,11 @@ INT32 rel32UsingPreallocatedJumpStub(INT32 UNALIGNED * pRel32, PCODE target, PCO if (emitJump) { - emitBackToBackJump((LPBYTE)jumpStubAddr, (LPVOID)target); + emitBackToBackJump((LPBYTE)jumpStubAddrRX, (LPBYTE)jumpStubAddrRW, (LPVOID)target); } else { - _ASSERTE(decodeBackToBackJump(jumpStubAddr) == target); + _ASSERTE(decodeBackToBackJump(jumpStubAddrRX) == target); } } @@ -862,7 +866,9 @@ EXTERN_C PCODE VirtualMethodFixupWorker(TransitionBlock * pTransitionBlock, CORC *(INT32 *)(pNewValue+1) = rel32UsingJumpStub((INT32*)(&pThunk->callJmp[1]), pCode, pMD, NULL); _ASSERTE(IS_ALIGNED(pThunk, sizeof(INT64))); - FastInterlockCompareExchangeLong((INT64*)pThunk, newValue, oldValue); + + ExecutableWriterHolder thunkWriterHolder((INT64*)pThunk, sizeof(INT64)); + FastInterlockCompareExchangeLong(thunkWriterHolder.GetRW(), newValue, oldValue); FlushInstructionCache(GetCurrentProcess(), pThunk, 8); } @@ -888,14 +894,17 @@ EXTERN_C PCODE VirtualMethodFixupWorker(TransitionBlock * pTransitionBlock, CORC #define BEGIN_DYNAMIC_HELPER_EMIT(size) \ SIZE_T cb = size; \ SIZE_T cbAligned = ALIGN_UP(cb, DYNAMIC_HELPER_ALIGNMENT); \ - BYTE * pStart = (BYTE *)(void *)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(cbAligned, DYNAMIC_HELPER_ALIGNMENT); \ + BYTE * pStartRX = (BYTE *)(void*)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(cbAligned, DYNAMIC_HELPER_ALIGNMENT); \ + ExecutableWriterHolder startWriterHolder(pStartRX, cbAligned); \ + BYTE * pStart = startWriterHolder.GetRW(); \ + size_t rxOffset = pStartRX - pStart; \ BYTE * p = pStart; #define END_DYNAMIC_HELPER_EMIT() \ _ASSERTE(pStart + cb == p); \ while (p < pStart + cbAligned) *p++ = X86_INSTR_INT3; \ - ClrFlushInstructionCache(pStart, cbAligned); \ - return (PCODE)pStart + ClrFlushInstructionCache(pStartRX, cbAligned); \ + return (PCODE)pStartRX PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, PCODE target) { @@ -913,13 +922,13 @@ PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, PCOD p += 8; *p++ = X86_INSTR_JMP_REL32; // jmp rel32 - *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator); + *(INT32 *)p = rel32UsingJumpStub((INT32 *)(p + rxOffset), target, NULL, pAllocator); p += 4; END_DYNAMIC_HELPER_EMIT(); } -void DynamicHelpers::EmitHelperWithArg(BYTE*& p, LoaderAllocator * pAllocator, TADDR arg, PCODE target) +void DynamicHelpers::EmitHelperWithArg(BYTE*& p, size_t rxOffset, LoaderAllocator * pAllocator, TADDR arg, PCODE target) { CONTRACTL { @@ -940,7 +949,7 @@ void DynamicHelpers::EmitHelperWithArg(BYTE*& p, LoaderAllocator * pAllocator, T p += 8; *p++ = X86_INSTR_JMP_REL32; // jmp rel32 - *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator); + *(INT32 *)p = rel32UsingJumpStub((INT32 *)(p + rxOffset), target, NULL, pAllocator); p += 4; } @@ -948,7 +957,7 @@ PCODE DynamicHelpers::CreateHelperWithArg(LoaderAllocator * pAllocator, TADDR ar { BEGIN_DYNAMIC_HELPER_EMIT(15); - EmitHelperWithArg(p, pAllocator, arg, target); + EmitHelperWithArg(p, rxOffset, pAllocator, arg, target); END_DYNAMIC_HELPER_EMIT(); } @@ -976,7 +985,7 @@ PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, TADD p += 8; *p++ = X86_INSTR_JMP_REL32; // jmp rel32 - *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator); + *(INT32 *)p = rel32UsingJumpStub((INT32 *)(p + rxOffset), target, NULL, pAllocator); p += 4; END_DYNAMIC_HELPER_EMIT(); @@ -1005,7 +1014,7 @@ PCODE DynamicHelpers::CreateHelperArgMove(LoaderAllocator * pAllocator, TADDR ar p += 8; *p++ = X86_INSTR_JMP_REL32; // jmp rel32 - *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator); + *(INT32 *)p = rel32UsingJumpStub((INT32 *)(p + rxOffset), target, NULL, pAllocator); p += 4; END_DYNAMIC_HELPER_EMIT(); @@ -1071,7 +1080,7 @@ PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADD p += 8; *p++ = X86_INSTR_JMP_REL32; // jmp rel32 - *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator); + *(INT32 *)p = rel32UsingJumpStub((INT32 *)(p + rxOffset), target, NULL, pAllocator); p += 4; END_DYNAMIC_HELPER_EMIT(); @@ -1100,7 +1109,7 @@ PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADD p += 8; *p++ = X86_INSTR_JMP_REL32; // jmp rel32 - *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator); + *(INT32 *)p = rel32UsingJumpStub((INT32 *)(p + rxOffset), target, NULL, pAllocator); p += 4; END_DYNAMIC_HELPER_EMIT(); @@ -1117,9 +1126,10 @@ PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator, GetEEFuncEntryPoint(JIT_GenericHandleClassWithSlotAndModule)); GenericHandleArgs * pArgs = (GenericHandleArgs *)(void *)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(sizeof(GenericHandleArgs), DYNAMIC_HELPER_ALIGNMENT); - pArgs->dictionaryIndexAndSlot = dictionaryIndexAndSlot; - pArgs->signature = pLookup->signature; - pArgs->module = (CORINFO_MODULE_HANDLE)pModule; + ExecutableWriterHolder argsWriterHolder(pArgs, sizeof(GenericHandleArgs)); + argsWriterHolder.GetRW()->dictionaryIndexAndSlot = dictionaryIndexAndSlot; + argsWriterHolder.GetRW()->signature = pLookup->signature; + argsWriterHolder.GetRW()->module = (CORINFO_MODULE_HANDLE)pModule; WORD slotOffset = (WORD)(dictionaryIndexAndSlot & 0xFFFF) * sizeof(Dictionary*); @@ -1131,7 +1141,7 @@ PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator, // rcx/rdi contains the generic context parameter // mov rdx/rsi,pArgs // jmp helperAddress - EmitHelperWithArg(p, pAllocator, (TADDR)pArgs, helperAddress); + EmitHelperWithArg(p, rxOffset, pAllocator, (TADDR)pArgs, helperAddress); END_DYNAMIC_HELPER_EMIT(); } @@ -1238,7 +1248,7 @@ PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator, // mov rdx|rsi,pArgs // jmp helperAddress - EmitHelperWithArg(p, pAllocator, (TADDR)pArgs, helperAddress); + EmitHelperWithArg(p, rxOffset, pAllocator, (TADDR)pArgs, helperAddress); } } diff --git a/src/coreclr/vm/amd64/cgencpu.h b/src/coreclr/vm/amd64/cgencpu.h index 7312ad0a019fe..6300876fa330e 100644 --- a/src/coreclr/vm/amd64/cgencpu.h +++ b/src/coreclr/vm/amd64/cgencpu.h @@ -370,11 +370,11 @@ INT32 rel32UsingJumpStub(INT32 UNALIGNED * pRel32, PCODE target, MethodDesc *pMe LoaderAllocator *pLoaderAllocator = NULL, bool throwOnOutOfMemoryWithinRange = true); // Get Rel32 destination, emit jumpStub if necessary into a preallocated location -INT32 rel32UsingPreallocatedJumpStub(INT32 UNALIGNED * pRel32, PCODE target, PCODE jumpStubAddr, bool emitJump); +INT32 rel32UsingPreallocatedJumpStub(INT32 UNALIGNED * pRel32, PCODE target, PCODE jumpStubAddr, PCODE jumpStubAddrRW, bool emitJump); -void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target); +void emitCOMStubCall (ComCallMethodDesc *pCOMMethodRX, ComCallMethodDesc *pCOMMethodRW, PCODE target); -void emitJump(LPBYTE pBuffer, LPVOID target); +void emitJump(LPBYTE pBufferRX, LPBYTE pBufferRW, LPVOID target); BOOL isJumpRel32(PCODE pCode); PCODE decodeJump32(PCODE pCode); @@ -388,11 +388,11 @@ PCODE decodeJump64(PCODE pCode); // For all other platforms back to back jumps don't require anything special // That is why we have these two wrapper functions that call emitJump and decodeJump // -inline void emitBackToBackJump(LPBYTE pBuffer, LPVOID target) +inline void emitBackToBackJump(LPBYTE pBufferRX, LPBYTE pBufferRW, LPVOID target) { WRAPPER_NO_CONTRACT; - emitJump(pBuffer, target); + emitJump(pBufferRX, pBufferRW, target); } inline BOOL isBackToBackJump(PCODE pCode) @@ -438,7 +438,7 @@ struct DECLSPEC_ALIGN(8) UMEntryThunkCode BYTE m_jmpRAX[3]; // JMP RAX BYTE m_padding2[5]; - void Encode(BYTE* pTargetCode, void* pvSecretParam); + void Encode(UMEntryThunkCode *pEntryThunkCodeRX, BYTE* pTargetCode, void* pvSecretParam); void Poison(); LPCBYTE GetEntryPoint() const @@ -610,19 +610,19 @@ class CallCountingStubShort : public CallCountingStub #ifndef DACCESS_COMPILE public: - CallCountingStubShort(CallCount *remainingCallCountCell, PCODE targetForMethod) + CallCountingStubShort(CallCountingStubShort* stubRX, CallCount *remainingCallCountCell, PCODE targetForMethod) : m_part0{ 0x48, 0xb8}, // mov rax, m_remainingCallCountCell(remainingCallCountCell), // m_part1{ 0x66, 0xff, 0x08, // dec word ptr [rax] 0x0f, 0x85}, // jnz m_rel32TargetForMethod( // GetRelative32BitOffset( - &m_rel32TargetForMethod, + &stubRX->m_rel32TargetForMethod, targetForMethod)), m_part2{ 0xe8}, // call m_rel32TargetForThresholdReached( // GetRelative32BitOffset( - &m_rel32TargetForThresholdReached, + &stubRX->m_rel32TargetForThresholdReached, TargetForThresholdReached)), // (rip == stub-identifying token) m_alignmentPadding{} diff --git a/src/coreclr/vm/amd64/virtualcallstubcpu.hpp b/src/coreclr/vm/amd64/virtualcallstubcpu.hpp index 860a681e21352..70b2de5813438 100644 --- a/src/coreclr/vm/amd64/virtualcallstubcpu.hpp +++ b/src/coreclr/vm/amd64/virtualcallstubcpu.hpp @@ -97,7 +97,7 @@ struct LookupHolder { static void InitializeStatic(); - void Initialize(PCODE resolveWorkerTarget, size_t dispatchToken); + void Initialize(LookupHolder* pLookupHolderRX, PCODE resolveWorkerTarget, size_t dispatchToken); LookupStub* stub() { LIMITED_METHOD_CONTRACT; return &_stub; } @@ -317,7 +317,7 @@ struct DispatchHolder { static void InitializeStatic(); - void Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT, + void Initialize(DispatchHolder* pDispatchHolderRX, PCODE implTarget, PCODE failTarget, size_t expectedMT, DispatchStub::DispatchStubType type); static size_t GetHolderSize(DispatchStub::DispatchStubType type) @@ -453,7 +453,8 @@ struct ResolveHolder { static void InitializeStatic(); - void Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget, + void Initialize(ResolveHolder* pResolveHolderRX, + PCODE resolveWorkerTarget, PCODE patcherTarget, size_t dispatchToken, UINT32 hashedToken, void * cacheAddr, INT32* counterAddr); @@ -573,7 +574,7 @@ void LookupHolder::InitializeStatic() lookupInit.part3 [1] = 0xE0; } -void LookupHolder::Initialize(PCODE resolveWorkerTarget, size_t dispatchToken) +void LookupHolder::Initialize(LookupHolder* pLookupHolderRX, PCODE resolveWorkerTarget, size_t dispatchToken) { _stub = lookupInit; @@ -632,7 +633,7 @@ void DispatchHolder::InitializeStatic() dispatchLongInit.part5 [1] = 0xE0; }; -void DispatchHolder::Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT, +void DispatchHolder::Initialize(DispatchHolder* pDispatchHolderRX, PCODE implTarget, PCODE failTarget, size_t expectedMT, DispatchStub::DispatchStubType type) { // @@ -650,17 +651,18 @@ void DispatchHolder::Initialize(PCODE implTarget, PCODE failTarget, size_t expe // if (type == DispatchStub::e_TYPE_SHORT) { - DispatchStubShort *shortStub = const_cast(stub()->getShortStub()); + DispatchStubShort *shortStubRW = const_cast(stub()->getShortStub()); + DispatchStubShort *shortStubRX = const_cast(pDispatchHolderRX->stub()->getShortStub()); // initialize the static data - *shortStub = dispatchShortInit; + *shortStubRW = dispatchShortInit; // fill in the dynamic data - size_t displ = (failTarget - ((PCODE) &shortStub->_failDispl + sizeof(DISPL))); + size_t displ = (failTarget - ((PCODE) &shortStubRX->_failDispl + sizeof(DISPL))); CONSISTENCY_CHECK(FitsInI4(displ)); - shortStub->_failDispl = (DISPL) displ; - shortStub->_implTarget = (size_t) implTarget; - CONSISTENCY_CHECK((PCODE)&shortStub->_failDispl + sizeof(DISPL) + shortStub->_failDispl == failTarget); + shortStubRW->_failDispl = (DISPL) displ; + shortStubRW->_implTarget = (size_t) implTarget; + CONSISTENCY_CHECK((PCODE)&shortStubRX->_failDispl + sizeof(DISPL) + shortStubRX->_failDispl == failTarget); } else { @@ -769,7 +771,8 @@ void ResolveHolder::InitializeStatic() resolveInit.part10 [1] = 0xE0; }; -void ResolveHolder::Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget, +void ResolveHolder::Initialize(ResolveHolder* pResolveHolderRX, + PCODE resolveWorkerTarget, PCODE patcherTarget, size_t dispatchToken, UINT32 hashedToken, void * cacheAddr, INT32* counterAddr) { diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h index c68e763e8945d..34f9e3b941fc8 100644 --- a/src/coreclr/vm/arm/cgencpu.h +++ b/src/coreclr/vm/arm/cgencpu.h @@ -232,7 +232,7 @@ inline void ClearITState(T_CONTEXT *context) { } #ifdef FEATURE_COMINTEROP -void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target); +void emitCOMStubCall (ComCallMethodDesc *pCOMMethodRX, ComCallMethodDesc *pCOMMethodRW, PCODE target); #endif // FEATURE_COMINTEROP //------------------------------------------------------------------------ @@ -283,14 +283,14 @@ inline int16_t decodeUnconditionalBranchThumb(LPBYTE pBuffer) } //------------------------------------------------------------------------ -inline void emitJump(LPBYTE pBuffer, LPVOID target) +inline void emitJump(LPBYTE pBufferRX, LPBYTE pBufferRW, LPVOID target) { LIMITED_METHOD_CONTRACT; // The PC-relative load we emit below requires 4-byte alignment for the offset to be calculated correctly. - _ASSERTE(((UINT_PTR)pBuffer & 3) == 0); + _ASSERTE(((UINT_PTR)pBufferRX & 3) == 0); - DWORD * pCode = (DWORD *)pBuffer; + DWORD * pCode = (DWORD *)pBufferRW; // ldr pc, [pc, #0] pCode[0] = 0xf000f8df; @@ -335,10 +335,10 @@ inline BOOL isBackToBackJump(PCODE pBuffer) } //------------------------------------------------------------------------ -inline void emitBackToBackJump(LPBYTE pBuffer, LPVOID target) +inline void emitBackToBackJump(LPBYTE pBufferRX, LPBYTE pBufferRW, LPVOID target) { WRAPPER_NO_CONTRACT; - emitJump(pBuffer, target); + emitJump(pBufferRX, pBufferRW, target); } //------------------------------------------------------------------------ @@ -943,7 +943,7 @@ struct DECLSPEC_ALIGN(4) UMEntryThunkCode TADDR m_pTargetCode; TADDR m_pvSecretParam; - void Encode(BYTE* pTargetCode, void* pvSecretParam); + void Encode(UMEntryThunkCode *pEntryThunkCodeRX, BYTE* pTargetCode, void* pvSecretParam); void Poison(); LPCBYTE GetEntryPoint() const @@ -1055,7 +1055,7 @@ struct StubPrecode { TADDR m_pTarget; TADDR m_pMethodDesc; - void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator); + void Init(StubPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator); TADDR GetMethodDesc() { @@ -1078,7 +1078,8 @@ struct StubPrecode { } CONTRACTL_END; - InterlockedExchange((LONG*)&m_pTarget, (LONG)GetPreStubEntryPoint()); + ExecutableWriterHolder precodeWriterHolder(this, sizeof(StubPrecode)); + InterlockedExchange((LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)GetPreStubEntryPoint()); } BOOL SetTargetInterlocked(TADDR target, TADDR expected) @@ -1090,8 +1091,9 @@ struct StubPrecode { } CONTRACTL_END; + ExecutableWriterHolder precodeWriterHolder(this, sizeof(StubPrecode)); return (TADDR)InterlockedCompareExchange( - (LONG*)&m_pTarget, (LONG)target, (LONG)expected) == expected; + (LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == expected; } #ifdef FEATURE_PREJIT @@ -1114,7 +1116,7 @@ struct NDirectImportPrecode { // takes advantage of this to detect NDirectImportPrecode. TADDR m_pTarget; - void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator); + void Init(NDirectImportPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator); TADDR GetMethodDesc() { @@ -1155,7 +1157,7 @@ struct FixupPrecode { BYTE m_PrecodeChunkIndex; TADDR m_pTarget; - void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex = 0, int iPrecodeChunkIndex = 0); + void Init(FixupPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex = 0, int iPrecodeChunkIndex = 0); TADDR GetBase() { @@ -1182,7 +1184,8 @@ struct FixupPrecode { } CONTRACTL_END; - InterlockedExchange((LONG*)&m_pTarget, (LONG)GetEEFuncEntryPoint(PrecodeFixupThunk)); + ExecutableWriterHolder precodeWriterHolder(this, sizeof(FixupPrecode)); + InterlockedExchange((LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)GetEEFuncEntryPoint(PrecodeFixupThunk)); } BOOL SetTargetInterlocked(TADDR target, TADDR expected) @@ -1194,8 +1197,9 @@ struct FixupPrecode { } CONTRACTL_END; + ExecutableWriterHolder precodeWriterHolder(this, sizeof(FixupPrecode)); return (TADDR)InterlockedCompareExchange( - (LONG*)&m_pTarget, (LONG)target, (LONG)expected) == expected; + (LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == expected; } static BOOL IsFixupPrecodeByASM(PCODE addr) @@ -1261,7 +1265,8 @@ struct ThisPtrRetBufPrecode { } CONTRACTL_END; - return FastInterlockCompareExchange((LONG*)&m_pTarget, (LONG)target, (LONG)expected) == (LONG)expected; + ExecutableWriterHolder precodeWriterHolder(this, sizeof(ThisPtrRetBufPrecode)); + return FastInterlockCompareExchange((LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == (LONG)expected; } }; typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode; @@ -1364,7 +1369,7 @@ class CallCountingStubShort : public CallCountingStub #ifndef DACCESS_COMPILE public: - CallCountingStubShort(CallCount *remainingCallCountCell, PCODE targetForMethod) + CallCountingStubShort(CallCountingStubShort* stubRX, CallCount *remainingCallCountCell, PCODE targetForMethod) : m_part0{ 0xb401, // push {r0} 0xf8df, 0xc01c, // ldr r12, [pc, #(m_remainingCallCountCell)] 0xf8bc, 0x0000, // ldrh r0, [r12] diff --git a/src/coreclr/vm/arm/stubs.cpp b/src/coreclr/vm/arm/stubs.cpp index 1ca6fd09642d0..aac3e25b18146 100644 --- a/src/coreclr/vm/arm/stubs.cpp +++ b/src/coreclr/vm/arm/stubs.cpp @@ -98,7 +98,7 @@ class ThumbCondJump : public InstructionFormat //Encoding 1|0|1|1|op|0|i|1|imm5|Rn //op = Bit3(variation) //Rn = Bits2-0(variation) - virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer) + virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBufferRX, BYTE *pOutBufferRW, UINT variationCode, BYTE *pDataBuffer) { LIMITED_METHOD_CONTRACT @@ -109,8 +109,8 @@ class ThumbCondJump : public InstructionFormat _ASSERTE((fixedUpReference & 0x1) == 0); - pOutBuffer[0] = static_cast(((0x3e & fixedUpReference) << 2) | (0x7 & variationCode)); - pOutBuffer[1] = static_cast(0xb1 | (0x8 & variationCode)| ((0x40 & fixedUpReference)>>5)); + pOutBufferRW[0] = static_cast(((0x3e & fixedUpReference) << 2) | (0x7 & variationCode)); + pOutBufferRW[1] = static_cast(0xb1 | (0x8 & variationCode)| ((0x40 & fixedUpReference)>>5)); } }; @@ -138,7 +138,7 @@ class ThumbNearJump : public InstructionFormat return 0; } - virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT cond, BYTE *pDataBuffer) + virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBufferRX, BYTE *pOutBufferRW, UINT cond, BYTE *pDataBuffer) { LIMITED_METHOD_CONTRACT @@ -155,8 +155,8 @@ class ThumbNearJump : public InstructionFormat _ASSERTE(!"Expected refSize to be 2"); //Emit T2 encoding of B