diff --git a/src/coreclr/CMakeLists.txt b/src/coreclr/CMakeLists.txt
index 78aa969473525..b4a4859342702 100644
--- a/src/coreclr/CMakeLists.txt
+++ b/src/coreclr/CMakeLists.txt
@@ -119,6 +119,8 @@ add_subdirectory(pal/prebuilt/inc)
 
 add_subdirectory(debug/debug-pal)
 
+add_subdirectory(minipal)
+
 if(CLR_CMAKE_TARGET_WIN32)
   add_subdirectory(gc/sample)
 endif()
@@ -171,6 +173,7 @@ include_directories("classlibnative/cryptography")
 include_directories("classlibnative/inc")
 include_directories("${GENERATED_INCLUDE_DIR}")
 include_directories("hosts/inc")
+include_directories("minipal")
 
 if(CLR_CMAKE_TARGET_WIN32 AND FEATURE_EVENT_TRACE)
     include_directories("${GENERATED_INCLUDE_DIR}/etw")
diff --git a/src/coreclr/clrdefinitions.cmake b/src/coreclr/clrdefinitions.cmake
index eeb421cac4c2f..0485ff99a99eb 100644
--- a/src/coreclr/clrdefinitions.cmake
+++ b/src/coreclr/clrdefinitions.cmake
@@ -224,10 +224,6 @@ if(CLR_CMAKE_TARGET_WIN32)
   endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386)
 endif(CLR_CMAKE_TARGET_WIN32)
 
-if(CLR_CMAKE_TARGET_OSX)
-  add_definitions(-DFEATURE_WRITEBARRIER_COPY)
-endif(CLR_CMAKE_TARGET_OSX)
-
 if (NOT CLR_CMAKE_TARGET_ARCH_I386 OR NOT CLR_CMAKE_TARGET_WIN32)
   add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:IGNORE_DEFAULT_TARGET_ARCH>>>:FEATURE_EH_FUNCLETS>)
 endif (NOT CLR_CMAKE_TARGET_ARCH_I386 OR NOT CLR_CMAKE_TARGET_WIN32)
diff --git a/src/coreclr/debug/ee/arm64/arm64walker.cpp b/src/coreclr/debug/ee/arm64/arm64walker.cpp
index ae6e8c1fc2933..6c4dee9349700 100644
--- a/src/coreclr/debug/ee/arm64/arm64walker.cpp
+++ b/src/coreclr/debug/ee/arm64/arm64walker.cpp
@@ -171,7 +171,14 @@ BYTE*  NativeWalker::SetupOrSimulateInstructionForPatchSkip(T_CONTEXT * context,
     {
         CORDbgSetInstruction((CORDB_ADDRESS_TYPE *)patchBypass, 0xd503201f); //Add Nop in buffer
 
-        m_pSharedPatchBypassBuffer->RipTargetFixup = ip; //Control Flow simulation alone is done DebuggerPatchSkip::TriggerExceptionHook
+#if defined(HOST_OSX) && defined(HOST_ARM64)
+        ExecutableWriterHolder<UINT_PTR> ripTargetFixupWriterHolder(&m_pSharedPatchBypassBuffer->RipTargetFixup, sizeof(UINT_PTR));
+        UINT_PTR *pRipTargetFixupRW = ripTargetFixupWriterHolder.GetRW();
+#else // HOST_OSX && HOST_ARM64
+        UINT_PTR *pRipTargetFixupRW = &m_pSharedPatchBypassBuffer->RipTargetFixup;
+#endif // HOST_OSX && HOST_ARM64
+
+        *pRipTargetFixupRW = ip; //Control Flow simulation alone is done DebuggerPatchSkip::TriggerExceptionHook
         LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate opcode: %x  is a Control Flow instr \n", opcode));
 
         if (walk == WALK_CALL) //initialize Lr
diff --git a/src/coreclr/debug/ee/controller.cpp b/src/coreclr/debug/ee/controller.cpp
index b17ae8f115002..f9304d16ab070 100644
--- a/src/coreclr/debug/ee/controller.cpp
+++ b/src/coreclr/debug/ee/controller.cpp
@@ -84,8 +84,13 @@ SharedPatchBypassBuffer* DebuggerControllerPatch::GetOrCreateSharedPatchBypassBu
     if (m_pSharedPatchBypassBuffer == NULL)
     {
         void *pSharedPatchBypassBufferRX = g_pDebugger->GetInteropSafeExecutableHeap()->Alloc(sizeof(SharedPatchBypassBuffer));
+#if defined(HOST_OSX) && defined(HOST_ARM64)
         ExecutableWriterHolder<SharedPatchBypassBuffer> sharedPatchBypassBufferWriterHolder((SharedPatchBypassBuffer*)pSharedPatchBypassBufferRX, sizeof(SharedPatchBypassBuffer));
-        new (sharedPatchBypassBufferWriterHolder.GetRW()) SharedPatchBypassBuffer();
+        void *pSharedPatchBypassBufferRW = sharedPatchBypassBufferWriterHolder.GetRW();
+#else // HOST_OSX && HOST_ARM64
+        void *pSharedPatchBypassBufferRW = pSharedPatchBypassBufferRX;
+#endif // HOST_OSX && HOST_ARM64
+        new (pSharedPatchBypassBufferRW) SharedPatchBypassBuffer();
         m_pSharedPatchBypassBuffer = (SharedPatchBypassBuffer*)pSharedPatchBypassBufferRX;
 
         _ASSERTE(m_pSharedPatchBypassBuffer);
@@ -4351,7 +4356,15 @@ DebuggerPatchSkip::DebuggerPatchSkip(Thread *thread,
     //
 
     m_pSharedPatchBypassBuffer = patch->GetOrCreateSharedPatchBypassBuffer();
-    BYTE* patchBypass = m_pSharedPatchBypassBuffer->PatchBypass;
+#if defined(HOST_OSX) && defined(HOST_ARM64)
+    ExecutableWriterHolder<SharedPatchBypassBuffer> sharedPatchBypassBufferWriterHolder((SharedPatchBypassBuffer*)m_pSharedPatchBypassBuffer, sizeof(SharedPatchBypassBuffer));
+    SharedPatchBypassBuffer *pSharedPatchBypassBufferRW = sharedPatchBypassBufferWriterHolder.GetRW();
+#else // HOST_OSX && HOST_ARM64
+    SharedPatchBypassBuffer *pSharedPatchBypassBufferRW = m_pSharedPatchBypassBuffer;
+#endif // HOST_OSX && HOST_ARM64
+
+    BYTE* patchBypassRX = m_pSharedPatchBypassBuffer->PatchBypass;
+    BYTE* patchBypassRW = pSharedPatchBypassBufferRW->PatchBypass;
     LOG((LF_CORDB, LL_INFO10000, "DPS::DPS: Patch skip for opcode 0x%.4x at address %p buffer allocated at 0x%.8x\n", patch->opcode, patch->address, m_pSharedPatchBypassBuffer));
 
     // Copy the instruction block over to the patch skip
@@ -4367,19 +4380,19 @@ DebuggerPatchSkip::DebuggerPatchSkip(Thread *thread,
     // the 2nd skip executes the new jump-stamp code and not the original method prologue code. Copying
     // the code every time ensures that we have the most up-to-date version of the code in the buffer.
     _ASSERTE( patch->IsBound() );
-    CopyInstructionBlock(patchBypass, (const BYTE *)patch->address);
+    CopyInstructionBlock(patchBypassRW, (const BYTE *)patch->address);
 
     // Technically, we could create a patch skipper for an inactive patch, but we rely on the opcode being
     // set here.
     _ASSERTE( patch->IsActivated() );
-    CORDbgSetInstruction((CORDB_ADDRESS_TYPE *)patchBypass, patch->opcode);
+    CORDbgSetInstruction((CORDB_ADDRESS_TYPE *)patchBypassRW, patch->opcode);
 
     LOG((LF_CORDB, LL_EVERYTHING, "SetInstruction was called\n"));
     //
     // Look at instruction to get some attributes
     //
 
-    NativeWalker::DecodeInstructionForPatchSkip(patchBypass, &(m_instrAttrib));
+    NativeWalker::DecodeInstructionForPatchSkip(patchBypassRX, &(m_instrAttrib));
 
 #if defined(TARGET_AMD64)
 
@@ -4395,33 +4408,33 @@ DebuggerPatchSkip::DebuggerPatchSkip(Thread *thread,
         // Populate the RIP-relative buffer with the current value if needed
         //
 
-        BYTE* bufferBypass = m_pSharedPatchBypassBuffer->BypassBuffer;
+        BYTE* bufferBypassRW = pSharedPatchBypassBufferRW->BypassBuffer;
 
         // Overwrite the *signed* displacement.
-        int dwOldDisp = *(int*)(&patchBypass[m_instrAttrib.m_dwOffsetToDisp]);
+        int dwOldDisp = *(int*)(&patchBypassRX[m_instrAttrib.m_dwOffsetToDisp]);
         int dwNewDisp = offsetof(SharedPatchBypassBuffer, BypassBuffer) -
                           (offsetof(SharedPatchBypassBuffer, PatchBypass) + m_instrAttrib.m_cbInstr);
-        *(int*)(&patchBypass[m_instrAttrib.m_dwOffsetToDisp]) = dwNewDisp;
+        *(int*)(&patchBypassRW[m_instrAttrib.m_dwOffsetToDisp]) = dwNewDisp;
 
         // This could be an LEA, which we'll just have to change into a MOV
         // and copy the original address
-        if (((patchBypass[0] == 0x4C) || (patchBypass[0] == 0x48)) && (patchBypass[1] == 0x8d))
+        if (((patchBypassRX[0] == 0x4C) || (patchBypassRX[0] == 0x48)) && (patchBypassRX[1] == 0x8d))
         {
-            patchBypass[1] = 0x8b; // MOV reg, mem
+            patchBypassRW[1] = 0x8b; // MOV reg, mem
             _ASSERTE((int)sizeof(void*) <= SharedPatchBypassBuffer::cbBufferBypass);
-            *(void**)bufferBypass = (void*)(patch->address + m_instrAttrib.m_cbInstr + dwOldDisp);
+            *(void**)bufferBypassRW = (void*)(patch->address + m_instrAttrib.m_cbInstr + dwOldDisp);
         }
         else
         {
             _ASSERTE(m_instrAttrib.m_cOperandSize <= SharedPatchBypassBuffer::cbBufferBypass);
             // Copy the data into our buffer.
-            memcpy(bufferBypass, patch->address + m_instrAttrib.m_cbInstr + dwOldDisp, m_instrAttrib.m_cOperandSize);
+            memcpy(bufferBypassRW, patch->address + m_instrAttrib.m_cbInstr + dwOldDisp, m_instrAttrib.m_cOperandSize);
 
             if (m_instrAttrib.m_fIsWrite)
             {
                 // save the actual destination address and size so when we TriggerSingleStep() we can update the value
-                m_pSharedPatchBypassBuffer->RipTargetFixup = (UINT_PTR)(patch->address + m_instrAttrib.m_cbInstr + dwOldDisp);
-                m_pSharedPatchBypassBuffer->RipTargetFixupSize = m_instrAttrib.m_cOperandSize;
+                pSharedPatchBypassBufferRW->RipTargetFixup = (UINT_PTR)(patch->address + m_instrAttrib.m_cbInstr + dwOldDisp);
+                pSharedPatchBypassBufferRW->RipTargetFixupSize = m_instrAttrib.m_cOperandSize;
             }
         }
     }
@@ -4490,17 +4503,17 @@ DebuggerPatchSkip::DebuggerPatchSkip(Thread *thread,
 #else // FEATURE_EMULATE_SINGLESTEP
 
 #ifdef TARGET_ARM64
-    patchBypass = NativeWalker::SetupOrSimulateInstructionForPatchSkip(context, m_pSharedPatchBypassBuffer, (const BYTE *)patch->address, patch->opcode);
+    patchBypassRX = NativeWalker::SetupOrSimulateInstructionForPatchSkip(context, m_pSharedPatchBypassBuffer, (const BYTE *)patch->address, patch->opcode);
 #endif //TARGET_ARM64
 
     //set eip to point to buffer...
-    SetIP(context, (PCODE)patchBypass);
+    SetIP(context, (PCODE)patchBypassRX);
 
     if (context ==(T_CONTEXT*) &c)
         thread->SetThreadContext(&c);
 
 
-    LOG((LF_CORDB, LL_INFO10000, "DPS::DPS Bypass at 0x%p for opcode %p \n", patchBypass, patch->opcode));
+    LOG((LF_CORDB, LL_INFO10000, "DPS::DPS Bypass at 0x%p for opcode %p \n", patchBypassRX, patch->opcode));
 
     //
     // Turn on single step (if the platform supports it) so we can
diff --git a/src/coreclr/debug/ee/controller.h b/src/coreclr/debug/ee/controller.h
index 12b1106f7a4b2..6996439c31fba 100644
--- a/src/coreclr/debug/ee/controller.h
+++ b/src/coreclr/debug/ee/controller.h
@@ -266,14 +266,28 @@ class SharedPatchBypassBuffer
 
     LONG AddRef()
     {
-        LONG newRefCount = InterlockedIncrement(&m_refCount);
+#if !defined(DACCESS_COMPILE) && defined(HOST_OSX) && defined(HOST_ARM64)
+        ExecutableWriterHolder<LONG> refCountWriterHolder(&m_refCount, sizeof(LONG));
+        LONG *pRefCountRW = refCountWriterHolder.GetRW();
+#else // !DACCESS_COMPILE && HOST_OSX && HOST_ARM64
+        LONG *pRefCountRW = &m_refCount;
+#endif // !DACCESS_COMPILE && HOST_OSX && HOST_ARM64
+
+        LONG newRefCount = InterlockedIncrement(pRefCountRW);
         _ASSERTE(newRefCount > 0);
         return newRefCount;
     }
 
     LONG Release()
     {
-        LONG newRefCount = InterlockedDecrement(&m_refCount);
+#if !DACCESS_COMPILE && HOST_OSX && HOST_ARM64
+        ExecutableWriterHolder<LONG> refCountWriterHolder(&m_refCount, sizeof(LONG));
+        LONG *pRefCountRW = refCountWriterHolder.GetRW();
+#else // !DACCESS_COMPILE && HOST_OSX && HOST_ARM64
+        LONG *pRefCountRW = &m_refCount;
+#endif // !DACCESS_COMPILE && HOST_OSX && HOST_ARM64
+
+        LONG newRefCount = InterlockedDecrement(pRefCountRW);
         _ASSERTE(newRefCount >= 0);
 
         if (newRefCount == 0)
diff --git a/src/coreclr/debug/ee/debugger.cpp b/src/coreclr/debug/ee/debugger.cpp
index 53ee5555ace43..e4563a31757f4 100644
--- a/src/coreclr/debug/ee/debugger.cpp
+++ b/src/coreclr/debug/ee/debugger.cpp
@@ -1317,13 +1317,19 @@ DebuggerEval::DebuggerEval(CONTEXT * pContext, DebuggerIPCE_FuncEvalInfo * pEval
 
     // Allocate the breakpoint instruction info in executable memory.
     void *bpInfoSegmentRX = g_pDebugger->GetInteropSafeExecutableHeap()->Alloc(sizeof(DebuggerEvalBreakpointInfoSegment));
+
+#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) && defined(HOST_OSX) && defined(HOST_ARM64)
     ExecutableWriterHolder<DebuggerEvalBreakpointInfoSegment> bpInfoSegmentWriterHolder((DebuggerEvalBreakpointInfoSegment*)bpInfoSegmentRX, sizeof(DebuggerEvalBreakpointInfoSegment));
-    new (bpInfoSegmentWriterHolder.GetRW()) DebuggerEvalBreakpointInfoSegment(this);
+    DebuggerEvalBreakpointInfoSegment *bpInfoSegmentRW = bpInfoSegmentWriterHolder.GetRW();
+#else // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX && HOST_ARM64
+    DebuggerEvalBreakpointInfoSegment *bpInfoSegmentRW = (DebuggerEvalBreakpointInfoSegment*)bpInfoSegmentRX;
+#endif // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX && HOST_ARM64
+    new (bpInfoSegmentRW) DebuggerEvalBreakpointInfoSegment(this);
     m_bpInfoSegment = (DebuggerEvalBreakpointInfoSegment*)bpInfoSegmentRX;
 
     // This must be non-zero so that the saved opcode is non-zero, and on IA64 we want it to be 0x16
     // so that we can have a breakpoint instruction in any slot in the bundle.
-    bpInfoSegmentWriterHolder.GetRW()->m_breakpointInstruction[0] = 0x16;
+    bpInfoSegmentRW->m_breakpointInstruction[0] = 0x16;
 #if defined(TARGET_ARM)
     USHORT *bp = (USHORT*)&m_bpInfoSegment->m_breakpointInstruction;
     *bp = CORDbg_BREAK_INSTRUCTION;
@@ -16234,6 +16240,7 @@ void Debugger::ReleaseDebuggerDataLock(Debugger *pDebugger)
 }
 #endif // DACCESS_COMPILE
 
+#ifndef DACCESS_COMPILE
 /* ------------------------------------------------------------------------ *
  * Functions for DebuggerHeap executable memory allocations
  * ------------------------------------------------------------------------ */
@@ -16378,6 +16385,7 @@ void* DebuggerHeapExecutableMemoryAllocator::GetPointerToChunkWithUsageUpdate(De
 
     return page->GetPointerToChunk(chunkNumber);
 }
+#endif // DACCESS_COMPILE
 
 /* ------------------------------------------------------------------------ *
  * DebuggerHeap impl
@@ -16412,7 +16420,7 @@ void DebuggerHeap::Destroy()
         m_hHeap = NULL;
     }
 #endif
-#ifndef HOST_WINDOWS
+#if !defined(HOST_WINDOWS) && !defined(DACCESS_COMPILE)
     if (m_execMemAllocator != NULL)
     {
         delete m_execMemAllocator;
@@ -16439,6 +16447,8 @@ HRESULT DebuggerHeap::Init(BOOL fExecutable)
     }
     CONTRACTL_END;
 
+#ifndef DACCESS_COMPILE
+
     // Have knob catch if we don't want to lazy init the debugger.
     _ASSERTE(!g_DbgShouldntUseDebugger);
     m_fExecutable = fExecutable;
@@ -16472,7 +16482,9 @@ HRESULT DebuggerHeap::Init(BOOL fExecutable)
             return E_OUTOFMEMORY;
         }
     }
-#endif
+#endif    
+
+#endif // !DACCESS_COMPILE
 
     return S_OK;
 }
@@ -16549,7 +16561,10 @@ void *DebuggerHeap::Alloc(DWORD size)
     size += sizeof(InteropHeapCanary);
 #endif
 
-    void *ret;
+    void *ret = NULL;
+
+#ifndef DACCESS_COMPILE
+
 #ifdef USE_INTEROPSAFE_HEAP
     _ASSERTE(m_hHeap != NULL);
     ret = ::HeapAlloc(m_hHeap, HEAP_ZERO_MEMORY, size);
@@ -16585,7 +16600,7 @@ void *DebuggerHeap::Alloc(DWORD size)
     InteropHeapCanary * pCanary = InteropHeapCanary::GetFromRawAddr(ret);
     ret = pCanary->GetUserAddr();
 #endif
-
+#endif // !DACCESS_COMPILE
     return ret;
 }
 
@@ -16638,6 +16653,8 @@ void DebuggerHeap::Free(void *pMem)
     }
     CONTRACTL_END;
 
+#ifndef DACCESS_COMPILE
+
 #ifdef USE_INTEROPSAFE_CANARY
     // Check for canary
 
@@ -16673,6 +16690,7 @@ void DebuggerHeap::Free(void *pMem)
 #endif // HOST_WINDOWS
     }
 #endif
+#endif // !DACCESS_COMPILE
 }
 
 #ifndef DACCESS_COMPILE
diff --git a/src/coreclr/debug/ee/debugger.h b/src/coreclr/debug/ee/debugger.h
index f16f8cd6d9d9d..5503de2459099 100644
--- a/src/coreclr/debug/ee/debugger.h
+++ b/src/coreclr/debug/ee/debugger.h
@@ -1054,6 +1054,8 @@ constexpr uint64_t CHUNKS_PER_DEBUGGERHEAP=(DEBUGGERHEAP_PAGESIZE / EXPECTED_CHU
 constexpr uint64_t MAX_CHUNK_MASK=((1ull << CHUNKS_PER_DEBUGGERHEAP) - 1);
 constexpr uint64_t BOOKKEEPING_CHUNK_MASK (1ull << (CHUNKS_PER_DEBUGGERHEAP - 1));
 
+#ifndef DACCESS_COMPILE
+
 // Forward declaration
 struct DebuggerHeapExecutableMemoryPage;
 
@@ -1110,8 +1112,13 @@ struct DECLSPEC_ALIGN(DEBUGGERHEAP_PAGESIZE) DebuggerHeapExecutableMemoryPage
 
     inline void SetNextPage(DebuggerHeapExecutableMemoryPage* nextPage)
     {
+#if defined(HOST_OSX) && defined(HOST_ARM64)
         ExecutableWriterHolder<DebuggerHeapExecutableMemoryPage> debuggerHeapPageWriterHolder(this, sizeof(DebuggerHeapExecutableMemoryPage));
-        debuggerHeapPageWriterHolder.GetRW()->chunks[0].bookkeeping.nextPage = nextPage;
+        DebuggerHeapExecutableMemoryPage *pHeapPageRW = debuggerHeapPageWriterHolder.GetRW();
+#else
+        DebuggerHeapExecutableMemoryPage *pHeapPageRW = this;
+#endif
+        pHeapPageRW->chunks[0].bookkeeping.nextPage = nextPage;
     }
 
     inline uint64_t GetPageOccupancy() const
@@ -1124,8 +1131,13 @@ struct DECLSPEC_ALIGN(DEBUGGERHEAP_PAGESIZE) DebuggerHeapExecutableMemoryPage
         // Can't unset the bookmark chunk!
         ASSERT((newOccupancy & BOOKKEEPING_CHUNK_MASK) != 0);
         ASSERT(newOccupancy <= MAX_CHUNK_MASK);
+#if defined(HOST_OSX) && defined(HOST_ARM64)
         ExecutableWriterHolder<DebuggerHeapExecutableMemoryPage> debuggerHeapPageWriterHolder(this, sizeof(DebuggerHeapExecutableMemoryPage));
-        debuggerHeapPageWriterHolder.GetRW()->chunks[0].bookkeeping.pageOccupancy = newOccupancy;
+        DebuggerHeapExecutableMemoryPage *pHeapPageRW = debuggerHeapPageWriterHolder.GetRW();
+#else
+        DebuggerHeapExecutableMemoryPage *pHeapPageRW = this;
+#endif
+        pHeapPageRW->chunks[0].bookkeeping.pageOccupancy = newOccupancy;
     }
 
     inline void* GetPointerToChunk(int chunkNum) const
@@ -1136,14 +1148,18 @@ struct DECLSPEC_ALIGN(DEBUGGERHEAP_PAGESIZE) DebuggerHeapExecutableMemoryPage
 
     DebuggerHeapExecutableMemoryPage()
     {
-        ExecutableWriterHolder<DebuggerHeapExecutableMemoryPage> debuggerHeapPageWriterHolder(this, sizeof(DebuggerHeapExecutableMemoryPage));
-
         SetPageOccupancy(BOOKKEEPING_CHUNK_MASK); // only the first bit is set.
+#if defined(HOST_OSX) && defined(HOST_ARM64)
+        ExecutableWriterHolder<DebuggerHeapExecutableMemoryPage> debuggerHeapPageWriterHolder(this, sizeof(DebuggerHeapExecutableMemoryPage));
+        DebuggerHeapExecutableMemoryPage *pHeapPageRW = debuggerHeapPageWriterHolder.GetRW();
+#else
+        DebuggerHeapExecutableMemoryPage *pHeapPageRW = this;
+#endif
         for (uint8_t i = 1; i < CHUNKS_PER_DEBUGGERHEAP; i++)
         {
             ASSERT(i != 0);
-            debuggerHeapPageWriterHolder.GetRW()->chunks[i].data.startOfPage = this;
-            debuggerHeapPageWriterHolder.GetRW()->chunks[i].data.chunkNumber = i;
+            pHeapPageRW->chunks[i].data.startOfPage = this;
+            pHeapPageRW->chunks[i].data.chunkNumber = i;
         }
     }
 
@@ -1190,6 +1206,8 @@ class DebuggerHeapExecutableMemoryAllocator
     Crst m_execMemAllocMutex;
 };
 
+#endif // DACCESS_COMPILE
+
 // ------------------------------------------------------------------------ *
 // DebuggerHeap class
 // For interop debugging, we need a heap that:
@@ -1201,6 +1219,8 @@ class DebuggerHeapExecutableMemoryAllocator
     #define USE_INTEROPSAFE_HEAP
 #endif
 
+class DebuggerHeapExecutableMemoryAllocator;
+
 class DebuggerHeap
 {
 public:
diff --git a/src/coreclr/debug/inc/amd64/primitives.h b/src/coreclr/debug/inc/amd64/primitives.h
index d8d14b24b5425..9d363938519c7 100644
--- a/src/coreclr/debug/inc/amd64/primitives.h
+++ b/src/coreclr/debug/inc/amd64/primitives.h
@@ -12,10 +12,6 @@
 #ifndef PRIMITIVES_H_
 #define PRIMITIVES_H_
 
-#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE)
-#include "executableallocator.h"
-#endif
-
 #ifndef CORDB_ADDRESS_TYPE
 typedef const BYTE                  CORDB_ADDRESS_TYPE;
 typedef DPTR(CORDB_ADDRESS_TYPE)    PTR_CORDB_ADDRESS_TYPE;
@@ -191,14 +187,7 @@ inline void CORDbgInsertBreakpoint(UNALIGNED CORDB_ADDRESS_TYPE *address)
 {
     LIMITED_METHOD_CONTRACT;
 
-#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE)
-    ExecutableWriterHolder<CORDB_ADDRESS_TYPE> breakpointWriterHolder(address, CORDbg_BREAK_INSTRUCTION_SIZE);
-    UNALIGNED CORDB_ADDRESS_TYPE* addressRW = breakpointWriterHolder.GetRW();
-#else // !DBI_COMPILE && !DACCESS_COMPILE
-    UNALIGNED CORDB_ADDRESS_TYPE* addressRW = address;
-#endif // !DBI_COMPILE && !DACCESS_COMPILE
-
-    *((unsigned char*)addressRW) = 0xCC; // int 3 (single byte patch)
+    *((unsigned char*)address) = 0xCC; // int 3 (single byte patch)
     FlushInstructionCache(GetCurrentProcess(), address, 1);
 
 }
@@ -209,14 +198,7 @@ inline void CORDbgSetInstruction(UNALIGNED CORDB_ADDRESS_TYPE* address,
     // In a DAC build, this function assumes the input is an host address.
     LIMITED_METHOD_DAC_CONTRACT;
 
-#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE)
-    ExecutableWriterHolder<CORDB_ADDRESS_TYPE> instructionWriterHolder(address, sizeof(unsigned char));
-    UNALIGNED CORDB_ADDRESS_TYPE* addressRW = instructionWriterHolder.GetRW();
-#else // !DBI_COMPILE && !DACCESS_COMPILE
-    UNALIGNED CORDB_ADDRESS_TYPE* addressRW = address;
-#endif // !DBI_COMPILE && !DACCESS_COMPILE
-
-    *((unsigned char*)addressRW) =
+    *((unsigned char*)address) =
         (unsigned char) instruction;    // setting one byte is important
     FlushInstructionCache(GetCurrentProcess(), address, 1);
 
diff --git a/src/coreclr/debug/inc/arm/primitives.h b/src/coreclr/debug/inc/arm/primitives.h
index c4e2d28602e56..269281eb006be 100644
--- a/src/coreclr/debug/inc/arm/primitives.h
+++ b/src/coreclr/debug/inc/arm/primitives.h
@@ -12,10 +12,6 @@
 #ifndef PRIMITIVES_H_
 #define PRIMITIVES_H_
 
-#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE)
-#include "executableallocator.h"
-#endif
-
 #ifndef THUMB_CODE
 #define THUMB_CODE 1
 #endif
@@ -163,14 +159,7 @@ inline void CORDbgSetInstruction(CORDB_ADDRESS_TYPE* address,
     // In a DAC build, this function assumes the input is an host address.
     LIMITED_METHOD_DAC_CONTRACT;
 
-#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE)
-    ExecutableWriterHolder<CORDB_ADDRESS_TYPE> instructionWriterHolder(address, sizeof(PRD_TYPE));
-    CORDB_ADDRESS_TYPE* addressRW = instructionWriterHolder.GetRW();
-#else // !DBI_COMPILE && !DACCESS_COMPILE
-    CORDB_ADDRESS_TYPE* addressRW = address;
-#endif // !DBI_COMPILE && !DACCESS_COMPILE
-
-    CORDB_ADDRESS ptraddr = (CORDB_ADDRESS)addressRW;
+    CORDB_ADDRESS ptraddr = (CORDB_ADDRESS)address;
     _ASSERTE(ptraddr & THUMB_CODE);
     ptraddr &= ~THUMB_CODE;
 
diff --git a/src/coreclr/debug/inc/arm64/primitives.h b/src/coreclr/debug/inc/arm64/primitives.h
index 4f4c3f7bcd8f2..05c03c7b3094f 100644
--- a/src/coreclr/debug/inc/arm64/primitives.h
+++ b/src/coreclr/debug/inc/arm64/primitives.h
@@ -150,13 +150,13 @@ inline void CORDbgSetInstruction(CORDB_ADDRESS_TYPE* address,
     // In a DAC build, this function assumes the input is an host address.
     LIMITED_METHOD_DAC_CONTRACT;
 
-#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE)
+#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) && defined(HOST_OSX)
     ExecutableWriterHolder<void> instructionWriterHolder((LPVOID)address, sizeof(PRD_TYPE));
 
     ULONGLONG ptraddr = dac_cast<ULONGLONG>(instructionWriterHolder.GetRW());
-#else // !DBI_COMPILE && !DACCESS_COMPILE
+#else // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX
     ULONGLONG ptraddr = dac_cast<ULONGLONG>(address);
-#endif // !DBI_COMPILE && !DACCESS_COMPILE
+#endif // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX
     *(PRD_TYPE *)ptraddr = instruction;
     FlushInstructionCache(GetCurrentProcess(),
                           address,
diff --git a/src/coreclr/debug/inc/i386/primitives.h b/src/coreclr/debug/inc/i386/primitives.h
index 313b42c5a1970..2f228b3a3a9a1 100644
--- a/src/coreclr/debug/inc/i386/primitives.h
+++ b/src/coreclr/debug/inc/i386/primitives.h
@@ -12,10 +12,6 @@
 #ifndef PRIMITIVES_H_
 #define PRIMITIVES_H_
 
-#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE)
-#include "executableallocator.h"
-#endif
-
 typedef const BYTE                  CORDB_ADDRESS_TYPE;
 typedef DPTR(CORDB_ADDRESS_TYPE)    PTR_CORDB_ADDRESS_TYPE;
 
@@ -151,14 +147,7 @@ inline void CORDbgInsertBreakpoint(UNALIGNED CORDB_ADDRESS_TYPE *address)
 {
     LIMITED_METHOD_CONTRACT;
 
-#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE)
-    ExecutableWriterHolder<CORDB_ADDRESS_TYPE> breakpointWriterHolder(address, CORDbg_BREAK_INSTRUCTION_SIZE);
-    UNALIGNED CORDB_ADDRESS_TYPE* addressRW = breakpointWriterHolder.GetRW();
-#else // !DBI_COMPILE && !DACCESS_COMPILE
-    UNALIGNED CORDB_ADDRESS_TYPE* addressRW = address;
-#endif // !DBI_COMPILE && !DACCESS_COMPILE
-
-    *((unsigned char*)addressRW) = 0xCC; // int 3 (single byte patch)
+    *((unsigned char*)address) = 0xCC; // int 3 (single byte patch)
     FlushInstructionCache(GetCurrentProcess(), address, 1);
 }
 
diff --git a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt
index fae55ecdc3ea5..9b8e4b649864d 100644
--- a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt
+++ b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt
@@ -109,6 +109,7 @@ set(CORECLR_LIBRARIES
     v3binder
     System.Globalization.Native-Static
     interop
+    coreclrminipal
 )
 
 if(CLR_CMAKE_TARGET_WIN32)
diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def
index c48872a0b9424..c7266df7dbb01 100644
--- a/src/coreclr/inc/CrstTypes.def
+++ b/src/coreclr/inc/CrstTypes.def
@@ -201,6 +201,10 @@ End
 Crst Exception
 End
 
+Crst ExecutableAllocatorLock
+    AcquiredAfter LoaderHeap ArgBasedStubCache UMEntryThunkFreeListLock
+End
+
 Crst ExecuteManRangeLock
 End
 
@@ -505,6 +509,9 @@ Crst TypeEquivalenceMap
     AcquiredBefore LoaderHeap
 End
 
+Crst UMEntryThunkFreeListLock
+End
+
 Crst UniqueStack
     AcquiredBefore LoaderHeap
 End
diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h
index 0d2a1db98e471..e2f1a63a20fec 100644
--- a/src/coreclr/inc/clrconfigvalues.h
+++ b/src/coreclr/inc/clrconfigvalues.h
@@ -737,6 +737,10 @@ RETAIL_CONFIG_STRING_INFO(EXTERNAL_DOTNET_DiagnosticPorts, W("DiagnosticPorts"),
 RETAIL_CONFIG_STRING_INFO(INTERNAL_LTTngConfig, W("LTTngConfig"), "Configuration for LTTng.")
 RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_LTTng, W("LTTng"), 1, "If COMPlus_LTTng is set to 0, this will prevent the LTTng library from being loaded at runtime")
 
+//
+// Executable code
+//
+RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableWriteXorExecute, W("EnableWriteXorExecute"), 0, "Enable W^X for executable memory.");
 
 #ifdef FEATURE_GDBJIT
 ///
diff --git a/src/coreclr/inc/crsttypes.h b/src/coreclr/inc/crsttypes.h
index a1bab2ecb906c..7be482c48bb55 100644
--- a/src/coreclr/inc/crsttypes.h
+++ b/src/coreclr/inc/crsttypes.h
@@ -49,92 +49,94 @@ enum CrstType
     CrstEventPipe = 31,
     CrstEventStore = 32,
     CrstException = 33,
-    CrstExecuteManRangeLock = 34,
-    CrstExternalObjectContextCache = 35,
-    CrstFCall = 36,
-    CrstFuncPtrStubs = 37,
-    CrstFusionAppCtx = 38,
-    CrstGCCover = 39,
-    CrstGlobalStrLiteralMap = 40,
-    CrstHandleTable = 41,
-    CrstHostAssemblyMap = 42,
-    CrstHostAssemblyMapAdd = 43,
-    CrstIbcProfile = 44,
-    CrstIJWFixupData = 45,
-    CrstIJWHash = 46,
-    CrstILStubGen = 47,
-    CrstInlineTrackingMap = 48,
-    CrstInstMethodHashTable = 49,
-    CrstInterop = 50,
-    CrstInteropData = 51,
-    CrstIsJMCMethod = 52,
-    CrstISymUnmanagedReader = 53,
-    CrstJit = 54,
-    CrstJitGenericHandleCache = 55,
-    CrstJitInlineTrackingMap = 56,
-    CrstJitPatchpoint = 57,
-    CrstJitPerf = 58,
-    CrstJumpStubCache = 59,
-    CrstLeafLock = 60,
-    CrstListLock = 61,
-    CrstLoaderAllocator = 62,
-    CrstLoaderAllocatorReferences = 63,
-    CrstLoaderHeap = 64,
-    CrstManagedObjectWrapperMap = 65,
-    CrstMethodDescBackpatchInfoTracker = 66,
-    CrstModule = 67,
-    CrstModuleFixup = 68,
-    CrstModuleLookupTable = 69,
-    CrstMulticoreJitHash = 70,
-    CrstMulticoreJitManager = 71,
-    CrstNativeImageEagerFixups = 72,
-    CrstNativeImageLoad = 73,
-    CrstNls = 74,
-    CrstNotifyGdb = 75,
-    CrstObjectList = 76,
-    CrstPEImage = 77,
-    CrstPendingTypeLoadEntry = 78,
-    CrstPgoData = 79,
-    CrstPinnedByrefValidation = 80,
-    CrstProfilerGCRefDataFreeList = 81,
-    CrstProfilingAPIStatus = 82,
-    CrstRCWCache = 83,
-    CrstRCWCleanupList = 84,
-    CrstReadyToRunEntryPointToMethodDescMap = 85,
-    CrstReflection = 86,
-    CrstReJITGlobalRequest = 87,
-    CrstRetThunkCache = 88,
-    CrstSavedExceptionInfo = 89,
-    CrstSaveModuleProfileData = 90,
-    CrstSecurityStackwalkCache = 91,
-    CrstSigConvert = 92,
-    CrstSingleUseLock = 93,
-    CrstSpecialStatics = 94,
-    CrstStackSampler = 95,
-    CrstStressLog = 96,
-    CrstStubCache = 97,
-    CrstStubDispatchCache = 98,
-    CrstStubUnwindInfoHeapSegments = 99,
-    CrstSyncBlockCache = 100,
-    CrstSyncHashLock = 101,
-    CrstSystemBaseDomain = 102,
-    CrstSystemDomain = 103,
-    CrstSystemDomainDelayedUnloadList = 104,
-    CrstThreadIdDispenser = 105,
-    CrstThreadpoolTimerQueue = 106,
-    CrstThreadpoolWaitThreads = 107,
-    CrstThreadpoolWorker = 108,
-    CrstThreadStore = 109,
-    CrstTieredCompilation = 110,
-    CrstTypeEquivalenceMap = 111,
-    CrstTypeIDMap = 112,
-    CrstUMEntryThunkCache = 113,
-    CrstUniqueStack = 114,
-    CrstUnresolvedClassLock = 115,
-    CrstUnwindInfoTableLock = 116,
-    CrstVSDIndirectionCellLock = 117,
-    CrstWrapperTemplate = 118,
-    kNumberOfCrstTypes = 119
+    CrstExecutableAllocatorLock = 34,
+    CrstExecuteManRangeLock = 35,
+    CrstExternalObjectContextCache = 36,
+    CrstFCall = 37,
+    CrstFuncPtrStubs = 38,
+    CrstFusionAppCtx = 39,
+    CrstGCCover = 40,
+    CrstGlobalStrLiteralMap = 41,
+    CrstHandleTable = 42,
+    CrstHostAssemblyMap = 43,
+    CrstHostAssemblyMapAdd = 44,
+    CrstIbcProfile = 45,
+    CrstIJWFixupData = 46,
+    CrstIJWHash = 47,
+    CrstILStubGen = 48,
+    CrstInlineTrackingMap = 49,
+    CrstInstMethodHashTable = 50,
+    CrstInterop = 51,
+    CrstInteropData = 52,
+    CrstIsJMCMethod = 53,
+    CrstISymUnmanagedReader = 54,
+    CrstJit = 55,
+    CrstJitGenericHandleCache = 56,
+    CrstJitInlineTrackingMap = 57,
+    CrstJitPatchpoint = 58,
+    CrstJitPerf = 59,
+    CrstJumpStubCache = 60,
+    CrstLeafLock = 61,
+    CrstListLock = 62,
+    CrstLoaderAllocator = 63,
+    CrstLoaderAllocatorReferences = 64,
+    CrstLoaderHeap = 65,
+    CrstManagedObjectWrapperMap = 66,
+    CrstMethodDescBackpatchInfoTracker = 67,
+    CrstModule = 68,
+    CrstModuleFixup = 69,
+    CrstModuleLookupTable = 70,
+    CrstMulticoreJitHash = 71,
+    CrstMulticoreJitManager = 72,
+    CrstNativeImageEagerFixups = 73,
+    CrstNativeImageLoad = 74,
+    CrstNls = 75,
+    CrstNotifyGdb = 76,
+    CrstObjectList = 77,
+    CrstPEImage = 78,
+    CrstPendingTypeLoadEntry = 79,
+    CrstPgoData = 80,
+    CrstPinnedByrefValidation = 81,
+    CrstProfilerGCRefDataFreeList = 82,
+    CrstProfilingAPIStatus = 83,
+    CrstRCWCache = 84,
+    CrstRCWCleanupList = 85,
+    CrstReadyToRunEntryPointToMethodDescMap = 86,
+    CrstReflection = 87,
+    CrstReJITGlobalRequest = 88,
+    CrstRetThunkCache = 89,
+    CrstSavedExceptionInfo = 90,
+    CrstSaveModuleProfileData = 91,
+    CrstSecurityStackwalkCache = 92,
+    CrstSigConvert = 93,
+    CrstSingleUseLock = 94,
+    CrstSpecialStatics = 95,
+    CrstStackSampler = 96,
+    CrstStressLog = 97,
+    CrstStubCache = 98,
+    CrstStubDispatchCache = 99,
+    CrstStubUnwindInfoHeapSegments = 100,
+    CrstSyncBlockCache = 101,
+    CrstSyncHashLock = 102,
+    CrstSystemBaseDomain = 103,
+    CrstSystemDomain = 104,
+    CrstSystemDomainDelayedUnloadList = 105,
+    CrstThreadIdDispenser = 106,
+    CrstThreadpoolTimerQueue = 107,
+    CrstThreadpoolWaitThreads = 108,
+    CrstThreadpoolWorker = 109,
+    CrstThreadStore = 110,
+    CrstTieredCompilation = 111,
+    CrstTypeEquivalenceMap = 112,
+    CrstTypeIDMap = 113,
+    CrstUMEntryThunkCache = 114,
+    CrstUMEntryThunkFreeListLock = 115,
+    CrstUniqueStack = 116,
+    CrstUnresolvedClassLock = 117,
+    CrstUnwindInfoTableLock = 118,
+    CrstVSDIndirectionCellLock = 119,
+    CrstWrapperTemplate = 120,
+    kNumberOfCrstTypes = 121
 };
 
 #endif // __CRST_TYPES_INCLUDED
@@ -147,11 +149,11 @@ int g_rgCrstLevelMap[] =
 {
     10,         // CrstAppDomainCache
     14,         // CrstAppDomainHandleTable
-    0,          // CrstArgBasedStubCache
+    3,          // CrstArgBasedStubCache
     0,          // CrstAssemblyList
     12,         // CrstAssemblyLoader
-    3,          // CrstAvailableClass
-    4,          // CrstAvailableParamTypes
+    4,          // CrstAvailableClass
+    5,          // CrstAvailableParamTypes
     7,          // CrstBaseDomain
     -1,         // CrstCCompRC
     13,         // CrstClassFactInfoHash
@@ -160,7 +162,7 @@ int g_rgCrstLevelMap[] =
     6,          // CrstCodeFragmentHeap
     9,          // CrstCodeVersioning
     0,          // CrstCOMCallWrapper
-    4,          // CrstCOMWrapperCache
+    5,          // CrstCOMWrapperCache
     3,          // CrstDataTest1
     0,          // CrstDataTest2
     0,          // CrstDbgTransport
@@ -179,9 +181,10 @@ int g_rgCrstLevelMap[] =
     18,         // CrstEventPipe
     0,          // CrstEventStore
     0,          // CrstException
+    0,          // CrstExecutableAllocatorLock
     0,          // CrstExecuteManRangeLock
     0,          // CrstExternalObjectContextCache
-    3,          // CrstFCall
+    4,          // CrstFCall
     7,          // CrstFuncPtrStubs
     10,         // CrstFusionAppCtx
     10,         // CrstGCCover
@@ -196,25 +199,25 @@ int g_rgCrstLevelMap[] =
     3,          // CrstInlineTrackingMap
     17,         // CrstInstMethodHashTable
     20,         // CrstInterop
-    4,          // CrstInteropData
+    5,          // CrstInteropData
     0,          // CrstIsJMCMethod
     7,          // CrstISymUnmanagedReader
     11,         // CrstJit
     0,          // CrstJitGenericHandleCache
     16,         // CrstJitInlineTrackingMap
-    3,          // CrstJitPatchpoint
+    4,          // CrstJitPatchpoint
     -1,         // CrstJitPerf
     6,          // CrstJumpStubCache
     0,          // CrstLeafLock
     -1,         // CrstListLock
     15,         // CrstLoaderAllocator
     16,         // CrstLoaderAllocatorReferences
-    0,          // CrstLoaderHeap
+    3,          // CrstLoaderHeap
     3,          // CrstManagedObjectWrapperMap
     14,         // CrstMethodDescBackpatchInfoTracker
-    4,          // CrstModule
+    5,          // CrstModule
     15,         // CrstModuleFixup
-    3,          // CrstModuleLookupTable
+    4,          // CrstModuleLookupTable
     0,          // CrstMulticoreJitHash
     13,         // CrstMulticoreJitManager
     0,          // CrstNativeImageEagerFixups
@@ -222,22 +225,22 @@ int g_rgCrstLevelMap[] =
     0,          // CrstNls
     0,          // CrstNotifyGdb
     2,          // CrstObjectList
-    4,          // CrstPEImage
+    5,          // CrstPEImage
     19,         // CrstPendingTypeLoadEntry
-    3,          // CrstPgoData
+    4,          // CrstPgoData
     0,          // CrstPinnedByrefValidation
     0,          // CrstProfilerGCRefDataFreeList
     0,          // CrstProfilingAPIStatus
-    3,          // CrstRCWCache
+    4,          // CrstRCWCache
     0,          // CrstRCWCleanupList
     10,         // CrstReadyToRunEntryPointToMethodDescMap
     8,          // CrstReflection
     17,         // CrstReJITGlobalRequest
-    3,          // CrstRetThunkCache
+    4,          // CrstRetThunkCache
     3,          // CrstSavedExceptionInfo
     0,          // CrstSaveModuleProfileData
     0,          // CrstSecurityStackwalkCache
-    3,          // CrstSigConvert
+    4,          // CrstSigConvert
     5,          // CrstSingleUseLock
     0,          // CrstSpecialStatics
     0,          // CrstStackSampler
@@ -247,7 +250,7 @@ int g_rgCrstLevelMap[] =
     4,          // CrstStubUnwindInfoHeapSegments
     3,          // CrstSyncBlockCache
     0,          // CrstSyncHashLock
-    4,          // CrstSystemBaseDomain
+    5,          // CrstSystemBaseDomain
     13,         // CrstSystemDomain
     0,          // CrstSystemDomainDelayedUnloadList
     0,          // CrstThreadIdDispenser
@@ -256,13 +259,14 @@ int g_rgCrstLevelMap[] =
     13,         // CrstThreadpoolWorker
     12,         // CrstThreadStore
     8,          // CrstTieredCompilation
-    3,          // CrstTypeEquivalenceMap
+    4,          // CrstTypeEquivalenceMap
     10,         // CrstTypeIDMap
-    3,          // CrstUMEntryThunkCache
-    3,          // CrstUniqueStack
+    4,          // CrstUMEntryThunkCache
+    3,          // CrstUMEntryThunkFreeListLock
+    4,          // CrstUniqueStack
     7,          // CrstUnresolvedClassLock
     3,          // CrstUnwindInfoTableLock
-    3,          // CrstVSDIndirectionCellLock
+    4,          // CrstVSDIndirectionCellLock
     3,          // CrstWrapperTemplate
 };
 
@@ -303,6 +307,7 @@ LPCSTR g_rgCrstNameMap[] =
     "CrstEventPipe",
     "CrstEventStore",
     "CrstException",
+    "CrstExecutableAllocatorLock",
     "CrstExecuteManRangeLock",
     "CrstExternalObjectContextCache",
     "CrstFCall",
@@ -383,6 +388,7 @@ LPCSTR g_rgCrstNameMap[] =
     "CrstTypeEquivalenceMap",
     "CrstTypeIDMap",
     "CrstUMEntryThunkCache",
+    "CrstUMEntryThunkFreeListLock",
     "CrstUniqueStack",
     "CrstUnresolvedClassLock",
     "CrstUnwindInfoTableLock",
diff --git a/src/coreclr/inc/executableallocator.h b/src/coreclr/inc/executableallocator.h
index ce0c6c22f890e..101178f9a4ef0 100644
--- a/src/coreclr/inc/executableallocator.h
+++ b/src/coreclr/inc/executableallocator.h
@@ -11,6 +11,191 @@
 #include "utilcode.h"
 #include "ex.h"
 
+#include "minipal.h"
+
+#ifndef DACCESS_COMPILE
+
+// This class is responsible for allocation of all the executable memory in the runtime.
+class ExecutableAllocator
+{
+    // RX address range block descriptor
+    struct BlockRX
+    {
+        // Next block in a linked list
+        BlockRX* next;
+        // Base address of the block
+        void* baseRX;
+        // Size of the block
+        size_t size;
+        // Offset of the block in the shared memory
+        size_t offset;
+    };
+
+    // RW address range block descriptor
+    struct BlockRW
+    {
+        // Next block in a linked list
+        BlockRW* next;
+        // Base address of the RW mapping of the block
+        void* baseRW;
+        // Base address of the RX mapping of the block
+        void* baseRX;
+        // Size of the block
+        size_t size;
+        // Usage reference count of the RW block. RW blocks can be reused
+        // when multiple mappings overlap in the VA space at the same time
+        // (even from multiple threads)
+        size_t refCount;
+    };
+
+    typedef void (*FatalErrorHandler)(UINT errorCode, LPCWSTR pszMessage);
+
+    // Instance of the allocator
+    static ExecutableAllocator* g_instance;
+
+    // Callback to the runtime to report fatal errors
+    static FatalErrorHandler g_fatalErrorHandler;
+
+#if USE_UPPER_ADDRESS
+    // Preferred region to allocate the code in.
+    static BYTE* g_codeMinAddr;
+    static BYTE* g_codeMaxAddr;
+    static BYTE* g_codeAllocStart;
+    // Next address to try to allocate for code in the preferred region.
+    static BYTE* g_codeAllocHint;
+#endif // USE_UPPER_ADDRESS
+
+    // Caches the COMPlus_EnableWXORX setting
+    static bool g_isWXorXEnabled;
+
+    // Head of the linked list of all RX blocks that were allocated by this allocator
+    BlockRX* m_pFirstBlockRX = NULL;
+
+    // Head of the linked list of free RX blocks that were allocated by this allocator and then backed out
+    BlockRX* m_pFirstFreeBlockRX = NULL;
+
+    // Head of the linked list of currently mapped RW blocks
+    BlockRW* m_pFirstBlockRW = NULL;
+
+    // Handle of the double mapped memory mapper
+    void *m_doubleMemoryMapperHandle = NULL;
+
+    // Maximum size of executable memory this allocator can allocate
+    size_t m_maxExecutableCodeSize;
+
+    // First free offset in the underlying shared memory. It is not used
+    // for platforms that don't use shared memory.
+    size_t m_freeOffset = 0;
+
+    // Last RW mapping cached so that it can be reused for the next mapping
+    // request if it goes into the same range.
+    BlockRW* m_cachedMapping = NULL;
+
+    // Synchronization of the public allocator methods
+    CRITSEC_COOKIE m_CriticalSection;
+
+    // Update currently cached mapping. If the passed in block is the same as the one
+    // in the cache, it keeps it cached. Otherwise it destroys the currently cached one
+    // and replaces it by the passed in one.
+    void UpdateCachedMapping(BlockRW *pBlock);
+
+    // Find existing RW block that maps the whole specified range of RX memory.
+    // Return NULL if no such block exists.
+    void* FindRWBlock(void* baseRX, size_t size);
+
+    // Add RW block to the list of existing RW blocks
+    bool AddRWBlock(void* baseRW, void* baseRX, size_t size);
+
+    // Remove RW block from the list of existing RW blocks and return the base
+    // address and size the underlying memory was mapped at.
+    // Return false if no existing RW block contains the passed in address.
+    bool RemoveRWBlock(void* pRW, void** pUnmapAddress, size_t* pUnmapSize);
+
+    // Find a free block with the closest size >= the requested size.
+    // Returns NULL if no such block exists.
+    BlockRX* FindBestFreeBlock(size_t size);
+
+    // Return memory mapping granularity.
+    static size_t Granularity();
+
+    // Allocate a block of executable memory of the specified size.
+    // It doesn't acquire the actual virtual memory, just the
+    // range of the underlying shared memory.
+    BlockRX* AllocateBlock(size_t size, bool* pIsFreeBlock);
+
+    // Backout the block allocated by AllocateBlock in case of an
+    // error.
+    void BackoutBlock(BlockRX* pBlock, bool isFreeBlock);
+
+    // Allocate range of offsets in the underlying shared memory
+    bool AllocateOffset(size_t* pOffset, size_t size);
+
+    // Add RX block to the linked list of existing blocks
+    void AddRXBlock(BlockRX *pBlock);
+
+    // Return true if double mapping is enabled.
+    static bool IsDoubleMappingEnabled();
+
+    // Initialize the allocator instance
+    bool Initialize();
+
+public:
+
+    // Return the ExecuteAllocator singleton instance
+    static ExecutableAllocator* Instance();
+
+    // Initialize the static members of the Executable allocator and allocate
+    // and initialize the instance of it.
+    static HRESULT StaticInitialize(FatalErrorHandler fatalErrorHandler);
+
+    // Destroy the allocator
+    ~ExecutableAllocator();
+
+    // Return true if W^X is enabled
+    static bool IsWXORXEnabled();
+
+    // Use this function to initialize the g_codeAllocHint
+    // during startup. base is runtime .dll base address,
+    // size is runtime .dll virtual size.
+    static void InitCodeAllocHint(size_t base, size_t size, int randomPageOffset);
+
+    // Use this function to reset the g_codeAllocHint
+    // after unloading an AppDomain
+    static void ResetCodeAllocHint();
+
+    // Returns TRUE if p is located in near clr.dll that allows us
+    // to use rel32 IP-relative addressing modes.
+    static bool IsPreferredExecutableRange(void* p);
+
+    // Reserve the specified amount of virtual address space for executable mapping.
+    void* Reserve(size_t size);
+
+    // Reserve the specified amount of virtual address space for executable mapping.
+    // The reserved range must be within the loAddress and hiAddress. If it is not
+    // possible to reserve memory in such range, the method returns NULL.
+    void* ReserveWithinRange(size_t size, const void* loAddress, const void* hiAddress);
+
+    // Reserve the specified amount of virtual address space for executable mapping
+    // exactly at the given address.
+    void* ReserveAt(void* baseAddressRX, size_t size);
+
+    // Commit the specified range of memory. The memory can be committed as executable (RX)
+    // or non-executable (RW) based on the passed in isExecutable flag. The non-executable
+    // allocations are used to allocate data structures that need to be close to the
+    // executable code due to memory addressing performance related reasons.
+    void* Commit(void* pStart, size_t size, bool isExecutable);
+
+    // Release the executable memory block starting at the passed in address that was allocated
+    // by one of the ReserveXXX methods.
+    void Release(void* pRX);
+
+    // Map the specified block of executable memory as RW
+    void* MapRW(void* pRX, size_t size);
+
+    // Unmap the RW mapping at the specified address
+    void UnmapRW(void* pRW);
+};
+
 // Holder class to map read-execute memory as read-write so that it can be modified without using read-write-execute mapping.
 // At the moment the implementation is dummy, returning the same addresses for both cases and expecting them to be read-write-execute.
 // The class uses the move semantics to ensure proper unmapping in case of re-assigning of the holder value.
@@ -30,13 +215,17 @@ class ExecutableWriterHolder
 
     void Unmap()
     {
+#if defined(HOST_OSX) && defined(HOST_ARM64) && !defined(DACCESS_COMPILE)
         if (m_addressRX != NULL)
         {
-            // TODO: mapping / unmapping for targets using double memory mapping  will be added with the double mapped allocator addition 
-#if defined(HOST_OSX) && defined(HOST_ARM64) && !defined(DACCESS_COMPILE)
             PAL_JitWriteProtect(false);
-#endif
         }
+#else
+        if (m_addressRX != m_addressRW)
+        {
+            ExecutableAllocator::Instance()->UnmapRW((void*)m_addressRW);
+        }
+#endif
     }
 
 public:
@@ -62,9 +251,11 @@ class ExecutableWriterHolder
     ExecutableWriterHolder(T* addressRX, size_t size)
     {
         m_addressRX = addressRX;
+#if defined(HOST_OSX) && defined(HOST_ARM64)
         m_addressRW = addressRX;
-#if defined(HOST_OSX) && defined(HOST_ARM64) && !defined(DACCESS_COMPILE)
         PAL_JitWriteProtect(true);
+#else
+        m_addressRW = (T *)ExecutableAllocator::Instance()->MapRW((void*)addressRX, size);
 #endif
     }
 
@@ -79,3 +270,5 @@ class ExecutableWriterHolder
         return m_addressRW;
     }
 };
+
+#endif // !DACCESS_COMPILE
diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h
index fb65ea9fa613c..3c42f0850850b 100644
--- a/src/coreclr/inc/jithelpers.h
+++ b/src/coreclr/inc/jithelpers.h
@@ -302,12 +302,12 @@
 #endif // !FEATURE_EH_FUNCLETS
 
 #ifdef TARGET_X86
-    JITHELPER(CORINFO_HELP_ASSIGN_REF_EAX, JIT_WriteBarrierEAX, CORINFO_HELP_SIG_NO_ALIGN_STUB)
-    JITHELPER(CORINFO_HELP_ASSIGN_REF_EBX, JIT_WriteBarrierEBX, CORINFO_HELP_SIG_NO_ALIGN_STUB)
-    JITHELPER(CORINFO_HELP_ASSIGN_REF_ECX, JIT_WriteBarrierECX, CORINFO_HELP_SIG_NO_ALIGN_STUB)
-    JITHELPER(CORINFO_HELP_ASSIGN_REF_ESI, JIT_WriteBarrierESI, CORINFO_HELP_SIG_NO_ALIGN_STUB)
-    JITHELPER(CORINFO_HELP_ASSIGN_REF_EDI, JIT_WriteBarrierEDI, CORINFO_HELP_SIG_NO_ALIGN_STUB)
-    JITHELPER(CORINFO_HELP_ASSIGN_REF_EBP, JIT_WriteBarrierEBP, CORINFO_HELP_SIG_NO_ALIGN_STUB)
+    DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EAX, JIT_WriteBarrierEAX, CORINFO_HELP_SIG_NO_ALIGN_STUB)
+    DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EBX, JIT_WriteBarrierEBX, CORINFO_HELP_SIG_NO_ALIGN_STUB)
+    DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_ECX, JIT_WriteBarrierECX, CORINFO_HELP_SIG_NO_ALIGN_STUB)
+    DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_ESI, JIT_WriteBarrierESI, CORINFO_HELP_SIG_NO_ALIGN_STUB)
+    DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EDI, JIT_WriteBarrierEDI, CORINFO_HELP_SIG_NO_ALIGN_STUB)
+    DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EBP, JIT_WriteBarrierEBP, CORINFO_HELP_SIG_NO_ALIGN_STUB)
 
     JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, JIT_CheckedWriteBarrierEAX, CORINFO_HELP_SIG_NO_ALIGN_STUB)
     JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, JIT_CheckedWriteBarrierEBX, CORINFO_HELP_SIG_NO_ALIGN_STUB)
diff --git a/src/coreclr/inc/utilcode.h b/src/coreclr/inc/utilcode.h
index a47034ee2e05c..77df9dfa94d2a 100644
--- a/src/coreclr/inc/utilcode.h
+++ b/src/coreclr/inc/utilcode.h
@@ -1014,35 +1014,6 @@ void    SplitPath(__in SString const &path,
 
 #define CLRGetTickCount64() GetTickCount64()
 
-//
-// Use this function to initialize the s_CodeAllocHint
-// during startup. base is runtime .dll base address,
-// size is runtime .dll virtual size.
-//
-void InitCodeAllocHint(SIZE_T base, SIZE_T size, int randomPageOffset);
-
-
-//
-// Use this function to reset the s_CodeAllocHint
-// after unloading an AppDomain
-//
-void ResetCodeAllocHint();
-
-//
-// Returns TRUE if p is located in near clr.dll that allows us
-// to use rel32 IP-relative addressing modes.
-//
-BOOL IsPreferredExecutableRange(void * p);
-
-//
-// Allocate free memory that will be used for executable code
-// Handles the special requirements that we have on 64-bit platforms
-// where we want the executable memory to be located near mscorwks
-//
-BYTE * ClrVirtualAllocExecutable(SIZE_T dwSize,
-                                 DWORD flAllocationType,
-                                 DWORD flProtect);
-
 //
 // Allocate free memory within the range [pMinAddr..pMaxAddr] using
 // ClrVirtualQuery to find free memory and ClrVirtualAlloc to allocate it.
diff --git a/src/coreclr/minipal/CMakeLists.txt b/src/coreclr/minipal/CMakeLists.txt
new file mode 100644
index 0000000000000..3096237d2a2fe
--- /dev/null
+++ b/src/coreclr/minipal/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories(.)
+if (CLR_CMAKE_HOST_UNIX)
+    add_subdirectory(Unix)
+else (CLR_CMAKE_HOST_UNIX)
+    add_subdirectory(Windows)
+endif (CLR_CMAKE_HOST_UNIX)
+
diff --git a/src/coreclr/minipal/Unix/CMakeLists.txt b/src/coreclr/minipal/Unix/CMakeLists.txt
new file mode 100644
index 0000000000000..b56b5017d375f
--- /dev/null
+++ b/src/coreclr/minipal/Unix/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_library(coreclrminipal
+    STATIC
+    doublemapping.cpp
+)
diff --git a/src/coreclr/minipal/Unix/doublemapping.cpp b/src/coreclr/minipal/Unix/doublemapping.cpp
new file mode 100644
index 0000000000000..a50b326861aad
--- /dev/null
+++ b/src/coreclr/minipal/Unix/doublemapping.cpp
@@ -0,0 +1,211 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+#include <errno.h>
+#ifdef TARGET_LINUX
+#include <linux/memfd.h>
+#include <sys/syscall.h> // __NR_memfd_create
+#endif // TARGET_LINUX
+#include "minipal.h"
+
+#if defined(TARGET_OSX) && defined(TARGET_AMD64)
+#include <mach/mach.h>
+#endif // TARGET_OSX && TARGET_AMD64
+
+#ifndef TARGET_OSX
+
+#ifdef TARGET_64BIT
+static const off_t MaxDoubleMappedSize = 2048ULL*1024*1024*1024;
+#else
+static const off_t MaxDoubleMappedSize = UINT_MAX;
+#endif
+
+#ifdef TARGET_LINUX
+#define memfd_create(...) syscall(__NR_memfd_create, __VA_ARGS__)
+#endif // TARGET_LINUX
+
+#endif // TARGET_OSX
+
+bool VMToOSInterface::CreateDoubleMemoryMapper(void** pHandle, size_t *pMaxExecutableCodeSize)
+{
+#ifndef TARGET_OSX
+
+#ifdef TARGET_FREEBSD
+    int fd = shm_open(SHM_ANON, O_RDWR | O_CREAT, S_IRWXU);
+#else // TARGET_FREEBSD
+    int fd = memfd_create("doublemapper", MFD_CLOEXEC);
+#endif // TARGET_FREEBSD
+
+    if (fd == -1)
+    {
+        return false;
+    }
+
+    if (ftruncate(fd, MaxDoubleMappedSize) == -1)
+    {
+        close(fd);
+        return false;
+    }
+
+    *pMaxExecutableCodeSize = MaxDoubleMappedSize;
+    *pHandle = (void*)(size_t)fd;
+#else // !TARGET_OSX
+    *pMaxExecutableCodeSize = SIZE_MAX;
+    *pHandle = NULL;
+#endif // !TARGET_OSX
+
+    return true;
+}
+
+void VMToOSInterface::DestroyDoubleMemoryMapper(void *mapperHandle)
+{
+#ifndef TARGET_OSX
+    close((int)(size_t)mapperHandle);
+#endif
+}
+
+extern "C" void* PAL_VirtualReserveFromExecutableMemoryAllocatorWithinRange(const void* lpBeginAddress, const void* lpEndAddress, size_t dwSize);
+
+#ifdef TARGET_OSX
+bool IsMapJitFlagNeeded()
+{
+    static volatile int isMapJitFlagNeeded = -1;
+
+    if (isMapJitFlagNeeded == -1)
+    {
+        int mapJitFlagCheckResult = 0;
+        int pageSize = sysconf(_SC_PAGE_SIZE);
+        // Try to map a page with read-write-execute protection. It should fail on Mojave hardened runtime and higher.
+        void* testPage = mmap(NULL, pageSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+        if (testPage == MAP_FAILED && (errno == EACCES))
+        {
+            // The mapping has failed with EACCES, check if making the same mapping with MAP_JIT flag works
+            testPage = mmap(NULL, pageSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE | MAP_JIT, -1, 0);
+            if (testPage != MAP_FAILED)
+            {
+                mapJitFlagCheckResult = 1;
+            }
+        }
+
+        if (testPage != MAP_FAILED)
+        {
+            munmap(testPage, pageSize);
+        }
+
+        isMapJitFlagNeeded = mapJitFlagCheckResult;
+    }
+
+    return (bool)isMapJitFlagNeeded;
+}
+#endif // TARGET_OSX
+
+void* VMToOSInterface::ReserveDoubleMappedMemory(void *mapperHandle, size_t offset, size_t size, const void *rangeStart, const void* rangeEnd)
+{
+    int fd = (int)(size_t)mapperHandle;
+
+    if (rangeStart != NULL || rangeEnd != NULL)
+    {
+        void* result = PAL_VirtualReserveFromExecutableMemoryAllocatorWithinRange(rangeStart, rangeEnd, size);
+#ifndef TARGET_OSX
+        if (result != NULL)
+        {
+            // Map the shared memory over the range reserved from the executable memory allocator.
+            result = mmap(result, size, PROT_NONE, MAP_SHARED | MAP_FIXED, fd, offset);
+            if (result == MAP_FAILED)
+            {
+                assert(false);
+                result = NULL;
+            }
+        }
+#endif // TARGET_OSX
+
+        return result;
+    }
+
+#ifndef TARGET_OSX
+    void* result = mmap(NULL, size, PROT_NONE, MAP_SHARED, fd, offset);
+#else
+    int mmapFlags = MAP_ANON | MAP_PRIVATE;
+    if (IsMapJitFlagNeeded())
+    {
+        mmapFlags |= MAP_JIT;
+    }
+    void* result = mmap(NULL, size, PROT_NONE, mmapFlags, -1, 0);
+#endif    
+    if (result == MAP_FAILED)
+    {
+        assert(false);
+        result = NULL;
+    }
+    return result;
+}
+
+void *VMToOSInterface::CommitDoubleMappedMemory(void* pStart, size_t size, bool isExecutable)
+{
+    if (mprotect(pStart, size, isExecutable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE)) == -1)
+    {
+        return NULL;
+    }
+
+    return pStart;
+}
+
+bool VMToOSInterface::ReleaseDoubleMappedMemory(void *mapperHandle, void* pStart, size_t offset, size_t size)
+{
+#ifndef TARGET_OSX
+    int fd = (int)(size_t)mapperHandle;
+    mmap(pStart, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, offset);
+    memset(pStart, 0, size);
+#endif // TARGET_OSX
+    return munmap(pStart, size) != -1;
+}
+
+void* VMToOSInterface::GetRWMapping(void *mapperHandle, void* pStart, size_t offset, size_t size)
+{
+#ifndef TARGET_OSX
+    int fd = (int)(size_t)mapperHandle;
+    return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset);
+#else // TARGET_OSX
+#ifdef TARGET_AMD64
+    vm_address_t startRW;
+    vm_prot_t curProtection, maxProtection;
+    kern_return_t kr = vm_remap(mach_task_self(), &startRW, size, 0, VM_FLAGS_ANYWHERE | VM_FLAGS_RANDOM_ADDR,
+                                mach_task_self(), (vm_address_t)pStart, FALSE, &curProtection, &maxProtection, VM_INHERIT_NONE);
+
+    if (kr != KERN_SUCCESS)
+    {
+        return NULL;
+    }
+
+    int st = mprotect((void*)startRW, size, PROT_READ | PROT_WRITE);
+    if (st == -1)
+    {
+        munmap((void*)startRW, size);
+        return NULL;
+    }
+
+    return (void*)startRW;
+#else // TARGET_AMD64
+    // This method should not be called on OSX ARM64
+    assert(false);
+    return NULL;
+#endif // TARGET_AMD64
+#endif // TARGET_OSX
+}
+
+bool VMToOSInterface::ReleaseRWMapping(void* pStart, size_t size)
+{
+    return munmap(pStart, size) != -1;
+}
diff --git a/src/coreclr/minipal/Windows/CMakeLists.txt b/src/coreclr/minipal/Windows/CMakeLists.txt
new file mode 100644
index 0000000000000..b56b5017d375f
--- /dev/null
+++ b/src/coreclr/minipal/Windows/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_library(coreclrminipal
+    STATIC
+    doublemapping.cpp
+)
diff --git a/src/coreclr/minipal/Windows/doublemapping.cpp b/src/coreclr/minipal/Windows/doublemapping.cpp
new file mode 100644
index 0000000000000..e265f1d139ad0
--- /dev/null
+++ b/src/coreclr/minipal/Windows/doublemapping.cpp
@@ -0,0 +1,205 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+
+#include <windows.h>
+#include <inttypes.h>
+#include <assert.h>
+#include "minipal.h"
+
+#define HIDWORD(_qw)    ((ULONG)((_qw) >> 32))
+#define LODWORD(_qw)    ((ULONG)(_qw))
+
+#ifdef TARGET_64BIT
+static const uint64_t MaxDoubleMappedSize = 2048ULL*1024*1024*1024;
+#else
+static const uint64_t MaxDoubleMappedSize = UINT_MAX;
+#endif
+
+#define VIRTUAL_ALLOC_RESERVE_GRANULARITY (64*1024)    // 0x10000  (64 KB)
+inline size_t ALIGN_UP( size_t val, size_t alignment )
+{
+    // alignment must be a power of 2 for this implementation to work (need modulo otherwise)
+    assert( 0 == (alignment & (alignment - 1)) );
+    size_t result = (val + (alignment - 1)) & ~(alignment - 1);
+    assert( result >= val );      // check for overflow
+    return result;
+}
+
+template <typename T> inline T ALIGN_UP(T val, size_t alignment)
+{
+    return (T)ALIGN_UP((size_t)val, alignment);
+}
+
+inline void *GetTopMemoryAddress(void)
+{
+    static void *result; // = NULL;
+    if( NULL == result )
+    {
+        SYSTEM_INFO sysInfo;
+        GetSystemInfo( &sysInfo );
+        result = sysInfo.lpMaximumApplicationAddress;
+    }
+    return result;
+}
+
+inline void *GetBotMemoryAddress(void)
+{
+    static void *result; // = NULL;
+    if( NULL == result )
+    {
+        SYSTEM_INFO sysInfo;
+        GetSystemInfo( &sysInfo );
+        result = sysInfo.lpMinimumApplicationAddress;
+    }
+    return result;
+}
+
+#define TOP_MEMORY (GetTopMemoryAddress())
+#define BOT_MEMORY (GetBotMemoryAddress())
+
+bool VMToOSInterface::CreateDoubleMemoryMapper(void **pHandle, size_t *pMaxExecutableCodeSize)
+{
+    *pMaxExecutableCodeSize = (size_t)MaxDoubleMappedSize;
+    *pHandle = CreateFileMapping(
+                 INVALID_HANDLE_VALUE,    // use paging file
+                 NULL,                    // default security
+                 PAGE_EXECUTE_READWRITE |  SEC_RESERVE,  // read/write/execute access
+                 HIDWORD(MaxDoubleMappedSize),                       // maximum object size (high-order DWORD)
+                 LODWORD(MaxDoubleMappedSize),   // maximum object size (low-order DWORD)
+                 NULL);
+
+    return *pHandle != NULL;
+}
+
+void VMToOSInterface::DestroyDoubleMemoryMapper(void *mapperHandle)
+{
+    CloseHandle((HANDLE)mapperHandle);
+}
+
+void* VMToOSInterface::ReserveDoubleMappedMemory(void *mapperHandle, size_t offset, size_t size, const void *pMinAddr, const void* pMaxAddr)
+{
+    BYTE *pResult = nullptr;  // our return value;
+
+    if (size == 0)
+    {
+        return nullptr;
+    }
+
+    //
+    // First lets normalize the pMinAddr and pMaxAddr values
+    //
+    // If pMinAddr is NULL then set it to BOT_MEMORY
+    if ((pMinAddr == 0) || (pMinAddr < (BYTE *) BOT_MEMORY))
+    {
+        pMinAddr = (BYTE *) BOT_MEMORY;
+    }
+
+    // If pMaxAddr is NULL then set it to TOP_MEMORY
+    if ((pMaxAddr == 0) || (pMaxAddr > (BYTE *) TOP_MEMORY))
+    {
+        pMaxAddr = (BYTE *) TOP_MEMORY;
+    }
+
+    // If pMaxAddr is not greater than pMinAddr we can not make an allocation
+    if (pMaxAddr <= pMinAddr)
+    {
+        return nullptr;
+    }
+
+    // If pMinAddr is BOT_MEMORY and pMaxAddr is TOP_MEMORY
+    // then we can call ClrVirtualAlloc instead
+    if ((pMinAddr == (BYTE *) BOT_MEMORY) && (pMaxAddr == (BYTE *) TOP_MEMORY))
+    {
+        return (BYTE*)MapViewOfFile((HANDLE)mapperHandle,
+                        FILE_MAP_EXECUTE | FILE_MAP_READ | FILE_MAP_WRITE,
+                        HIDWORD((int64_t)offset),
+                        LODWORD((int64_t)offset),
+                        size);
+    }
+
+    // We will do one scan from [pMinAddr .. pMaxAddr]
+    // First align the tryAddr up to next 64k base address.
+    // See docs for VirtualAllocEx and lpAddress and 64k alignment for reasons.
+    //
+    BYTE *   tryAddr            = (BYTE *)ALIGN_UP((BYTE *)pMinAddr, VIRTUAL_ALLOC_RESERVE_GRANULARITY);
+    bool     virtualQueryFailed = false;
+    bool     faultInjected      = false;
+    unsigned virtualQueryCount  = 0;
+
+    // Now scan memory and try to find a free block of the size requested.
+    while ((tryAddr + size) <= (BYTE *) pMaxAddr)
+    {
+        MEMORY_BASIC_INFORMATION mbInfo;
+
+        // Use VirtualQuery to find out if this address is MEM_FREE
+        //
+        virtualQueryCount++;
+        if (!VirtualQuery((LPCVOID)tryAddr, &mbInfo, sizeof(mbInfo)))
+        {
+            // Exit and return nullptr if the VirtualQuery call fails.
+            virtualQueryFailed = true;
+            break;
+        }
+
+        // Is there enough memory free from this start location?
+        // Note that for most versions of UNIX the mbInfo.RegionSize returned will always be 0
+        if ((mbInfo.State == MEM_FREE) &&
+            (mbInfo.RegionSize >= (SIZE_T) size || mbInfo.RegionSize == 0))
+        {
+            // Try reserving the memory using VirtualAlloc now
+            pResult = (BYTE*)MapViewOfFileEx((HANDLE)mapperHandle,
+                        FILE_MAP_EXECUTE | FILE_MAP_READ | FILE_MAP_WRITE,
+                        HIDWORD((int64_t)offset),
+                        LODWORD((int64_t)offset),
+                        size,
+                        tryAddr);
+
+            // Normally this will be successful
+            //
+            if (pResult != nullptr)
+            {
+                // return pResult
+                break;
+            }
+
+            // We might fail in a race.  So just move on to next region and continue trying
+            tryAddr = tryAddr + VIRTUAL_ALLOC_RESERVE_GRANULARITY;
+        }
+        else
+        {
+            // Try another section of memory
+            tryAddr = max(tryAddr + VIRTUAL_ALLOC_RESERVE_GRANULARITY,
+                          (BYTE*) mbInfo.BaseAddress + mbInfo.RegionSize);
+        }
+    }
+
+    return pResult;
+}
+
+void *VMToOSInterface::CommitDoubleMappedMemory(void* pStart, size_t size, bool isExecutable)
+{
+    return VirtualAlloc(pStart, size, MEM_COMMIT, isExecutable ? PAGE_EXECUTE_READ : PAGE_READWRITE);
+}
+
+bool VMToOSInterface::ReleaseDoubleMappedMemory(void *mapperHandle, void* pStart, size_t offset, size_t size)
+{
+    // Zero the memory before the unmapping
+    VirtualAlloc(pStart, size, MEM_COMMIT, PAGE_READWRITE);
+    memset(pStart, 0, size);
+    return UnmapViewOfFile(pStart);
+}
+
+void* VMToOSInterface::GetRWMapping(void *mapperHandle, void* pStart, size_t offset, size_t size)
+{
+    return (BYTE*)MapViewOfFile((HANDLE)mapperHandle,
+                    FILE_MAP_READ | FILE_MAP_WRITE,
+                    HIDWORD((int64_t)offset),
+                    LODWORD((int64_t)offset),
+                    size);
+}
+
+bool VMToOSInterface::ReleaseRWMapping(void* pStart, size_t size)
+{
+    return UnmapViewOfFile(pStart);
+}
diff --git a/src/coreclr/minipal/minipal.h b/src/coreclr/minipal/minipal.h
new file mode 100644
index 0000000000000..39098f9bc1295
--- /dev/null
+++ b/src/coreclr/minipal/minipal.h
@@ -0,0 +1,78 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+#include <stddef.h>
+
+// Interface between the runtime and platform specific functionality
+class VMToOSInterface
+{
+private:
+    ~VMToOSInterface() {}
+public:
+    // Create double mapped memory mapper
+    // Parameters:
+    //  pHandle                - receives handle of the double mapped memory mapper
+    //  pMaxExecutableCodeSize - receives the maximum executable memory size it can map
+    // Return:
+    //  true if it succeeded, false if it failed
+    static bool CreateDoubleMemoryMapper(void **pHandle, size_t *pMaxExecutableCodeSize);
+
+    // Destroy the double mapped memory mapper represented by the passed in handle
+    // Parameters:
+    //  mapperHandle - handle of the double mapped memory mapper to destroy
+    static void DestroyDoubleMemoryMapper(void *mapperHandle);
+
+    // Reserve a block of memory that can be double mapped.
+    // Parameters:
+    //  mapperHandle - handle of the double mapped memory mapper to use
+    //  offset       - offset in the underlying shared memory
+    //  size         - size of the block to reserve
+    //  rangeStart
+    //  rangeEnd     - Requests reserving virtual memory in the specified range.
+    //                 Setting both rangeStart and rangeEnd to 0 means that the
+    //                 requested range is not limited.
+    //                 When a specific range is requested, it is obligatory.
+    // Return:
+    //  starting virtual address of the reserved memory or NULL if it failed
+    static void* ReserveDoubleMappedMemory(void *mapperHandle, size_t offset, size_t size, const void *rangeStart, const void* rangeEnd);
+
+    // Commit a block of memory in the range previously reserved by the ReserveDoubleMappedMemory
+    // Parameters:
+    //  pStart       - start address of the virtual address range to commit
+    //  size         - size of the memory block to commit
+    //  isExecutable - true means that the mapping should be RX, false means RW
+    // Return:
+    //  Committed range start
+    static void* CommitDoubleMappedMemory(void* pStart, size_t size, bool isExecutable);
+
+    // Release a block of virtual memory previously commited by the CommitDoubleMappedMemory
+    // Parameters:
+    //  mapperHandle - handle of the double mapped memory mapper to use
+    //  pStart       - start address of the virtual address range to release. It must be one
+    //                 that was previously returned by the CommitDoubleMappedMemory
+    //  offset       - offset in the underlying shared memory
+    //  size         - size of the memory block to release
+    // Return:
+    //  true if it succeeded, false if it failed
+    static bool ReleaseDoubleMappedMemory(void *mapperHandle, void* pStart, size_t offset, size_t size);
+
+    // Get a RW mapping for the RX block specified by the arguments
+    // Parameters:
+    //  mapperHandle - handle of the double mapped memory mapper to use
+    //  pStart       - start address of the RX virtual address range.
+    //  offset       - offset in the underlying shared memory
+    //  size         - size of the memory block to map as RW
+    // Return:
+    //  Starting virtual address of the RW mapping.
+    static void* GetRWMapping(void *mapperHandle, void* pStart, size_t offset, size_t size);
+
+    // Release RW mapping of the block specified by the arguments
+    // Parameters:
+    //  pStart       - Start address of the RW virtual address range. It must be an address
+    //                 previously returned by the GetRWMapping.
+    //  size         - Size of the memory block to release. It must be the size previously
+    //                 passed to the GetRWMapping that returned the pStart.
+    // Return:
+    //  true if it succeeded, false if it failed
+    static bool ReleaseRWMapping(void* pStart, size_t size);
+};
diff --git a/src/coreclr/utilcode/CMakeLists.txt b/src/coreclr/utilcode/CMakeLists.txt
index 1ae433adbfd89..8c57742cb6315 100644
--- a/src/coreclr/utilcode/CMakeLists.txt
+++ b/src/coreclr/utilcode/CMakeLists.txt
@@ -69,6 +69,7 @@ endif(CLR_CMAKE_TARGET_WIN32)
 
 set(UTILCODE_SOURCES
   ${UTILCODE_COMMON_SOURCES}
+  executableallocator.cpp
 )
 
 set(UTILCODE_DAC_SOURCES
diff --git a/src/coreclr/utilcode/executableallocator.cpp b/src/coreclr/utilcode/executableallocator.cpp
new file mode 100644
index 0000000000000..ac4c326c83784
--- /dev/null
+++ b/src/coreclr/utilcode/executableallocator.cpp
@@ -0,0 +1,755 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "pedecoder.h"
+#include "executableallocator.h"
+
+#if USE_UPPER_ADDRESS
+// Preferred region to allocate the code in.
+BYTE * ExecutableAllocator::g_codeMinAddr;
+BYTE * ExecutableAllocator::g_codeMaxAddr;
+BYTE * ExecutableAllocator::g_codeAllocStart;
+// Next address to try to allocate for code in the preferred region.
+BYTE * ExecutableAllocator::g_codeAllocHint;
+#endif // USE_UPPER_ADDRESS
+
+bool ExecutableAllocator::g_isWXorXEnabled = false;
+
+ExecutableAllocator::FatalErrorHandler ExecutableAllocator::g_fatalErrorHandler = NULL;
+
+ExecutableAllocator* ExecutableAllocator::g_instance = NULL;
+
+bool ExecutableAllocator::IsDoubleMappingEnabled()
+{
+    LIMITED_METHOD_CONTRACT;
+
+#if defined(HOST_OSX) && defined(HOST_ARM64)
+    return false;
+#else
+    return g_isWXorXEnabled;
+#endif
+}
+
+bool ExecutableAllocator::IsWXORXEnabled()
+{
+    LIMITED_METHOD_CONTRACT;
+
+#if defined(HOST_OSX) && defined(HOST_ARM64)
+    return true;
+#else
+    return g_isWXorXEnabled;
+#endif
+}
+
+extern SYSTEM_INFO g_SystemInfo;
+
+size_t ExecutableAllocator::Granularity()
+{
+    LIMITED_METHOD_CONTRACT;
+
+    return g_SystemInfo.dwAllocationGranularity;
+}
+
+// Use this function to initialize the g_codeAllocHint
+// during startup. base is runtime .dll base address,
+// size is runtime .dll virtual size.
+void ExecutableAllocator::InitCodeAllocHint(size_t base, size_t size, int randomPageOffset)
+{
+#if USE_UPPER_ADDRESS
+
+#ifdef _DEBUG
+    // If GetForceRelocs is enabled we don't constrain the pMinAddr
+    if (PEDecoder::GetForceRelocs())
+        return;
+#endif
+
+    //
+    // If we are using the UPPER_ADDRESS space (on Win64)
+    // then for any code heap that doesn't specify an address
+    // range using [pMinAddr..pMaxAddr] we place it in the
+    // upper address space
+    // This enables us to avoid having to use long JumpStubs
+    // to reach the code for our ngen-ed images.
+    // Which are also placed in the UPPER_ADDRESS space.
+    //
+    SIZE_T reach = 0x7FFF0000u;
+
+    // We will choose the preferred code region based on the address of clr.dll. The JIT helpers
+    // in clr.dll are the most heavily called functions.
+    g_codeMinAddr = (base + size > reach) ? (BYTE *)(base + size - reach) : (BYTE *)0;
+    g_codeMaxAddr = (base + reach > base) ? (BYTE *)(base + reach) : (BYTE *)-1;
+
+    BYTE * pStart;
+
+    if (g_codeMinAddr <= (BYTE *)CODEHEAP_START_ADDRESS &&
+        (BYTE *)CODEHEAP_START_ADDRESS < g_codeMaxAddr)
+    {
+        // clr.dll got loaded at its preferred base address? (OS without ASLR - pre-Vista)
+        // Use the code head start address that does not cause collisions with NGen images.
+        // This logic is coupled with scripts that we use to assign base addresses.
+        pStart = (BYTE *)CODEHEAP_START_ADDRESS;
+    }
+    else
+    if (base > UINT32_MAX)
+    {
+        // clr.dll got address assigned by ASLR?
+        // Try to occupy the space as far as possible to minimize collisions with other ASLR assigned
+        // addresses. Do not start at g_codeMinAddr exactly so that we can also reach common native images
+        // that can be placed at higher addresses than clr.dll.
+        pStart = g_codeMinAddr + (g_codeMaxAddr - g_codeMinAddr) / 8;
+    }
+    else
+    {
+        // clr.dll missed the base address?
+        // Try to occupy the space right after it.
+        pStart = (BYTE *)(base + size);
+    }
+
+    // Randomize the address space
+    pStart += GetOsPageSize() * randomPageOffset;
+
+    g_codeAllocStart = pStart;
+    g_codeAllocHint = pStart;
+#endif
+}
+
+// Use this function to reset the g_codeAllocHint
+// after unloading an AppDomain
+void ExecutableAllocator::ResetCodeAllocHint()
+{
+    LIMITED_METHOD_CONTRACT;
+#if USE_UPPER_ADDRESS
+    g_codeAllocHint = g_codeAllocStart;
+#endif
+}
+
+// Returns TRUE if p is located in near clr.dll that allows us
+// to use rel32 IP-relative addressing modes.
+bool ExecutableAllocator::IsPreferredExecutableRange(void * p)
+{
+    LIMITED_METHOD_CONTRACT;
+#if USE_UPPER_ADDRESS
+    if (g_codeMinAddr <= (BYTE *)p && (BYTE *)p < g_codeMaxAddr)
+        return true;
+#endif
+    return false;
+}
+
+ExecutableAllocator* ExecutableAllocator::Instance()
+{
+    LIMITED_METHOD_CONTRACT;
+    return g_instance;
+}
+
+ExecutableAllocator::~ExecutableAllocator()
+{
+    if (IsDoubleMappingEnabled())
+    {
+        VMToOSInterface::DestroyDoubleMemoryMapper(m_doubleMemoryMapperHandle);
+    }
+}
+
+HRESULT ExecutableAllocator::StaticInitialize(FatalErrorHandler fatalErrorHandler)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    g_fatalErrorHandler = fatalErrorHandler;
+    g_isWXorXEnabled = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableWriteXorExecute) != 0;
+    g_instance = new (nothrow) ExecutableAllocator();
+    if (g_instance == NULL)
+    {
+        return E_OUTOFMEMORY;
+    }
+
+    if (!g_instance->Initialize())
+    {
+        return E_FAIL;
+    }
+
+    return S_OK;
+}
+
+bool ExecutableAllocator::Initialize()
+{
+    LIMITED_METHOD_CONTRACT;
+
+    if (IsDoubleMappingEnabled())
+    {
+        if (!VMToOSInterface::CreateDoubleMemoryMapper(&m_doubleMemoryMapperHandle, &m_maxExecutableCodeSize))
+        {
+            return false;
+        }
+
+        m_CriticalSection = ClrCreateCriticalSection(CrstExecutableAllocatorLock,CrstFlags(CRST_UNSAFE_ANYMODE | CRST_DEBUGGER_THREAD));
+    }
+
+    return true;
+}
+
+//#define ENABLE_CACHED_MAPPINGS
+
+void ExecutableAllocator::UpdateCachedMapping(BlockRW* pBlock)
+{
+    LIMITED_METHOD_CONTRACT;
+#ifdef ENABLE_CACHED_MAPPINGS
+    if (m_cachedMapping == NULL)
+    {
+        m_cachedMapping = pBlock;
+        pBlock->refCount++;
+    }
+    else if (m_cachedMapping != pBlock)
+    {
+        void* unmapAddress = NULL;
+        size_t unmapSize;
+
+        if (!RemoveRWBlock(m_cachedMapping->baseRW, &unmapAddress, &unmapSize))
+        {
+            g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RW block to unmap was not found"));
+        }
+        if (unmapAddress && !VMToOSInterface::ReleaseRWMapping(unmapAddress, unmapSize))
+        {
+            g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Releasing the RW mapping failed"));
+        }
+        m_cachedMapping = pBlock;
+        pBlock->refCount++;
+    }
+#endif // ENABLE_CACHED_MAPPINGS    
+}
+
+void* ExecutableAllocator::FindRWBlock(void* baseRX, size_t size)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    for (BlockRW* pBlock = m_pFirstBlockRW; pBlock != NULL; pBlock = pBlock->next)
+    {
+        if (pBlock->baseRX <= baseRX && ((size_t)baseRX + size) <= ((size_t)pBlock->baseRX + pBlock->size))
+        {
+            pBlock->refCount++;
+            UpdateCachedMapping(pBlock);
+
+            return (BYTE*)pBlock->baseRW + ((size_t)baseRX - (size_t)pBlock->baseRX);
+        }
+    }
+
+    return NULL;
+}
+
+bool ExecutableAllocator::AddRWBlock(void* baseRW, void* baseRX, size_t size)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    for (BlockRW* pBlock = m_pFirstBlockRW; pBlock != NULL; pBlock = pBlock->next)
+    {
+        if (pBlock->baseRX <= baseRX && ((size_t)baseRX + size) <= ((size_t)pBlock->baseRX + pBlock->size))
+        {
+            break;
+        }
+    }
+
+    // The new "nothrow" below failure is handled as fail fast since it is not recoverable
+    PERMANENT_CONTRACT_VIOLATION(FaultViolation, ReasonContractInfrastructure);
+
+    BlockRW* pBlockRW = new (nothrow) BlockRW();
+    if (pBlockRW == NULL)
+    {
+        g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RW block metadata cannot be allocated"));
+        return false;
+    }
+
+    pBlockRW->baseRW = baseRW;
+    pBlockRW->baseRX = baseRX;
+    pBlockRW->size = size;
+    pBlockRW->next = m_pFirstBlockRW;
+    pBlockRW->refCount = 1;
+    m_pFirstBlockRW = pBlockRW;
+
+    UpdateCachedMapping(pBlockRW);
+
+    return true;
+}
+
+bool ExecutableAllocator::RemoveRWBlock(void* pRW, void** pUnmapAddress, size_t* pUnmapSize)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    BlockRW* pPrevBlockRW = NULL;
+    for (BlockRW* pBlockRW = m_pFirstBlockRW; pBlockRW != NULL; pBlockRW = pBlockRW->next)
+    {
+        if (pBlockRW->baseRW <= pRW && (size_t)pRW < ((size_t)pBlockRW->baseRW + pBlockRW->size))
+        {
+            // found
+            pBlockRW->refCount--;
+            if (pBlockRW->refCount != 0)
+            {
+                *pUnmapAddress = NULL;
+                return true;
+            }
+
+            if (pPrevBlockRW == NULL)
+            {
+                m_pFirstBlockRW = pBlockRW->next;
+            }
+            else
+            {
+                pPrevBlockRW->next = pBlockRW->next;
+            }
+
+            *pUnmapAddress = pBlockRW->baseRW;
+            *pUnmapSize = pBlockRW->size;
+
+            delete pBlockRW;
+            return true;
+        }
+
+        pPrevBlockRW = pBlockRW;
+    }
+
+    return false;
+}
+
+bool ExecutableAllocator::AllocateOffset(size_t* pOffset, size_t size)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    size_t offset = m_freeOffset;
+    size_t newFreeOffset = offset + size;
+
+    if (newFreeOffset > m_maxExecutableCodeSize)
+    {
+        return false;
+    }
+
+    m_freeOffset = newFreeOffset;
+
+    *pOffset = offset;
+
+    return true;
+}
+
+void ExecutableAllocator::AddRXBlock(BlockRX* pBlock)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    pBlock->next = m_pFirstBlockRX;
+    m_pFirstBlockRX = pBlock;
+}
+
+void* ExecutableAllocator::Commit(void* pStart, size_t size, bool isExecutable)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    if (IsDoubleMappingEnabled())
+    {
+        return VMToOSInterface::CommitDoubleMappedMemory(pStart, size, isExecutable);
+    }
+    else
+    {
+        return ClrVirtualAlloc(pStart, size, MEM_COMMIT, isExecutable ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE);
+    }
+}
+
+void ExecutableAllocator::Release(void* pRX)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    if (IsDoubleMappingEnabled())
+    {
+        CRITSEC_Holder csh(m_CriticalSection);
+
+        // Locate the RX block corresponding to the pRX and remove it from the linked list
+        BlockRX* pBlock;
+        BlockRX* pPrevBlock = NULL;
+
+        for (pBlock = m_pFirstBlockRX; pBlock != NULL; pBlock = pBlock->next)
+        {
+            if (pRX == pBlock->baseRX)
+            {
+                if (pPrevBlock == NULL)
+                {
+                    m_pFirstBlockRX = pBlock->next;
+                }
+                else
+                {
+                    pPrevBlock->next = pBlock->next;
+                }
+
+                break;
+            }
+            pPrevBlock = pBlock;
+        }
+
+        if (pBlock != NULL)
+        {
+            VMToOSInterface::ReleaseDoubleMappedMemory(m_doubleMemoryMapperHandle, pRX, pBlock->offset, pBlock->size);
+            // Put the released block into the free block list
+            pBlock->baseRX = NULL;
+            pBlock->next = m_pFirstFreeBlockRX;
+            m_pFirstFreeBlockRX = pBlock;
+        }
+        else
+        {
+            // The block was not found, which should never happen.
+            g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RX block to release was not found"));
+        }
+    }
+    else
+    {
+        ClrVirtualFree(pRX, 0, MEM_RELEASE);
+    }
+}
+
+// Find a free block with the closest size >= the requested size.
+// Returns NULL if no such block exists.
+ExecutableAllocator::BlockRX* ExecutableAllocator::FindBestFreeBlock(size_t size)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    BlockRX* pPrevBlock = NULL;
+    BlockRX* pPrevBestBlock = NULL;
+    BlockRX* pBestBlock = NULL;
+    BlockRX* pBlock = m_pFirstFreeBlockRX;
+
+    while (pBlock != NULL)
+    {
+        if (pBlock->size >= size)
+        {
+            if (pBestBlock != NULL)
+            {
+                if (pBlock->size < pBestBlock->size)
+                {
+                    pPrevBestBlock = pPrevBlock;
+                    pBestBlock = pBlock;
+                }
+            }
+            else
+            {
+                pPrevBestBlock = pPrevBlock;
+                pBestBlock = pBlock;
+            }
+        }
+        pPrevBlock = pBlock;
+        pBlock = pBlock->next;
+    }
+
+    if (pBestBlock != NULL)
+    {
+        if (pPrevBestBlock != NULL)
+        {
+            pPrevBestBlock->next = pBestBlock->next;
+        }
+        else
+        {
+            m_pFirstFreeBlockRX = pBestBlock->next;
+        }
+
+        pBestBlock->next = NULL;
+    }
+
+    return pBestBlock;
+}
+
+// Allocate a new block of executable memory and the related descriptor structure.
+// First try to get it from the free blocks and if there is no suitable free block,
+// allocate a new one.
+ExecutableAllocator::BlockRX* ExecutableAllocator::AllocateBlock(size_t size, bool* pIsFreeBlock)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    size_t offset;
+    BlockRX* block = FindBestFreeBlock(size);
+    *pIsFreeBlock = (block != NULL);
+
+    if (block == NULL)
+    {
+        if (!AllocateOffset(&offset, size))
+        {
+            return NULL;
+        }
+
+        block = new (nothrow) BlockRX();
+        if (block == NULL)
+        {
+            return NULL;
+        }
+
+        block->offset = offset;
+        block->size = size;
+    }
+
+    return block;
+}
+
+// Backout a previously allocated block. The block is added to the free blocks list and
+// reused for later allocation requests.
+void ExecutableAllocator::BackoutBlock(BlockRX* pBlock, bool isFreeBlock)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    if (!isFreeBlock)
+    {
+        m_freeOffset -= pBlock->size;
+        delete pBlock;
+    }
+    else
+    {
+        pBlock->next = m_pFirstFreeBlockRX;
+        m_pFirstFreeBlockRX = pBlock;
+    }
+}
+
+// Reserve executable memory within the specified virtual address space range. If it is not possible to
+// reserve memory in that range, the method returns NULL and nothing is allocated.
+void* ExecutableAllocator::ReserveWithinRange(size_t size, const void* loAddress, const void* hiAddress)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    _ASSERTE((size & (Granularity() - 1)) == 0);
+    if (IsDoubleMappingEnabled())
+    {
+        CRITSEC_Holder csh(m_CriticalSection);
+
+        bool isFreeBlock;
+        BlockRX* block = AllocateBlock(size, &isFreeBlock);
+        if (block == NULL)
+        {
+            return NULL;
+        }
+
+        void *result = VMToOSInterface::ReserveDoubleMappedMemory(m_doubleMemoryMapperHandle, block->offset, size, loAddress, hiAddress);
+
+        if (result != NULL)
+        {
+            block->baseRX = result;
+            AddRXBlock(block);
+        }
+        else 
+        {
+            BackoutBlock(block, isFreeBlock);
+        }
+
+        return result;
+    }
+    else
+    {
+        DWORD allocationType = MEM_RESERVE;
+#ifdef HOST_UNIX
+        // Tell PAL to use the executable memory allocator to satisfy this request for virtual memory.
+        // This will allow us to place JIT'ed code close to the coreclr library
+        // and thus improve performance by avoiding jump stubs in managed code.
+        allocationType |= MEM_RESERVE_EXECUTABLE;
+#endif
+        return ClrVirtualAllocWithinRange((const BYTE*)loAddress, (const BYTE*)hiAddress, size, allocationType, PAGE_NOACCESS);
+    }
+}
+
+// Reserve executable memory. On Windows it tries to use the allocation hints to
+// allocate memory close to the previously allocated executable memory and loaded
+// executable files.
+void* ExecutableAllocator::Reserve(size_t size)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    _ASSERTE((size & (Granularity() - 1)) == 0);
+
+    BYTE *result = NULL;
+
+#if USE_UPPER_ADDRESS
+    //
+    // If we are using the UPPER_ADDRESS space (on Win64)
+    // then for any heap that will contain executable code
+    // we will place it in the upper address space
+    //
+    // This enables us to avoid having to use JumpStubs
+    // to reach the code for our ngen-ed images on x64,
+    // since they are also placed in the UPPER_ADDRESS space.
+    //
+    BYTE * pHint = g_codeAllocHint;
+
+    if (size <= (SIZE_T)(g_codeMaxAddr - g_codeMinAddr) && pHint != NULL)
+    {
+        // Try to allocate in the preferred region after the hint
+        result = (BYTE*)ReserveWithinRange(size, pHint, g_codeMaxAddr);
+        if (result != NULL)
+        {
+            g_codeAllocHint = result + size;
+        }
+        else
+        {
+            // Try to allocate in the preferred region before the hint
+            result = (BYTE*)ReserveWithinRange(size, g_codeMinAddr, pHint + size);
+
+            if (result != NULL)
+            {
+                g_codeAllocHint = result + size;
+            }
+
+            g_codeAllocHint = NULL;
+        }
+    }
+
+    // Fall through to
+#endif // USE_UPPER_ADDRESS
+
+    if (result == NULL)
+    {
+        if (IsDoubleMappingEnabled())
+        {
+            CRITSEC_Holder csh(m_CriticalSection);
+
+            bool isFreeBlock;
+            BlockRX* block = AllocateBlock(size, &isFreeBlock);
+            if (block == NULL)
+            {
+                return NULL;
+            }
+
+            result = (BYTE*)VMToOSInterface::ReserveDoubleMappedMemory(m_doubleMemoryMapperHandle, block->offset, size, 0, 0);
+
+            if (result != NULL)
+            {
+                block->baseRX = result;
+                AddRXBlock(block);
+            }
+            else 
+            {
+                BackoutBlock(block, isFreeBlock);
+            }
+        }
+        else
+        {
+            DWORD allocationType = MEM_RESERVE;
+#ifdef HOST_UNIX
+            // Tell PAL to use the executable memory allocator to satisfy this request for virtual memory.
+            // This will allow us to place JIT'ed code close to the coreclr library
+            // and thus improve performance by avoiding jump stubs in managed code.
+            allocationType |= MEM_RESERVE_EXECUTABLE;
+#endif
+            result = (BYTE*)ClrVirtualAlloc(NULL, size, allocationType, PAGE_NOACCESS);
+        }
+    }
+
+    return result;
+}
+
+// Reserve a block of executable memory at the specified virtual address. If it is not
+// possible, the method returns NULL.
+void* ExecutableAllocator::ReserveAt(void* baseAddressRX, size_t size)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    _ASSERTE((size & (Granularity() - 1)) == 0);
+
+    if (IsDoubleMappingEnabled())
+    {
+        CRITSEC_Holder csh(m_CriticalSection);
+
+        bool isFreeBlock;
+        BlockRX* block = AllocateBlock(size, &isFreeBlock);
+        if (block == NULL)
+        {
+            return NULL;
+        }
+
+        void* result = VMToOSInterface::ReserveDoubleMappedMemory(m_doubleMemoryMapperHandle, block->offset, size, baseAddressRX, baseAddressRX);
+
+        if (result != NULL)
+        {
+            block->baseRX = result;
+            AddRXBlock(block);
+        }
+        else 
+        {
+            BackoutBlock(block, isFreeBlock);
+        }
+
+        return result;
+    }
+    else
+    {
+        return VirtualAlloc(baseAddressRX, size, MEM_RESERVE, PAGE_NOACCESS);
+    }
+}
+
+// Map an executable memory block as writeable. If there is already a mapping
+// covering the specified block, return that mapping instead of creating a new one.
+// Return starting address of the writeable mapping.
+void* ExecutableAllocator::MapRW(void* pRX, size_t size)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    if (!IsDoubleMappingEnabled())
+    {
+        return pRX;
+    }
+
+    CRITSEC_Holder csh(m_CriticalSection);
+
+    void* result = FindRWBlock(pRX, size);
+    if (result != NULL)
+    {
+        return result;
+    }
+
+    for (BlockRX* pBlock = m_pFirstBlockRX; pBlock != NULL; pBlock = pBlock->next)
+    {
+        if (pRX >= pBlock->baseRX && ((size_t)pRX + size) <= ((size_t)pBlock->baseRX + pBlock->size))
+        {
+            // Offset of the RX address in the originally allocated block
+            size_t offset = (size_t)pRX - (size_t)pBlock->baseRX;
+            // Offset of the RX address that will start the newly mapped block
+            size_t mapOffset = ALIGN_DOWN(offset, Granularity());
+            // Size of the block we will map
+            size_t mapSize = ALIGN_UP(offset - mapOffset + size, Granularity());
+            void* pRW = VMToOSInterface::GetRWMapping(m_doubleMemoryMapperHandle, (BYTE*)pBlock->baseRX + mapOffset, pBlock->offset + mapOffset, mapSize);
+
+            if (pRW == NULL)
+            {
+                g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Failed to create RW mapping for RX memory"));
+            }
+
+            AddRWBlock(pRW, (BYTE*)pBlock->baseRX + mapOffset, mapSize);
+
+            return (void*)((size_t)pRW + (offset - mapOffset));
+        }
+        else if (pRX >= pBlock->baseRX && pRX < (void*)((size_t)pBlock->baseRX + pBlock->size))
+        {
+            g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Attempting to RW map a block that crosses the end of the allocated RX range"));
+        }
+        else if (pRX < pBlock->baseRX && (void*)((size_t)pRX + size) > pBlock->baseRX)
+        {
+            g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Attempting to map a block that crosses the beginning of the allocated range"));
+        }
+    }
+
+    // The executable memory block was not found, so we cannot provide the writeable mapping.
+    g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RX block to map as RW was not found"));
+    return NULL;
+}
+
+// Unmap writeable mapping at the specified address. The address must be an address
+// returned by the MapRW method.
+void ExecutableAllocator::UnmapRW(void* pRW)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    if (!IsDoubleMappingEnabled())
+    {
+        return;
+    }
+
+    CRITSEC_Holder csh(m_CriticalSection);
+    _ASSERTE(pRW != NULL);
+
+    void* unmapAddress = NULL;
+    size_t unmapSize;
+
+    if (!RemoveRWBlock(pRW, &unmapAddress, &unmapSize))
+    {
+        g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RW block to unmap was not found"));
+    }
+
+    if (unmapAddress && !VMToOSInterface::ReleaseRWMapping(unmapAddress, unmapSize))
+    {
+        g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Releasing the RW mapping failed"));
+    }
+}
diff --git a/src/coreclr/utilcode/loaderheap.cpp b/src/coreclr/utilcode/loaderheap.cpp
index adaf07d8f5825..b3b381b2f9bef 100644
--- a/src/coreclr/utilcode/loaderheap.cpp
+++ b/src/coreclr/utilcode/loaderheap.cpp
@@ -695,15 +695,21 @@ size_t AllocMem_TotalSize(size_t dwRequestedSize, UnlockedLoaderHeap *pHeap);
 struct LoaderHeapFreeBlock
 {
     public:
-        LoaderHeapFreeBlock   *m_pNext;    // Pointer to next block on free list
-        size_t                 m_dwSize;   // Total size of this block (including this header)
-//! Try not to grow the size of this structure. It places a minimum size on LoaderHeap allocations.
+        LoaderHeapFreeBlock   *m_pNext;         // Pointer to next block on free list
+        size_t                 m_dwSize;        // Total size of this block
+        void                  *m_pBlockAddress; // Virtual address of the block
 
+#ifndef DACCESS_COMPILE
         static void InsertFreeBlock(LoaderHeapFreeBlock **ppHead, void *pMem, size_t dwTotalSize, UnlockedLoaderHeap *pHeap)
         {
             STATIC_CONTRACT_NOTHROW;
             STATIC_CONTRACT_GC_NOTRIGGER;
 
+            // The new "nothrow" below failure is handled in a non-fault way, so
+            // make sure that callers with FORBID_FAULT can call this method without
+            // firing the contract violation assert.
+            PERMANENT_CONTRACT_VIOLATION(FaultViolation, ReasonContractInfrastructure);
+
             LOADER_HEAP_BEGIN_TRAP_FAULT
 
             // It's illegal to insert a free block that's smaller than the minimum sized allocation -
@@ -722,19 +728,30 @@ struct LoaderHeapFreeBlock
             }
 #endif
 
-            INDEBUG(memset(pMem, 0xcc, dwTotalSize);)
-            LoaderHeapFreeBlock *pNewBlock = (LoaderHeapFreeBlock*)pMem;
-            pNewBlock->m_pNext  = *ppHead;
-            pNewBlock->m_dwSize = dwTotalSize;
-            *ppHead = pNewBlock;
+            void* pMemRW = pMem;
+            ExecutableWriterHolder<void> memWriterHolder;
+            if (pHeap->IsExecutable())
+            {
+                memWriterHolder = ExecutableWriterHolder<void>(pMem, dwTotalSize);
+                pMemRW = memWriterHolder.GetRW();
+            }
 
-            MergeBlock(pNewBlock, pHeap);
+            INDEBUG(memset(pMemRW, 0xcc, dwTotalSize);)
+            LoaderHeapFreeBlock *pNewBlock = new (nothrow) LoaderHeapFreeBlock;
+            // If we fail allocating the LoaderHeapFreeBlock, ignore the failure and don't insert the free block at all.
+            if (pNewBlock != NULL)
+            {
+                pNewBlock->m_pNext  = *ppHead;
+                pNewBlock->m_dwSize = dwTotalSize;
+                pNewBlock->m_pBlockAddress = pMem;
+                *ppHead = pNewBlock;
+                MergeBlock(pNewBlock, pHeap);
+            }
 
             LOADER_HEAP_END_TRAP_FAULT
         }
 
-
-        static void *AllocFromFreeList(LoaderHeapFreeBlock **ppHead, size_t dwSize, BOOL fRemoveFromFreeList, UnlockedLoaderHeap *pHeap)
+        static void *AllocFromFreeList(LoaderHeapFreeBlock **ppHead, size_t dwSize, UnlockedLoaderHeap *pHeap)
         {
             STATIC_CONTRACT_NOTHROW;
             STATIC_CONTRACT_GC_NOTRIGGER;
@@ -751,23 +768,19 @@ struct LoaderHeapFreeBlock
                 size_t dwCurSize = pCur->m_dwSize;
                 if (dwCurSize == dwSize)
                 {
-                    pResult = pCur;
+                    pResult = pCur->m_pBlockAddress;
                     // Exact match. Hooray!
-                    if (fRemoveFromFreeList)
-                    {
-                        *ppWalk = pCur->m_pNext;
-                    }
+                    *ppWalk = pCur->m_pNext;
+                    delete pCur;
                     break;
                 }
                 else if (dwCurSize > dwSize && (dwCurSize - dwSize) >= AllocMem_TotalSize(1, pHeap))
                 {
                     // Partial match. Ok...
-                    pResult = pCur;
-                    if (fRemoveFromFreeList)
-                    {
-                        *ppWalk = pCur->m_pNext;
-                        InsertFreeBlock(ppWalk, ((BYTE*)pCur) + dwSize, dwCurSize - dwSize, pHeap );
-                    }
+                    pResult = pCur->m_pBlockAddress;
+                    *ppWalk = pCur->m_pNext;
+                    InsertFreeBlock(ppWalk, ((BYTE*)pCur->m_pBlockAddress) + dwSize, dwCurSize - dwSize, pHeap );
+                    delete pCur;
                     break;
                 }
 
@@ -777,19 +790,22 @@ struct LoaderHeapFreeBlock
                 ppWalk = &( pCur->m_pNext );
             }
 
-            if (pResult && fRemoveFromFreeList)
+            if (pResult)
             {
+                void *pResultRW = pResult;
+                ExecutableWriterHolder<void> resultWriterHolder;
+                if (pHeap->IsExecutable())
+                {
+                    resultWriterHolder = ExecutableWriterHolder<void>(pResult, dwSize);
+                    pResultRW = resultWriterHolder.GetRW();
+                }
                 // Callers of loaderheap assume allocated memory is zero-inited so we must preserve this invariant!
-                memset(pResult, 0, dwSize);
+                memset(pResultRW, 0, dwSize);
             }
             LOADER_HEAP_END_TRAP_FAULT
             return pResult;
-
-
-
         }
 
-
     private:
         // Try to merge pFreeBlock with its immediate successor. Return TRUE if a merge happened. FALSE if no merge happened.
         static BOOL MergeBlock(LoaderHeapFreeBlock *pFreeBlock, UnlockedLoaderHeap *pHeap)
@@ -803,7 +819,7 @@ struct LoaderHeapFreeBlock
             LoaderHeapFreeBlock *pNextBlock = pFreeBlock->m_pNext;
             size_t               dwSize     = pFreeBlock->m_dwSize;
 
-            if (pNextBlock == NULL || ((BYTE*)pNextBlock) != (((BYTE*)pFreeBlock) + dwSize))
+            if (pNextBlock == NULL || ((BYTE*)pNextBlock->m_pBlockAddress) != (((BYTE*)pFreeBlock->m_pBlockAddress) + dwSize))
             {
                 result = FALSE;
             }
@@ -811,9 +827,17 @@ struct LoaderHeapFreeBlock
             {
                 size_t dwCombinedSize = dwSize + pNextBlock->m_dwSize;
                 LoaderHeapFreeBlock *pNextNextBlock = pNextBlock->m_pNext;
-                INDEBUG(memset(pFreeBlock, 0xcc, dwCombinedSize);)
+                void *pMemRW = pFreeBlock->m_pBlockAddress;
+                ExecutableWriterHolder<void> memWriterHolder;
+                if (pHeap->IsExecutable())
+                {
+                    memWriterHolder = ExecutableWriterHolder<void>(pFreeBlock->m_pBlockAddress, dwCombinedSize);
+                    pMemRW = memWriterHolder.GetRW();
+                }
+                INDEBUG(memset(pMemRW, 0xcc, dwCombinedSize);)
                 pFreeBlock->m_pNext  = pNextNextBlock;
                 pFreeBlock->m_dwSize = dwCombinedSize;
+                delete pNextBlock;
 
                 result = TRUE;
             }
@@ -822,7 +846,7 @@ struct LoaderHeapFreeBlock
             return result;
 
         }
-
+#endif // DACCESS_COMPILE
 };
 
 
@@ -840,8 +864,7 @@ struct LoaderHeapFreeBlock
 //   - z  bytes of pad  (DEBUG-ONLY) (where "z" is just enough to pointer-align the following byte)
 //   - a  bytes of tag  (DEBUG-ONLY) (where "a" is sizeof(LoaderHeapValidationTag)
 //
-//   - b  bytes of pad               (if total size after all this < sizeof(LoaderHeapFreeBlock), pad enough to make it the size of LoaderHeapFreeBlock)
-//   - c  bytes of pad               (where "c" is just enough to pointer-align the following byte)
+//   - b  bytes of pad               (where "b" is just enough to pointer-align the following byte)
 //
 // ==> Following address is always pointer-aligned
 //=====================================================================================
@@ -862,10 +885,6 @@ inline size_t AllocMem_TotalSize(size_t dwRequestedSize, UnlockedLoaderHeap *pHe
 #ifdef _DEBUG
         dwSize += sizeof(LoaderHeapValidationTag);
 #endif
-        if (dwSize < sizeof(LoaderHeapFreeBlock))
-        {
-            dwSize = sizeof(LoaderHeapFreeBlock);
-        }
     }
     dwSize = ((dwSize + ALLOC_ALIGN_CONSTANT) & (~ALLOC_ALIGN_CONSTANT));
 
@@ -977,9 +996,7 @@ UnlockedLoaderHeap::~UnlockedLoaderHeap()
 
         if (fReleaseMemory)
         {
-            BOOL fSuccess;
-            fSuccess = ClrVirtualFree(pVirtualAddress, 0, MEM_RELEASE);
-            _ASSERTE(fSuccess);
+            ExecutableAllocator::Instance()->Release(pVirtualAddress);
         }
 
         delete pSearch;
@@ -987,9 +1004,7 @@ UnlockedLoaderHeap::~UnlockedLoaderHeap()
 
     if (m_reservedBlock.m_fReleaseMemory)
     {
-        BOOL fSuccess;
-        fSuccess = ClrVirtualFree(m_reservedBlock.pVirtualAddress, 0, MEM_RELEASE);
-        _ASSERTE(fSuccess);
+        ExecutableAllocator::Instance()->Release(m_reservedBlock.pVirtualAddress);
     }
 
     INDEBUG(s_dwNumInstancesOfLoaderHeaps --;)
@@ -1058,7 +1073,7 @@ void ReleaseReservedMemory(BYTE* value)
 {
     if (value)
     {
-        ClrVirtualFree(value, 0, MEM_RELEASE);
+        ExecutableAllocator::Instance()->Release(value);
     }
 }
 
@@ -1114,7 +1129,9 @@ BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit)
         // Reserve pages
         //
 
-        pData = ClrVirtualAllocExecutable(dwSizeToReserve, MEM_RESERVE, PAGE_NOACCESS);
+        // Reserve the memory for even non-executable stuff close to the executable code, as it has profound effect
+        // on e.g. a static variable access performance.
+        pData = (BYTE *)ExecutableAllocator::Instance()->Reserve(dwSizeToReserve);
         if (pData == NULL)
         {
             return FALSE;
@@ -1140,7 +1157,7 @@ BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit)
     }
 
     // Commit first set of pages, since it will contain the LoaderHeapBlock
-    void *pTemp = ClrVirtualAlloc(pData, dwSizeToCommit, MEM_COMMIT, (m_Options & LHF_EXECUTABLE) ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE);
+    void *pTemp = ExecutableAllocator::Instance()->Commit(pData, dwSizeToCommit, (m_Options & LHF_EXECUTABLE));
     if (pTemp == NULL)
     {
         //_ASSERTE(!"Unable to ClrVirtualAlloc commit in a loaderheap");
@@ -1213,7 +1230,7 @@ BOOL UnlockedLoaderHeap::GetMoreCommittedPages(size_t dwMinSize)
         dwSizeToCommit = ALIGN_UP(dwSizeToCommit, GetOsPageSize());
 
         // Yes, so commit the desired number of reserved pages
-        void *pData = ClrVirtualAlloc(m_pPtrToEndOfCommittedRegion, dwSizeToCommit, MEM_COMMIT, (m_Options & LHF_EXECUTABLE) ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE);
+        void *pData = ExecutableAllocator::Instance()->Commit(m_pPtrToEndOfCommittedRegion, dwSizeToCommit, (m_Options & LHF_EXECUTABLE));
         if (pData == NULL)
             return FALSE;
 
@@ -1316,7 +1333,7 @@ void *UnlockedLoaderHeap::UnlockedAllocMem_NoThrow(size_t dwSize
 
     {
         // Any memory available on the free list?
-        void *pData = LoaderHeapFreeBlock::AllocFromFreeList(&m_pFirstFreeBlock, dwSize, TRUE /*fRemoveFromFreeList*/, this);
+        void *pData = LoaderHeapFreeBlock::AllocFromFreeList(&m_pFirstFreeBlock, dwSize, this);
         if (!pData)
         {
             // Enough bytes available in committed region?
@@ -1518,8 +1535,6 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem,
 
     if (m_pAllocPtr == ( ((BYTE*)pMem) + dwSize ))
     {
-        // Cool. This was the last block allocated. We can just undo the allocation instead
-        // of going to the freelist.
         void *pMemRW = pMem;
         ExecutableWriterHolder<void> memWriterHolder;
         if (m_Options & LHF_EXECUTABLE)
@@ -1527,6 +1542,9 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem,
             memWriterHolder = ExecutableWriterHolder<void>(pMem, dwSize);
             pMemRW = memWriterHolder.GetRW();
         }
+
+        // Cool. This was the last block allocated. We can just undo the allocation instead
+        // of going to the freelist.
         memset(pMemRW, 0x00, dwSize); // Fill freed region with 0
         m_pAllocPtr = (BYTE*)pMem;
     }
@@ -1534,7 +1552,6 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem,
     {
         LoaderHeapFreeBlock::InsertFreeBlock(&m_pFirstFreeBlock, pMem, dwSize, this);
     }
-
 }
 
 
diff --git a/src/coreclr/utilcode/util.cpp b/src/coreclr/utilcode/util.cpp
index 0026d1f619f14..e7b1755b2b1c4 100644
--- a/src/coreclr/utilcode/util.cpp
+++ b/src/coreclr/utilcode/util.cpp
@@ -352,168 +352,6 @@ HRESULT FakeCoCreateInstanceEx(REFCLSID       rclsid,
     return hr;
 }
 
-#if USE_UPPER_ADDRESS
-static BYTE * s_CodeMinAddr;        // Preferred region to allocate the code in.
-static BYTE * s_CodeMaxAddr;
-static BYTE * s_CodeAllocStart;
-static BYTE * s_CodeAllocHint;      // Next address to try to allocate for code in the preferred region.
-#endif
-
-//
-// Use this function to initialize the s_CodeAllocHint
-// during startup. base is runtime .dll base address,
-// size is runtime .dll virtual size.
-//
-void InitCodeAllocHint(SIZE_T base, SIZE_T size, int randomPageOffset)
-{
-#if USE_UPPER_ADDRESS
-
-#ifdef _DEBUG
-    // If GetForceRelocs is enabled we don't constrain the pMinAddr
-    if (PEDecoder::GetForceRelocs())
-        return;
-#endif
-
-//
-    // If we are using the UPPER_ADDRESS space (on Win64)
-    // then for any code heap that doesn't specify an address
-    // range using [pMinAddr..pMaxAddr] we place it in the
-    // upper address space
-    // This enables us to avoid having to use long JumpStubs
-    // to reach the code for our ngen-ed images.
-    // Which are also placed in the UPPER_ADDRESS space.
-    //
-    SIZE_T reach = 0x7FFF0000u;
-
-    // We will choose the preferred code region based on the address of clr.dll. The JIT helpers
-    // in clr.dll are the most heavily called functions.
-    s_CodeMinAddr = (base + size > reach) ? (BYTE *)(base + size - reach) : (BYTE *)0;
-    s_CodeMaxAddr = (base + reach > base) ? (BYTE *)(base + reach) : (BYTE *)-1;
-
-    BYTE * pStart;
-
-    if (s_CodeMinAddr <= (BYTE *)CODEHEAP_START_ADDRESS &&
-        (BYTE *)CODEHEAP_START_ADDRESS < s_CodeMaxAddr)
-    {
-        // clr.dll got loaded at its preferred base address? (OS without ASLR - pre-Vista)
-        // Use the code head start address that does not cause collisions with NGen images.
-        // This logic is coupled with scripts that we use to assign base addresses.
-        pStart = (BYTE *)CODEHEAP_START_ADDRESS;
-    }
-    else
-    if (base > UINT32_MAX)
-    {
-        // clr.dll got address assigned by ASLR?
-        // Try to occupy the space as far as possible to minimize collisions with other ASLR assigned
-        // addresses. Do not start at s_CodeMinAddr exactly so that we can also reach common native images
-        // that can be placed at higher addresses than clr.dll.
-        pStart = s_CodeMinAddr + (s_CodeMaxAddr - s_CodeMinAddr) / 8;
-    }
-    else
-    {
-        // clr.dll missed the base address?
-        // Try to occupy the space right after it.
-        pStart = (BYTE *)(base + size);
-    }
-
-    // Randomize the address space
-    pStart += GetOsPageSize() * randomPageOffset;
-
-    s_CodeAllocStart = pStart;
-    s_CodeAllocHint = pStart;
-#endif
-}
-
-//
-// Use this function to reset the s_CodeAllocHint
-// after unloading an AppDomain
-//
-void ResetCodeAllocHint()
-{
-    LIMITED_METHOD_CONTRACT;
-#if USE_UPPER_ADDRESS
-    s_CodeAllocHint = s_CodeAllocStart;
-#endif
-}
-
-//
-// Returns TRUE if p is located in near clr.dll that allows us
-// to use rel32 IP-relative addressing modes.
-//
-BOOL IsPreferredExecutableRange(void * p)
-{
-    LIMITED_METHOD_CONTRACT;
-#if USE_UPPER_ADDRESS
-    if (s_CodeMinAddr <= (BYTE *)p && (BYTE *)p < s_CodeMaxAddr)
-        return TRUE;
-#endif
-    return FALSE;
-}
-
-//
-// Allocate free memory that will be used for executable code
-// Handles the special requirements that we have on 64-bit platforms
-// where we want the executable memory to be located near clr.dll
-//
-BYTE * ClrVirtualAllocExecutable(SIZE_T dwSize,
-                                 DWORD flAllocationType,
-                                 DWORD flProtect)
-{
-    CONTRACTL
-    {
-        NOTHROW;
-    }
-    CONTRACTL_END;
-
-#if USE_UPPER_ADDRESS
-    //
-    // If we are using the UPPER_ADDRESS space (on Win64)
-    // then for any heap that will contain executable code
-    // we will place it in the upper address space
-    //
-    // This enables us to avoid having to use JumpStubs
-    // to reach the code for our ngen-ed images on x64,
-    // since they are also placed in the UPPER_ADDRESS space.
-    //
-    BYTE * pHint = s_CodeAllocHint;
-
-    if (dwSize <= (SIZE_T)(s_CodeMaxAddr - s_CodeMinAddr) && pHint != NULL)
-    {
-        // Try to allocate in the preferred region after the hint
-        BYTE * pResult = ClrVirtualAllocWithinRange(pHint, s_CodeMaxAddr, dwSize, flAllocationType, flProtect);
-
-        if (pResult != NULL)
-        {
-            s_CodeAllocHint = pResult + dwSize;
-            return pResult;
-        }
-
-        // Try to allocate in the preferred region before the hint
-        pResult = ClrVirtualAllocWithinRange(s_CodeMinAddr, pHint + dwSize, dwSize, flAllocationType, flProtect);
-
-        if (pResult != NULL)
-        {
-            s_CodeAllocHint = pResult + dwSize;
-            return pResult;
-        }
-
-        s_CodeAllocHint = NULL;
-    }
-
-    // Fall through to
-#endif // USE_UPPER_ADDRESS
-
-#ifdef HOST_UNIX
-    // Tell PAL to use the executable memory allocator to satisfy this request for virtual memory.
-    // This will allow us to place JIT'ed code close to the coreclr library
-    // and thus improve performance by avoiding jump stubs in managed code.
-    flAllocationType |= MEM_RESERVE_EXECUTABLE;
-#endif // HOST_UNIX
-
-    return (BYTE *) ClrVirtualAlloc (NULL, dwSize, flAllocationType, flProtect);
-
-}
-
 //
 // Allocate free memory with specific alignment.
 //
diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt
index 1d682d2a428bb..9c2cb3df0b7e9 100644
--- a/src/coreclr/vm/CMakeLists.txt
+++ b/src/coreclr/vm/CMakeLists.txt
@@ -833,7 +833,6 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM)
     set(VM_SOURCES_DAC_AND_WKS_ARCH
         ${ARCH_SOURCES_DIR}/exceparm.cpp
         ${ARCH_SOURCES_DIR}/stubs.cpp
-        ${ARCH_SOURCES_DIR}/armsinglestepper.cpp
     )
 
     set(VM_HEADERS_DAC_AND_WKS_ARCH
@@ -844,6 +843,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM)
 
     set(VM_SOURCES_WKS_ARCH
         ${ARCH_SOURCES_DIR}/profiler.cpp
+        ${ARCH_SOURCES_DIR}/armsinglestepper.cpp
         exceptionhandling.cpp
         gcinfodecoder.cpp
     )
@@ -868,7 +868,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
     )
 
     if(CLR_CMAKE_HOST_UNIX)
-        list(APPEND VM_SOURCES_DAC_AND_WKS_ARCH
+        list(APPEND VM_SOURCES_WKS_ARCH
             ${ARCH_SOURCES_DIR}/arm64singlestepper.cpp
         )
     endif(CLR_CMAKE_HOST_UNIX)
diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm
index 82a301bb0cbd1..219597eb350c2 100644
--- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm
+++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm
@@ -51,37 +51,6 @@ endif
 
 extern JIT_InternalThrow:proc
 
-; There is an even more optimized version of these helpers possible which takes
-; advantage of knowledge of which way the ephemeral heap is growing to only do 1/2
-; that check (this is more significant in the JIT_WriteBarrier case).
-;
-; Additionally we can look into providing helpers which will take the src/dest from
-; specific registers (like x86) which _could_ (??) make for easier register allocation
-; for the JIT64, however it might lead to having to have some nasty code that treats
-; these guys really special like... :(.
-;
-; Version that does the move, checks whether or not it's in the GC and whether or not
-; it needs to have it's card updated
-;
-; void JIT_CheckedWriteBarrier(Object** dst, Object* src)
-LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
-
-        ; When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference
-        ; but if it isn't then it will just return.
-        ;
-        ; See if this is in GCHeap
-        cmp     rcx, [g_lowest_address]
-        jb      NotInHeap
-        cmp     rcx, [g_highest_address]
-        jnb     NotInHeap
-
-        jmp     JIT_WriteBarrier
-
-    NotInHeap:
-        ; See comment above about possible AV
-        mov     [rcx], rdx
-        ret
-LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT
 
 ; Mark start of the code region that we patch at runtime
 LEAF_ENTRY JIT_PatchedCodeStart, _TEXT
@@ -99,7 +68,8 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT
 
 ifdef _DEBUG
         ; In debug builds, this just contains jump to the debug version of the write barrier by default
-        jmp     JIT_WriteBarrier_Debug
+        mov     rax, JIT_WriteBarrier_Debug
+        jmp     rax
 endif
 
 ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
@@ -388,6 +358,51 @@ endif
         ret
 LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT
 
+Section segment para 'DATA'
+
+        align   16
+
+        public  JIT_WriteBarrier_Loc
+JIT_WriteBarrier_Loc:
+        dq 0
+
+LEAF_ENTRY  JIT_WriteBarrier_Callable, _TEXT
+        ; JIT_WriteBarrier(Object** dst, Object* src)
+        jmp     QWORD PTR [JIT_WriteBarrier_Loc]
+LEAF_END JIT_WriteBarrier_Callable, _TEXT
+
+; There is an even more optimized version of these helpers possible which takes
+; advantage of knowledge of which way the ephemeral heap is growing to only do 1/2
+; that check (this is more significant in the JIT_WriteBarrier case).
+;
+; Additionally we can look into providing helpers which will take the src/dest from
+; specific registers (like x86) which _could_ (??) make for easier register allocation
+; for the JIT64, however it might lead to having to have some nasty code that treats
+; these guys really special like... :(.
+;
+; Version that does the move, checks whether or not it's in the GC and whether or not
+; it needs to have it's card updated
+;
+; void JIT_CheckedWriteBarrier(Object** dst, Object* src)
+LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
+
+        ; When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference
+        ; but if it isn't then it will just return.
+        ;
+        ; See if this is in GCHeap
+        cmp     rcx, [g_lowest_address]
+        jb      NotInHeap
+        cmp     rcx, [g_highest_address]
+        jnb     NotInHeap
+
+        jmp     QWORD PTR [JIT_WriteBarrier_Loc]
+
+    NotInHeap:
+        ; See comment above about possible AV
+        mov     [rcx], rdx
+        ret
+LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT
+
 ; The following helper will access ("probe") a word on each page of the stack
 ; starting with the page right beneath rsp down to the one pointed to by r11.
 ; The procedure is needed to make sure that the "guard" page is pushed down below the allocated stack frame.
diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S
index a13afb4878511..8109886d0c969 100644
--- a/src/coreclr/vm/amd64/jithelpers_fast.S
+++ b/src/coreclr/vm/amd64/jithelpers_fast.S
@@ -32,26 +32,14 @@ LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
         // See if this is in GCHeap
         PREPARE_EXTERNAL_VAR g_lowest_address, rax
         cmp     rdi, [rax]
-#ifdef FEATURE_WRITEBARRIER_COPY
         // jb      NotInHeap
         .byte 0x72, 0x12
-#else
-        // jb      NotInHeap
-        .byte 0x72, 0x0e
-#endif
         PREPARE_EXTERNAL_VAR g_highest_address, rax
         cmp     rdi, [rax]
 
-#ifdef FEATURE_WRITEBARRIER_COPY
         // jnb     NotInHeap
         .byte 0x73, 0x06
         jmp     [rip + C_FUNC(JIT_WriteBarrier_Loc)]
-#else
-        // jnb     NotInHeap
-        .byte 0x73, 0x02
-        // jmp C_FUNC(JIT_WriteBarrier)
-        .byte 0xeb, 0x05
-#endif
 
     NotInHeap:
         // See comment above about possible AV
@@ -398,11 +386,17 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
         ret
 LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT
 
-#ifdef FEATURE_WRITEBARRIER_COPY
         // When JIT_WriteBarrier is copied into an allocated page,
         // helpers use this global variable to jump to it. This variable is set in InitThreadManager.
-        .global _JIT_WriteBarrier_Loc
-        .zerofill __DATA,__common,_JIT_WriteBarrier_Loc,8,3
+        .global C_FUNC(JIT_WriteBarrier_Loc)
+#ifdef TARGET_OSX
+        .zerofill __DATA,__common,C_FUNC(JIT_WriteBarrier_Loc),8,3
+#else
+        .data
+    C_FUNC(JIT_WriteBarrier_Loc):
+        .quad 0
+        .text
+#endif
 
 // ------------------------------------------------------------------
 // __declspec(naked) void F_CALL_CONV JIT_WriteBarrier_Callable(Object **dst, Object* val)
@@ -412,8 +406,6 @@ LEAF_ENTRY  JIT_WriteBarrier_Callable, _TEXT
     jmp     [rip + C_FUNC(JIT_WriteBarrier_Loc)]
 LEAF_END JIT_WriteBarrier_Callable, _TEXT
 
-#endif // FEATURE_WRITEBARRIER_COPY
-
 
 // The following helper will access ("probe") a word on each page of the stack
 // starting with the page right beneath rsp down to the one pointed to by r11.
diff --git a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp
index 38bff78a54cb0..02b023777b8a9 100644
--- a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp
+++ b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp
@@ -293,7 +293,10 @@ int WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier,
 
     // the memcpy must come before the switch statment because the asserts inside the switch
     // are actually looking into the JIT_WriteBarrier buffer
-    memcpy(GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier), (LPVOID)GetCurrentWriteBarrierCode(), GetCurrentWriteBarrierSize());
+    {
+        ExecutableWriterHolder<void> writeBarrierWriterHolder(GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier), GetCurrentWriteBarrierSize());
+        memcpy(writeBarrierWriterHolder.GetRW(), (LPVOID)GetCurrentWriteBarrierCode(), GetCurrentWriteBarrierSize());
+    }
 
     switch (newWriteBarrier)
     {
@@ -544,7 +547,8 @@ int WriteBarrierManager::UpdateEphemeralBounds(bool isRuntimeSuspended)
             // Change immediate if different from new g_ephermeral_high.
             if (*(UINT64*)m_pUpperBoundImmediate != (size_t)g_ephemeral_high)
             {
-                *(UINT64*)m_pUpperBoundImmediate = (size_t)g_ephemeral_high;
+                ExecutableWriterHolder<UINT64> upperBoundWriterHolder((UINT64*)m_pUpperBoundImmediate, sizeof(UINT64));
+                *upperBoundWriterHolder.GetRW() = (size_t)g_ephemeral_high;
                 stompWBCompleteActions |= SWB_ICACHE_FLUSH;
             }
         }
@@ -557,7 +561,8 @@ int WriteBarrierManager::UpdateEphemeralBounds(bool isRuntimeSuspended)
             // Change immediate if different from new g_ephermeral_low.
             if (*(UINT64*)m_pLowerBoundImmediate != (size_t)g_ephemeral_low)
             {
-                *(UINT64*)m_pLowerBoundImmediate = (size_t)g_ephemeral_low;
+                ExecutableWriterHolder<UINT64> lowerBoundImmediateWriterHolder((UINT64*)m_pLowerBoundImmediate, sizeof(UINT64));
+                *lowerBoundImmediateWriterHolder.GetRW() = (size_t)g_ephemeral_low;
                 stompWBCompleteActions |= SWB_ICACHE_FLUSH;
             }
             break;
@@ -609,7 +614,8 @@ int WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSus
 #endif // FEATURE_SVR_GC
             if (*(UINT64*)m_pWriteWatchTableImmediate != (size_t)g_sw_ww_table)
             {
-                *(UINT64*)m_pWriteWatchTableImmediate = (size_t)g_sw_ww_table;
+                ExecutableWriterHolder<UINT64> writeWatchTableImmediateWriterHolder((UINT64*)m_pWriteWatchTableImmediate, sizeof(UINT64));
+                *writeWatchTableImmediateWriterHolder.GetRW() = (size_t)g_sw_ww_table;
                 stompWBCompleteActions |= SWB_ICACHE_FLUSH;
             }
             break;
@@ -621,14 +627,16 @@ int WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSus
 
     if (*(UINT64*)m_pCardTableImmediate != (size_t)g_card_table)
     {
-        *(UINT64*)m_pCardTableImmediate = (size_t)g_card_table;
+         ExecutableWriterHolder<UINT64> cardTableImmediateWriterHolder((UINT64*)m_pCardTableImmediate, sizeof(UINT64));
+        *cardTableImmediateWriterHolder.GetRW() = (size_t)g_card_table;
         stompWBCompleteActions |= SWB_ICACHE_FLUSH;
     }
 
 #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
     if (*(UINT64*)m_pCardBundleTableImmediate != (size_t)g_card_bundle_table)
     {
-        *(UINT64*)m_pCardBundleTableImmediate = (size_t)g_card_bundle_table;
+         ExecutableWriterHolder<UINT64> cardBundleTableImmediateWriterHolder((UINT64*)m_pCardBundleTableImmediate, sizeof(UINT64));
+        *cardBundleTableImmediateWriterHolder.GetRW() = (size_t)g_card_bundle_table;
         stompWBCompleteActions |= SWB_ICACHE_FLUSH;
     }
 #endif
diff --git a/src/coreclr/vm/arm/armsinglestepper.cpp b/src/coreclr/vm/arm/armsinglestepper.cpp
index 79317263b2223..f9e718ae5420e 100644
--- a/src/coreclr/vm/arm/armsinglestepper.cpp
+++ b/src/coreclr/vm/arm/armsinglestepper.cpp
@@ -97,11 +97,7 @@ ArmSingleStepper::ArmSingleStepper()
 ArmSingleStepper::~ArmSingleStepper()
 {
 #if !defined(DACCESS_COMPILE)
-#ifdef TARGET_UNIX
     SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->BackoutMem(m_rgCode, kMaxCodeBuffer * sizeof(WORD));
-#else
-    DeleteExecutable(m_rgCode);
-#endif
 #endif
 }
 
@@ -110,11 +106,7 @@ void ArmSingleStepper::Init()
 #if !defined(DACCESS_COMPILE)
     if (m_rgCode == NULL)
     {
-#ifdef TARGET_UNIX
         m_rgCode = (WORD *)(void *)SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->AllocMem(S_SIZE_T(kMaxCodeBuffer * sizeof(WORD)));
-#else
-        m_rgCode = new (executable) WORD[kMaxCodeBuffer];
-#endif
     }
 #endif
 }
@@ -287,6 +279,8 @@ void ArmSingleStepper::Apply(T_CONTEXT *pCtx)
 
     DWORD idxNextInstruction = 0;
 
+    ExecutableWriterHolder<WORD> codeWriterHolder(m_rgCode, kMaxCodeBuffer * sizeof(m_rgCode[0]));
+
     if (m_originalITState.InITBlock() && !ConditionHolds(pCtx, m_originalITState.CurrentCondition()))
     {
         LOG((LF_CORDB, LL_INFO100000, "ArmSingleStepper: Case 1: ITState::Clear;\n"));
@@ -295,7 +289,7 @@ void ArmSingleStepper::Apply(T_CONTEXT *pCtx)
         //         to execute. We'll put the correct value back during fixup.
         ITState::Clear(pCtx);
         m_fSkipIT = true;
-        m_rgCode[idxNextInstruction++] = kBreakpointOp;
+        codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp;
     }
     else if (TryEmulate(pCtx, opcode1, opcode2, false))
     {
@@ -308,8 +302,8 @@ void ArmSingleStepper::Apply(T_CONTEXT *pCtx)
         m_fEmulate = true;
 
         // Set breakpoints to stop the execution.  This will get us right back here.
-        m_rgCode[idxNextInstruction++] = kBreakpointOp;
-        m_rgCode[idxNextInstruction++] = kBreakpointOp;
+        codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp;
+        codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp;
     }
     else
     {
@@ -323,24 +317,24 @@ void ArmSingleStepper::Apply(T_CONTEXT *pCtx)
         //         guarantee one of them will be hit (we don't care which one -- the fixup code will update
         //         the PC and IT state to make it look as though the CPU just executed the current
         //         instruction).
-        m_rgCode[idxNextInstruction++] = opcode1;
+        codeWriterHolder.GetRW()[idxNextInstruction++] = opcode1;
         if (Is32BitInstruction(opcode1))
-            m_rgCode[idxNextInstruction++] = opcode2;
+            codeWriterHolder.GetRW()[idxNextInstruction++] = opcode2;
 
-        m_rgCode[idxNextInstruction++] = kBreakpointOp;
-        m_rgCode[idxNextInstruction++] = kBreakpointOp;
-        m_rgCode[idxNextInstruction++] = kBreakpointOp;
+        codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp;
+        codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp;
+        codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp;
     }
 
     // Always terminate the redirection buffer with a breakpoint.
-    m_rgCode[idxNextInstruction++] = kBreakpointOp;
+    codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp;
     _ASSERTE(idxNextInstruction <= kMaxCodeBuffer);
 
     // Set the thread up so it will redirect to our buffer when execution resumes.
     pCtx->Pc = ((DWORD)(DWORD_PTR)m_rgCode) | THUMB_CODE;
 
     // Make sure the CPU sees the updated contents of the buffer.
-    FlushInstructionCache(GetCurrentProcess(), m_rgCode, sizeof(m_rgCode));
+    FlushInstructionCache(GetCurrentProcess(), m_rgCode, kMaxCodeBuffer * sizeof(m_rgCode[0]));
 
     // Done, set the state.
     m_state = Applied;
diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S
index 930395b56dc7e..3faa8fe36846e 100644
--- a/src/coreclr/vm/arm/asmhelpers.S
+++ b/src/coreclr/vm/arm/asmhelpers.S
@@ -978,6 +978,16 @@ g_rgWriteBarrierDescriptors:
 
     .global g_rgWriteBarrierDescriptors
 
+// ------------------------------------------------------------------
+// __declspec(naked) void F_CALL_CONV JIT_WriteBarrier_Callable(Object **dst, Object* val)
+    LEAF_ENTRY JIT_WriteBarrier_Callable
+
+    // Branch to the write barrier
+    ldr     r2, =JIT_WriteBarrier_Loc // or R3? See targetarm.h
+    ldr     pc, [r2]
+
+    LEAF_END JIT_WriteBarrier_Callable
+
 #ifdef FEATURE_READYTORUN
 
     NESTED_ENTRY DelayLoad_MethodCall_FakeProlog, _TEXT, NoHandler
diff --git a/src/coreclr/vm/arm/asmhelpers.asm b/src/coreclr/vm/arm/asmhelpers.asm
index d20540e62090e..82596e66693dc 100644
--- a/src/coreclr/vm/arm/asmhelpers.asm
+++ b/src/coreclr/vm/arm/asmhelpers.asm
@@ -1724,6 +1724,18 @@ tempReg     SETS "$tmpReg"
 
     END_WRITE_BARRIERS
 
+    IMPORT JIT_WriteBarrier_Loc
+
+; ------------------------------------------------------------------
+; __declspec(naked) void F_CALL_CONV JIT_WriteBarrier_Callable(Object **dst, Object* val)
+    LEAF_ENTRY  JIT_WriteBarrier_Callable
+
+    ; Branch to the write barrier
+    ldr     r2, =JIT_WriteBarrier_Loc ; or R3? See targetarm.h
+    ldr     pc, [r2]
+
+    LEAF_END
+
 #ifdef FEATURE_READYTORUN
 
     NESTED_ENTRY DelayLoad_MethodCall_FakeProlog
diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h
index 88d0c6802b69d..425c286558432 100644
--- a/src/coreclr/vm/arm/cgencpu.h
+++ b/src/coreclr/vm/arm/cgencpu.h
@@ -1069,6 +1069,7 @@ struct StubPrecode {
         return m_pTarget;
     }
 
+#ifndef DACCESS_COMPILE
     void ResetTargetInterlocked()
     {
         CONTRACTL
@@ -1095,6 +1096,7 @@ struct StubPrecode {
         return (TADDR)InterlockedCompareExchange(
             (LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == expected;
     }
+#endif // !DACCESS_COMPILE
 
 #ifdef FEATURE_PREJIT
     void Fixup(DataImage *image);
@@ -1167,6 +1169,13 @@ struct FixupPrecode {
         return dac_cast<TADDR>(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode);
     }
 
+    size_t GetSizeRW()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        return GetBase() + sizeof(void*) - dac_cast<TADDR>(this);
+    }
+
     TADDR GetMethodDesc();
 
     PCODE GetTarget()
@@ -1175,6 +1184,7 @@ struct FixupPrecode {
         return m_pTarget;
     }
 
+#ifndef DACCESS_COMPILE
     void ResetTargetInterlocked()
     {
         CONTRACTL
@@ -1201,6 +1211,7 @@ struct FixupPrecode {
         return (TADDR)InterlockedCompareExchange(
             (LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == expected;
     }
+#endif // !DACCESS_COMPILE
 
     static BOOL IsFixupPrecodeByASM(PCODE addr)
     {
@@ -1256,6 +1267,7 @@ struct ThisPtrRetBufPrecode {
         return m_pTarget;
     }
 
+#ifndef DACCESS_COMPILE
     BOOL SetTargetInterlocked(TADDR target, TADDR expected)
     {
         CONTRACTL
@@ -1268,6 +1280,7 @@ struct ThisPtrRetBufPrecode {
         ExecutableWriterHolder<ThisPtrRetBufPrecode> precodeWriterHolder(this, sizeof(ThisPtrRetBufPrecode)); 
         return FastInterlockCompareExchange((LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == (LONG)expected;
     }
+#endif // !DACCESS_COMPILE
 };
 typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode;
 
diff --git a/src/coreclr/vm/arm/stubs.cpp b/src/coreclr/vm/arm/stubs.cpp
index aac3e25b18146..6e62df2370338 100644
--- a/src/coreclr/vm/arm/stubs.cpp
+++ b/src/coreclr/vm/arm/stubs.cpp
@@ -329,16 +329,28 @@ void ComputeWriteBarrierRange(BYTE ** ppbStart, DWORD * pcbLength)
 {
     DWORD size = (PBYTE)JIT_PatchedWriteBarrierLast - (PBYTE)JIT_PatchedWriteBarrierStart;
     *ppbStart = (PBYTE)JIT_PatchedWriteBarrierStart;
+    if (IsWriteBarrierCopyEnabled())
+    {
+        *ppbStart = GetWriteBarrierCodeLocation(*ppbStart);
+    }
     *pcbLength = size;
 }
 
 void CopyWriteBarrier(PCODE dstCode, PCODE srcCode, PCODE endCode)
 {
-    TADDR dst = PCODEToPINSTR(dstCode);
+    TADDR dst = (TADDR)PCODEToPINSTR((PCODE)GetWriteBarrierCodeLocation((void*)dstCode));
     TADDR src = PCODEToPINSTR(srcCode);
     TADDR end = PCODEToPINSTR(endCode);
 
     size_t size = (PBYTE)end - (PBYTE)src;
+
+    ExecutableWriterHolder<void> writeBarrierWriterHolder;
+    if (IsWriteBarrierCopyEnabled())
+    {
+        writeBarrierWriterHolder = ExecutableWriterHolder<void>((void*)dst, size);
+        dst = (TADDR)writeBarrierWriterHolder.GetRW();
+    }
+
     memcpy((PVOID)dst, (PVOID)src, size);
 }
 
@@ -419,7 +431,7 @@ void UpdateGCWriteBarriers(bool postGrow = false)
     }
 #define GWB_PATCH_OFFSET(_global)                                       \
     if (pDesc->m_dw_##_global##_offset != 0xffff)                       \
-        PutThumb2Mov32((UINT16*)(to + pDesc->m_dw_##_global##_offset - 1), (UINT32)(dac_cast<TADDR>(_global)));
+        PutThumb2Mov32((UINT16*)(to + pDesc->m_dw_##_global##_offset), (UINT32)(dac_cast<TADDR>(_global)));
 
     // Iterate through the write barrier patch table created in the .clrwb section
     // (see write barrier asm code)
@@ -431,6 +443,13 @@ void UpdateGCWriteBarriers(bool postGrow = false)
         PBYTE to = FindWBMapping(pDesc->m_pFuncStart);
         if(to)
         {
+            to = (PBYTE)PCODEToPINSTR((PCODE)GetWriteBarrierCodeLocation(to));
+            ExecutableWriterHolder<BYTE> barrierWriterHolder;
+            if (IsWriteBarrierCopyEnabled())
+            {
+                barrierWriterHolder = ExecutableWriterHolder<BYTE>(to, pDesc->m_pFuncEnd - pDesc->m_pFuncStart);
+                to = barrierWriterHolder.GetRW();
+            }
             GWB_PATCH_OFFSET(g_lowest_address);
             GWB_PATCH_OFFSET(g_highest_address);
             GWB_PATCH_OFFSET(g_ephemeral_low);
diff --git a/src/coreclr/vm/arm64/arm64singlestepper.cpp b/src/coreclr/vm/arm64/arm64singlestepper.cpp
index d45925311a33e..6c1764647c9f2 100644
--- a/src/coreclr/vm/arm64/arm64singlestepper.cpp
+++ b/src/coreclr/vm/arm64/arm64singlestepper.cpp
@@ -46,11 +46,7 @@ Arm64SingleStepper::Arm64SingleStepper()
 Arm64SingleStepper::~Arm64SingleStepper()
 {
 #if !defined(DACCESS_COMPILE)
-#ifdef TARGET_UNIX
     SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->BackoutMem(m_rgCode, kMaxCodeBuffer * sizeof(uint32_t));
-#else
-    DeleteExecutable(m_rgCode);
-#endif
 #endif
 }
 
@@ -59,11 +55,7 @@ void Arm64SingleStepper::Init()
 #if !defined(DACCESS_COMPILE)
     if (m_rgCode == NULL)
     {
-#ifdef TARGET_UNIX
         m_rgCode = (uint32_t *)(void *)SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->AllocMem(S_SIZE_T(kMaxCodeBuffer * sizeof(uint32_t)));
-#else
-        m_rgCode = new (executable) uint32_t[kMaxCodeBuffer];
-#endif
     }
 #endif
 }
@@ -207,7 +199,7 @@ void Arm64SingleStepper::Apply(T_CONTEXT *pCtx)
 
     unsigned int idxNextInstruction = 0;
 
-    ExecutableWriterHolder<DWORD> codeWriterHolder(m_rgCode, sizeof(m_rgCode));
+    ExecutableWriterHolder<DWORD> codeWriterHolder(m_rgCode, kMaxCodeBuffer * sizeof(m_rgCode[0]));
 
     if (TryEmulate(pCtx, opcode, false))
     {
@@ -230,7 +222,7 @@ void Arm64SingleStepper::Apply(T_CONTEXT *pCtx)
     pCtx->Pc = (uint64_t)m_rgCode;
 
     // Make sure the CPU sees the updated contents of the buffer.
-    FlushInstructionCache(GetCurrentProcess(), m_rgCode, sizeof(m_rgCode));
+    FlushInstructionCache(GetCurrentProcess(), m_rgCode, kMaxCodeBuffer * sizeof(m_rgCode[0]));
 
     // Done, set the state.
     m_state = Applied;
diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S
index e6b47d07b2b0c..8ef66586cd22c 100644
--- a/src/coreclr/vm/arm64/asmhelpers.S
+++ b/src/coreclr/vm/arm64/asmhelpers.S
@@ -270,13 +270,9 @@ LOCAL_LABEL(EphemeralCheckEnabled):
     ldr  x7, [x12]
 
     // Update wbs state
-#ifdef FEATURE_WRITEBARRIER_COPY
     PREPARE_EXTERNAL_VAR JIT_WriteBarrier_Table_Loc, x12
     ldr  x12, [x12]
     add  x12, x12, x9
-#else // FEATURE_WRITEBARRIER_COPY
-    adr  x12, LOCAL_LABEL(wbs_begin)
-#endif // FEATURE_WRITEBARRIER_COPY
 
     stp  x0, x1, [x12], 16
     stp  x2, x3, [x12], 16
@@ -295,16 +291,10 @@ LEAF_ENTRY  JIT_WriteBarrier_Callable, _TEXT
     mov     x14, x0                     // x14 = dst
     mov     x15, x1                     // x15 = val
 
-#ifdef FEATURE_WRITEBARRIER_COPY
-LOCAL_LABEL(Branch_JIT_WriteBarrier_Copy):
     // Branch to the write barrier
     PREPARE_EXTERNAL_VAR JIT_WriteBarrier_Loc, x17
     ldr     x17, [x17]
     br      x17
-#else // FEATURE_WRITEBARRIER_COPY
-    // Branch to the write barrier
-    b       C_FUNC(JIT_WriteBarrier)
-#endif // FEATURE_WRITEBARRIER_COPY
 LEAF_END JIT_WriteBarrier_Callable, _TEXT
 
 .balign 64  // Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line
diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm
index ffbeb9fd1acb3..17d3a676940bd 100644
--- a/src/coreclr/vm/arm64/asmhelpers.asm
+++ b/src/coreclr/vm/arm64/asmhelpers.asm
@@ -61,6 +61,10 @@
 #ifdef FEATURE_COMINTEROP
     IMPORT CLRToCOMWorker
 #endif // FEATURE_COMINTEROP
+
+    IMPORT JIT_WriteBarrier_Table_Loc
+    IMPORT JIT_WriteBarrier_Loc
+
     TEXTAREA
 
 ;; LPVOID __stdcall GetCurrentIP(void);
@@ -308,6 +312,7 @@ ThePreStubPatchLabel
         ; x12 will be used for pointers
 
         mov      x8, x0
+        mov      x9, x1
 
         adrp     x12, g_card_table
         ldr      x0, [x12, g_card_table]
@@ -346,7 +351,9 @@ EphemeralCheckEnabled
         ldr      x7, [x12, g_highest_address]
 
         ; Update wbs state
-        adr      x12, wbs_begin
+        adrp     x12, JIT_WriteBarrier_Table_Loc
+        ldr      x12, [x12, JIT_WriteBarrier_Table_Loc]
+        add      x12, x12, x9
         stp      x0, x1, [x12], 16
         stp      x2, x3, [x12], 16
         stp      x4, x5, [x12], 16
@@ -355,9 +362,11 @@ EphemeralCheckEnabled
         EPILOG_RESTORE_REG_PAIR fp, lr, #16!
         EPILOG_RETURN
 
+    WRITE_BARRIER_END JIT_UpdateWriteBarrierState
+
         ; Begin patchable literal pool
         ALIGN 64  ; Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line
-
+    WRITE_BARRIER_ENTRY JIT_WriteBarrier_Table
 wbs_begin
 wbs_card_table
         DCQ 0
@@ -375,14 +384,7 @@ wbs_lowest_address
         DCQ 0
 wbs_highest_address
         DCQ 0
-
-    WRITE_BARRIER_END JIT_UpdateWriteBarrierState
-
-; ------------------------------------------------------------------
-; End of the writeable code region
-    LEAF_ENTRY JIT_PatchedCodeLast
-        ret      lr
-    LEAF_END
+    WRITE_BARRIER_END JIT_WriteBarrier_Table
 
 ; void JIT_ByRefWriteBarrier
 ; On entry:
@@ -546,6 +548,12 @@ Exit
         ret      lr
     WRITE_BARRIER_END JIT_WriteBarrier
 
+; ------------------------------------------------------------------
+; End of the writeable code region
+    LEAF_ENTRY JIT_PatchedCodeLast
+        ret      lr
+    LEAF_END
+
 #ifdef FEATURE_PREJIT
 ;------------------------------------------------
 ; VirtualMethodFixupStub
@@ -1417,9 +1425,10 @@ CallHelper2
     mov     x14, x0                     ; x14 = dst
     mov     x15, x1                     ; x15 = val
 
-    ; Branch to the write barrier (which is already correctly overwritten with
-    ; single or multi-proc code based on the current CPU
-    b       JIT_WriteBarrier
+    ; Branch to the write barrier
+    adrp    x17, JIT_WriteBarrier_Loc
+    ldr     x17, [x17, JIT_WriteBarrier_Loc]
+    br      x17
 
     LEAF_END
 
diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h
index 83e56cfb9f9b9..0641d89ff1a91 100644
--- a/src/coreclr/vm/arm64/cgencpu.h
+++ b/src/coreclr/vm/arm64/cgencpu.h
@@ -597,6 +597,7 @@ struct StubPrecode {
         return m_pTarget;
     }
 
+#ifndef DACCESS_COMPILE
     void ResetTargetInterlocked()
     {
         CONTRACTL
@@ -623,6 +624,7 @@ struct StubPrecode {
         return (TADDR)InterlockedCompareExchange64(
             (LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)target, (TADDR)expected) == expected;
     }
+#endif // !DACCESS_COMPILE
 
 #ifdef FEATURE_PREJIT
     void Fixup(DataImage *image);
@@ -715,6 +717,13 @@ struct FixupPrecode {
         return dac_cast<TADDR>(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode);
     }
 
+    size_t GetSizeRW()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        return GetBase() + sizeof(void*) - dac_cast<TADDR>(this);
+    }
+
     TADDR GetMethodDesc();
 
     PCODE GetTarget()
@@ -723,6 +732,7 @@ struct FixupPrecode {
         return m_pTarget;
     }
 
+#ifndef DACCESS_COMPILE
     void ResetTargetInterlocked()
     {
         CONTRACTL
@@ -749,6 +759,7 @@ struct FixupPrecode {
         return (TADDR)InterlockedCompareExchange64(
             (LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)target, (TADDR)expected) == expected;
     }
+#endif // !DACCESS_COMPILE
 
     static BOOL IsFixupPrecodeByASM(PCODE addr)
     {
@@ -797,6 +808,7 @@ struct ThisPtrRetBufPrecode {
         return m_pTarget;
     }
 
+#ifndef DACCESS_COMPILE
     BOOL SetTargetInterlocked(TADDR target, TADDR expected)
     {
         CONTRACTL
@@ -810,6 +822,7 @@ struct ThisPtrRetBufPrecode {
         return (TADDR)InterlockedCompareExchange64(
             (LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)target, (TADDR)expected) == expected;
     }
+#endif // !DACCESS_COMPILE
 };
 typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode;
 
diff --git a/src/coreclr/vm/arm64/stubs.cpp b/src/coreclr/vm/arm64/stubs.cpp
index 54cf1c4927548..12d56ddb9867e 100644
--- a/src/coreclr/vm/arm64/stubs.cpp
+++ b/src/coreclr/vm/arm64/stubs.cpp
@@ -1067,8 +1067,14 @@ extern "C" void STDCALL JIT_PatchedCodeLast();
 static void UpdateWriteBarrierState(bool skipEphemeralCheck)
 {
     BYTE *writeBarrierCodeStart = GetWriteBarrierCodeLocation((void*)JIT_PatchedCodeStart);
-    ExecutableWriterHolder<BYTE> writeBarrierWriterHolder(writeBarrierCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart);
-    JIT_UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap(), writeBarrierWriterHolder.GetRW() - writeBarrierCodeStart);
+    BYTE *writeBarrierCodeStartRW = writeBarrierCodeStart;
+    ExecutableWriterHolder<BYTE> writeBarrierWriterHolder;
+    if (IsWriteBarrierCopyEnabled())
+    {
+        writeBarrierWriterHolder = ExecutableWriterHolder<BYTE>(writeBarrierCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart);
+        writeBarrierCodeStartRW = writeBarrierWriterHolder.GetRW();
+    }
+    JIT_UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap(), writeBarrierCodeStartRW - writeBarrierCodeStart);
 }
 
 void InitJITHelpers1()
diff --git a/src/coreclr/vm/ceemain.cpp b/src/coreclr/vm/ceemain.cpp
index cdc5925234af9..b60aac924d2e2 100644
--- a/src/coreclr/vm/ceemain.cpp
+++ b/src/coreclr/vm/ceemain.cpp
@@ -607,6 +607,11 @@ void EESocketCleanupHelper(bool isExecutingOnAltStack)
 #endif // TARGET_UNIX
 #endif // CROSSGEN_COMPILE
 
+void FatalErrorHandler(UINT errorCode, LPCWSTR pszMessage)
+{
+    EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(errorCode, pszMessage);
+}
+
 void EEStartupHelper()
 {
     CONTRACTL
@@ -670,6 +675,8 @@ void EEStartupHelper()
         // This needs to be done before the EE has started
         InitializeStartupFlags();
 
+        IfFailGo(ExecutableAllocator::StaticInitialize(FatalErrorHandler));
+
         ThreadpoolMgr::StaticInitialize();
 
         MethodDescBackpatchInfoTracker::StaticInitialize();
@@ -824,7 +831,7 @@ void EEStartupHelper()
 
             g_runtimeLoadedBaseAddress = (SIZE_T)pe.GetBase();
             g_runtimeVirtualSize = (SIZE_T)pe.GetVirtualSize();
-            InitCodeAllocHint(g_runtimeLoadedBaseAddress, g_runtimeVirtualSize, GetRandomInt(64));
+            ExecutableAllocator::InitCodeAllocHint(g_runtimeLoadedBaseAddress, g_runtimeVirtualSize, GetRandomInt(64));
         }
 #endif // !TARGET_UNIX
 
diff --git a/src/coreclr/vm/class.cpp b/src/coreclr/vm/class.cpp
index 02feec829a76b..5c5004f56860a 100644
--- a/src/coreclr/vm/class.cpp
+++ b/src/coreclr/vm/class.cpp
@@ -153,7 +153,9 @@ void EEClass::Destruct(MethodTable * pOwningMT)
 
         if (pDelegateEEClass->m_pStaticCallStub)
         {
-            BOOL fStubDeleted = pDelegateEEClass->m_pStaticCallStub->DecRef();
+            ExecutableWriterHolder<Stub> stubWriterHolder(pDelegateEEClass->m_pStaticCallStub, sizeof(Stub));
+            BOOL fStubDeleted = stubWriterHolder.GetRW()->DecRef();
+
             if (fStubDeleted)
             {
                 DelegateInvokeStubManager::g_pManager->RemoveStub(pDelegateEEClass->m_pStaticCallStub);
@@ -167,7 +169,6 @@ void EEClass::Destruct(MethodTable * pOwningMT)
         // it is owned by the m_pMulticastStubCache, not by the class
         // - it is shared across classes. So we don't decrement
         // its ref count here
-        delete pDelegateEEClass->m_pUMThunkMarshInfo;
     }
 
 #ifdef FEATURE_COMINTEROP
diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp
index 37220786fedda..78721292a3e9f 100644
--- a/src/coreclr/vm/codeman.cpp
+++ b/src/coreclr/vm/codeman.cpp
@@ -2139,8 +2139,7 @@ VOID EEJitManager::EnsureJumpStubReserve(BYTE * pImageBase, SIZE_T imageSize, SI
                 return; // Unable to allocate the reserve - give up
             }
 
-            pNewReserve->m_ptr = ClrVirtualAllocWithinRange(loAddrCurrent, hiAddrCurrent,
-                                               allocChunk, MEM_RESERVE, PAGE_NOACCESS);
+            pNewReserve->m_ptr = (BYTE*)ExecutableAllocator::Instance()->ReserveWithinRange(allocChunk, loAddrCurrent, hiAddrCurrent);
 
             if (pNewReserve->m_ptr != NULL)
                 break;
@@ -2231,8 +2230,7 @@ HeapList* LoaderCodeHeap::CreateCodeHeap(CodeHeapRequestInfo *pInfo, LoaderHeap
             if (!pInfo->getThrowOnOutOfMemoryWithinRange() && PEDecoder::GetForceRelocs())
                 RETURN NULL;
 #endif
-            pBaseAddr = ClrVirtualAllocWithinRange(loAddr, hiAddr,
-                                                   reserveSize, MEM_RESERVE, PAGE_NOACCESS);
+            pBaseAddr = (BYTE*)ExecutableAllocator::Instance()->ReserveWithinRange(reserveSize, loAddr, hiAddr);
 
             if (!pBaseAddr)
             {
@@ -2251,7 +2249,7 @@ HeapList* LoaderCodeHeap::CreateCodeHeap(CodeHeapRequestInfo *pInfo, LoaderHeap
         }
         else
         {
-            pBaseAddr = ClrVirtualAllocExecutable(reserveSize, MEM_RESERVE, PAGE_NOACCESS);
+            pBaseAddr = (BYTE*)ExecutableAllocator::Instance()->Reserve(reserveSize);
             if (!pBaseAddr)
                 ThrowOutOfMemory();
         }
@@ -2686,15 +2684,14 @@ void EEJitManager::allocCode(MethodDesc* pMD, size_t blockSize, size_t reserveFo
 
         *pAllocatedSize = sizeof(CodeHeader) + totalSize;
 
-#if defined(HOST_OSX) && defined(HOST_ARM64)
-#define FEATURE_WXORX
-#endif
-
-#ifdef FEATURE_WXORX
-        pCodeHdrRW = (CodeHeader *)new BYTE[*pAllocatedSize];
-#else
-        pCodeHdrRW = pCodeHdr;
-#endif
+        if (ExecutableAllocator::IsWXORXEnabled())
+        {
+            pCodeHdrRW = (CodeHeader *)new BYTE[*pAllocatedSize];
+        }
+        else
+        {
+            pCodeHdrRW = pCodeHdr;
+        }
 
 #ifdef USE_INDIRECT_CODEHEADER
         if (requestInfo.IsDynamicDomain())
@@ -3347,7 +3344,7 @@ void EEJitManager::Unload(LoaderAllocator *pAllocator)
         }
     }
 
-    ResetCodeAllocHint();
+    ExecutableAllocator::ResetCodeAllocHint();
 }
 
 EEJitManager::DomainCodeHeapList::DomainCodeHeapList()
diff --git a/src/coreclr/vm/comcallablewrapper.cpp b/src/coreclr/vm/comcallablewrapper.cpp
index 8b95dac8cdd77..499880dc16dde 100644
--- a/src/coreclr/vm/comcallablewrapper.cpp
+++ b/src/coreclr/vm/comcallablewrapper.cpp
@@ -3183,12 +3183,11 @@ void ComMethodTable::Cleanup()
 
     if (m_pDispatchInfo)
         delete m_pDispatchInfo;
-    if (m_pMDescr)
-        DeleteExecutable(m_pMDescr);
     if (m_pITypeInfo && !g_fProcessDetach)
         SafeRelease(m_pITypeInfo);
 
-    DeleteExecutable(this);
+    // The m_pMDescr and the current instance is allocated from the related LoaderAllocator
+    // so no cleanup is needed here.
 }
 
 
@@ -3214,7 +3213,7 @@ void ComMethodTable::LayOutClassMethodTable()
     SLOT *pComVtable;
     unsigned cbPrevSlots = 0;
     unsigned cbAlloc = 0;
-    NewExecutableHolder<BYTE>  pMDMemoryPtr = NULL;
+    AllocMemHolder<BYTE> pMDMemoryPtr;
     BYTE*  pMethodDescMemory = NULL;
     size_t writeableOffset = 0;
     unsigned cbNumParentVirtualMethods = 0;
@@ -3321,7 +3320,7 @@ void ComMethodTable::LayOutClassMethodTable()
         cbAlloc = cbMethodDescs;
         if (cbAlloc > 0)
         {
-            pMDMemoryPtr = (BYTE*) new (executable) BYTE[cbAlloc + sizeof(UINT_PTR)];
+            pMDMemoryPtr = m_pMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(cbAlloc + sizeof(UINT_PTR)));
             pMethodDescMemory = pMDMemoryPtr;
 
             methodDescMemoryWriteableHolder = ExecutableWriterHolder<BYTE>(pMethodDescMemory, cbAlloc + sizeof(UINT_PTR));
@@ -3703,7 +3702,6 @@ BOOL ComMethodTable::LayOutInterfaceMethodTable(MethodTable* pClsMT)
         // Method descs are at the end of the vtable
         // m_cbSlots interfaces methods + IUnk methods
         pMethodDescMemory = (BYTE *)&pComVtable[m_cbSlots];
-
         for (i = 0; i < cbSlots; i++)
         {
             ComCallMethodDesc* pNewMD = (ComCallMethodDesc *) (pMethodDescMemory + COMMETHOD_PREPAD);
@@ -4495,13 +4493,12 @@ ComMethodTable* ComCallWrapperTemplate::CreateComMethodTableForClass(MethodTable
     if (cbToAlloc.IsOverflow())
         ThrowHR(COR_E_OVERFLOW);
 
-    NewExecutableHolder<ComMethodTable> pComMT = (ComMethodTable*) new (executable) BYTE[cbToAlloc.Value()];
+    AllocMemHolder<ComMethodTable> pComMT(pClassMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(cbToAlloc.Value())));
 
     _ASSERTE(!cbNewSlots.IsOverflow() && !cbTotalSlots.IsOverflow() && !cbVtable.IsOverflow());
 
     ExecutableWriterHolder<ComMethodTable> comMTWriterHolder(pComMT, cbToAlloc.Value());
     ComMethodTable* pComMTRW = comMTWriterHolder.GetRW();
-
     // set up the header
     pComMTRW->m_ptReserved = (SLOT)(size_t)0xDEADC0FF;          // reserved
     pComMTRW->m_pMT  = pClassMT; // pointer to the class method table
@@ -4573,7 +4570,7 @@ ComMethodTable* ComCallWrapperTemplate::CreateComMethodTableForInterface(MethodT
     if (cbToAlloc.IsOverflow())
         ThrowHR(COR_E_OVERFLOW);
 
-    NewExecutableHolder<ComMethodTable> pComMT = (ComMethodTable*) new (executable) BYTE[cbToAlloc.Value()];
+    AllocMemHolder<ComMethodTable> pComMT(pInterfaceMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(cbToAlloc.Value())));
 
     _ASSERTE(!cbVtable.IsOverflow() && !cbMethDescs.IsOverflow());
 
@@ -4639,7 +4636,8 @@ ComMethodTable* ComCallWrapperTemplate::CreateComMethodTableForBasic(MethodTable
     unsigned cbVtable    = cbExtraSlots * sizeof(SLOT);
     unsigned cbToAlloc   = sizeof(ComMethodTable) + cbVtable;
 
-    NewExecutableHolder<ComMethodTable> pComMT = (ComMethodTable*) new (executable) BYTE[cbToAlloc];
+    AllocMemHolder<ComMethodTable> pComMT(pMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(cbToAlloc)));
+
     ExecutableWriterHolder<ComMethodTable> comMTWriterHolder(pComMT, cbToAlloc);
     ComMethodTable* pComMTRW = comMTWriterHolder.GetRW();
 
diff --git a/src/coreclr/vm/comcallablewrapper.h b/src/coreclr/vm/comcallablewrapper.h
index 2581ddf832fd5..0f1e4b878e4c9 100644
--- a/src/coreclr/vm/comcallablewrapper.h
+++ b/src/coreclr/vm/comcallablewrapper.h
@@ -499,6 +499,7 @@ struct ComMethodTable
     // Accessor for the IDispatch information.
     DispatchInfo* GetDispatchInfo();
 
+#ifndef DACCESS_COMPILE
     LONG AddRef()
     {
         LIMITED_METHOD_CONTRACT;
@@ -527,6 +528,7 @@ struct ComMethodTable
 
         return cbRef;
     }
+#endif // DACCESS_COMPILE
 
     CorIfaceAttr GetInterfaceType()
     {
@@ -746,6 +748,7 @@ struct ComMethodTable
     }
 
 
+#ifndef DACCESS_COMPILE
     inline REFIID GetIID()
     {
         // Cannot use a normal CONTRACT since the return type is ref type which
@@ -768,6 +771,7 @@ struct ComMethodTable
 
         return m_IID;
     }
+#endif // DACCESS_COMPILE
 
     void CheckParentComVisibility(BOOL fForIDispatch)
     {
diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp
index b6c17260a1302..1b61e16dec5d3 100644
--- a/src/coreclr/vm/comdelegate.cpp
+++ b/src/coreclr/vm/comdelegate.cpp
@@ -1253,7 +1253,7 @@ LPVOID COMDelegate::ConvertToCallback(OBJECTREF pDelegateObj)
             {
                 GCX_PREEMP();
 
-                pUMThunkMarshInfo = new UMThunkMarshInfo();
+                pUMThunkMarshInfo = (UMThunkMarshInfo*)(void*)pMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(sizeof(UMThunkMarshInfo)));
 
                 ExecutableWriterHolder<UMThunkMarshInfo> uMThunkMarshInfoWriterHolder(pUMThunkMarshInfo, sizeof(UMThunkMarshInfo));
                 uMThunkMarshInfoWriterHolder.GetRW()->LoadTimeInit(pInvokeMeth);
diff --git a/src/coreclr/vm/dllimportcallback.cpp b/src/coreclr/vm/dllimportcallback.cpp
index 4a88f81df5210..4f3cf879d10a4 100644
--- a/src/coreclr/vm/dllimportcallback.cpp
+++ b/src/coreclr/vm/dllimportcallback.cpp
@@ -41,7 +41,7 @@ class UMEntryThunkFreeList
     {
         WRAPPER_NO_CONTRACT;
 
-        m_crst.Init(CrstLeafLock, CRST_UNSAFE_ANYMODE);
+        m_crst.Init(CrstUMEntryThunkFreeListLock, CRST_UNSAFE_ANYMODE);
     }
 
     UMEntryThunk *GetUMEntryThunk()
diff --git a/src/coreclr/vm/dynamicmethod.cpp b/src/coreclr/vm/dynamicmethod.cpp
index 9dae86aca9377..541d88dc16885 100644
--- a/src/coreclr/vm/dynamicmethod.cpp
+++ b/src/coreclr/vm/dynamicmethod.cpp
@@ -403,8 +403,7 @@ HeapList* HostCodeHeap::InitializeHeapList(CodeHeapRequestInfo *pInfo)
 
     if (pInfo->m_loAddr != NULL || pInfo->m_hiAddr != NULL)
     {
-        m_pBaseAddr = ClrVirtualAllocWithinRange(pInfo->m_loAddr, pInfo->m_hiAddr,
-            ReserveBlockSize, MEM_RESERVE, PAGE_NOACCESS);
+        m_pBaseAddr = (BYTE*)ExecutableAllocator::Instance()->ReserveWithinRange(ReserveBlockSize, pInfo->m_loAddr, pInfo->m_hiAddr);
         if (!m_pBaseAddr)
         {
             if (pInfo->getThrowOnOutOfMemoryWithinRange())
@@ -417,7 +416,7 @@ HeapList* HostCodeHeap::InitializeHeapList(CodeHeapRequestInfo *pInfo)
         // top up the ReserveBlockSize to suggested minimum
         ReserveBlockSize = max(ReserveBlockSize, pInfo->getReserveSize());
 
-        m_pBaseAddr = ClrVirtualAllocExecutable(ReserveBlockSize, MEM_RESERVE, PAGE_NOACCESS);
+        m_pBaseAddr = (BYTE*)ExecutableAllocator::Instance()->Reserve(ReserveBlockSize);
         if (!m_pBaseAddr)
             ThrowOutOfMemory();
     }
@@ -749,7 +748,7 @@ HostCodeHeap::TrackAllocation* HostCodeHeap::AllocMemory_NoThrow(size_t header,
 
         if (m_pLastAvailableCommittedAddr + sizeToCommit <= m_pBaseAddr + m_TotalBytesAvailable)
         {
-            if (NULL == ClrVirtualAlloc(m_pLastAvailableCommittedAddr, sizeToCommit, MEM_COMMIT, PAGE_EXECUTE_READWRITE))
+            if (NULL == ExecutableAllocator::Instance()->Commit(m_pLastAvailableCommittedAddr, sizeToCommit, true /* isExecutable */))
             {
                 LOG((LF_BCL, LL_ERROR, "CodeHeap [0x%p] - VirtualAlloc failed\n", this));
                 return NULL;
diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp
index a1fdf255a5ce0..6bf5efcc8028c 100644
--- a/src/coreclr/vm/excep.cpp
+++ b/src/coreclr/vm/excep.cpp
@@ -6699,14 +6699,12 @@ AdjustContextForJITHelpers(
 
     PCODE ip = GetIP(pContext);
 
-#ifdef FEATURE_WRITEBARRIER_COPY
     if (IsIPInWriteBarrierCodeCopy(ip))
     {
         // Pretend we were executing the barrier function at its original location so that the unwinder can unwind the frame
         ip = AdjustWriteBarrierIP(ip);
         SetIP(pContext, ip);
     }
-#endif // FEATURE_WRITEBARRIER_COPY
 
 #ifdef FEATURE_DATABREAKPOINT
 
diff --git a/src/coreclr/vm/exceptionhandling.cpp b/src/coreclr/vm/exceptionhandling.cpp
index 7fff234ca85ef..4af702fab1499 100644
--- a/src/coreclr/vm/exceptionhandling.cpp
+++ b/src/coreclr/vm/exceptionhandling.cpp
@@ -4694,14 +4694,12 @@ VOID DECLSPEC_NORETURN UnwindManagedExceptionPass1(PAL_SEHException& ex, CONTEXT
                 break;
             }
 
-#ifdef FEATURE_WRITEBARRIER_COPY
             if (IsIPInWriteBarrierCodeCopy(controlPc))
             {
                 // Pretend we were executing the barrier function at its original location so that the unwinder can unwind the frame
                 controlPc = AdjustWriteBarrierIP(controlPc);
                 SetIP(frameContext, controlPc);
             }
-#endif // FEATURE_WRITEBARRIER_COPY
 
             UINT_PTR sp = GetSP(frameContext);
 
@@ -5174,13 +5172,11 @@ BOOL IsSafeToHandleHardwareException(PCONTEXT contextRecord, PEXCEPTION_RECORD e
 {
     PCODE controlPc = GetIP(contextRecord);
 
-#ifdef FEATURE_WRITEBARRIER_COPY
     if (IsIPInWriteBarrierCodeCopy(controlPc))
     {
         // Pretend we were executing the barrier function at its original location
         controlPc = AdjustWriteBarrierIP(controlPc);
     }
-#endif // FEATURE_WRITEBARRIER_COPY
 
     return g_fEEStarted && (
         exceptionRecord->ExceptionCode == STATUS_BREAKPOINT ||
@@ -5259,14 +5255,12 @@ BOOL HandleHardwareException(PAL_SEHException* ex)
         {
             GCX_COOP();     // Must be cooperative to modify frame chain.
 
-#ifdef FEATURE_WRITEBARRIER_COPY
             if (IsIPInWriteBarrierCodeCopy(controlPc))
             {
                 // Pretend we were executing the barrier function at its original location so that the unwinder can unwind the frame
                 controlPc = AdjustWriteBarrierIP(controlPc);
                 SetIP(ex->GetContextRecord(), controlPc);
             }
-#endif // FEATURE_WRITEBARRIER_COPY
 
             if (IsIPInMarkedJitHelper(controlPc))
             {
diff --git a/src/coreclr/vm/gccover.cpp b/src/coreclr/vm/gccover.cpp
index be856dbe1a63a..9ce0cc676f7a7 100644
--- a/src/coreclr/vm/gccover.cpp
+++ b/src/coreclr/vm/gccover.cpp
@@ -1258,9 +1258,9 @@ void RemoveGcCoverageInterrupt(TADDR instrPtr, BYTE * savedInstrPtr, GCCoverageI
 {
     ExecutableWriterHolder<void> instrPtrWriterHolder((void*)instrPtr, 4);
 #ifdef TARGET_ARM
-        if (GetARMInstructionLength(savedInstrPtr) == 2)
+    if (GetARMInstructionLength(savedInstrPtr) == 2)
         *(WORD *)instrPtrWriterHolder.GetRW()  = *(WORD *)savedInstrPtr;
-        else
+    else
         *(DWORD *)instrPtrWriterHolder.GetRW() = *(DWORD *)savedInstrPtr;
 #elif defined(TARGET_ARM64)
     *(DWORD *)instrPtrWriterHolder.GetRW() = *(DWORD *)savedInstrPtr;
diff --git a/src/coreclr/vm/i386/jithelp.S b/src/coreclr/vm/i386/jithelp.S
index facce7cacd3ef..dc56da1d1779e 100644
--- a/src/coreclr/vm/i386/jithelp.S
+++ b/src/coreclr/vm/i386/jithelp.S
@@ -377,10 +377,27 @@ LEAF_ENTRY JIT_WriteBarrierGroup, _TEXT
     ret
 LEAF_END JIT_WriteBarrierGroup, _TEXT
 
-#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
-// *******************************************************************************
-//  Write barrier wrappers with fcall calling convention
-//
+    .data
+    .align 4
+    .global C_FUNC(JIT_WriteBarrierEAX_Loc)
+C_FUNC(JIT_WriteBarrierEAX_Loc):
+    .word 0
+    .text
+
+LEAF_ENTRY JIT_WriteBarrier_Callable, _TEXT
+    mov eax, edx
+    mov edx, ecx
+    push    eax
+    call    1f
+1:
+    pop     eax
+2:
+    add     eax, offset _GLOBAL_OFFSET_TABLE_+1 // (2b - 1b)
+    mov     eax, dword ptr [eax + C_FUNC(JIT_WriteBarrierEAX_Loc)@GOT]
+    xchg    eax, dword ptr [esp]
+    ret
+LEAF_END JIT_WriteBarrier_Callable, _TEXT
+
 .macro UniversalWriteBarrierHelper name
 .align 4
 
@@ -392,6 +409,11 @@ LEAF_END JIT_\name, _TEXT
 
 .endm
 
+#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
+// *******************************************************************************
+//  Write barrier wrappers with fcall calling convention
+//
+
 // Only define these if we're using the ASM GC write barriers; if this flag is not defined,
 // we'll use C++ versions of these write barriers.
 UniversalWriteBarrierHelper CheckedWriteBarrier
diff --git a/src/coreclr/vm/i386/jithelp.asm b/src/coreclr/vm/i386/jithelp.asm
index 3743ac3cbe02f..3650b3f2afd6d 100644
--- a/src/coreclr/vm/i386/jithelp.asm
+++ b/src/coreclr/vm/i386/jithelp.asm
@@ -411,15 +411,13 @@ ENDM
 ;*******************************************************************************
 ; Write barrier wrappers with fcall calling convention
 ;
-UniversalWriteBarrierHelper MACRO name
+
+        .data
         ALIGN 4
-PUBLIC @JIT_&name&@8
-@JIT_&name&@8 PROC
-        mov eax,edx
-        mov edx,ecx
-        jmp _JIT_&name&EAX@0
-@JIT_&name&@8 ENDP
-ENDM
+        public  _JIT_WriteBarrierEAX_Loc
+_JIT_WriteBarrierEAX_Loc dd 0
+
+        .code
 
 ; WriteBarrierStart and WriteBarrierEnd are used to determine bounds of
 ; WriteBarrier functions so can determine if got AV in them.
@@ -429,6 +427,25 @@ _JIT_WriteBarrierGroup@0 PROC
 ret
 _JIT_WriteBarrierGroup@0 ENDP
 
+        ALIGN 4
+PUBLIC @JIT_WriteBarrier_Callable@8
+@JIT_WriteBarrier_Callable@8 PROC
+        mov eax,edx
+        mov edx,ecx
+        jmp DWORD PTR [_JIT_WriteBarrierEAX_Loc]
+
+@JIT_WriteBarrier_Callable@8 ENDP
+
+UniversalWriteBarrierHelper MACRO name
+        ALIGN 4
+PUBLIC @JIT_&name&@8
+@JIT_&name&@8 PROC
+        mov eax,edx
+        mov edx,ecx
+        jmp _JIT_&name&EAX@0
+@JIT_&name&@8 ENDP
+ENDM
+
 ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
 ; Only define these if we're using the ASM GC write barriers; if this flag is not defined,
 ; we'll use C++ versions of these write barriers.
@@ -1233,6 +1250,8 @@ fremloopd:
 ; PatchedCodeStart and PatchedCodeEnd are used to determine bounds of patched code.
 ;
 
+            ALIGN 4
+
 _JIT_PatchedCodeStart@0 proc public
 ret
 _JIT_PatchedCodeStart@0 endp
diff --git a/src/coreclr/vm/i386/jitinterfacex86.cpp b/src/coreclr/vm/i386/jitinterfacex86.cpp
index 0e366bdbd1a8b..0467f347aaacb 100644
--- a/src/coreclr/vm/i386/jitinterfacex86.cpp
+++ b/src/coreclr/vm/i386/jitinterfacex86.cpp
@@ -1050,10 +1050,18 @@ void InitJITHelpers1()
     {
         BYTE * pfunc = (BYTE *) JIT_WriteBarrierReg_PreGrow;
 
-        BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier];
+        BYTE * pBuf = GetWriteBarrierCodeLocation((BYTE *)c_rgWriteBarriers[iBarrier]);
         int reg = c_rgWriteBarrierRegs[iBarrier];
 
-        memcpy(pBuf, pfunc, 34);
+        BYTE * pBufRW = pBuf;
+        ExecutableWriterHolder<BYTE> barrierWriterHolder;
+        if (IsWriteBarrierCopyEnabled())
+        {
+            barrierWriterHolder = ExecutableWriterHolder<BYTE>(pBuf, 34);
+            pBufRW = barrierWriterHolder.GetRW();
+        }
+
+        memcpy(pBufRW, pfunc, 34);
 
         // assert the copied code ends in a ret to make sure we got the right length
         _ASSERTE(pBuf[33] == 0xC3);
@@ -1069,24 +1077,24 @@ void InitJITHelpers1()
 
         _ASSERTE(pBuf[0] == 0x89);
         // Update the reg field (bits 3..5) of the ModR/M byte of this instruction
-        pBuf[1] &= 0xc7;
-        pBuf[1] |= reg << 3;
+        pBufRW[1] &= 0xc7;
+        pBufRW[1] |= reg << 3;
 
         // Second instruction to patch is cmp reg, imm32 (low bound)
 
         _ASSERTE(pBuf[2] == 0x81);
         // Here the lowest three bits in ModR/M field are the register
-        pBuf[3] &= 0xf8;
-        pBuf[3] |= reg;
+        pBufRW[3] &= 0xf8;
+        pBufRW[3] |= reg;
 
 #ifdef WRITE_BARRIER_CHECK
         // Don't do the fancy optimization just jump to the old one
         // Use the slow one from time to time in a debug build because
         // there are some good asserts in the unoptimized one
         if ((g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK) || DEBUG_RANDOM_BARRIER_CHECK) {
-            pfunc = &pBuf[0];
+            pfunc = &pBufRW[0];
             *pfunc++ = 0xE9;                // JMP c_rgDebugWriteBarriers[iBarrier]
-            *((DWORD*) pfunc) = (BYTE*) c_rgDebugWriteBarriers[iBarrier] - (pfunc + sizeof(DWORD));
+            *((DWORD*) pfunc) = (BYTE*) c_rgDebugWriteBarriers[iBarrier] - (&pBuf[1] + sizeof(DWORD));
         }
 #endif // WRITE_BARRIER_CHECK
     }
@@ -1132,7 +1140,7 @@ void ValidateWriteBarrierHelpers()
 #endif // WRITE_BARRIER_CHECK
 
     // first validate the PreGrow helper
-    BYTE* pWriteBarrierFunc = reinterpret_cast<BYTE*>(JIT_WriteBarrierEAX);
+    BYTE* pWriteBarrierFunc = GetWriteBarrierCodeLocation(reinterpret_cast<BYTE*>(JIT_WriteBarrierEAX));
 
     // ephemeral region
     DWORD* pLocation = reinterpret_cast<DWORD*>(&pWriteBarrierFunc[AnyGrow_EphemeralLowerBound]);
@@ -1170,7 +1178,7 @@ void ValidateWriteBarrierHelpers()
 #endif //CODECOVERAGE
 /*********************************************************************/
 
-#define WriteBarrierIsPreGrow() (((BYTE *)JIT_WriteBarrierEAX)[10] == 0xc1)
+#define WriteBarrierIsPreGrow() ((GetWriteBarrierCodeLocation((BYTE *)JIT_WriteBarrierEAX))[10] == 0xc1)
 
 
 /*********************************************************************/
@@ -1188,20 +1196,28 @@ int StompWriteBarrierEphemeral(bool /* isRuntimeSuspended */)
 
 #ifdef WRITE_BARRIER_CHECK
         // Don't do the fancy optimization if we are checking write barrier
-    if (((BYTE *)JIT_WriteBarrierEAX)[0] == 0xE9)  // we are using slow write barrier
+    if ((GetWriteBarrierCodeLocation((BYTE *)JIT_WriteBarrierEAX))[0] == 0xE9)  // we are using slow write barrier
         return stompWBCompleteActions;
 #endif // WRITE_BARRIER_CHECK
 
     // Update the lower bound.
     for (int iBarrier = 0; iBarrier < NUM_WRITE_BARRIERS; iBarrier++)
     {
-        BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier];
+        BYTE * pBuf = GetWriteBarrierCodeLocation((BYTE *)c_rgWriteBarriers[iBarrier]);
+
+        BYTE * pBufRW = pBuf;
+        ExecutableWriterHolder<BYTE> barrierWriterHolder;
+        if (IsWriteBarrierCopyEnabled())
+        {
+            barrierWriterHolder = ExecutableWriterHolder<BYTE>(pBuf, 42);
+            pBufRW = barrierWriterHolder.GetRW();
+        }
 
         // assert there is in fact a cmp r/m32, imm32 there
         _ASSERTE(pBuf[2] == 0x81);
 
         // Update the immediate which is the lower bound of the ephemeral generation
-        size_t *pfunc = (size_t *) &pBuf[AnyGrow_EphemeralLowerBound];
+        size_t *pfunc = (size_t *) &pBufRW[AnyGrow_EphemeralLowerBound];
         //avoid trivial self modifying code
         if (*pfunc != (size_t) g_ephemeral_low)
         {
@@ -1214,7 +1230,7 @@ int StompWriteBarrierEphemeral(bool /* isRuntimeSuspended */)
             _ASSERTE(pBuf[10] == 0x81);
 
                 // Update the upper bound if we are using the PostGrow thunk.
-            pfunc = (size_t *) &pBuf[PostGrow_EphemeralUpperBound];
+            pfunc = (size_t *) &pBufRW[PostGrow_EphemeralUpperBound];
             //avoid trivial self modifying code
             if (*pfunc != (size_t) g_ephemeral_high)
             {
@@ -1244,7 +1260,7 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
 
 #ifdef WRITE_BARRIER_CHECK
         // Don't do the fancy optimization if we are checking write barrier
-    if (((BYTE *)JIT_WriteBarrierEAX)[0] == 0xE9)  // we are using slow write barrier
+    if ((GetWriteBarrierCodeLocation((BYTE *)JIT_WriteBarrierEAX))[0] == 0xE9)  // we are using slow write barrier
         return stompWBCompleteActions;
 #endif // WRITE_BARRIER_CHECK
 
@@ -1253,12 +1269,20 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
 
     for (int iBarrier = 0; iBarrier < NUM_WRITE_BARRIERS; iBarrier++)
     {
-        BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier];
+        BYTE * pBuf = GetWriteBarrierCodeLocation((BYTE *)c_rgWriteBarriers[iBarrier]);
         int reg = c_rgWriteBarrierRegs[iBarrier];
 
         size_t *pfunc;
 
-    // Check if we are still using the pre-grow version of the write barrier.
+        BYTE * pBufRW = pBuf;
+        ExecutableWriterHolder<BYTE> barrierWriterHolder;
+        if (IsWriteBarrierCopyEnabled())
+        {
+            barrierWriterHolder = ExecutableWriterHolder<BYTE>(pBuf, 42);
+            pBufRW = barrierWriterHolder.GetRW();
+        }
+
+        // Check if we are still using the pre-grow version of the write barrier.
         if (bWriteBarrierIsPreGrow)
         {
             // Check if we need to use the upper bounds checking barrier stub.
@@ -1271,7 +1295,7 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
                 }
 
                 pfunc = (size_t *) JIT_WriteBarrierReg_PostGrow;
-                memcpy(pBuf, pfunc, 42);
+                memcpy(pBufRW, pfunc, 42);
 
                 // assert the copied code ends in a ret to make sure we got the right length
                 _ASSERTE(pBuf[41] == 0xC3);
@@ -1287,35 +1311,35 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
 
                 _ASSERTE(pBuf[0] == 0x89);
                 // Update the reg field (bits 3..5) of the ModR/M byte of this instruction
-                pBuf[1] &= 0xc7;
-                pBuf[1] |= reg << 3;
+                pBufRW[1] &= 0xc7;
+                pBufRW[1] |= reg << 3;
 
                 // Second instruction to patch is cmp reg, imm32 (low bound)
 
                 _ASSERTE(pBuf[2] == 0x81);
                 // Here the lowest three bits in ModR/M field are the register
-                pBuf[3] &= 0xf8;
-                pBuf[3] |= reg;
+                pBufRW[3] &= 0xf8;
+                pBufRW[3] |= reg;
 
                 // Third instruction to patch is another cmp reg, imm32 (high bound)
 
                 _ASSERTE(pBuf[10] == 0x81);
                 // Here the lowest three bits in ModR/M field are the register
-                pBuf[11] &= 0xf8;
-                pBuf[11] |= reg;
+                pBufRW[11] &= 0xf8;
+                pBufRW[11] |= reg;
 
                 bStompWriteBarrierEphemeral = true;
                 // What we're trying to update is the offset field of a
 
                 // cmp offset[edx], 0ffh instruction
                 _ASSERTE(pBuf[22] == 0x80);
-                pfunc = (size_t *) &pBuf[PostGrow_CardTableFirstLocation];
+                pfunc = (size_t *) &pBufRW[PostGrow_CardTableFirstLocation];
                *pfunc = (size_t) g_card_table;
 
                 // What we're trying to update is the offset field of a
                 // mov offset[edx], 0ffh instruction
                 _ASSERTE(pBuf[34] == 0xC6);
-                pfunc = (size_t *) &pBuf[PostGrow_CardTableSecondLocation];
+                pfunc = (size_t *) &pBufRW[PostGrow_CardTableSecondLocation];
 
             }
             else
@@ -1324,14 +1348,14 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
 
                 // cmp offset[edx], 0ffh instruction
                 _ASSERTE(pBuf[14] == 0x80);
-                pfunc = (size_t *) &pBuf[PreGrow_CardTableFirstLocation];
+                pfunc = (size_t *) &pBufRW[PreGrow_CardTableFirstLocation];
                *pfunc = (size_t) g_card_table;
 
                 // What we're trying to update is the offset field of a
 
                 // mov offset[edx], 0ffh instruction
                 _ASSERTE(pBuf[26] == 0xC6);
-                pfunc = (size_t *) &pBuf[PreGrow_CardTableSecondLocation];
+                pfunc = (size_t *) &pBufRW[PreGrow_CardTableSecondLocation];
             }
         }
         else
@@ -1340,13 +1364,13 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
 
             // cmp offset[edx], 0ffh instruction
             _ASSERTE(pBuf[22] == 0x80);
-            pfunc = (size_t *) &pBuf[PostGrow_CardTableFirstLocation];
+            pfunc = (size_t *) &pBufRW[PostGrow_CardTableFirstLocation];
            *pfunc = (size_t) g_card_table;
 
             // What we're trying to update is the offset field of a
             // mov offset[edx], 0ffh instruction
             _ASSERTE(pBuf[34] == 0xC6);
-            pfunc = (size_t *) &pBuf[PostGrow_CardTableSecondLocation];
+            pfunc = (size_t *) &pBufRW[PostGrow_CardTableSecondLocation];
         }
 
         // Stick in the adjustment value.
diff --git a/src/coreclr/vm/i386/stublinkerx86.cpp b/src/coreclr/vm/i386/stublinkerx86.cpp
index 61c5dfd90cbfc..564363053fc6a 100644
--- a/src/coreclr/vm/i386/stublinkerx86.cpp
+++ b/src/coreclr/vm/i386/stublinkerx86.cpp
@@ -4829,7 +4829,7 @@ VOID StubLinkerCPU::EmitArrayOpStub(const ArrayOpScript* pArrayOpScript)
                     X86EmitOp(0x8d, kEDX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
 
                     // call JIT_Writeable_Thunks_Buf.WriteBarrierReg[0] (== EAX)
-                    X86EmitCall(NewExternalCodeLabel((LPVOID) &JIT_WriteBarrierEAX), 0);
+                    X86EmitCall(NewExternalCodeLabel((LPVOID) GetWriteBarrierCodeLocation(&JIT_WriteBarrierEAX)), 0);
                 }
                 else
 #else // TARGET_AMD64
diff --git a/src/coreclr/vm/i386/stublinkerx86.h b/src/coreclr/vm/i386/stublinkerx86.h
index af5244d077193..564c999975e7c 100644
--- a/src/coreclr/vm/i386/stublinkerx86.h
+++ b/src/coreclr/vm/i386/stublinkerx86.h
@@ -536,7 +536,7 @@ struct StubPrecode {
 
         return rel32Decode(PTR_HOST_MEMBER_TADDR(StubPrecode, this, m_rel32));
     }
-
+#ifndef DACCESS_COMPILE
     void ResetTargetInterlocked()
     {
         CONTRACTL
@@ -562,6 +562,7 @@ struct StubPrecode {
         ExecutableWriterHolder<void> rel32Holder(&m_rel32, 4);
         return rel32SetInterlocked(&m_rel32, rel32Holder.GetRW(), target, expected, (MethodDesc*)GetMethodDesc());
     }
+#endif // !DACCESS_COMPILE
 };
 IN_TARGET_64BIT(static_assert_no_msg(offsetof(StubPrecode, m_movR10) == OFFSETOF_PRECODE_TYPE);)
 IN_TARGET_64BIT(static_assert_no_msg(offsetof(StubPrecode, m_type) == OFFSETOF_PRECODE_TYPE_MOV_R10);)
@@ -646,6 +647,13 @@ struct FixupPrecode {
         return dac_cast<TADDR>(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode);
     }
 
+    size_t GetSizeRW()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        return GetBase() + sizeof(void*) - dac_cast<TADDR>(this);
+    }
+
     TADDR GetMethodDesc();
 #else // HAS_FIXUP_PRECODE_CHUNKS
     TADDR GetMethodDesc()
diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp
index a1e4d93d881de..882e2c29cef04 100644
--- a/src/coreclr/vm/jitinterface.cpp
+++ b/src/coreclr/vm/jitinterface.cpp
@@ -11875,7 +11875,7 @@ WORD CEEJitInfo::getRelocTypeHint(void * target)
     if (m_fAllowRel32)
     {
         // The JIT calls this method for data addresses only. It always uses REL32s for direct code targets.
-        if (IsPreferredExecutableRange(target))
+        if (ExecutableAllocator::IsPreferredExecutableRange(target))
             return IMAGE_REL_BASED_REL32;
     }
 #endif // TARGET_AMD64
diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h
index ca9d03c2141d3..e071d0717d179 100644
--- a/src/coreclr/vm/jitinterface.h
+++ b/src/coreclr/vm/jitinterface.h
@@ -238,15 +238,10 @@ extern "C" FCDECL2(Object*, ChkCastAny_NoCacheLookup, CORINFO_CLASS_HANDLE type,
 extern "C" FCDECL2(Object*, IsInstanceOfAny_NoCacheLookup, CORINFO_CLASS_HANDLE type, Object* obj);
 extern "C" FCDECL2(LPVOID, Unbox_Helper, CORINFO_CLASS_HANDLE type, Object* obj);
 
-#if defined(TARGET_ARM64) || defined(FEATURE_WRITEBARRIER_COPY)
 // ARM64 JIT_WriteBarrier uses speciall ABI and thus is not callable directly
 // Copied write barriers must be called at a different location
 extern "C" FCDECL2(VOID, JIT_WriteBarrier_Callable, Object **dst, Object *ref);
 #define WriteBarrier_Helper JIT_WriteBarrier_Callable
-#else
-// in other cases the regular JIT helper is callable.
-#define WriteBarrier_Helper JIT_WriteBarrier
-#endif
 
 extern "C" FCDECL1(void, JIT_InternalThrow, unsigned exceptNum);
 extern "C" FCDECL1(void*, JIT_InternalThrowFromHelper, unsigned exceptNum);
@@ -344,28 +339,25 @@ EXTERN_C FCDECL2_VV(UINT64, JIT_LRsz, UINT64 num, int shift);
 
 #ifdef TARGET_X86
 
+#define ENUM_X86_WRITE_BARRIER_REGISTERS() \
+    X86_WRITE_BARRIER_REGISTER(EAX) \
+    X86_WRITE_BARRIER_REGISTER(ECX) \
+    X86_WRITE_BARRIER_REGISTER(EBX) \
+    X86_WRITE_BARRIER_REGISTER(ESI) \
+    X86_WRITE_BARRIER_REGISTER(EDI) \
+    X86_WRITE_BARRIER_REGISTER(EBP)
+
 extern "C"
 {
-    void STDCALL JIT_CheckedWriteBarrierEAX(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_CheckedWriteBarrierEBX(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_CheckedWriteBarrierECX(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_CheckedWriteBarrierESI(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_CheckedWriteBarrierEDI(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_CheckedWriteBarrierEBP(); // JIThelp.asm/JIThelp.s
-
-    void STDCALL JIT_DebugWriteBarrierEAX(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_DebugWriteBarrierEBX(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_DebugWriteBarrierECX(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_DebugWriteBarrierESI(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_DebugWriteBarrierEDI(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_DebugWriteBarrierEBP(); // JIThelp.asm/JIThelp.s
-
-    void STDCALL JIT_WriteBarrierEAX();        // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_WriteBarrierEBX();        // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_WriteBarrierECX();        // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_WriteBarrierESI();        // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_WriteBarrierEDI();        // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_WriteBarrierEBP();        // JIThelp.asm/JIThelp.s
+
+// JIThelp.asm/JIThelp.s
+#define X86_WRITE_BARRIER_REGISTER(reg) \
+    void STDCALL JIT_CheckedWriteBarrier##reg(); \
+    void STDCALL JIT_DebugWriteBarrier##reg(); \
+    void STDCALL JIT_WriteBarrier##reg();
+
+    ENUM_X86_WRITE_BARRIER_REGISTERS()
+#undef X86_WRITE_BARRIER_REGISTER
 
     void STDCALL JIT_WriteBarrierGroup();
     void STDCALL JIT_WriteBarrierGroup_End();
diff --git a/src/coreclr/vm/loaderallocator.cpp b/src/coreclr/vm/loaderallocator.cpp
index 4f222be4a2c03..0a77e4445f06f 100644
--- a/src/coreclr/vm/loaderallocator.cpp
+++ b/src/coreclr/vm/loaderallocator.cpp
@@ -1137,7 +1137,7 @@ void LoaderAllocator::Init(BaseDomain *pDomain, BYTE *pExecutableHeapMemory)
     _ASSERTE(dwTotalReserveMemSize <= VIRTUAL_ALLOC_RESERVE_GRANULARITY);
 #endif
 
-    BYTE * initReservedMem = ClrVirtualAllocExecutable(dwTotalReserveMemSize, MEM_RESERVE, PAGE_NOACCESS);
+    BYTE * initReservedMem = (BYTE*)ExecutableAllocator::Instance()->Reserve(dwTotalReserveMemSize);
 
     m_InitialReservedMemForLoaderHeaps = initReservedMem;
 
@@ -1672,18 +1672,25 @@ void AssemblyLoaderAllocator::SetCollectible()
 {
     CONTRACTL
     {
-        THROWS;
+        NOTHROW;
     }
     CONTRACTL_END;
 
     m_IsCollectible = true;
-#ifndef DACCESS_COMPILE
-    m_pShuffleThunkCache = new ShuffleThunkCache(m_pStubHeap);
-#endif
 }
 
 #ifndef DACCESS_COMPILE
 
+void AssemblyLoaderAllocator::Init(AppDomain* pAppDomain)
+{
+    m_Id.Init();
+    LoaderAllocator::Init((BaseDomain *)pAppDomain);
+    if (IsCollectible())
+    {
+        m_pShuffleThunkCache = new ShuffleThunkCache(m_pStubHeap);
+    }
+}
+
 #ifndef CROSSGEN_COMPILE
 
 AssemblyLoaderAllocator::~AssemblyLoaderAllocator()
diff --git a/src/coreclr/vm/loaderallocator.inl b/src/coreclr/vm/loaderallocator.inl
index a826675ccc93c..993732d4010f8 100644
--- a/src/coreclr/vm/loaderallocator.inl
+++ b/src/coreclr/vm/loaderallocator.inl
@@ -21,12 +21,6 @@ inline void GlobalLoaderAllocator::Init(BaseDomain *pDomain)
     LoaderAllocator::Init(pDomain, m_ExecutableHeapInstance);
 }
 
-inline void AssemblyLoaderAllocator::Init(AppDomain* pAppDomain)
-{
-    m_Id.Init();
-    LoaderAllocator::Init((BaseDomain *)pAppDomain);
-}
-
 inline BOOL LoaderAllocatorID::Equals(LoaderAllocatorID *pId)
 {
     LIMITED_METHOD_CONTRACT;
diff --git a/src/coreclr/vm/method.cpp b/src/coreclr/vm/method.cpp
index bd3984d8697cd..db308ab208a8e 100644
--- a/src/coreclr/vm/method.cpp
+++ b/src/coreclr/vm/method.cpp
@@ -4188,46 +4188,6 @@ c_CentralJumpCode = {
 };
 #include <poppack.h>
 
-#elif defined(TARGET_AMD64)
-
-#include <pshpack1.h>
-static const struct CentralJumpCode {
-    BYTE m_movzxRAX[4];
-    BYTE m_shlEAX[4];
-    BYTE m_movRAX[2];
-    MethodDesc* m_pBaseMD;
-    BYTE m_addR10RAX[3];
-    BYTE m_jmp[1];
-    INT32 m_rel32;
-
-    inline void Setup(CentralJumpCode* pCodeRX, MethodDesc* pMD, PCODE target, LoaderAllocator *pLoaderAllocator) {
-        WRAPPER_NO_CONTRACT;
-        m_pBaseMD = pMD;
-        m_rel32 = rel32UsingJumpStub(&pCodeRX->m_rel32, target, pMD, pLoaderAllocator);
-    }
-
-    inline BOOL CheckTarget(TADDR target) {
-        WRAPPER_NO_CONTRACT;
-        TADDR addr = rel32Decode(PTR_HOST_MEMBER_TADDR(CentralJumpCode, this, m_rel32));
-        if (*PTR_BYTE(addr) == 0x48 &&
-            *PTR_BYTE(addr+1) == 0xB8 &&
-            *PTR_BYTE(addr+10) == 0xFF &&
-            *PTR_BYTE(addr+11) == 0xE0)
-        {
-            addr = *PTR_TADDR(addr+2);
-        }
-        return (addr == target);
-    }
-}
-c_CentralJumpCode = {
-    { 0x48, 0x0F, 0xB6, 0xC0 },                         //   movzx rax,al
-    { 0x48, 0xC1, 0xE0, MethodDesc::ALIGNMENT_SHIFT },  //   shl   rax, MethodDesc::ALIGNMENT_SHIFT
-    { 0x49, 0xBA }, NULL,                               //   mov   r10, pBaseMD
-    { 0x4C, 0x03, 0xD0 },                               //   add   r10,rax
-    { 0xE9 }, 0                     // jmp PreStub
-};
-#include <poppack.h>
-
 #elif defined(TARGET_ARM)
 
 #include <pshpack1.h>
diff --git a/src/coreclr/vm/precode.cpp b/src/coreclr/vm/precode.cpp
index 80731c191e737..0bd2bd657f9ad 100644
--- a/src/coreclr/vm/precode.cpp
+++ b/src/coreclr/vm/precode.cpp
@@ -480,7 +480,9 @@ void Precode::Reset()
 #ifdef HAS_FIXUP_PRECODE_CHUNKS
     if (t == PRECODE_FIXUP)
     {
-        size = sizeof(FixupPrecode) + sizeof(PTR_MethodDesc);
+        // The writeable size the Init method accesses is dynamic depending on
+        // the FixupPrecode members.
+        size = ((FixupPrecode*)this)->GetSizeRW();
     }
     else
 #endif
diff --git a/src/coreclr/vm/stackwalk.cpp b/src/coreclr/vm/stackwalk.cpp
index 0971334af4d31..e61802b984950 100644
--- a/src/coreclr/vm/stackwalk.cpp
+++ b/src/coreclr/vm/stackwalk.cpp
@@ -713,14 +713,12 @@ UINT_PTR Thread::VirtualUnwindToFirstManagedCallFrame(T_CONTEXT* pContext)
     // get our caller's PSP, or our caller's caller's SP.
     while (!ExecutionManager::IsManagedCode(uControlPc))
     {
-#ifdef FEATURE_WRITEBARRIER_COPY
         if (IsIPInWriteBarrierCodeCopy(uControlPc))
         {
             // Pretend we were executing the barrier function at its original location so that the unwinder can unwind the frame
             uControlPc = AdjustWriteBarrierIP(uControlPc);
             SetIP(pContext, uControlPc);
         }
-#endif // FEATURE_WRITEBARRIER_COPY
 
 #ifndef TARGET_UNIX
         uControlPc = VirtualUnwindCallFrame(pContext);
diff --git a/src/coreclr/vm/stublink.cpp b/src/coreclr/vm/stublink.cpp
index 04a33e3982613..304cb4fb35b44 100644
--- a/src/coreclr/vm/stublink.cpp
+++ b/src/coreclr/vm/stublink.cpp
@@ -846,7 +846,7 @@ Stub *StubLinker::Link(LoaderHeap *pHeap, DWORD flags)
                 );
         ASSERT(pStub != NULL);
 
-        bool fSuccess = EmitStub(pStub, globalsize, pHeap);
+        bool fSuccess = EmitStub(pStub, globalsize, size, pHeap);
 
 #ifdef STUBLINKER_GENERATES_UNWIND_INFO
         if (fSuccess)
@@ -1007,13 +1007,13 @@ int StubLinker::CalculateSize(int* pGlobalSize)
     return globalsize + datasize;
 }
 
-bool StubLinker::EmitStub(Stub* pStub, int globalsize, LoaderHeap* pHeap)
+bool StubLinker::EmitStub(Stub* pStub, int globalsize, int totalSize, LoaderHeap* pHeap)
 {
     STANDARD_VM_CONTRACT;
 
     BYTE *pCode = (BYTE*)(pStub->GetBlob());
 
-    ExecutableWriterHolder<Stub> stubWriterHolder(pStub, sizeof(Stub));
+    ExecutableWriterHolder<Stub> stubWriterHolder(pStub, sizeof(Stub) + totalSize);
     Stub *pStubRW = stubWriterHolder.GetRW();
 
     BYTE *pCodeRW = (BYTE*)(pStubRW->GetBlob());
@@ -2013,11 +2013,7 @@ VOID Stub::DeleteStub()
         FillMemory(this+1, m_numCodeBytes, 0xcc);
 #endif
 
-#ifndef TARGET_UNIX
-        DeleteExecutable((BYTE*)GetAllocationBase());
-#else
         delete [] (BYTE*)GetAllocationBase();
-#endif
     }
 }
 
@@ -2124,11 +2120,7 @@ Stub* Stub::NewStub(PTR_VOID pCode, DWORD flags)
     BYTE *pBlock;
     if (pHeap == NULL)
     {
-#ifndef TARGET_UNIX
-        pBlock = new (executable) BYTE[totalSize];
-#else
         pBlock = new BYTE[totalSize];
-#endif
     }
     else
     {
diff --git a/src/coreclr/vm/stublink.h b/src/coreclr/vm/stublink.h
index 94326f9962ea7..9613fd48f687d 100644
--- a/src/coreclr/vm/stublink.h
+++ b/src/coreclr/vm/stublink.h
@@ -395,7 +395,7 @@ class StubLinker
 
         // Writes out the code element into memory following the
         // stub object.
-        bool EmitStub(Stub* pStub, int globalsize, LoaderHeap* pHeap);
+        bool EmitStub(Stub* pStub, int globalsize, int totalSize, LoaderHeap* pHeap);
 
         CodeRun *GetLastCodeRunIfAny();
 
diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp
index fa93110399d39..2c55f8770b01b 100644
--- a/src/coreclr/vm/threads.cpp
+++ b/src/coreclr/vm/threads.cpp
@@ -1078,18 +1078,30 @@ DWORD_PTR Thread::OBJREF_HASH = OBJREF_TABSIZE;
 extern "C" void STDCALL JIT_PatchedCodeStart();
 extern "C" void STDCALL JIT_PatchedCodeLast();
 
-#ifdef FEATURE_WRITEBARRIER_COPY
-
 static void* s_barrierCopy = NULL;
 
 BYTE* GetWriteBarrierCodeLocation(VOID* barrier)
 {
-    return (BYTE*)s_barrierCopy + ((BYTE*)barrier - (BYTE*)JIT_PatchedCodeStart);
+    if (IsWriteBarrierCopyEnabled())
+    {
+        return (BYTE*)PINSTRToPCODE((TADDR)s_barrierCopy + ((TADDR)barrier - (TADDR)JIT_PatchedCodeStart));
+    }
+    else
+    {
+        return (BYTE*)barrier;
+    }
 }
 
 BOOL IsIPInWriteBarrierCodeCopy(PCODE controlPc)
 {
-    return (s_barrierCopy <= (void*)controlPc && (void*)controlPc < ((BYTE*)s_barrierCopy + ((BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart)));
+    if (IsWriteBarrierCopyEnabled())
+    {
+        return (s_barrierCopy <= (void*)controlPc && (void*)controlPc < ((BYTE*)s_barrierCopy + ((BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart)));
+    }
+    else
+    {
+        return FALSE;
+    }
 }
 
 PCODE AdjustWriteBarrierIP(PCODE controlPc)
@@ -1100,14 +1112,21 @@ PCODE AdjustWriteBarrierIP(PCODE controlPc)
     return (PCODE)JIT_PatchedCodeStart + (controlPc - (PCODE)s_barrierCopy);
 }
 
+#ifdef TARGET_X86
+extern "C" void *JIT_WriteBarrierEAX_Loc;
+#else
 extern "C" void *JIT_WriteBarrier_Loc;
+#endif
+
 #ifdef TARGET_ARM64
 extern "C" void (*JIT_WriteBarrier_Table)();
 extern "C" void *JIT_WriteBarrier_Loc = 0;
 extern "C" void *JIT_WriteBarrier_Table_Loc = 0;
 #endif // TARGET_ARM64
 
-#endif // FEATURE_WRITEBARRIER_COPY
+#ifdef TARGET_ARM
+extern "C" void *JIT_WriteBarrier_Loc = 0;
+#endif // TARGET_ARM
 
 #ifndef TARGET_UNIX
 // g_TlsIndex is only used by the DAC. Disable optimizations around it to prevent it from getting optimized out.
@@ -1138,50 +1157,80 @@ void InitThreadManager()
     _ASSERTE_ALL_BUILDS("clr/src/VM/threads.cpp", (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart > (ptrdiff_t)0);
     _ASSERTE_ALL_BUILDS("clr/src/VM/threads.cpp", (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart < (ptrdiff_t)GetOsPageSize());
 
-#ifdef FEATURE_WRITEBARRIER_COPY
-    s_barrierCopy = ClrVirtualAlloc(NULL, g_SystemInfo.dwAllocationGranularity, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
-    if (s_barrierCopy == NULL)
+    if (IsWriteBarrierCopyEnabled())
     {
-        _ASSERTE(!"ClrVirtualAlloc of GC barrier code page failed");
-        COMPlusThrowWin32();
-    }
+        s_barrierCopy = ExecutableAllocator::Instance()->Reserve(g_SystemInfo.dwAllocationGranularity);
+        ExecutableAllocator::Instance()->Commit(s_barrierCopy, g_SystemInfo.dwAllocationGranularity, true);
+        if (s_barrierCopy == NULL)
+        {
+            _ASSERTE(!"Allocation of GC barrier code page failed");
+            COMPlusThrowWin32();
+        }
 
-    {
-        size_t writeBarrierSize = (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart;
-        ExecutableWriterHolder<void> barrierWriterHolder(s_barrierCopy, writeBarrierSize);
-        memcpy(barrierWriterHolder.GetRW(), (BYTE*)JIT_PatchedCodeStart, writeBarrierSize);
-    }
+        {
+            size_t writeBarrierSize = (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart;
+            ExecutableWriterHolder<void> barrierWriterHolder(s_barrierCopy, writeBarrierSize);
+            memcpy(barrierWriterHolder.GetRW(), (BYTE*)JIT_PatchedCodeStart, writeBarrierSize);
+        }
 
-    // Store the JIT_WriteBarrier copy location to a global variable so that helpers
-    // can jump to it.
-    JIT_WriteBarrier_Loc = GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier);
+        // Store the JIT_WriteBarrier copy location to a global variable so that helpers
+        // can jump to it.
+#ifdef TARGET_X86
+        JIT_WriteBarrierEAX_Loc = GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEAX);
 
-    SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier));
+#define X86_WRITE_BARRIER_REGISTER(reg) \
+    SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_##reg, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier##reg)); \
+    ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier##reg), W("@WriteBarrier" #reg));
 
-#ifdef TARGET_ARM64
-    // Store the JIT_WriteBarrier_Table copy location to a global variable so that it can be updated.
-    JIT_WriteBarrier_Table_Loc = GetWriteBarrierCodeLocation((void*)&JIT_WriteBarrier_Table);
+        ENUM_X86_WRITE_BARRIER_REGISTERS()
 
-    SetJitHelperFunction(CORINFO_HELP_CHECKED_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_CheckedWriteBarrier));
-    SetJitHelperFunction(CORINFO_HELP_ASSIGN_BYREF, GetWriteBarrierCodeLocation((void*)JIT_ByRefWriteBarrier));
-#endif // TARGET_ARM64
+#undef X86_WRITE_BARRIER_REGISTER
 
-#else // FEATURE_WRITEBARRIER_COPY
+#else // TARGET_X86
+        JIT_WriteBarrier_Loc = GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier);
+#endif // TARGET_X86
+        SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier));
+        ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier), W("@WriteBarrier"));
 
-    // I am using virtual protect to cover the entire range that this code falls in.
-    //
+#ifdef TARGET_ARM64
+        // Store the JIT_WriteBarrier_Table copy location to a global variable so that it can be updated.
+        JIT_WriteBarrier_Table_Loc = GetWriteBarrierCodeLocation((void*)&JIT_WriteBarrier_Table);
+#endif // TARGET_ARM64
 
-    // We could reset it to non-writeable inbetween GCs and such, but then we'd have to keep on re-writing back and forth,
-    // so instead we'll leave it writable from here forward.
+#if defined(TARGET_ARM64) || defined(TARGET_ARM)
+        SetJitHelperFunction(CORINFO_HELP_CHECKED_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_CheckedWriteBarrier));
+        ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_CheckedWriteBarrier), W("@CheckedWriteBarrier"));
+        SetJitHelperFunction(CORINFO_HELP_ASSIGN_BYREF, GetWriteBarrierCodeLocation((void*)JIT_ByRefWriteBarrier));
+        ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_ByRefWriteBarrier), W("@ByRefWriteBarrier"));
+#endif // TARGET_ARM64 || TARGET_ARM
 
-    DWORD oldProt;
-    if (!ClrVirtualProtect((void *)JIT_PatchedCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart,
-                           PAGE_EXECUTE_READWRITE, &oldProt))
+    }
+    else
     {
-        _ASSERTE(!"ClrVirtualProtect of code page failed");
-        COMPlusThrowWin32();
+        // I am using virtual protect to cover the entire range that this code falls in.
+        //
+
+        // We could reset it to non-writeable inbetween GCs and such, but then we'd have to keep on re-writing back and forth,
+        // so instead we'll leave it writable from here forward.
+
+        DWORD oldProt;
+        if (!ClrVirtualProtect((void *)JIT_PatchedCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart,
+                            PAGE_EXECUTE_READWRITE, &oldProt))
+        {
+            _ASSERTE(!"ClrVirtualProtect of code page failed");
+            COMPlusThrowWin32();
+        }
+
+#ifdef TARGET_X86
+        JIT_WriteBarrierEAX_Loc = (void*)JIT_WriteBarrierEAX;
+#else
+        JIT_WriteBarrier_Loc = (void*)JIT_WriteBarrier;
+#endif
+#ifdef TARGET_ARM64
+        // Store the JIT_WriteBarrier_Table copy location to a global variable so that it can be updated.
+        JIT_WriteBarrier_Table_Loc = (void*)&JIT_WriteBarrier_Table;
+#endif // TARGET_ARM64
     }
-#endif // FEATURE_WRITEBARRIER_COPY
 
 #ifndef TARGET_UNIX
     _ASSERTE(GetThreadNULLOk() == NULL);
diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h
index d18b21d58f95a..7d600dab5edac 100644
--- a/src/coreclr/vm/threads.h
+++ b/src/coreclr/vm/threads.h
@@ -6271,18 +6271,23 @@ class ThreadStateNCStackHolder
 
 BOOL Debug_IsLockedViaThreadSuspension();
 
-#ifdef FEATURE_WRITEBARRIER_COPY
+inline BOOL IsWriteBarrierCopyEnabled()
+{
+#ifdef DACCESS_COMPILE
+    return FALSE;
+#else // DACCESS_COMPILE
+#ifdef HOST_OSX
+    return TRUE;
+#else
+    return ExecutableAllocator::IsWXORXEnabled();
+#endif
+#endif // DACCESS_COMPILE
+}
 
 BYTE* GetWriteBarrierCodeLocation(VOID* barrier);
 BOOL IsIPInWriteBarrierCodeCopy(PCODE controlPc);
 PCODE AdjustWriteBarrierIP(PCODE controlPc);
 
-#else // FEATURE_WRITEBARRIER_COPY
-
-#define GetWriteBarrierCodeLocation(barrier) ((BYTE*)(barrier))
-
-#endif // FEATURE_WRITEBARRIER_COPY
-
 #if !defined(DACCESS_COMPILE) && !defined(CROSSGEN_COMPILE)
 extern thread_local Thread* t_pStackWalkerWalkingThread;
 #define SET_THREAD_TYPE_STACKWALKER(pThread)    t_pStackWalkerWalkingThread = pThread
diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp
index 95d568d641c73..3af4c52afc9bb 100644
--- a/src/coreclr/vm/virtualcallstub.cpp
+++ b/src/coreclr/vm/virtualcallstub.cpp
@@ -641,7 +641,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA
                               dwTotalReserveMemSize);
         }
 
-        initReservedMem = ClrVirtualAllocExecutable (dwTotalReserveMemSize, MEM_RESERVE, PAGE_NOACCESS);
+        initReservedMem = (BYTE*)ExecutableAllocator::Instance()->Reserve(dwTotalReserveMemSize);
 
         m_initialReservedMemForHeaps = (BYTE *) initReservedMem;
 
@@ -2766,11 +2766,7 @@ DispatchHolder *VirtualCallStubManager::GenerateDispatchStub(PCODE            ad
     }
 #endif
 
-    ExecutableWriterHolder<DispatchHolder> dispatchWriterHolder(holder, sizeof(DispatchHolder)
-#ifdef TARGET_AMD64
-                                                                        + sizeof(DispatchStubShort)
-#endif
-                                                               );
+    ExecutableWriterHolder<DispatchHolder> dispatchWriterHolder(holder, dispatchHolderSize);
     dispatchWriterHolder.GetRW()->Initialize(holder, addrOfCode,
                        addrOfFail,
                        (size_t)pMTExpected
@@ -2833,9 +2829,9 @@ DispatchHolder *VirtualCallStubManager::GenerateDispatchStubLong(PCODE
     } CONTRACT_END;
 
     //allocate from the requisite heap and copy the template over it.
-    DispatchHolder * holder = (DispatchHolder*) (void*)
-        dispatch_heap->AllocAlignedMem(DispatchHolder::GetHolderSize(DispatchStub::e_TYPE_LONG), CODE_SIZE_ALIGN);
-    ExecutableWriterHolder<DispatchHolder> dispatchWriterHolder(holder, sizeof(DispatchHolder) + sizeof(DispatchStubLong));
+    size_t dispatchHolderSize = DispatchHolder::GetHolderSize(DispatchStub::e_TYPE_LONG);
+    DispatchHolder * holder = (DispatchHolder*) (void*)dispatch_heap->AllocAlignedMem(dispatchHolderSize, CODE_SIZE_ALIGN);
+    ExecutableWriterHolder<DispatchHolder> dispatchWriterHolder(holder, dispatchHolderSize);
 
     dispatchWriterHolder.GetRW()->Initialize(holder, addrOfCode,
                        addrOfFail,