From b06b32520ed77d66a7394a09447eea14cd7fb2d8 Mon Sep 17 00:00:00 2001 From: Vladimir Sadov Date: Mon, 24 Jul 2023 06:14:23 -0700 Subject: [PATCH] [NativeAOT] Replace GVMLookupForSlot internal cache with generic cache similar to CastCache (#89331) * factored out CastCache to be able create several. * Cache impl * generic cache * some refactoring * separated GenericCache * comments * less refs * do not store hash * fix CoreCLR and some more refactoring * PR feedback * remove no longer needed CastCache wrapping constructor * remove auto-inserted unused usings. * remove unused ActivatorCreateInstanceAny --- .../Runtime/CompilerServices/CastHelpers.cs | 13 +- .../src/System/Runtime/TypeCast.cs | 15 +- .../CompilerHelpers/LibraryInitializer.cs | 1 - .../src/System/Runtime/TypeLoaderExports.cs | 361 +++++----------- .../Runtime/CompilerServices/CastCache.cs | 10 +- src/coreclr/vm/corelib.h | 2 +- .../System.Private.CoreLib.Shared.projitems | 5 +- .../Runtime/CompilerServices/CastCache.cs | 112 ++--- .../Runtime/CompilerServices/GenericCache.cs | 409 ++++++++++++++++++ 9 files changed, 614 insertions(+), 314 deletions(-) create mode 100644 src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/GenericCache.cs diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs index 435782b8b754d..0292c992a94f3 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs @@ -11,6 +11,9 @@ namespace System.Runtime.CompilerServices { internal static unsafe class CastHelpers { + // In coreclr the table is allocated and written to on the native side. + internal static int[]? s_table; + [MethodImpl(MethodImplOptions.InternalCall)] private static extern object IsInstanceOfAny_NoCacheLookup(void* toTypeHnd, object obj); @@ -36,7 +39,7 @@ internal static unsafe class CastHelpers void* mt = RuntimeHelpers.GetMethodTable(obj); if (mt != toTypeHnd) { - CastResult result = CastCache.TryGet((nuint)mt, (nuint)toTypeHnd); + CastResult result = CastCache.TryGet(s_table!, (nuint)mt, (nuint)toTypeHnd); if (result == CastResult.CanCast) { // do nothing @@ -186,7 +189,7 @@ internal static unsafe class CastHelpers [MethodImpl(MethodImplOptions.NoInlining)] private static object? IsInstance_Helper(void* toTypeHnd, object obj) { - CastResult result = CastCache.TryGet((nuint)RuntimeHelpers.GetMethodTable(obj), (nuint)toTypeHnd); + CastResult result = CastCache.TryGet(s_table!, (nuint)RuntimeHelpers.GetMethodTable(obj), (nuint)toTypeHnd); if (result == CastResult.CanCast) { return obj; @@ -215,7 +218,7 @@ internal static unsafe class CastHelpers void* mt = RuntimeHelpers.GetMethodTable(obj); if (mt != toTypeHnd) { - result = CastCache.TryGet((nuint)mt, (nuint)toTypeHnd); + result = CastCache.TryGet(s_table!, (nuint)mt, (nuint)toTypeHnd); if (result != CastResult.CanCast) { goto slowPath; @@ -239,7 +242,7 @@ internal static unsafe class CastHelpers [MethodImpl(MethodImplOptions.NoInlining)] private static object? ChkCast_Helper(void* toTypeHnd, object obj) { - CastResult result = CastCache.TryGet((nuint)RuntimeHelpers.GetMethodTable(obj), (nuint)toTypeHnd); + CastResult result = CastCache.TryGet(s_table!, (nuint)RuntimeHelpers.GetMethodTable(obj), (nuint)toTypeHnd); if (result == CastResult.CanCast) { return obj; @@ -456,7 +459,7 @@ private static void StelemRef(Array array, nint index, object? obj) [MethodImpl(MethodImplOptions.NoInlining)] private static void StelemRef_Helper(ref object? element, void* elementType, object obj) { - CastResult result = CastCache.TryGet((nuint)RuntimeHelpers.GetMethodTable(obj), (nuint)elementType); + CastResult result = CastCache.TryGet(s_table!, (nuint)RuntimeHelpers.GetMethodTable(obj), (nuint)elementType); if (result == CastResult.CanCast) { WriteBarrier(ref element, obj); diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/TypeCast.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/TypeCast.cs index 3cd06a108b2bb..364a248158e73 100644 --- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/TypeCast.cs +++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/TypeCast.cs @@ -21,8 +21,19 @@ namespace System.Runtime // ///////////////////////////////////////////////////////////////////////////////////////////////////// + [EagerStaticClassConstruction] internal static class TypeCast { +#if DEBUG + private const int InitialCacheSize = 8; // MUST BE A POWER OF TWO + private const int MaximumCacheSize = 512; // make this lower than release to make it easier to reach this in tests. +#else + private const int InitialCacheSize = 128; // MUST BE A POWER OF TWO + private const int MaximumCacheSize = 4096; // 4096 * sizeof(CastCacheEntry) is 98304 bytes on 64bit. We will rarely need this much though. +#endif // DEBUG + + private static CastCache s_castCache = new CastCache(InitialCacheSize, MaximumCacheSize); + [Flags] internal enum AssignmentVariation { @@ -1159,7 +1170,7 @@ public static unsafe bool AreTypesAssignableInternal(MethodTable* pSourceType, M return true; nuint sourceAndVariation = (nuint)pSourceType + (uint)variation; - CastResult result = CastCache.TryGet(sourceAndVariation, (nuint)(pTargetType)); + CastResult result = s_castCache.TryGet(sourceAndVariation, (nuint)(pTargetType)); if (result != CastResult.MaybeCast) { return result == CastResult.CanCast; @@ -1187,7 +1198,7 @@ private static unsafe bool CacheMiss(MethodTable* pSourceType, MethodTable* pTar // Update the cache // nuint sourceAndVariation = (nuint)pSourceType + (uint)variation; - CastCache.TrySet(sourceAndVariation, (nuint)pTargetType, result); + s_castCache.TrySet(sourceAndVariation, (nuint)pTargetType, result); return result; } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/LibraryInitializer.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/LibraryInitializer.cs index 720f0c3180b20..bf3c60daa8099 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/LibraryInitializer.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/LibraryInitializer.cs @@ -19,7 +19,6 @@ public static void InitializeLibrary() { PreallocatedOutOfMemoryException.Initialize(); ClassConstructorRunner.Initialize(); - TypeLoaderExports.Initialize(); } } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/TypeLoaderExports.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/TypeLoaderExports.cs index 431c83b42d032..c49220b95672d 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/TypeLoaderExports.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/TypeLoaderExports.cs @@ -7,164 +7,173 @@ using System.Threading; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Numerics; namespace System.Runtime { + // Initialize the cache eagerly to avoid null checks. + [EagerStaticClassConstruction] public static class TypeLoaderExports { - public static unsafe void ActivatorCreateInstanceAny(ref object ptrToData, IntPtr pEETypePtr) + // + // Generic lookup cache + // + +#if DEBUG + // use smaller numbers to hit resizing/preempting logic in debug + private const int InitialCacheSize = 8; // MUST BE A POWER OF TWO + private const int MaximumCacheSize = 512; +#else + private const int InitialCacheSize = 128; // MUST BE A POWER OF TWO + private const int MaximumCacheSize = 128 * 1024; +#endif // DEBUG + + private static GenericCache s_cache = + new GenericCache(InitialCacheSize, MaximumCacheSize); + + private struct Key : IEquatable { - EETypePtr pEEType = new EETypePtr(pEETypePtr); + public IntPtr _context; + public IntPtr _signature; - if (pEEType.IsValueType) + public Key(nint context, nint signature) { - // Nothing else to do for value types. - return; + _context = context; + _signature = signature; } - // For reference types, we need to: - // 1- Allocate the new object - // 2- Call its default ctor - // 3- Update ptrToData to point to that newly allocated object - ptrToData = RuntimeImports.RhNewObject(pEEType); - - Entry entry = LookupInCache(s_cache, pEETypePtr, pEETypePtr); - entry ??= CacheMiss(pEETypePtr, pEETypePtr, - (IntPtr context, IntPtr signature, object contextObject, ref IntPtr auxResult) => - { - IntPtr result = RuntimeAugments.TypeLoaderCallbacks.TryGetDefaultConstructorForType(new RuntimeTypeHandle(new EETypePtr(context))); - if (result == IntPtr.Zero) - result = RuntimeAugments.GetFallbackDefaultConstructor(); - return result; - }); - RawCalliHelper.Call(entry.Result, ptrToData); - } + public bool Equals(Key other) + { + return _context == other._context && _signature == other._signature; + } - // - // Generic lookup cache - // + public override int GetHashCode() + { + // pointers will likely match and cancel out in the upper bits + // we will rotate context by 16 bit to keep more varying bits in the hash + IntPtr context = (IntPtr)BitOperations.RotateLeft((nuint)_context, 16); + return (context ^ _signature).GetHashCode(); + } - private class Entry - { - public IntPtr Context; - public IntPtr Signature; - public IntPtr Result; - public IntPtr AuxResult; - public Entry Next; + public override bool Equals(object obj) + { + return obj is Key && Equals((Key)obj); + } } - // Initialize the cache eagerly to avoid null checks. - // Use array with just single element to make this pay-for-play. The actual cache will be allocated only - // once the lazy lookups are actually needed. - private static Entry[] s_cache; + private struct Value + { + public IntPtr _result; + public IntPtr _auxResult; - private static Lock s_lock; - private static GCHandle s_previousCache; + public Value(IntPtr result, IntPtr auxResult) + { + _result = result; + _auxResult = auxResult; + } + } - internal static void Initialize() + private static Value LookupOrAdd(IntPtr context, IntPtr signature) { - s_cache = new Entry[1]; + if (!TryGetFromCache(context, signature, out var v)) + { + v = CacheMiss(context, signature); + } + + return v; } public static IntPtr GenericLookup(IntPtr context, IntPtr signature) { - Entry entry = LookupInCache(s_cache, context, signature); - entry ??= CacheMiss(context, signature); - return entry.Result; + if (!TryGetFromCache(context, signature, out var v)) + { + v = CacheMiss(context, signature); + } + + return v._result; } public static void GenericLookupAndCallCtor(object arg, IntPtr context, IntPtr signature) { - Entry entry = LookupInCache(s_cache, context, signature); - entry ??= CacheMiss(context, signature); - RawCalliHelper.Call(entry.Result, arg); + Value v = LookupOrAdd(context, signature); + RawCalliHelper.Call(v._result, arg); } public static object GenericLookupAndAllocObject(IntPtr context, IntPtr signature) { - Entry entry = LookupInCache(s_cache, context, signature); - entry ??= CacheMiss(context, signature); - return RawCalliHelper.Call(entry.Result, entry.AuxResult); + Value v = LookupOrAdd(context, signature); + return RawCalliHelper.Call(v._result, v._auxResult); } public static object GenericLookupAndAllocArray(IntPtr context, IntPtr arg, IntPtr signature) { - Entry entry = LookupInCache(s_cache, context, signature); - entry ??= CacheMiss(context, signature); - return RawCalliHelper.Call(entry.Result, entry.AuxResult, arg); + Value v = LookupOrAdd(context, signature); + return RawCalliHelper.Call(v._result, v._auxResult, arg); } public static void GenericLookupAndCheckArrayElemType(IntPtr context, object arg, IntPtr signature) { - Entry entry = LookupInCache(s_cache, context, signature); - entry ??= CacheMiss(context, signature); - RawCalliHelper.Call(entry.Result, entry.AuxResult, arg); + Value v = LookupOrAdd(context, signature); + RawCalliHelper.Call(v._result, v._auxResult, arg); } public static object GenericLookupAndCast(object arg, IntPtr context, IntPtr signature) { - Entry entry = LookupInCache(s_cache, context, signature); - entry ??= CacheMiss(context, signature); - return RawCalliHelper.Call(entry.Result, arg, entry.AuxResult); + Value v = LookupOrAdd(context, signature); + return RawCalliHelper.Call(v._result, arg, v._auxResult); } public static unsafe IntPtr GVMLookupForSlot(object obj, RuntimeMethodHandle slot) { - Entry entry = LookupInCache(s_cache, (IntPtr)obj.GetMethodTable(), RuntimeMethodHandle.ToIntPtr(slot)); - if (entry != null) - return entry.Result; + if (TryGetFromCache((IntPtr)obj.GetMethodTable(), RuntimeMethodHandle.ToIntPtr(slot), out var v)) + return v._result; return GVMLookupForSlotSlow(obj, slot); } private static unsafe IntPtr GVMLookupForSlotSlow(object obj, RuntimeMethodHandle slot) { - Entry entry = CacheMiss((IntPtr)obj.GetMethodTable(), RuntimeMethodHandle.ToIntPtr(slot), + Value v = CacheMiss((IntPtr)obj.GetMethodTable(), RuntimeMethodHandle.ToIntPtr(slot), (IntPtr context, IntPtr signature, object contextObject, ref IntPtr auxResult) => RuntimeAugments.TypeLoaderCallbacks.ResolveGenericVirtualMethodTarget(new RuntimeTypeHandle(new EETypePtr(context)), *(RuntimeMethodHandle*)&signature)); - return entry.Result; + return v._result; } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static unsafe IntPtr OpenInstanceMethodLookup(IntPtr openResolver, object obj) { - Entry entry = LookupInCache(s_cache, (IntPtr)obj.GetMethodTable(), openResolver); - entry ??= CacheMiss((IntPtr)obj.GetMethodTable(), openResolver, - (IntPtr context, IntPtr signature, object contextObject, ref IntPtr auxResult) - => Internal.Runtime.CompilerServices.OpenMethodResolver.ResolveMethodWorker(signature, contextObject), - obj); - return entry.Result; + if (!TryGetFromCache((IntPtr)obj.GetMethodTable(), openResolver, out var v)) + { + v = CacheMiss((IntPtr)obj.GetMethodTable(), openResolver, + (IntPtr context, IntPtr signature, object contextObject, ref IntPtr auxResult) + => Internal.Runtime.CompilerServices.OpenMethodResolver.ResolveMethodWorker(signature, contextObject), + obj); + } + + return v._result; } - [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] - private static Entry LookupInCache(Entry[] cache, IntPtr context, IntPtr signature) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool TryGetFromCache(IntPtr context, IntPtr signature, out Value entry) { - int key = ((context.GetHashCode() >> 4) ^ signature.GetHashCode()) & (cache.Length - 1); -#if DEBUG - Entry entry = cache[key]; -#else - Entry entry = Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(cache), key); -#endif - while (entry != null) - { - if (entry.Context == context && entry.Signature == signature) - break; - entry = entry.Next; - } - return entry; + Key k = new Key(context, signature); + return s_cache.TryGet(k, out entry); } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static IntPtr RuntimeCacheLookupInCache(IntPtr context, IntPtr signature, RuntimeObjectFactory factory, object contextObject, out IntPtr auxResult) { - Entry entry = LookupInCache(s_cache, context, signature); - entry ??= CacheMiss(context, signature, factory, contextObject); - auxResult = entry.AuxResult; - return entry.Result; + if (!TryGetFromCache(context, signature, out var v)) + { + v = CacheMiss(context, signature, factory, contextObject); + } + + auxResult = v._auxResult; + return v._result; } - private static Entry CacheMiss(IntPtr ctx, IntPtr sig) + private static Value CacheMiss(IntPtr ctx, IntPtr sig) { return CacheMiss(ctx, sig, (IntPtr context, IntPtr signature, object contextObject, ref IntPtr auxResult) => @@ -172,161 +181,19 @@ private static Entry CacheMiss(IntPtr ctx, IntPtr sig) ); } - private static unsafe Entry CacheMiss(IntPtr context, IntPtr signature, RuntimeObjectFactory factory, object contextObject = null) + private static unsafe Value CacheMiss(IntPtr context, IntPtr signature, RuntimeObjectFactory factory, object contextObject = null) { - IntPtr result = IntPtr.Zero, auxResult = IntPtr.Zero; - bool previouslyCached = false; - - // - // Try to find the entry in the previous version of the cache that is kept alive by weak reference - // - if (s_previousCache.IsAllocated) - { - Entry[]? previousCache = (Entry[]?)s_previousCache.Target; - if (previousCache != null) - { - Entry previousEntry = LookupInCache(previousCache, context, signature); - if (previousEntry != null) - { - result = previousEntry.Result; - auxResult = previousEntry.AuxResult; - previouslyCached = true; - } - } - } - // // Call into the type loader to compute the target // - if (!previouslyCached) - { - result = factory(context, signature, contextObject, ref auxResult); - } - - // - // Update the cache under the lock - // - if (s_lock == null) - Interlocked.CompareExchange(ref s_lock, new Lock(), null); + IntPtr auxResult = default; + IntPtr result = factory(context, signature, contextObject, ref auxResult); - s_lock.Acquire(); - try - { - // Avoid duplicate entries - Entry existingEntry = LookupInCache(s_cache, context, signature); - if (existingEntry != null) - return existingEntry; + Key k = new Key(context, signature); + Value v = new Value(result, auxResult); - // Resize cache as necessary - Entry[] cache = ResizeCacheForNewEntryAsNecessary(); - - int key = ((context.GetHashCode() >> 4) ^ signature.GetHashCode()) & (cache.Length - 1); - - Entry newEntry = new Entry() { Context = context, Signature = signature, Result = result, AuxResult = auxResult, Next = cache[key] }; - cache[key] = newEntry; - return newEntry; - } - finally - { - s_lock.Release(); - } - } - - // - // Parameters and state used by generic lookup cache resizing algorithm - // - - private const int InitialCacheSize = 128; // MUST BE A POWER OF TWO - private const int DefaultCacheSize = 1024; - private const int MaximumCacheSize = 128 * 1024; - - private static long s_tickCountOfLastOverflow; - private static int s_entries; - private static bool s_roundRobinFlushing; - - private static Entry[] ResizeCacheForNewEntryAsNecessary() - { - Entry[] cache = s_cache; - - if (cache.Length < InitialCacheSize) - { - // Start with small cache size so that the cache entries used by startup one-time only initialization will get flushed soon - return s_cache = new Entry[InitialCacheSize]; - } - - int entries = s_entries++; - - // If the cache has spare space, we are done - if (2 * entries < cache.Length) - { - if (s_roundRobinFlushing) - { - cache[2 * entries] = null; - cache[2 * entries + 1] = null; - } - return cache; - } - - // - // Now, we have cache that is overflowing with the stuff. We need to decide whether to resize it or start flushing the old entries instead - // - - // Start over counting the entries - s_entries = 0; - - // See how long it has been since the last time the cache was overflowing - long tickCount = Environment.TickCount64; - long tickCountSinceLastOverflow = tickCount - s_tickCountOfLastOverflow; - s_tickCountOfLastOverflow = tickCount; - - bool shrinkCache = false; - bool growCache = false; - - if (cache.Length < DefaultCacheSize) - { - // If the cache have not reached the default size, just grow it without thinking about it much - growCache = true; - } - else - { - if (tickCountSinceLastOverflow < cache.Length / 128) - { - // If the fill rate of the cache is faster than ~0.01ms per entry, grow it - if (cache.Length < MaximumCacheSize) - growCache = true; - } - else - if (tickCountSinceLastOverflow > cache.Length * 16) - { - // If the fill rate of the cache is slower than 16ms per entry, shrink it - if (cache.Length > DefaultCacheSize) - shrinkCache = true; - } - // Otherwise, keep the current size and just keep flushing the entries round robin - } - - if (growCache || shrinkCache) - { - s_roundRobinFlushing = false; - - // Keep the reference to the old cache in a weak handle. We will try to use to avoid - // hitting the type loader until GC collects it. - if (s_previousCache.IsAllocated) - { - s_previousCache.Target = cache; - } - else - { - s_previousCache = GCHandle.Alloc(cache, GCHandleType.Weak); - } - - return s_cache = new Entry[shrinkCache ? (cache.Length / 2) : (cache.Length * 2)]; - } - else - { - s_roundRobinFlushing = true; - return cache; - } + s_cache.TrySet(k, v); + return v; } } @@ -334,39 +201,39 @@ private static Entry[] ResizeCacheForNewEntryAsNecessary() internal static unsafe class RawCalliHelper { - [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Call(System.IntPtr pfn, ref byte data) => ((delegate*)pfn)(ref data); - [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static T Call(System.IntPtr pfn, IntPtr arg) => ((delegate*)pfn)(arg); - [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Call(System.IntPtr pfn, object arg) => ((delegate*)pfn)(arg); - [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static T Call(System.IntPtr pfn, IntPtr arg1, IntPtr arg2) => ((delegate*)pfn)(arg1, arg2); - [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static T Call(System.IntPtr pfn, IntPtr arg1, IntPtr arg2, object arg3, out IntPtr arg4) => ((delegate*)pfn)(arg1, arg2, arg3, out arg4); - [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Call(System.IntPtr pfn, IntPtr arg1, object arg2) => ((delegate*)pfn)(arg1, arg2); - [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static T Call(System.IntPtr pfn, object arg1, IntPtr arg2) => ((delegate*)pfn)(arg1, arg2); - [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static T Call(IntPtr pfn, string[] arg0) => ((delegate*)pfn)(arg0); - [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static ref byte Call(IntPtr pfn, void* arg1, ref byte arg2, ref byte arg3, void* arg4) => ref ((delegate*)pfn)(arg1, ref arg2, ref arg3, arg4); } diff --git a/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/CompilerServices/CastCache.cs b/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/CompilerServices/CastCache.cs index 58de4e50ea2bd..389a8a5d82e1d 100644 --- a/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/CompilerServices/CastCache.cs +++ b/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/CompilerServices/CastCache.cs @@ -8,14 +8,18 @@ internal enum CastResult } // trivial implementation of the cast cache - internal static unsafe class CastCache + internal unsafe struct CastCache { - internal static CastResult TryGet(nuint source, nuint target) + public CastCache(int initialCacheSize, int maxCacheSize) + { + } + + internal CastResult TryGet(nuint source, nuint target) { return CastResult.MaybeCast; } - internal static void TrySet(nuint source, nuint target, bool result) + internal void TrySet(nuint source, nuint target, bool result) { } } diff --git a/src/coreclr/vm/corelib.h b/src/coreclr/vm/corelib.h index d085abd6361e2..96576a8581312 100644 --- a/src/coreclr/vm/corelib.h +++ b/src/coreclr/vm/corelib.h @@ -1168,7 +1168,7 @@ DEFINE_CLASS(NULLABLE_COMPARER, CollectionsGeneric, NullableComparer`1) DEFINE_CLASS(INATTRIBUTE, Interop, InAttribute) -DEFINE_CLASS(CASTCACHE, CompilerServices, CastCache) +DEFINE_CLASS(CASTCACHE, CompilerServices, CastHelpers) DEFINE_FIELD(CASTCACHE, TABLE, s_table) DEFINE_CLASS(CASTHELPERS, CompilerServices, CastHelpers) diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index a7b6331227f9a..65d65ff797e22 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -775,7 +775,8 @@ - + + @@ -2676,4 +2677,4 @@ - \ No newline at end of file + diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastCache.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastCache.cs index a333b710d012c..7653749d5d5f2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastCache.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastCache.cs @@ -15,47 +15,45 @@ internal enum CastResult MaybeCast = 2 } -#if NATIVEAOT - [EagerStaticClassConstruction] -#endif - internal static unsafe class CastCache + internal unsafe struct CastCache { - -#if CORECLR - // In coreclr the table is written to only on the native side. T - // This is all we need to implement TryGet. - private static int[]? s_table; -#else - - #if DEBUG - private const int INITIAL_CACHE_SIZE = 8; // MUST BE A POWER OF TWO - private const int MAXIMUM_CACHE_SIZE = 512; // make this lower than release to make it easier to reach this in tests. - #else - private const int INITIAL_CACHE_SIZE = 128; // MUST BE A POWER OF TWO - private const int MAXIMUM_CACHE_SIZE = 4096; // 4096 * sizeof(CastCacheEntry) is 98304 bytes on 64bit. We will rarely need this much though. - #endif // DEBUG - private const int VERSION_NUM_SIZE = 29; private const uint VERSION_NUM_MASK = (1 << VERSION_NUM_SIZE) - 1; + private const int BUCKET_SIZE = 8; - // A trivial 2-elements table used for "flushing" the cache. Nothing is ever stored in this table. - // It is required that we are able to allocate this. - private static int[] s_sentinelTable = CreateCastCache(2, throwOnFail: true)!; + // nothing is ever stored into this, so we can use a static instance. + private static int[]? s_sentinelTable; + + // The actual storage. + private int[] _table; // when flushing, remember the last size. - private static int s_lastFlushSize = INITIAL_CACHE_SIZE; + private int _lastFlushSize; - // The actual storage. - // Initialize to the sentinel in DEBUG as if just flushed, to ensure the sentinel can be handled in Set. - private static int[] s_table = - #if !DEBUG - CreateCastCache(INITIAL_CACHE_SIZE) ?? - #endif - s_sentinelTable; + private int _initialCacheSize; + private int _maxCacheSize; -#endif // CORECLR + public CastCache(int initialCacheSize, int maxCacheSize) + { + Debug.Assert(BitOperations.PopCount((uint)initialCacheSize) == 1 && initialCacheSize > 1); + Debug.Assert(BitOperations.PopCount((uint)maxCacheSize) == 1 && maxCacheSize >= initialCacheSize); - private const int BUCKET_SIZE = 8; + _initialCacheSize = initialCacheSize; + _maxCacheSize = maxCacheSize; + + // A trivial 2-elements table used for "flushing" the cache. + // Nothing is ever stored in such a small table and identity of the sentinel is not important. + // It is required that we are able to allocate this, we may need this in OOM cases. + s_sentinelTable ??= CreateCastCache(2, throwOnFail: true); + + _table = +#if !DEBUG + // Initialize to the sentinel in DEBUG as if just flushed, to ensure the sentinel can be handled in Set. + CreateCastCache(_initialCacheSize) ?? +#endif + s_sentinelTable!; + _lastFlushSize = _initialCacheSize; + } [StructLayout(LayoutKind.Sequential)] private struct CastCacheEntry @@ -139,10 +137,17 @@ private static ref CastCacheEntry Element(ref int tableData, int index) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static CastResult TryGet(nuint source, nuint target) + internal CastResult TryGet(nuint source, nuint target) + { + // table is always initialized and is not null. + return TryGet(_table!, source, target); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static CastResult TryGet(int[] table, nuint source, nuint target) { // table is always initialized and is not null. - ref int tableData = ref TableData(s_table!); + ref int tableData = ref TableData(table); int index = KeyToBucket(ref tableData, source, target); for (int i = 0; i < BUCKET_SIZE;) @@ -159,7 +164,7 @@ internal static CastResult TryGet(nuint source, nuint target) if (entrySource == source) { - // in CoreCLR we do ordinary reads of the entry parts and + // we do ordinary reads of the entry parts and // Interlocked.ReadMemoryBarrier() before reading the version nuint entryTargetAndResult = pEntry._targetAndResult; // target never has its lower bit set. @@ -204,10 +209,9 @@ internal static CastResult TryGet(nuint source, nuint target) // in CoreClr the cache is only updated in the native code // // The following helpers must match native implementations in castcache.h and castcache.cpp -#if !CORECLR // we generally do not OOM in casts, just return null unless throwOnFail is specified. - private static int[]? CreateCastCache(int size, bool throwOnFail = false) + private int[]? CreateCastCache(int size, bool throwOnFail = false) { // size must be positive Debug.Assert(size > 1); @@ -225,7 +229,7 @@ internal static CastResult TryGet(nuint source, nuint target) if (table == null) { - size = INITIAL_CACHE_SIZE; + size = _initialCacheSize; try { table = new int[(size + 1) * sizeof(CastCacheEntry) / sizeof(int)]; @@ -252,20 +256,20 @@ internal static CastResult TryGet(nuint source, nuint target) return table; } - internal static void TrySet(nuint source, nuint target, bool result) + internal void TrySet(nuint source, nuint target, bool result) { int bucket; ref int tableData = ref *(int*)0; do { - tableData = ref TableData(s_table); + tableData = ref TableData(_table); if (TableMask(ref tableData) == 1) { // 2-element table is used as a sentinel. // we did not allocate a real table yet or have flushed it. // try replacing the table, but do not insert anything. - MaybeReplaceCacheWithLarger(s_lastFlushSize); + MaybeReplaceCacheWithLarger(_lastFlushSize); return; } @@ -333,7 +337,7 @@ internal static void TrySet(nuint source, nuint target, bool result) } while (TryGrow(ref tableData)); // reread tableData after TryGrow. - tableData = ref TableData(s_table); + tableData = ref TableData(_table); if (TableMask(ref tableData) == 1) { @@ -381,19 +385,22 @@ private static int CacheElementCount(ref int tableData) return TableMask(ref tableData) + 1; } - private static void FlushCurrentCache() + private void FlushCurrentCache() { - ref int tableData = ref TableData(s_table); + ref int tableData = ref TableData(_table); int lastSize = CacheElementCount(ref tableData); - if (lastSize < INITIAL_CACHE_SIZE) - lastSize = INITIAL_CACHE_SIZE; + if (lastSize < _initialCacheSize) + lastSize = _initialCacheSize; - s_lastFlushSize = lastSize; + // store the last size to use when creating a new table + // it is just a hint, not needed for correctness, so no synchronization + // with the writing of the table + _lastFlushSize = lastSize; // flushing is just replacing the table with a sentinel. - s_table = s_sentinelTable; + _table = s_sentinelTable!; } - private static bool MaybeReplaceCacheWithLarger(int size) + private bool MaybeReplaceCacheWithLarger(int size) { int[]? newTable = CreateCastCache(size); if (newTable == null) @@ -401,20 +408,19 @@ private static bool MaybeReplaceCacheWithLarger(int size) return false; } - s_table = newTable; + _table = newTable; return true; } - private static bool TryGrow(ref int tableData) + private bool TryGrow(ref int tableData) { int newSize = CacheElementCount(ref tableData) * 2; - if (newSize <= MAXIMUM_CACHE_SIZE) + if (newSize <= _maxCacheSize) { return MaybeReplaceCacheWithLarger(newSize); } return false; } -#endif // !CORECLR } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/GenericCache.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/GenericCache.cs new file mode 100644 index 0000000000000..712f5ed924645 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/GenericCache.cs @@ -0,0 +1,409 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Numerics; +using System.Runtime.InteropServices; +using System.Threading; + +namespace System.Runtime.CompilerServices +{ + + // EntryInfo is a union, so that we could put some extra info in the element #0 of the table. + // the struct is not nested in GenericCache because generic types cannot have explicit layout. + [StructLayout(LayoutKind.Explicit)] + internal struct EntryInfo + { + // version has the following structure: + // [ distance:3bit | versionNum:29bit ] + // + // distance is how many iterations the entry is from its ideal position. + // we use that for preemption. + // + // versionNum is a monotonically increasing numerical tag. + // Writer "claims" entry by atomically incrementing the tag. Thus odd number indicates an entry in progress. + // Upon completion of adding an entry the tag is incremented again making it even. Even number indicates a complete entry. + // + // Readers will read the version twice before and after retrieving the entry. + // To have a usable entry both reads must yield the same even version. + // + [FieldOffset(0)] + internal uint _version; + + // AuxData (to store some data specific to the table in the element #0 ) + [FieldOffset(0)] + internal byte hashShift; + [FieldOffset(1)] + internal byte victimCounter; + } + + // NOTE: It is ok if TKey contains references, but we want it to be a struct, + // so that equality is devirtualized. + internal unsafe struct GenericCache + where TKey: struct, IEquatable + { + private struct Entry + { + internal EntryInfo _info; + internal TKey _key; + internal TValue _value; + + [UnscopedRef] + public ref uint Version => ref _info._version; + } + + private const int VERSION_NUM_SIZE = 29; + private const uint VERSION_NUM_MASK = (1 << VERSION_NUM_SIZE) - 1; + private const int BUCKET_SIZE = 8; + + // nothing is ever stored into this, so we can use a static instance. + private static Entry[]? s_sentinelTable; + + // The actual storage. + private Entry[] _table; + + // when flushing, remember the last size. + private int _lastFlushSize; + + private int _initialCacheSize; + private int _maxCacheSize; + + // creates a new cache instance + public GenericCache(int initialCacheSize, int maxCacheSize) + { + Debug.Assert(BitOperations.PopCount((uint)initialCacheSize) == 1 && initialCacheSize > 1); + Debug.Assert(BitOperations.PopCount((uint)maxCacheSize) == 1 && maxCacheSize >= initialCacheSize); + + _initialCacheSize = initialCacheSize; + _maxCacheSize = maxCacheSize; + + // A trivial 2-elements table used for "flushing" the cache. + // Nothing is ever stored in such a small table and identity of the sentinel is not important. + // It is required that we are able to allocate this, we may need this in OOM cases. + s_sentinelTable ??= CreateCacheTable(2, throwOnFail: true); + + _table = +#if !DEBUG + // Initialize to the sentinel in DEBUG as if just flushed, to ensure the sentinel can be handled in Set. + CreateCacheTable(initialCacheSize) ?? +#endif + s_sentinelTable!; + _lastFlushSize = initialCacheSize; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int HashToBucket(Entry[] table, int hash) + { + byte hashShift = HashShift(table); +#if TARGET_64BIT + return (int)(((ulong)hash * 11400714819323198485ul) >> hashShift); +#else + return (int)(((uint)hash * 2654435769u) >> hashShift); +#endif + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ref Entry TableData(Entry[] table) + { + // points to element 0, which is used for embedded aux data + return ref Unsafe.As(ref Unsafe.As(table).Data); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ref byte HashShift(Entry[] table) + { + return ref TableData(table)._info.hashShift; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ref byte VictimCounter(Entry[] table) + { + return ref TableData(table)._info.victimCounter; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int TableMask(Entry[] table) + { + // element 0 is used for embedded aux data + return table.Length - 2; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ref Entry Element(Entry[] table, int index) + { + // element 0 is used for embedded aux data, skip it + return ref Unsafe.Add(ref Unsafe.As(ref Unsafe.As(table).Data), index + 1); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal bool TryGet(TKey key, out TValue? value) + { + // table is always initialized and is not null. + Entry[] table = _table!; + int hash = key!.GetHashCode(); + int index = HashToBucket(table, hash); + for (int i = 0; i < BUCKET_SIZE;) + { + ref Entry pEntry = ref Element(table, index); + + // we must read in this order: version -> [entry parts] -> version + // if version is odd or changes, the entry is inconsistent and thus ignored + uint version = Volatile.Read(ref pEntry.Version); + + // NOTE: We could store hash as a part of entry info and compare hash before comparing keys. + // Space-wise it would typically be free because of alignment. + // However, hash compare would be advantageous only if it is much cheaper than the key compare. + // That is not the case for current uses of this cache, so for now we do not store + // hash and just do direct comparing of keys. (hash compare can be easily added, if needed) + if (key.Equals(pEntry._key)) + { + // we use ordinary reads to fetch the value + value = pEntry._value; + + // make sure the second read of 'version' happens after reading '_value' + Interlocked.ReadMemoryBarrier(); + + // mask the lower version bit to make it even. + // This way we can check if version is odd or changing in just one compare. + version &= unchecked((uint)~1); + if (version != pEntry.Version) + { + // oh, so close, the entry is in inconsistent state. + // it is either changing or has changed while we were reading. + // treat it as a miss. + break; + } + + return true; + } + + if (version == 0) + { + // the rest of the bucket is unclaimed, no point to search further + break; + } + + // quadratic reprobe + i++; + index = (index + i) & TableMask(table); + } + + value = default; + return false; + } + + // we generally do not want OOM in cache lookups, just return null unless throwOnFail is specified. + private Entry[]? CreateCacheTable(int size, bool throwOnFail = false) + { + // size must be positive + Debug.Assert(size > 1); + // size must be a power of two + Debug.Assert((size & (size - 1)) == 0); + + Entry[]? table = null; + try + { + table = new Entry[size + 1]; + } + catch (OutOfMemoryException) when (!throwOnFail) + { + } + + if (table == null) + { + size = _initialCacheSize; + try + { + table = new Entry[size + 1]; + } + catch (OutOfMemoryException) + { + } + } + + if (table == null) + { + return table; + } + + ref Entry tableData = ref TableData(table); + + // Fibonacci hash reduces the value into desired range by shifting right by the number of leading zeroes in 'size-1' + byte shift = (byte)BitOperations.LeadingZeroCount(size - 1); + HashShift(table) = shift; + + return table; + } + + internal void TrySet(TKey key, TValue value) + { + int bucket; + int hash = key!.GetHashCode(); + Entry[] table; + + do + { + table = _table; + if (table.Length == 2) + { + // 2-element table is used as a sentinel. + // we did not allocate a real table yet or have flushed it. + // try replacing the table, but do not insert anything. + MaybeReplaceCacheWithLarger(_lastFlushSize); + return; + } + + bucket = HashToBucket(table, hash); + int index = bucket; + ref Entry pEntry = ref Element(table, index); + + for (int i = 0; i < BUCKET_SIZE;) + { + // claim the entry if unused or is more distant than us from its origin. + // Note - someone familiar with Robin Hood hashing will notice that + // we do the opposite - we are "robbing the poor". + // Robin Hood strategy improves average lookup in a lossles dictionary by reducing + // outliers via giving preference to more distant entries. + // What we have here is a lossy cache with outliers bounded by the bucket size. + // We improve average lookup by giving preference to the "richer" entries. + // If we used Robin Hood strategy we could eventually end up with all + // entries in the table being maximally "poor". + + uint version = pEntry.Version; + + // mask the lower version bit to make it even. + // This way we will detect both if version is changing (odd) or has changed (even, but different). + version &= unchecked((uint)~1); + + if ((version & VERSION_NUM_MASK) >= (VERSION_NUM_MASK - 2)) + { + // If exactly VERSION_NUM_MASK updates happens between here and publishing, we may not recognize a race. + // It is extremely unlikely, but to not worry about the possibility, lets not allow version to go this high and just get a new cache. + // This will not happen often. + FlushCurrentCache(); + return; + } + + if (version == 0 || (version >> VERSION_NUM_SIZE) > i) + { + uint newVersion = ((uint)i << VERSION_NUM_SIZE) + (version & VERSION_NUM_MASK) + 1; + uint versionOrig = Interlocked.CompareExchange(ref pEntry.Version, newVersion, version); + if (versionOrig == version) + { + pEntry._key = key; + pEntry._value = value; + + // entry is in inconsistent state and cannot be read or written to until we + // update the version, which is the last thing we do here + Volatile.Write(ref pEntry.Version, newVersion + 1); + return; + } + // someone snatched the entry. try the next one in the bucket. + } + + if (key.Equals(pEntry._key)) + { + // looks like we already have an entry for this. + // duplicate entries are harmless, but a bit of a waste. + return; + } + + // quadratic reprobe + i++; + index += i; + pEntry = ref Element(table, index & TableMask(table)); + } + + // bucket is full. + } while (TryGrow(table)); + + // reread tableData after TryGrow. + table = _table; + + if (table.Length == 2) + { + // do not insert into a sentinel. + return; + } + + // pick a victim somewhat randomly within a bucket + // NB: ++ is not interlocked. We are ok if we lose counts here. It is just a number that changes. + byte victimDistance = (byte)(VictimCounter(table)++ & (BUCKET_SIZE - 1)); + // position the victim in a quadratic reprobe bucket + int victim = (victimDistance * victimDistance + victimDistance) / 2; + + { + ref Entry pEntry = ref Element(table, (bucket + victim) & TableMask(table)); + + uint version = pEntry.Version; + + // mask the lower version bit to make it even. + // This way we will detect both if version is changing (odd) or has changed (even, but different). + version &= unchecked((uint)~1); + + if ((version & VERSION_NUM_MASK) >= (VERSION_NUM_MASK - 2)) + { + // If exactly VERSION_NUM_MASK updates happens between here and publishing, we may not recognize a race. + // It is extremely unlikely, but to not worry about the possibility, lets not allow version to go this high and just get a new cache. + // This will not happen often. + FlushCurrentCache(); + return; + } + + uint newVersion = (uint)((victimDistance << VERSION_NUM_SIZE) + (version & VERSION_NUM_MASK) + 1); + uint versionOrig = Interlocked.CompareExchange(ref pEntry.Version, newVersion, version); + + if (versionOrig == version) + { + pEntry._key = key; + pEntry._value = value; + Volatile.Write(ref pEntry.Version, newVersion + 1); + } + } + } + + private static int CacheElementCount(Entry[] table) + { + return table.Length - 1; + } + + private void FlushCurrentCache() + { + Entry[] table = _table; + int lastSize = CacheElementCount(table); + if (lastSize < _initialCacheSize) + lastSize = _initialCacheSize; + + // store the last size to use when creating a new table + // it is just a hint, not needed for correctness, so no synchronization + // with the writing of the table + _lastFlushSize = lastSize; + // flushing is just replacing the table with a sentinel. + _table = s_sentinelTable!; + } + + private bool MaybeReplaceCacheWithLarger(int size) + { + Entry[]? newTable = CreateCacheTable(size); + if (newTable == null) + { + return false; + } + + _table = newTable; + return true; + } + + private bool TryGrow(Entry[] table) + { + int newSize = CacheElementCount(table) * 2; + if (newSize <= _maxCacheSize) + { + return MaybeReplaceCacheWithLarger(newSize); + } + + return false; + } + } +}