From 057be76b66aed84bc4198a2884a59c11814115c6 Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Tue, 10 Sep 2024 07:16:51 -0700 Subject: [PATCH] Add a SearchValues implementation for values with unique low nibbles (#106900) * Add SearchValues implementation for values with unique low nibbles * More generics * Tweak comment * Remove extra empty line * Update comment --- .../System.Memory/tests/Span/SearchValues.cs | 7 + .../Any2CharPackedIgnoreCaseSearchValues.cs | 4 +- .../SearchValues/AsciiByteSearchValues.cs | 34 +- .../SearchValues/AsciiCharSearchValues.cs | 36 ++- .../src/System/SearchValues/BitVector256.cs | 4 - .../SearchValues/IndexOfAnyAsciiSearcher.cs | 294 ++++++++++++------ .../ProbabilisticWithAsciiCharSearchValues.cs | 12 +- .../src/System/SearchValues/SearchValues.cs | 57 ++-- .../Strings/Helpers/AhoCorasick.cs | 6 +- 9 files changed, 310 insertions(+), 144 deletions(-) diff --git a/src/libraries/System.Memory/tests/Span/SearchValues.cs b/src/libraries/System.Memory/tests/Span/SearchValues.cs index 8f39a95bd3c0f..00e9c71e3dfb7 100644 --- a/src/libraries/System.Memory/tests/Span/SearchValues.cs +++ b/src/libraries/System.Memory/tests/Span/SearchValues.cs @@ -35,6 +35,8 @@ public static IEnumerable Values_MemberData() "abcd", "aeio", "aeiou", + "Aabc", + "Aabcd", "abceiou", "123456789", "123456789123", @@ -82,6 +84,11 @@ public static IEnumerable Values_MemberData() { yield return Pair(value); yield return Pair('a' + value); + yield return Pair('\0' + value); + yield return Pair('\u0001' + value); + yield return Pair('\u00FE' + value); + yield return Pair('\u00FF' + value); + yield return Pair('\uFF00' + value); // Test some more duplicates if (value.Length > 0) diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2CharPackedIgnoreCaseSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2CharPackedIgnoreCaseSearchValues.cs index 1073fcf3c8185..7b789febead06 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2CharPackedIgnoreCaseSearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2CharPackedIgnoreCaseSearchValues.cs @@ -53,7 +53,7 @@ internal override int IndexOfAnyExcept(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] internal override int LastIndexOfAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.LastIndexOfAny( + IndexOfAnyAsciiSearcher.LastIndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -61,7 +61,7 @@ internal override int LastIndexOfAny(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] internal override int LastIndexOfAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.LastIndexOfAny( + IndexOfAnyAsciiSearcher.LastIndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiByteSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiByteSearchValues.cs index 57b755b2ce8a4..a3562f8c98a7d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiByteSearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiByteSearchValues.cs @@ -1,20 +1,36 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.Wasm; using System.Runtime.Intrinsics.X86; +using System.Text; namespace System.Buffers { - internal sealed class AsciiByteSearchValues : SearchValues + internal sealed class AsciiByteSearchValues : SearchValues + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst { private IndexOfAnyAsciiSearcher.AsciiState _state; - public AsciiByteSearchValues(ReadOnlySpan values) => - IndexOfAnyAsciiSearcher.ComputeAsciiState(values, out _state); + public AsciiByteSearchValues(ReadOnlySpan values) + { + // Despite the name being Ascii, this type may be used with non-ASCII values on ARM. + // See IndexOfAnyAsciiSearcher.CanUseUniqueLowNibbleSearch. + Debug.Assert(Ascii.IsValid(values) || (AdvSimd.IsSupported && TUniqueLowNibble.Value)); + + if (TUniqueLowNibble.Value) + { + IndexOfAnyAsciiSearcher.ComputeUniqueLowNibbleState(values, out _state); + } + else + { + IndexOfAnyAsciiSearcher.ComputeAsciiState(values, out _state); + } + } internal override byte[] GetValues() => _state.Lookup.GetByteValues(); @@ -28,7 +44,7 @@ internal override bool ContainsCore(byte value) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override int IndexOfAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.IndexOfAny( + IndexOfAnyAsciiSearcher.IndexOfAny( ref MemoryMarshal.GetReference(span), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -36,7 +52,7 @@ internal override int IndexOfAny(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override int IndexOfAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.IndexOfAny( + IndexOfAnyAsciiSearcher.IndexOfAny( ref MemoryMarshal.GetReference(span), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -44,7 +60,7 @@ internal override int IndexOfAnyExcept(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override int LastIndexOfAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.LastIndexOfAny( + IndexOfAnyAsciiSearcher.LastIndexOfAny( ref MemoryMarshal.GetReference(span), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -52,7 +68,7 @@ internal override int LastIndexOfAny(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override int LastIndexOfAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.LastIndexOfAny( + IndexOfAnyAsciiSearcher.LastIndexOfAny( ref MemoryMarshal.GetReference(span), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -60,7 +76,7 @@ internal override int LastIndexOfAnyExcept(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override bool ContainsAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.ContainsAny( + IndexOfAnyAsciiSearcher.ContainsAny( ref MemoryMarshal.GetReference(span), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -68,7 +84,7 @@ internal override bool ContainsAny(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override bool ContainsAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.ContainsAny( + IndexOfAnyAsciiSearcher.ContainsAny( ref MemoryMarshal.GetReference(span), span.Length, ref _state); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiCharSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiCharSearchValues.cs index 111a3ad313b9d..175c2737b9a7e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiCharSearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiCharSearchValues.cs @@ -1,35 +1,51 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.Wasm; using System.Runtime.Intrinsics.X86; +using System.Text; namespace System.Buffers { - internal sealed class AsciiCharSearchValues : SearchValues + internal sealed class AsciiCharSearchValues : SearchValues where TOptimizations : struct, IndexOfAnyAsciiSearcher.IOptimizations + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst { private IndexOfAnyAsciiSearcher.AsciiState _state; - public AsciiCharSearchValues(ReadOnlySpan values) => - IndexOfAnyAsciiSearcher.ComputeAsciiState(values, out _state); + public AsciiCharSearchValues(ReadOnlySpan values) + { + // Despite the name being Ascii, this type may be used with non-ASCII values on ARM. + // See IndexOfAnyAsciiSearcher.CanUseUniqueLowNibbleSearch. + Debug.Assert(Ascii.IsValid(values) || (AdvSimd.IsSupported && TUniqueLowNibble.Value)); + + if (TUniqueLowNibble.Value) + { + IndexOfAnyAsciiSearcher.ComputeUniqueLowNibbleState(values, out _state); + } + else + { + IndexOfAnyAsciiSearcher.ComputeAsciiState(values, out _state); + } + } internal override char[] GetValues() => _state.Lookup.GetCharValues(); [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override bool ContainsCore(char value) => - _state.Lookup.Contains128(value); + _state.Lookup.Contains256(value); [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override int IndexOfAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.IndexOfAny( + IndexOfAnyAsciiSearcher.IndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -37,7 +53,7 @@ internal override int IndexOfAny(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override int IndexOfAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.IndexOfAny( + IndexOfAnyAsciiSearcher.IndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -45,7 +61,7 @@ internal override int IndexOfAnyExcept(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override int LastIndexOfAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.LastIndexOfAny( + IndexOfAnyAsciiSearcher.LastIndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -53,7 +69,7 @@ internal override int LastIndexOfAny(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override int LastIndexOfAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.LastIndexOfAny( + IndexOfAnyAsciiSearcher.LastIndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -61,7 +77,7 @@ internal override int LastIndexOfAnyExcept(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override bool ContainsAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.ContainsAny( + IndexOfAnyAsciiSearcher.ContainsAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -69,7 +85,7 @@ internal override bool ContainsAny(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override bool ContainsAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.ContainsAny( + IndexOfAnyAsciiSearcher.ContainsAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/BitVector256.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/BitVector256.cs index 56e68907c86d7..74d0836960bdd 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/BitVector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/BitVector256.cs @@ -31,10 +31,6 @@ public void Set(int c) _values[offset] |= significantBit; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public readonly bool Contains128(char c) => - c < 128 && ContainsUnchecked(c); - [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly bool Contains256(char c) => c < 256 && ContainsUnchecked(c); diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs index 36965c24da7e5..c722387c4f4e1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs @@ -13,24 +13,35 @@ namespace System.Buffers { internal static class IndexOfAnyAsciiSearcher { + // Reused for both ASCII and UniqueLowNibble searches since the state looks the same (a Vector128). public struct AsciiState(Vector128 bitmap, BitVector256 lookup) { public Vector256 Bitmap = Vector256.Create(bitmap); - public BitVector256 Lookup = lookup; + public readonly BitVector256 Lookup = lookup; public readonly AsciiState CreateInverse() => new AsciiState(~Bitmap._lower, Lookup.CreateInverse()); } - public struct AnyByteState(Vector128 bitmap0, Vector128 bitmap1, BitVector256 lookup) + public readonly struct AnyByteState(Vector128 bitmap0, Vector128 bitmap1, BitVector256 lookup) { - public Vector256 Bitmap0 = Vector256.Create(bitmap0); - public Vector256 Bitmap1 = Vector256.Create(bitmap1); - public BitVector256 Lookup = lookup; + public readonly Vector256 Bitmap0 = Vector256.Create(bitmap0); + public readonly Vector256 Bitmap1 = Vector256.Create(bitmap1); + public readonly BitVector256 Lookup = lookup; } internal static bool IsVectorizationSupported => Ssse3.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe void SetBitmapBit(byte* bitmap, int value) + { + Debug.Assert((uint)value <= 127); + + int highNibble = value >> 4; + int lowNibble = value & 0xF; + bitmap[(uint)lowNibble] |= (byte)(1 << highNibble); + } + internal static unsafe void ComputeAnyByteState(ReadOnlySpan values, out AnyByteState state) { // The exact format of these bitmaps differs from the other ComputeBitmap overloads as it's meant for the full [0, 255] range algorithm. @@ -46,16 +57,13 @@ internal static unsafe void ComputeAnyByteState(ReadOnlySpan values, out A { lookupLocal.Set(b); - int highNibble = b >> 4; - int lowNibble = b & 0xF; - - if (highNibble < 8) + if (b < 128) { - bitmapLocal0[(uint)lowNibble] |= (byte)(1 << highNibble); + SetBitmapBit(bitmapLocal0, b); } else { - bitmapLocal1[(uint)lowNibble] |= (byte)(1 << (highNibble - 8)); + SetBitmapBit(bitmapLocal1, b - 128); } } @@ -81,14 +89,83 @@ internal static unsafe void ComputeAsciiState(ReadOnlySpan values, out Asc } lookupLocal.Set(value); + SetBitmapBit(bitmapLocal, value); + } + + state = new AsciiState(bitmapSpace, lookupLocal); + } - int highNibble = value >> 4; - int lowNibble = value & 0xF; + public static bool CanUseUniqueLowNibbleSearch(ReadOnlySpan values, int maxInclusive) + where T : struct, IUnsignedNumber + { + Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(char)); - bitmapLocal[(uint)lowNibble] |= (byte)(1 << highNibble); + if (!IsVectorizationSupported || values.Length > 16) + { + return false; } - state = new AsciiState(bitmapSpace, lookupLocal); + if (Ssse3.IsSupported && maxInclusive > 127) + { + // We could support values higher than 127 if we did the "& 0xF" before calling into Shuffle in IndexOfAnyLookupCore. + // We currently optimize for the common case of ASCII characters instead, saving an instruction there. + return false; + } + + if (typeof(T) == typeof(char) && maxInclusive >= byte.MaxValue) + { + // When packing UTF-16 characters into bytes, values may saturate to 255 (false positives), hence ">=" instead of ">". + return false; + } + + // We assume there are no duplicates to simplify the logic (if there are any, they just won't use this searching approach). + int seenNibbles = 0; + + foreach (T tValue in values) + { + int bit = 1 << (int.CreateChecked(tValue) & 0xF); + + if ((seenNibbles & bit) != 0) + { + // We already saw a value with the same low nibble. + return false; + } + + seenNibbles |= bit; + } + + return true; + } + + public static void ComputeUniqueLowNibbleState(ReadOnlySpan values, out AsciiState state) + where T : struct, IUnsignedNumber + { + Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(char)); + + Vector128 valuesByLowNibble = default; + BitVector256 lookup = default; + + foreach (T tValue in values) + { + byte value = byte.CreateTruncating(tValue); + lookup.Set(value); + valuesByLowNibble.SetElementUnsafe(value & 0xF, value); + } + + // Elements of 'valuesByLowNibble' where no value had that low nibble will be left uninitialized at 0. + // For most, that is okay, as only the zero character in the input could ever match against them, + // but where such input characters will always be mapped to the 0th element of 'valuesByLowNibble'. + // + // That does mean we could still see false positivies if none of the values had a low nibble of zero. + // To avoid that, we can replace the 0th element with any other byte that has a non-zero low nibble. + // The zero character will no longer match, and the new value we pick won't match either as + // it will be mapped to a different element in 'valuesByLowNibble' given its non-zero low nibble. + if (valuesByLowNibble.GetElement(0) == 0 && !lookup.Contains(0)) + { + valuesByLowNibble.SetElementUnsafe(0, (byte)1); + } + + state = new AsciiState(valuesByLowNibble, lookup); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -104,10 +181,7 @@ private static unsafe bool TryComputeBitmap(ReadOnlySpan values, byte* bit return false; } - int highNibble = c >> 4; - int lowNibble = c & 0xF; - - bitmapLocal[(uint)lowNibble] |= (byte)(1 << highNibble); + SetBitmapBit(bitmapLocal, c); } needleContainsZero = (bitmap[0] & 1) != 0; @@ -147,8 +221,8 @@ private static unsafe bool TryIndexOfAny(ref short searchSpace, int se state.Bitmap = Vector256.Create(state.Bitmap.GetLower()); index = (Ssse3.IsSupported || PackedSimd.IsSupported) && needleContainsZero - ? IndexOfAny(ref searchSpace, searchSpaceLength, ref state) - : IndexOfAny(ref searchSpace, searchSpaceLength, ref state); + ? IndexOfAny(ref searchSpace, searchSpaceLength, ref state) + : IndexOfAny(ref searchSpace, searchSpaceLength, ref state); return true; } } @@ -174,8 +248,8 @@ private static unsafe bool TryLastIndexOfAny(ref short searchSpace, in state.Bitmap = Vector256.Create(state.Bitmap.GetLower()); index = (Ssse3.IsSupported || PackedSimd.IsSupported) && needleContainsZero - ? LastIndexOfAny(ref searchSpace, searchSpaceLength, ref state) - : LastIndexOfAny(ref searchSpace, searchSpaceLength, ref state); + ? LastIndexOfAny(ref searchSpace, searchSpaceLength, ref state) + : LastIndexOfAny(ref searchSpace, searchSpaceLength, ref state); return true; } } @@ -188,27 +262,30 @@ private static unsafe bool TryLastIndexOfAny(ref short searchSpace, in [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - public static bool ContainsAny(ref short searchSpace, int searchSpaceLength, ref AsciiState state) + public static bool ContainsAny(ref short searchSpace, int searchSpaceLength, ref AsciiState state) where TNegator : struct, INegator - where TOptimizations : struct, IOptimizations => - IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); + where TOptimizations : struct, IOptimizations + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst => + IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - public static int IndexOfAny(ref short searchSpace, int searchSpaceLength, ref AsciiState state) + public static int IndexOfAny(ref short searchSpace, int searchSpaceLength, ref AsciiState state) where TNegator : struct, INegator - where TOptimizations : struct, IOptimizations => - IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); + where TOptimizations : struct, IOptimizations + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst => + IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - private static TResult IndexOfAnyCore(ref short searchSpace, int searchSpaceLength, ref AsciiState state) + private static TResult IndexOfAnyCore(ref short searchSpace, int searchSpaceLength, ref AsciiState state) where TResult : struct where TNegator : struct, INegator where TOptimizations : struct, IOptimizations + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst where TResultMapper : struct, IResultMapper { ref short currentSearchSpace = ref searchSpace; @@ -220,7 +297,7 @@ private static TResult IndexOfAnyCore source0 = Vector256.LoadUnsafe(ref currentSearchSpace); Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); - Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); + Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); if (result != Vector256.Zero) { return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); @@ -276,7 +353,7 @@ private static TResult IndexOfAnyCore source0 = Vector256.LoadUnsafe(ref firstVector); Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd); - Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); + Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); if (result != Vector256.Zero) { return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); @@ -305,7 +382,7 @@ private static TResult IndexOfAnyCore source0 = Vector128.LoadUnsafe(ref currentSearchSpace); Vector128 source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128.Count); - Vector128 result = IndexOfAnyLookup(source0, source1, bitmap); + Vector128 result = IndexOfAnyLookup(source0, source1, bitmap); if (result != Vector128.Zero) { return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); @@ -329,7 +406,7 @@ private static TResult IndexOfAnyCore source0 = Vector128.LoadUnsafe(ref firstVector); Vector128 source1 = Vector128.LoadUnsafe(ref oneVectorAwayFromEnd); - Vector128 result = IndexOfAnyLookup(source0, source1, bitmap); + Vector128 result = IndexOfAnyLookup(source0, source1, bitmap); if (result != Vector128.Zero) { return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); @@ -342,16 +419,17 @@ private static TResult IndexOfAnyCore(ref short searchSpace, int searchSpaceLength, ref AsciiState state) + public static int LastIndexOfAny(ref short searchSpace, int searchSpaceLength, ref AsciiState state) where TNegator : struct, INegator where TOptimizations : struct, IOptimizations + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst { if (searchSpaceLength < Vector128.Count) { for (int i = searchSpaceLength - 1; i >= 0; i--) { char c = (char)Unsafe.Add(ref searchSpace, i); - if (TNegator.NegateIfNeeded(state.Lookup.Contains128(c))) + if (TNegator.NegateIfNeeded(state.Lookup.Contains256(c))) { return i; } @@ -385,7 +463,7 @@ public static int LastIndexOfAny(ref short searchSpace Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace); Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); - Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); + Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); if (result != Vector256.Zero) { return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); @@ -407,7 +485,7 @@ public static int LastIndexOfAny(ref short searchSpace Vector256 source0 = Vector256.LoadUnsafe(ref searchSpace); Vector256 source1 = Vector256.LoadUnsafe(ref secondVector); - Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); + Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); if (result != Vector256.Zero) { return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); @@ -436,7 +514,7 @@ public static int LastIndexOfAny(ref short searchSpace Vector128 source0 = Vector128.LoadUnsafe(ref currentSearchSpace); Vector128 source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128.Count); - Vector128 result = IndexOfAnyLookup(source0, source1, bitmap); + Vector128 result = IndexOfAnyLookup(source0, source1, bitmap); if (result != Vector128.Zero) { return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); @@ -458,7 +536,7 @@ public static int LastIndexOfAny(ref short searchSpace Vector128 source0 = Vector128.LoadUnsafe(ref searchSpace); Vector128 source1 = Vector128.LoadUnsafe(ref secondVector); - Vector128 result = IndexOfAnyLookup(source0, source1, bitmap); + Vector128 result = IndexOfAnyLookup(source0, source1, bitmap); if (result != Vector128.Zero) { return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); @@ -472,24 +550,27 @@ public static int LastIndexOfAny(ref short searchSpace [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - public static bool ContainsAny(ref byte searchSpace, int searchSpaceLength, ref AsciiState state) - where TNegator : struct, INegator => - IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); + public static bool ContainsAny(ref byte searchSpace, int searchSpaceLength, ref AsciiState state) + where TNegator : struct, INegator + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst => + IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - public static int IndexOfAny(ref byte searchSpace, int searchSpaceLength, ref AsciiState state) - where TNegator : struct, INegator => - IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); + public static int IndexOfAny(ref byte searchSpace, int searchSpaceLength, ref AsciiState state) + where TNegator : struct, INegator + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst => + IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - private static TResult IndexOfAnyCore(ref byte searchSpace, int searchSpaceLength, ref AsciiState state) + private static TResult IndexOfAnyCore(ref byte searchSpace, int searchSpaceLength, ref AsciiState state) where TResult : struct where TNegator : struct, INegator + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst where TResultMapper : struct, IResultMapper { ref byte currentSearchSpace = ref searchSpace; @@ -530,7 +611,7 @@ private static TResult IndexOfAnyCore(ref byte { Vector256 source = Vector256.LoadUnsafe(ref currentSearchSpace); - Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); + Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); if (result != Vector256.Zero) { return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); @@ -555,7 +636,7 @@ private static TResult IndexOfAnyCore(ref byte Vector128 source1 = Vector128.LoadUnsafe(ref halfVectorAwayFromEnd); Vector256 source = Vector256.Create(source0, source1); - Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); + Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); if (result != Vector256.Zero) { return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result); @@ -579,7 +660,7 @@ private static TResult IndexOfAnyCore(ref byte { Vector128 source = Vector128.LoadUnsafe(ref currentSearchSpace); - Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap)); + Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap)); if (result != Vector128.Zero) { return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); @@ -604,7 +685,7 @@ private static TResult IndexOfAnyCore(ref byte ulong source1 = Unsafe.ReadUnaligned(ref halfVectorAwayFromEnd); Vector128 source = Vector128.Create(source0, source1).AsByte(); - Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap)); + Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap)); if (result != Vector128.Zero) { return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result); @@ -617,8 +698,9 @@ private static TResult IndexOfAnyCore(ref byte [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - public static int LastIndexOfAny(ref byte searchSpace, int searchSpaceLength, ref AsciiState state) + public static int LastIndexOfAny(ref byte searchSpace, int searchSpaceLength, ref AsciiState state) where TNegator : struct, INegator + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst { if (searchSpaceLength < sizeof(ulong)) { @@ -656,7 +738,7 @@ public static int LastIndexOfAny(ref byte searchSpace, int searchSpace Vector256 source = Vector256.LoadUnsafe(ref currentSearchSpace); - Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); + Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); if (result != Vector256.Zero) { return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); @@ -679,7 +761,7 @@ public static int LastIndexOfAny(ref byte searchSpace, int searchSpace Vector128 source1 = Vector128.LoadUnsafe(ref secondVector); Vector256 source = Vector256.Create(source0, source1); - Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); + Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); if (result != Vector256.Zero) { return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); @@ -705,7 +787,7 @@ public static int LastIndexOfAny(ref byte searchSpace, int searchSpace Vector128 source = Vector128.LoadUnsafe(ref currentSearchSpace); - Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap)); + Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap)); if (result != Vector128.Zero) { return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); @@ -728,7 +810,7 @@ public static int LastIndexOfAny(ref byte searchSpace, int searchSpace ulong source1 = Unsafe.ReadUnaligned(ref secondVector); Vector128 source = Vector128.Create(source0, source1).AsByte(); - Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap)); + Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap)); if (result != Vector128.Zero) { return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); @@ -1020,13 +1102,14 @@ public static int LastIndexOfAny(ref byte searchSpace, int searchSpace [CompExactlyDependsOn(typeof(Sse2))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - private static Vector128 IndexOfAnyLookup(Vector128 source0, Vector128 source1, Vector128 bitmapLookup) + private static Vector128 IndexOfAnyLookup(Vector128 source0, Vector128 source1, Vector128 bitmapLookup) where TNegator : struct, INegator where TOptimizations : struct, IOptimizations + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst { Vector128 source = TOptimizations.PackSources(source0.AsUInt16(), source1.AsUInt16()); - Vector128 result = IndexOfAnyLookupCore(source, bitmapLookup); + Vector128 result = IndexOfAnyLookupCore(source, bitmapLookup); return TNegator.NegateIfNeeded(result); } @@ -1035,55 +1118,86 @@ private static Vector128 IndexOfAnyLookup(Vector [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - private static Vector128 IndexOfAnyLookupCore(Vector128 source, Vector128 bitmapLookup) + private static Vector128 IndexOfAnyLookupCore(Vector128 source, Vector128 bitmapLookup) + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst { - // On X86, the Ssse3.Shuffle instruction will already perform an implicit 'AND 0xF' on the indices, so we can skip it. - // For values above 127, Ssse3.Shuffle will also set the result to 0. This is fine as we don't want non-ASCII values to match anyway. - Vector128 lowNibbles = Ssse3.IsSupported - ? source - : source & Vector128.Create((byte)0xF); - - // On ARM, we have an instruction for an arithmetic right shift of 1-byte signed values. - // The shift will map values above 127 to values above 16, which the shuffle will then map to 0. - // On X86 and WASM, use a logical right shift instead. - Vector128 highNibbles = AdvSimd.IsSupported - ? AdvSimd.ShiftRightArithmetic(source.AsSByte(), 4).AsByte() - : source >>> 4; - - // The bitmapLookup represents a 8x16 table of bits, indicating whether a character is present in the needle. - // Lookup the rows via the lower nibble and the column via the higher nibble. - Vector128 bitMask = Vector128.ShuffleUnsafe(bitmapLookup, lowNibbles); - - // For values above 127, the high nibble will be above 7. We construct the positions vector for the shuffle such that those values map to 0. - Vector128 bitPositions = Vector128.ShuffleUnsafe(Vector128.Create(0x8040201008040201, 0).AsByte(), highNibbles); - - Vector128 result = bitMask & bitPositions; - return result; + if (TUniqueLowNibble.Value) + { + // Based on http://0x80.pl/articles/simd-byte-lookup.html#special-case-3-unique-lower-and-higher-nibbles + + // On X86, the Ssse3.Shuffle instruction will already perform an implicit 'AND 0xF' on the indices, so we can skip it. + // For values above 127, Ssse3.Shuffle will also set the result to 0. This is fine as we only use this approach if + // all values are <= 127 when Ssse3 is supported (see CanUseUniqueLowNibbleSearch). + // False positives from values mapped to 0 will be ruled out by the Vector128.Equals comparison below. + Vector128 lowNibbles = Ssse3.IsSupported + ? source + : source & Vector128.Create((byte)0xF); + + // We use a shuffle to look up potential matches for each byte based on its low nibble. + // Since all values have a unique low nibble, there's at most one potential match per nibble. + Vector128 values = Vector128.ShuffleUnsafe(bitmapLookup, lowNibbles); + + // Compare potential matches with the source to rule out false positives that have a different high nibble. + return Vector128.Equals(source, values); + } + else + { + // On X86, the Ssse3.Shuffle instruction will already perform an implicit 'AND 0xF' on the indices, so we can skip it. + // For values above 127, Ssse3.Shuffle will also set the result to 0. This is fine as we don't want non-ASCII values to match anyway. + Vector128 lowNibbles = Ssse3.IsSupported + ? source + : source & Vector128.Create((byte)0xF); + + // On ARM, we have an instruction for an arithmetic right shift of 1-byte signed values. + // The shift will map values above 127 to values above 16, which the shuffle will then map to 0. + // On X86 and WASM, use a logical right shift instead. + Vector128 highNibbles = AdvSimd.IsSupported + ? AdvSimd.ShiftRightArithmetic(source.AsSByte(), 4).AsByte() + : source >>> 4; + + // The bitmapLookup represents a 8x16 table of bits, indicating whether a character is present in the needle. + // Lookup the rows via the lower nibble and the column via the higher nibble. + Vector128 bitMask = Vector128.ShuffleUnsafe(bitmapLookup, lowNibbles); + + // For values above 127, the high nibble will be above 7. We construct the positions vector for the shuffle such that those values map to 0. + Vector128 bitPositions = Vector128.ShuffleUnsafe(Vector128.Create(0x8040201008040201, 0).AsByte(), highNibbles); + + return bitMask & bitPositions; + } } [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Avx2))] - private static Vector256 IndexOfAnyLookup(Vector256 source0, Vector256 source1, Vector256 bitmapLookup) + private static Vector256 IndexOfAnyLookup(Vector256 source0, Vector256 source1, Vector256 bitmapLookup) where TNegator : struct, INegator where TOptimizations : struct, IOptimizations + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst { Vector256 source = TOptimizations.PackSources(source0.AsUInt16(), source1.AsUInt16()); - Vector256 result = IndexOfAnyLookupCore(source, bitmapLookup); + Vector256 result = IndexOfAnyLookupCore(source, bitmapLookup); return TNegator.NegateIfNeeded(result); } [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Avx2))] - private static Vector256 IndexOfAnyLookupCore(Vector256 source, Vector256 bitmapLookup) + private static Vector256 IndexOfAnyLookupCore(Vector256 source, Vector256 bitmapLookup) + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst { // See comments in IndexOfAnyLookupCore(Vector128) above for more details. - Vector256 highNibbles = source >>> 4; - Vector256 bitMask = Avx2.Shuffle(bitmapLookup, source); - Vector256 bitPositions = Avx2.Shuffle(Vector256.Create(0x8040201008040201).AsByte(), highNibbles); - Vector256 result = bitMask & bitPositions; - return result; + if (TUniqueLowNibble.Value) + { + Vector256 values = Avx2.Shuffle(bitmapLookup, source); + return Vector256.Equals(source, values); + } + else + { + Vector256 highNibbles = source >>> 4; + Vector256 bitMask = Avx2.Shuffle(bitmapLookup, source); + Vector256 bitPositions = Avx2.Shuffle(Vector256.Create(0x8040201008040201).AsByte(), highNibbles); + return bitMask & bitPositions; + } } [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs index 7b05c3f8a3b77..eb07969130a83 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs @@ -56,7 +56,7 @@ internal override int IndexOfAny(ReadOnlySpan span) { Debug.Assert(_inverseAsciiState.Lookup.Contains(0), "The inverse bitmap did not contain a 0."); - offset = IndexOfAnyAsciiSearcher.IndexOfAny( + offset = IndexOfAnyAsciiSearcher.IndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _inverseAsciiState); @@ -66,7 +66,7 @@ ref Unsafe.As(ref MemoryMarshal.GetReference(span)), Debug.Assert(!(Ssse3.IsSupported || PackedSimd.IsSupported) || !_inverseAsciiState.Lookup.Contains(0), "The inverse bitmap contained a 0, but we're not using Ssse3AndWasmHandleZeroInNeedle."); - offset = IndexOfAnyAsciiSearcher.IndexOfAny( + offset = IndexOfAnyAsciiSearcher.IndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _inverseAsciiState); @@ -105,7 +105,7 @@ internal override int IndexOfAnyExcept(ReadOnlySpan span) if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count && char.IsAscii(span[0])) { // Do a regular IndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char. - offset = IndexOfAnyAsciiSearcher.IndexOfAny( + offset = IndexOfAnyAsciiSearcher.IndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _asciiState); @@ -155,7 +155,7 @@ internal override int LastIndexOfAny(ReadOnlySpan span) { Debug.Assert(_inverseAsciiState.Lookup.Contains(0), "The inverse bitmap did not contain a 0."); - offset = IndexOfAnyAsciiSearcher.LastIndexOfAny( + offset = IndexOfAnyAsciiSearcher.LastIndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _inverseAsciiState); @@ -165,7 +165,7 @@ ref Unsafe.As(ref MemoryMarshal.GetReference(span)), Debug.Assert(!(Ssse3.IsSupported || PackedSimd.IsSupported) || !_inverseAsciiState.Lookup.Contains(0), "The inverse bitmap contained a 0, but we're not using Ssse3AndWasmHandleZeroInNeedle."); - offset = IndexOfAnyAsciiSearcher.LastIndexOfAny( + offset = IndexOfAnyAsciiSearcher.LastIndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _inverseAsciiState); @@ -194,7 +194,7 @@ internal override int LastIndexOfAnyExcept(ReadOnlySpan span) if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count && char.IsAscii(span[^1])) { // Do a regular LastIndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char. - int offset = IndexOfAnyAsciiSearcher.LastIndexOfAny( + int offset = IndexOfAnyAsciiSearcher.LastIndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _asciiState); diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs index e02192a70ebbe..1bd81b6b4ff58 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs @@ -41,6 +41,13 @@ public static SearchValues Create(params ReadOnlySpan values) return new RangeByteSearchValues(minInclusive, maxInclusive); } + // Depending on the hardware, UniqueLowNibble can be faster than even range or 2 values. + // It's currently consistently faster than 4/5 values on all tested platforms (Arm, Avx2, Avx512). + if (values.Length >= 4 && IndexOfAnyAsciiSearcher.CanUseUniqueLowNibbleSearch(values, maxInclusive)) + { + return new AsciiByteSearchValues(values); + } + if (values.Length <= 5) { Debug.Assert(values.Length is 2 or 3 or 4 or 5); @@ -55,7 +62,7 @@ public static SearchValues Create(params ReadOnlySpan values) if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && maxInclusive < 128) { - return new AsciiByteSearchValues(values); + return new AsciiByteSearchValues(values); } return new AnyByteSearchValues(values); @@ -122,29 +129,39 @@ public static SearchValues Create(params ReadOnlySpan values) : new Any3SearchValues(shortValues); } - // IndexOfAnyAsciiSearcher for chars is slower than Any3CharSearchValues, but faster than Any4SearchValues - if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && maxInclusive < 128) + // If the values are sets of 2 ASCII letters with both cases, we can use an approach that + // reduces the number of comparisons by masking off the bit that differs between lower and upper case (0x20). + // While this most commonly applies to ASCII letters, it also works for other values that differ by 0x20 (e.g. "[]{}" => "{}"). + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && PackedSpanHelpers.PackedIndexOfIsSupported && + maxInclusive < 128 && values.Length == 4 && minInclusive > 0) { - // If the values are sets of 2 ASCII letters with both cases, we can use an approach that - // reduces the number of comparisons by masking off the bit that differs between lower and upper case (0x20). - // While this most commonly applies to ASCII letters, it also works for other values that differ by 0x20 (e.g. "[]{}" => "{}"). - if (PackedSpanHelpers.PackedIndexOfIsSupported && values.Length == 4 && minInclusive > 0) + Span copy = stackalloc char[4]; + values.CopyTo(copy); + copy.Sort(); + + if ((copy[0] ^ copy[2]) == 0x20 && + (copy[1] ^ copy[3]) == 0x20) { - Span copy = stackalloc char[4]; - values.CopyTo(copy); - copy.Sort(); - - if ((copy[0] ^ copy[2]) == 0x20 && - (copy[1] ^ copy[3]) == 0x20) - { - // We pick the higher two values (with the 0x20 bit set). "AaBb" => 'a', 'b' - return new Any2CharPackedIgnoreCaseSearchValues(copy[2], copy[3]); - } + // We pick the higher two values (with the 0x20 bit set). "AaBb" => 'a', 'b' + return new Any2CharPackedIgnoreCaseSearchValues(copy[2], copy[3]); } + } + // Depending on the hardware, UniqueLowNibble can be faster than most implementations we currently prefer above. + // It's currently consistently faster than 4/5 values or Ascii on all tested platforms (Arm, Avx2, Avx512). + if (IndexOfAnyAsciiSearcher.CanUseUniqueLowNibbleSearch(values, maxInclusive)) + { return (Ssse3.IsSupported || PackedSimd.IsSupported) && minInclusive == 0 - ? new AsciiCharSearchValues(values) - : new AsciiCharSearchValues(values); + ? new AsciiCharSearchValues(values) + : new AsciiCharSearchValues(values); + } + + // IndexOfAnyAsciiSearcher for chars is slower than Any3CharSearchValues, but faster than Any4SearchValues + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && maxInclusive < 128) + { + return (Ssse3.IsSupported || PackedSimd.IsSupported) && minInclusive == 0 + ? new AsciiCharSearchValues(values) + : new AsciiCharSearchValues(values); } if (values.Length == 4) @@ -162,7 +179,7 @@ public static SearchValues Create(params ReadOnlySpan values) // If we have both ASCII and non-ASCII characters, use an implementation that // does an optimistic ASCII fast-path and then falls back to the ProbabilisticMap. - return (Ssse3.IsSupported || PackedSimd.IsSupported) && values.Contains('\0') + return (Ssse3.IsSupported || PackedSimd.IsSupported) && minInclusive == 0 ? new ProbabilisticWithAsciiCharSearchValues(values, maxInclusive) : new ProbabilisticWithAsciiCharSearchValues(values, maxInclusive); } diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/AhoCorasick.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/AhoCorasick.cs index ebc94616ae642..ad3cdfe2e2837 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/AhoCorasick.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/AhoCorasick.cs @@ -49,7 +49,7 @@ public readonly bool ShouldUseAsciiFastScan for (int i = 0; i < 128; i++) { - if (_startingAsciiChars.Lookup.Contains128((char)i)) + if (_startingAsciiChars.Lookup.Contains256((char)i)) { frequency += CharacterFrequencyHelper.AsciiFrequency[i]; } @@ -96,7 +96,7 @@ private readonly int IndexOfAnyCore(ReadOnly // If '\0' is one of the starting chars and we're running on Ssse3 hardware, this may return false-positives. // False-positives here are okay, we'll just rule them out below. While we could flow the Ssse3AndWasmHandleZeroInNeedle // generic through, we expect such values to be rare enough that introducing more code is not worth it. - int offset = IndexOfAnyAsciiSearcher.IndexOfAny( + int offset = IndexOfAnyAsciiSearcher.IndexOfAny( ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetReference(span), i)), remainingLength, ref Unsafe.AsRef(in _startingAsciiChars)); @@ -205,7 +205,7 @@ private readonly int IndexOfAnyCaseInsensitiveUnicode(ReadOnly if (remainingLength >= Vector128.Count) { - int offset = IndexOfAnyAsciiSearcher.IndexOfAny( + int offset = IndexOfAnyAsciiSearcher.IndexOfAny( ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetReference(span), i)), remainingLength, ref Unsafe.AsRef(in _startingAsciiChars));