Slightly improve string unrolling for length 5 and 6 (#77398)

Co-authored-by: Jan Kotas <jkotas@microsoft.com>
dotnet · Oct 27, 2022 · 771eca7 · 771eca7
1 parent da72713
commit 771eca7
Show file tree

Hide file tree

Showing 3 changed files with 78 additions and 19 deletions.
diff --git a/src/coreclr/jit/importervectorization.cpp b/src/coreclr/jit/importervectorization.cpp
@@ -432,6 +432,29 @@ GenTree* Compiler::impExpandHalfConstEqualsSWAR(
     //   [          value1          ]
     //                 [          value2          ]
     //
+
+    // For 5..6 the overlapping part is 4 bytes
+    if (len <= 6)
+    {
+        UINT32   value2     = MAKEINT32(cns[len - 2], cns[len - 1]);
+        GenTree* firstIndir = impCreateCompareInd(data, TYP_LONG, dataOffset, value1, cmpMode, Xor);
+
+        ssize_t  offset      = dataOffset + len * sizeof(WCHAR) - sizeof(UINT32);
+        GenTree* secondIndir = impCreateCompareInd(gtClone(data)->AsLclVar(), TYP_INT, offset, value2, cmpMode, Xor);
+
+        if ((firstIndir == nullptr) || (secondIndir == nullptr))
+        {
+            return nullptr;
+        }
+
+        secondIndir = gtNewCastNode(TYP_LONG, secondIndir, true, TYP_LONG);
+        return gtNewOperNode(GT_EQ, TYP_INT, gtNewOperNode(GT_OR, TYP_LONG, firstIndir, secondIndir),
+                             gtNewIconNode(0, TYP_LONG));
+    }
+
+    // For 7..8 the overlapping part is 8 bytes
+    assert((len == 7) || (len == 8));
+
     UINT64   value2     = MAKEINT64(cns[len - 4], cns[len - 3], cns[len - 2], cns[len - 1]);
     GenTree* firstIndir = impCreateCompareInd(data, TYP_LONG, dataOffset, value1, cmpMode, Xor);
 

diff --git a/src/libraries/System.Private.CoreLib/src/System/Boolean.cs b/src/libraries/System.Private.CoreLib/src/System/Boolean.cs
@@ -189,6 +189,11 @@ public int CompareTo(bool value)
 
         // Custom string compares for early application use by config switches, etc
         //
+#if MONO
+        // We have to keep these implementations for Mono here because MemoryExtensions.Equals("True", OrdinalIgnoreCase)
+        // triggers CompareInfo static initialization which is not desired when we parse configs on start.
+        // TODO: Remove once Mono aligns its behavior with CoreCLR around .beforefieldinit
+        // https://github.com/dotnet/runtime/issues/77513
         internal static bool IsTrueStringIgnoreCase(ReadOnlySpan<char> value)
         {
             // "true" as a ulong, each char |'d with 0x0020 for case-insensitivity
@@ -205,6 +210,18 @@ internal static bool IsFalseStringIgnoreCase(ReadOnlySpan<char> value)
                    (((MemoryMarshal.Read<ulong>(MemoryMarshal.AsBytes(value)) | 0x0020002000200020) == fals_val) &
                     ((value[4] | 0x20) == 'e'));
         }
+#else
+        internal static bool IsTrueStringIgnoreCase(ReadOnlySpan<char> value)
+        {
+            // JIT inlines and unrolls this, see https://github.com/dotnet/runtime/pull/77398
+            return value.Equals(TrueLiteral, StringComparison.OrdinalIgnoreCase);
+        }
+
+        internal static bool IsFalseStringIgnoreCase(ReadOnlySpan<char> value)
+        {
+            return value.Equals(FalseLiteral, StringComparison.OrdinalIgnoreCase);
+        }
+#endif
 
         // Determines whether a String represents true or false.
         //

diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs
@@ -80,17 +80,19 @@ internal static bool EqualsIgnoreCase(ref char charA, ref char charB, int length
             IntPtr byteOffset = IntPtr.Zero;
 
 #if TARGET_64BIT
+            ulong valueAu64 = 0;
+            ulong valueBu64 = 0;
             // Read 4 chars (64 bits) at a time from each string
             while ((uint)length >= 4)
             {
-                ulong valueA = Unsafe.ReadUnaligned<ulong>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref charA, byteOffset)));
-                ulong valueB = Unsafe.ReadUnaligned<ulong>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref charB, byteOffset)));
+                valueAu64 = Unsafe.ReadUnaligned<ulong>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref charA, byteOffset)));
+                valueBu64 = Unsafe.ReadUnaligned<ulong>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref charB, byteOffset)));
 
                 // A 32-bit test - even with the bit-twiddling here - is more efficient than a 64-bit test.
-                ulong temp = valueA | valueB;
+                ulong temp = valueAu64 | valueBu64;
                 if (!Utf16Utility.AllCharsInUInt32AreAscii((uint)temp | (uint)(temp >> 32)))
                 {
-                    goto NonAscii; // one of the inputs contains non-ASCII data
+                    goto NonAscii64; // one of the inputs contains non-ASCII data
                 }
 
                 // Generally, the caller has likely performed a first-pass check that the input strings
@@ -100,7 +102,7 @@ internal static bool EqualsIgnoreCase(ref char charA, ref char charB, int length
                 // branching within this loop unless we're about to exit the loop, either due to failure or
                 // due to us running out of input data.
 
-                if (!Utf16Utility.UInt64OrdinalIgnoreCaseAscii(valueA, valueB))
+                if (!Utf16Utility.UInt64OrdinalIgnoreCaseAscii(valueAu64, valueBu64))
                 {
                     return false;
                 }
@@ -109,20 +111,21 @@ internal static bool EqualsIgnoreCase(ref char charA, ref char charB, int length
                 length -= 4;
             }
 #endif
-
+            uint valueAu32 = 0;
+            uint valueBu32 = 0;
             // Read 2 chars (32 bits) at a time from each string
 #if TARGET_64BIT
             if ((uint)length >= 2)
 #else
             while ((uint)length >= 2)
 #endif
             {
-                uint valueA = Unsafe.ReadUnaligned<uint>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref charA, byteOffset)));
-                uint valueB = Unsafe.ReadUnaligned<uint>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref charB, byteOffset)));
+                valueAu32 = Unsafe.ReadUnaligned<uint>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref charA, byteOffset)));
+                valueBu32 = Unsafe.ReadUnaligned<uint>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref charB, byteOffset)));
 
-                if (!Utf16Utility.AllCharsInUInt32AreAscii(valueA | valueB))
+                if (!Utf16Utility.AllCharsInUInt32AreAscii(valueAu32 | valueBu32))
                 {
-                    goto NonAscii; // one of the inputs contains non-ASCII data
+                    goto NonAscii32; // one of the inputs contains non-ASCII data
                 }
 
                 // Generally, the caller has likely performed a first-pass check that the input strings
@@ -132,7 +135,7 @@ internal static bool EqualsIgnoreCase(ref char charA, ref char charB, int length
                 // branching within this loop unless we're about to exit the loop, either due to failure or
                 // due to us running out of input data.
 
-                if (!Utf16Utility.UInt32OrdinalIgnoreCaseAscii(valueA, valueB))
+                if (!Utf16Utility.UInt32OrdinalIgnoreCaseAscii(valueAu32, valueBu32))
                 {
                     return false;
                 }
@@ -145,31 +148,47 @@ internal static bool EqualsIgnoreCase(ref char charA, ref char charB, int length
             {
                 Debug.Assert(length == 1);
 
-                uint valueA = Unsafe.AddByteOffset(ref charA, byteOffset);
-                uint valueB = Unsafe.AddByteOffset(ref charB, byteOffset);
+                valueAu32 = Unsafe.AddByteOffset(ref charA, byteOffset);
+                valueBu32 = Unsafe.AddByteOffset(ref charB, byteOffset);
 
-                if ((valueA | valueB) > 0x7Fu)
+                if ((valueAu32 | valueBu32) > 0x7Fu)
                 {
-                    goto NonAscii; // one of the inputs contains non-ASCII data
+                    goto NonAscii32; // one of the inputs contains non-ASCII data
                 }
 
-                if (valueA == valueB)
+                if (valueAu32 == valueBu32)
                 {
                     return true; // exact match
                 }
 
-                valueA |= 0x20u;
-                if ((uint)(valueA - 'a') > (uint)('z' - 'a'))
+                valueAu32 |= 0x20u;
+                if ((uint)(valueAu32 - 'a') > (uint)('z' - 'a'))
                 {
                     return false; // not exact match, and first input isn't in [A-Za-z]
                 }
 
-                return valueA == (valueB | 0x20u);
+                return valueAu32 == (valueBu32 | 0x20u);
             }
 
             Debug.Assert(length == 0);
             return true;
 
+        NonAscii32:
+            // Both values have to be non-ASCII to use the slow fallback, in case if one of them is not we return false
+            if (Utf16Utility.AllCharsInUInt32AreAscii(valueAu32) || Utf16Utility.AllCharsInUInt32AreAscii(valueBu32))
+            {
+                return false;
+            }
+            goto NonAscii;
+
+#if TARGET_64BIT
+        NonAscii64:
+            // Both values have to be non-ASCII to use the slow fallback, in case if one of them is not we return false
+            if (Utf16Utility.AllCharsInUInt64AreAscii(valueAu64) || Utf16Utility.AllCharsInUInt64AreAscii(valueBu64))
+            {
+                return false;
+            }
+#endif
         NonAscii:
             // The non-ASCII case is factored out into its own helper method so that the JIT
             // doesn't need to emit a complex prolog for its caller (this method).