Skip to content

Commit

Permalink
Slightly improve string unrolling for length 5 and 6 (#77398)
Browse files Browse the repository at this point in the history
Co-authored-by: Jan Kotas <jkotas@microsoft.com>
  • Loading branch information
EgorBo and jkotas authored Oct 27, 2022
1 parent da72713 commit 771eca7
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 19 deletions.
23 changes: 23 additions & 0 deletions src/coreclr/jit/importervectorization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,29 @@ GenTree* Compiler::impExpandHalfConstEqualsSWAR(
// [ value1 ]
// [ value2 ]
//

// For 5..6 the overlapping part is 4 bytes
if (len <= 6)
{
UINT32 value2 = MAKEINT32(cns[len - 2], cns[len - 1]);
GenTree* firstIndir = impCreateCompareInd(data, TYP_LONG, dataOffset, value1, cmpMode, Xor);

ssize_t offset = dataOffset + len * sizeof(WCHAR) - sizeof(UINT32);
GenTree* secondIndir = impCreateCompareInd(gtClone(data)->AsLclVar(), TYP_INT, offset, value2, cmpMode, Xor);

if ((firstIndir == nullptr) || (secondIndir == nullptr))
{
return nullptr;
}

secondIndir = gtNewCastNode(TYP_LONG, secondIndir, true, TYP_LONG);
return gtNewOperNode(GT_EQ, TYP_INT, gtNewOperNode(GT_OR, TYP_LONG, firstIndir, secondIndir),
gtNewIconNode(0, TYP_LONG));
}

// For 7..8 the overlapping part is 8 bytes
assert((len == 7) || (len == 8));

UINT64 value2 = MAKEINT64(cns[len - 4], cns[len - 3], cns[len - 2], cns[len - 1]);
GenTree* firstIndir = impCreateCompareInd(data, TYP_LONG, dataOffset, value1, cmpMode, Xor);

Expand Down
17 changes: 17 additions & 0 deletions src/libraries/System.Private.CoreLib/src/System/Boolean.cs
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,11 @@ public int CompareTo(bool value)

// Custom string compares for early application use by config switches, etc
//
#if MONO
// We have to keep these implementations for Mono here because MemoryExtensions.Equals("True", OrdinalIgnoreCase)
// triggers CompareInfo static initialization which is not desired when we parse configs on start.
// TODO: Remove once Mono aligns its behavior with CoreCLR around .beforefieldinit
// https://github.com/dotnet/runtime/issues/77513
internal static bool IsTrueStringIgnoreCase(ReadOnlySpan<char> value)
{
// "true" as a ulong, each char |'d with 0x0020 for case-insensitivity
Expand All @@ -205,6 +210,18 @@ internal static bool IsFalseStringIgnoreCase(ReadOnlySpan<char> value)
(((MemoryMarshal.Read<ulong>(MemoryMarshal.AsBytes(value)) | 0x0020002000200020) == fals_val) &
((value[4] | 0x20) == 'e'));
}
#else
internal static bool IsTrueStringIgnoreCase(ReadOnlySpan<char> value)
{
// JIT inlines and unrolls this, see https://github.com/dotnet/runtime/pull/77398
return value.Equals(TrueLiteral, StringComparison.OrdinalIgnoreCase);
}

internal static bool IsFalseStringIgnoreCase(ReadOnlySpan<char> value)
{
return value.Equals(FalseLiteral, StringComparison.OrdinalIgnoreCase);
}
#endif

// Determines whether a String represents true or false.
//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,19 @@ internal static bool EqualsIgnoreCase(ref char charA, ref char charB, int length
IntPtr byteOffset = IntPtr.Zero;

#if TARGET_64BIT
ulong valueAu64 = 0;
ulong valueBu64 = 0;
// Read 4 chars (64 bits) at a time from each string
while ((uint)length >= 4)
{
ulong valueA = Unsafe.ReadUnaligned<ulong>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref charA, byteOffset)));
ulong valueB = Unsafe.ReadUnaligned<ulong>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref charB, byteOffset)));
valueAu64 = Unsafe.ReadUnaligned<ulong>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref charA, byteOffset)));
valueBu64 = Unsafe.ReadUnaligned<ulong>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref charB, byteOffset)));

// A 32-bit test - even with the bit-twiddling here - is more efficient than a 64-bit test.
ulong temp = valueA | valueB;
ulong temp = valueAu64 | valueBu64;
if (!Utf16Utility.AllCharsInUInt32AreAscii((uint)temp | (uint)(temp >> 32)))
{
goto NonAscii; // one of the inputs contains non-ASCII data
goto NonAscii64; // one of the inputs contains non-ASCII data
}

// Generally, the caller has likely performed a first-pass check that the input strings
Expand All @@ -100,7 +102,7 @@ internal static bool EqualsIgnoreCase(ref char charA, ref char charB, int length
// branching within this loop unless we're about to exit the loop, either due to failure or
// due to us running out of input data.

if (!Utf16Utility.UInt64OrdinalIgnoreCaseAscii(valueA, valueB))
if (!Utf16Utility.UInt64OrdinalIgnoreCaseAscii(valueAu64, valueBu64))
{
return false;
}
Expand All @@ -109,20 +111,21 @@ internal static bool EqualsIgnoreCase(ref char charA, ref char charB, int length
length -= 4;
}
#endif

uint valueAu32 = 0;
uint valueBu32 = 0;
// Read 2 chars (32 bits) at a time from each string
#if TARGET_64BIT
if ((uint)length >= 2)
#else
while ((uint)length >= 2)
#endif
{
uint valueA = Unsafe.ReadUnaligned<uint>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref charA, byteOffset)));
uint valueB = Unsafe.ReadUnaligned<uint>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref charB, byteOffset)));
valueAu32 = Unsafe.ReadUnaligned<uint>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref charA, byteOffset)));
valueBu32 = Unsafe.ReadUnaligned<uint>(ref Unsafe.As<char, byte>(ref Unsafe.AddByteOffset(ref charB, byteOffset)));

if (!Utf16Utility.AllCharsInUInt32AreAscii(valueA | valueB))
if (!Utf16Utility.AllCharsInUInt32AreAscii(valueAu32 | valueBu32))
{
goto NonAscii; // one of the inputs contains non-ASCII data
goto NonAscii32; // one of the inputs contains non-ASCII data
}

// Generally, the caller has likely performed a first-pass check that the input strings
Expand All @@ -132,7 +135,7 @@ internal static bool EqualsIgnoreCase(ref char charA, ref char charB, int length
// branching within this loop unless we're about to exit the loop, either due to failure or
// due to us running out of input data.

if (!Utf16Utility.UInt32OrdinalIgnoreCaseAscii(valueA, valueB))
if (!Utf16Utility.UInt32OrdinalIgnoreCaseAscii(valueAu32, valueBu32))
{
return false;
}
Expand All @@ -145,31 +148,47 @@ internal static bool EqualsIgnoreCase(ref char charA, ref char charB, int length
{
Debug.Assert(length == 1);

uint valueA = Unsafe.AddByteOffset(ref charA, byteOffset);
uint valueB = Unsafe.AddByteOffset(ref charB, byteOffset);
valueAu32 = Unsafe.AddByteOffset(ref charA, byteOffset);
valueBu32 = Unsafe.AddByteOffset(ref charB, byteOffset);

if ((valueA | valueB) > 0x7Fu)
if ((valueAu32 | valueBu32) > 0x7Fu)
{
goto NonAscii; // one of the inputs contains non-ASCII data
goto NonAscii32; // one of the inputs contains non-ASCII data
}

if (valueA == valueB)
if (valueAu32 == valueBu32)
{
return true; // exact match
}

valueA |= 0x20u;
if ((uint)(valueA - 'a') > (uint)('z' - 'a'))
valueAu32 |= 0x20u;
if ((uint)(valueAu32 - 'a') > (uint)('z' - 'a'))
{
return false; // not exact match, and first input isn't in [A-Za-z]
}

return valueA == (valueB | 0x20u);
return valueAu32 == (valueBu32 | 0x20u);
}

Debug.Assert(length == 0);
return true;

NonAscii32:
// Both values have to be non-ASCII to use the slow fallback, in case if one of them is not we return false
if (Utf16Utility.AllCharsInUInt32AreAscii(valueAu32) || Utf16Utility.AllCharsInUInt32AreAscii(valueBu32))
{
return false;
}
goto NonAscii;

#if TARGET_64BIT
NonAscii64:
// Both values have to be non-ASCII to use the slow fallback, in case if one of them is not we return false
if (Utf16Utility.AllCharsInUInt64AreAscii(valueAu64) || Utf16Utility.AllCharsInUInt64AreAscii(valueBu64))
{
return false;
}
#endif
NonAscii:
// The non-ASCII case is factored out into its own helper method so that the JIT
// doesn't need to emit a complex prolog for its caller (this method).
Expand Down

0 comments on commit 771eca7

Please sign in to comment.