From 7b7cfba7447b2c0bb13f933fa4b22972a67ea817 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 17 Feb 2024 03:59:36 +0100 Subject: [PATCH 1/5] Expand String.EndsWith/MemoryExtensions.EndsWith in JIT --- src/coreclr/jit/compiler.h | 12 +- src/coreclr/jit/importercalls.cpp | 28 +++- src/coreclr/jit/importervectorization.cpp | 128 +++++++++--------- src/coreclr/jit/namedintrinsiclist.h | 2 + .../System/MemoryExtensions.Globalization.cs | 1 + .../src/System/MemoryExtensions.cs | 2 + .../src/System/String.Comparison.cs | 1 + 7 files changed, 106 insertions(+), 68 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a18fee959436b..95b59ebdd8f2b 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4423,12 +4423,18 @@ class Compiler Eq, // (d1 == cns1) && (s2 == cns2) Xor, // (d1 ^ cns1) | (s2 ^ cns2) }; - GenTree* impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO* sig, unsigned methodFlags); - GenTree* impSpanEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO* sig, unsigned methodFlags); + enum StringComparisonKind + { + Equals, + StartsWith, + EndsWith + }; + GenTree* impUtf16StringComparison(StringComparisonKind kind, CORINFO_SIG_INFO* sig, unsigned methodFlags); + GenTree* impUtf16SpanComparison(StringComparisonKind kind, CORINFO_SIG_INFO* sig, unsigned methodFlags); GenTree* impExpandHalfConstEquals(GenTreeLclVarCommon* data, GenTree* lengthFld, bool checkForNull, - bool startsWith, + StringComparisonKind kind, WCHAR* cnsData, int len, int dataOffset, diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index b263285c42b54..ac50d0142b1d6 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -2827,26 +2827,38 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, case NI_System_String_Equals: { - retNode = impStringEqualsOrStartsWith(/*startsWith:*/ false, sig, methodFlags); + retNode = impUtf16StringComparison(Equals, sig, methodFlags); break; } case NI_System_MemoryExtensions_Equals: case NI_System_MemoryExtensions_SequenceEqual: { - retNode = impSpanEqualsOrStartsWith(/*startsWith:*/ false, sig, methodFlags); + retNode = impUtf16SpanComparison(Equals, sig, methodFlags); break; } case NI_System_String_StartsWith: { - retNode = impStringEqualsOrStartsWith(/*startsWith:*/ true, sig, methodFlags); + retNode = impUtf16StringComparison(StartsWith, sig, methodFlags); + break; + } + + case NI_System_String_EndsWith: + { + retNode = impUtf16StringComparison(EndsWith, sig, methodFlags); break; } case NI_System_MemoryExtensions_StartsWith: { - retNode = impSpanEqualsOrStartsWith(/*startsWith:*/ true, sig, methodFlags); + retNode = impUtf16SpanComparison(StartsWith, sig, methodFlags); + break; + } + + case NI_System_MemoryExtensions_EndsWith: + { + retNode = impUtf16SpanComparison(EndsWith, sig, methodFlags); break; } @@ -8932,6 +8944,10 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { result = NI_System_MemoryExtensions_StartsWith; } + else if (strcmp(methodName, "EndsWith") == 0) + { + result = NI_System_MemoryExtensions_EndsWith; + } } break; } @@ -9032,6 +9048,10 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { result = NI_System_String_StartsWith; } + else if (strcmp(methodName, "EndsWith") == 0) + { + result = NI_System_String_EndsWith; + } } break; } diff --git a/src/coreclr/jit/importervectorization.cpp b/src/coreclr/jit/importervectorization.cpp index 01ee4916d4eef..ce211448b07aa 100644 --- a/src/coreclr/jit/importervectorization.cpp +++ b/src/coreclr/jit/importervectorization.cpp @@ -25,6 +25,10 @@ // 8) MemoryExtensions.StartsWith(ROS, ROS) // 9) MemoryExtensions.StartsWith(ROS, ROS, Ordinal or OrdinalIgnoreCase) // +// 10) str.EndsWith(string, Ordinal or OrdinalIgnoreCase) +// 11) MemoryExtensions.EndsWith(ROS, ROS) +// 12) MemoryExtensions.EndsWith(ROS, ROS, Ordinal or OrdinalIgnoreCase) +// // When one of the arguments is a constant string of a [0..32] size so we can inline // a vectorized comparison against it using SWAR or SIMD techniques (e.g. via two V256 vectors) // @@ -148,7 +152,6 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD( GenTree* vec2 = gtNewIndir(simdType, gtNewOperNode(GT_ADD, TYP_BYREF, gtClone(data), offset2)); GenTree* xor1; - GenTree* orr; if (cmpMode == OrdinalIgnoreCase) { @@ -156,21 +159,8 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD( GenTreeVecCon* toLowerVec1 = gtNewVconNode(simdType, toLowerMask); GenTreeVecCon* toLowerVec2 = gtNewVconNode(simdType, (BYTE*)toLowerMask + byteLen - simdSize); -#if defined(TARGET_XARCH) - if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) - { - GenTree* control; - - control = gtNewIconNode(static_cast((0xF0 | 0xCC) ^ 0xAA)); // (A | B)) ^ C - xor1 = gtNewSimdTernaryLogicNode(simdType, vec1, toLowerVec1, cnsVec1, control, baseType, simdSize); - } - else -#endif // TARGET_XARCH - { - vec1 = gtNewSimdBinOpNode(GT_OR, simdType, vec1, toLowerVec1, baseType, simdSize); - xor1 = gtNewSimdBinOpNode(GT_XOR, simdType, vec1, cnsVec1, baseType, simdSize); - } - + vec1 = gtNewSimdBinOpNode(GT_OR, simdType, vec1, toLowerVec1, baseType, simdSize); + xor1 = gtNewSimdBinOpNode(GT_XOR, simdType, vec1, cnsVec1, baseType, simdSize); vec2 = gtNewSimdBinOpNode(GT_OR, simdType, vec2, toLowerVec2, baseType, simdSize); } else @@ -178,24 +168,9 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD( xor1 = gtNewSimdBinOpNode(GT_XOR, simdType, vec1, cnsVec1, baseType, simdSize); } -// ((v1 ^ cns1) | (v2 ^ cns2)) == zero - -#if defined(TARGET_XARCH) - if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) - { - GenTree* control; - - control = gtNewIconNode(static_cast(0xF0 | (0xCC ^ 0xAA))); // A | (B ^ C) - orr = gtNewSimdTernaryLogicNode(simdType, xor1, vec2, cnsVec2, control, baseType, simdSize); - } - else -#endif // TARGET_XARCH - { - GenTree* xor2; - - xor2 = gtNewSimdBinOpNode(GT_XOR, simdType, vec2, cnsVec2, baseType, simdSize); - orr = gtNewSimdBinOpNode(GT_OR, simdType, xor1, xor2, baseType, simdSize); - } + // ((v1 ^ cns1) | (v2 ^ cns2)) == zero + GenTree* xor2 = gtNewSimdBinOpNode(GT_XOR, simdType, vec2, cnsVec2, baseType, simdSize); + GenTree* orr = gtNewSimdBinOpNode(GT_OR, simdType, xor1, xor2, baseType, simdSize); // Optimization: use a single load when byteLen equals simdSize. // For code simplicity we always create nodes for two vectors case. @@ -426,7 +401,7 @@ GenTree* Compiler::impExpandHalfConstEqualsSWAR( // data - Pointer (LCL_VAR) to a data to vectorize // lengthFld - Pointer (LCL_VAR or GT_IND) to Length field // checkForNull - Check data for null -// startsWith - Is it StartsWith or Equals? +// kind - Is it StartsWith, Equals or EndsWith? // cns - Constant data (array of 2-byte chars) // len - Number of 2-byte chars in the cns // dataOffset - Offset for data @@ -439,7 +414,7 @@ GenTree* Compiler::impExpandHalfConstEqualsSWAR( GenTree* Compiler::impExpandHalfConstEquals(GenTreeLclVarCommon* data, GenTree* lengthFld, bool checkForNull, - bool startsWith, + StringComparisonKind kind, WCHAR* cnsData, int len, int dataOffset, @@ -454,14 +429,14 @@ GenTree* Compiler::impExpandHalfConstEquals(GenTreeLclVarCommon* data, return nullptr; } - const genTreeOps cmpOp = startsWith ? GT_GE : GT_EQ; + const genTreeOps cmpOp = kind == Equals ? GT_EQ : GT_GE; GenTree* elementsCount = gtNewIconNode(len); GenTree* lenCheckNode; if (len == 0) { // For zero length we don't need to compare content, the following expression is enough: // - // varData != null && lengthFld == 0 + // varData != null && lengthFld cmpOp 0 // lenCheckNode = gtNewOperNode(cmpOp, TYP_INT, lengthFld, elementsCount); } @@ -469,15 +444,25 @@ GenTree* Compiler::impExpandHalfConstEquals(GenTreeLclVarCommon* data, { assert(cnsData != nullptr); + GenTreeLclVarCommon* dataAddr = gtClone(data)->AsLclVarCommon(); + + if (kind == EndsWith) + { + // For EndsWith we need to adjust dataAddr to point to the end of the string minus value's length + // We spawn a local that we're going to set below + unsigned dataTmp = lvaGrabTemp(true DEBUGARG("clonning data ptr")); + dataAddr = gtNewLclvNode(dataTmp, TYP_BYREF); + } + GenTree* indirCmp = nullptr; if (len < 8) // SWAR impl supports len == 8 but we'd better give it to SIMD { - indirCmp = impExpandHalfConstEqualsSWAR(gtClone(data)->AsLclVarCommon(), cnsData, len, dataOffset, cmpMode); + indirCmp = impExpandHalfConstEqualsSWAR(dataAddr, cnsData, len, dataOffset, cmpMode); } #if defined(FEATURE_HW_INTRINSICS) else if (IsBaselineSimdIsaSupported()) { - indirCmp = impExpandHalfConstEqualsSIMD(gtClone(data)->AsLclVarCommon(), cnsData, len, dataOffset, cmpMode); + indirCmp = impExpandHalfConstEqualsSIMD(dataAddr, cnsData, len, dataOffset, cmpMode); } #endif @@ -488,9 +473,21 @@ GenTree* Compiler::impExpandHalfConstEquals(GenTreeLclVarCommon* data, } assert(indirCmp->TypeIs(TYP_INT, TYP_UBYTE)); + if (kind == EndsWith) + { + // dataAddr = dataAddr + (length * 2 - len * 2) + GenTree* castedLen = gtNewCastNode(TYP_I_IMPL, gtCloneExpr(lengthFld), false, TYP_I_IMPL); + GenTree* byteLen = gtNewOperNode(GT_MUL, TYP_I_IMPL, castedLen, gtNewIconNode(2, TYP_I_IMPL)); + GenTreeOp* cmpStart = + gtNewOperNode(GT_ADD, TYP_BYREF, gtClone(data), + gtNewOperNode(GT_SUB, TYP_I_IMPL, byteLen, gtNewIconNode(len * 2, TYP_I_IMPL))); + GenTree* storeTmp = gtNewTempStore(dataAddr->GetLclNum(), cmpStart); + indirCmp = gtNewOperNode(GT_COMMA, indirCmp->TypeGet(), storeTmp, indirCmp); + } + GenTreeColon* lenCheckColon = gtNewColonNode(TYP_INT, indirCmp, gtNewFalse()); - // For StartsWith we use GT_GE, e.g.: `x.Length >= 10` + // For StartsWith/EndsWith we use GT_GE, e.g.: `x.Length >= 10` lenCheckNode = gtNewQmarkNode(TYP_INT, gtNewOperNode(cmpOp, TYP_INT, lengthFld, elementsCount), lenCheckColon); } @@ -556,7 +553,7 @@ GenTreeStrCon* Compiler::impGetStrConFromSpan(GenTree* span) } //------------------------------------------------------------------------ -// impStringEqualsOrStartsWith: The main entry-point for String methods +// impUtf16StringComparison: The main entry-point for String methods // We're going to unroll & vectorize the following cases: // 1) String.Equals(obj, "cns") // 2) String.Equals(obj, "cns", Ordinal or OrdinalIgnoreCase) @@ -570,18 +567,21 @@ GenTreeStrCon* Compiler::impGetStrConFromSpan(GenTree* span) // 9) obj.StartsWith("cns", Ordinal or OrdinalIgnoreCase) // 10) "cns".StartsWith(obj, Ordinal or OrdinalIgnoreCase) // +// 11) obj.EndsWith("cns", Ordinal or OrdinalIgnoreCase) +// 12) "cns".EndsWith(obj, Ordinal or OrdinalIgnoreCase) +// // For cases 5, 6 and 9 we don't emit "obj != null" // NOTE: String.Equals(object) is not supported currently // // Arguments: -// startsWith - Is it StartsWith or Equals? -// sig - signature of StartsWith or Equals method +// kind - Is it StartsWith, EndsWith or Equals? +// sig - signature of StartsWith, EndsWith or Equals method // methodFlags - its flags // // Returns: // GenTree representing vectorized comparison or nullptr // -GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO* sig, unsigned methodFlags) +GenTree* Compiler::impUtf16StringComparison(StringComparisonKind kind, CORINFO_SIG_INFO* sig, unsigned methodFlags) { const bool isStatic = methodFlags & CORINFO_FLG_STATIC; const int argsCount = sig->numArgs + (isStatic ? 0 : 1); @@ -589,7 +589,7 @@ GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO // This optimization spawns several temps so make sure we have a room if (lvaHaveManyLocals(0.75)) { - JITDUMP("impSpanEqualsOrStartsWith: Method has too many locals - bail out.\n") + JITDUMP("impStringComparison: Method has too many locals - bail out.\n") return nullptr; } @@ -630,9 +630,9 @@ GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO } else { - if (startsWith) + if (kind != Equals) { - // StartsWith is not commutative + // StartsWith and EndsWith are not commutative return nullptr; } cnsStr = op1->AsStrCon(); @@ -647,6 +647,7 @@ GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO // obj.Equals("cns") // obj.Equals("cns", Ordinal or OrdinalIgnoreCase) // obj.StartsWith("cns", Ordinal or OrdinalIgnoreCase) + // obj.EndsWith("cns", Ordinal or OrdinalIgnoreCase) // // instead, it should throw NRE if it's null needsNullcheck = false; @@ -658,7 +659,7 @@ GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO { // check for fake "" first cnsLength = 0; - JITDUMP("Trying to unroll String.Equals|StartsWith(op1, \"\")...\n", str) + JITDUMP("Trying to unroll String.Equals|StartsWith|EndsWith(op1, \"\")...\n", str) } else { @@ -668,7 +669,7 @@ GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO // We were unable to get the literal (e.g. dynamic context) return nullptr; } - JITDUMP("Trying to unroll String.Equals|StartsWith(op1, \"cns\")...\n") + JITDUMP("Trying to unroll String.Equals|StartsWith|EndsWith(op1, \"cns\")...\n") } // Create a temp which is safe to gtClone for varStr @@ -682,7 +683,7 @@ GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO GenTree* lenNode = gtNewArrLen(TYP_INT, varStrLcl, strLenOffset, compCurBB); varStrLcl = gtClone(varStrLcl)->AsLclVar(); - GenTree* unrolled = impExpandHalfConstEquals(varStrLcl, lenNode, needsNullcheck, startsWith, (WCHAR*)str, cnsLength, + GenTree* unrolled = impExpandHalfConstEquals(varStrLcl, lenNode, needsNullcheck, kind, (WCHAR*)str, cnsLength, strLenOffset + sizeof(int), cmpMode); if (unrolled != nullptr) { @@ -706,7 +707,7 @@ GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO } //------------------------------------------------------------------------ -// impSpanEqualsOrStartsWith: The main entry-point for [ReadOnly]Span methods +// impUtf16SpanComparison: The main entry-point for [ReadOnly]Span methods // We're going to unroll & vectorize the following cases: // 1) MemoryExtensions.SequenceEqual(var, "cns") // 2) MemoryExtensions.SequenceEqual("cns", var) @@ -717,15 +718,20 @@ GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO // 7) MemoryExtensions.StartsWith("cns", var, Ordinal or OrdinalIgnoreCase) // 8) MemoryExtensions.StartsWith(var, "cns", Ordinal or OrdinalIgnoreCase) // +// 9) MemoryExtensions.EndsWith("cns", var) +// 10) MemoryExtensions.EndsWith(var, "cns") +// 11) MemoryExtensions.EndsWith("cns", var, Ordinal or OrdinalIgnoreCase) +// 12) MemoryExtensions.EndsWith(var, "cns", Ordinal or OrdinalIgnoreCase) +// // Arguments: -// startsWith - Is it StartsWith or Equals? -// sig - signature of StartsWith or Equals method +// kind - Is it StartsWith, EndsWith or Equals? +// sig - signature of StartsWith, EndsWith or Equals method // methodFlags - its flags // // Returns: // GenTree representing vectorized comparison or nullptr // -GenTree* Compiler::impSpanEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO* sig, unsigned methodFlags) +GenTree* Compiler::impUtf16SpanComparison(StringComparisonKind kind, CORINFO_SIG_INFO* sig, unsigned methodFlags) { const bool isStatic = methodFlags & CORINFO_FLG_STATIC; const int argsCount = sig->numArgs + (isStatic ? 0 : 1); @@ -733,7 +739,7 @@ GenTree* Compiler::impSpanEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO* // This optimization spawns several temps so make sure we have a room if (lvaHaveManyLocals(0.75)) { - JITDUMP("impSpanEqualsOrStartsWith: Method has too many locals - bail out.\n") + JITDUMP("impUtf16SpanComparison: Method has too many locals - bail out.\n") return nullptr; } @@ -760,7 +766,7 @@ GenTree* Compiler::impSpanEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO* op2 = impStackTop(0).val; } - // For generic StartsWith and Equals we need to make sure T is char + // For generic StartsWith, EndsWith and Equals we need to make sure T is char if (sig->sigInst.methInstCount != 0) { assert(sig->sigInst.methInstCount == 1); @@ -790,9 +796,9 @@ GenTree* Compiler::impSpanEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO* } else { - if (startsWith) + if (kind != Equals) { - // StartsWith is not commutative + // StartsWith and EndsWith are not commutative return nullptr; } cnsStr = op1Str; @@ -835,8 +841,8 @@ GenTree* Compiler::impSpanEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO* GenTreeLclFld* spanReferenceFld = gtNewLclFldNode(spanLclNum, TYP_BYREF, OFFSETOF__CORINFO_Span__reference); GenTreeLclFld* spanLengthFld = gtNewLclFldNode(spanLclNum, TYP_INT, OFFSETOF__CORINFO_Span__length); - GenTree* unrolled = impExpandHalfConstEquals(spanReferenceFld, spanLengthFld, false, startsWith, (WCHAR*)str, - cnsLength, 0, cmpMode); + GenTree* unrolled = + impExpandHalfConstEquals(spanReferenceFld, spanLengthFld, false, kind, (WCHAR*)str, cnsLength, 0, cmpMode); if (unrolled != nullptr) { diff --git a/src/coreclr/jit/namedintrinsiclist.h b/src/coreclr/jit/namedintrinsiclist.h index 9c4f44e8e2f0b..09d33ba76a0d6 100644 --- a/src/coreclr/jit/namedintrinsiclist.h +++ b/src/coreclr/jit/namedintrinsiclist.h @@ -115,6 +115,7 @@ enum NamedIntrinsic : unsigned short NI_System_String_get_Length, NI_System_String_op_Implicit, NI_System_String_StartsWith, + NI_System_String_EndsWith, NI_System_Span_get_Item, NI_System_Span_get_Length, NI_System_SpanHelpers_SequenceEqual, @@ -125,6 +126,7 @@ enum NamedIntrinsic : unsigned short NI_System_MemoryExtensions_Equals, NI_System_MemoryExtensions_SequenceEqual, NI_System_MemoryExtensions_StartsWith, + NI_System_MemoryExtensions_EndsWith, NI_System_Threading_Interlocked_And, NI_System_Threading_Interlocked_Or, diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Globalization.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Globalization.cs index 4039f244d77bd..fac474c1b3691 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Globalization.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Globalization.cs @@ -294,6 +294,7 @@ public static int ToUpperInvariant(this ReadOnlySpan source, Span de /// The source span. /// The sequence to compare to the end of the source span. /// One of the enumeration values that determines how the and are compared. + [Intrinsic] // Unrolled and vectorized for half-constant input (Ordinal) public static bool EndsWith(this ReadOnlySpan span, ReadOnlySpan value, StringComparison comparisonType) { string.CheckStringComparison(comparisonType); diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs index bffab304afb0c..83ea307fc8570 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs @@ -2573,6 +2573,7 @@ ref Unsafe.As(ref MemoryMarshal.GetReference(value)), /// Determines whether the specified sequence appears at the end of the span. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] + [Intrinsic] // Unrolled and vectorized for half-constant input public static unsafe bool EndsWith(this Span span, ReadOnlySpan value) where T : IEquatable? { int spanLength = span.Length; @@ -2597,6 +2598,7 @@ ref MemoryMarshal.GetReference(value), /// Determines whether the specified sequence appears at the end of the span. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] + [Intrinsic] // Unrolled and vectorized for half-constant input public static unsafe bool EndsWith(this ReadOnlySpan span, ReadOnlySpan value) where T : IEquatable? { int spanLength = span.Length; diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 66b9432dfe24f..356903063d156 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -533,6 +533,7 @@ public bool EndsWith(string value) return EndsWith(value, StringComparison.CurrentCulture); } + [Intrinsic] // Unrolled and vectorized for half-constant input (Ordinal) public bool EndsWith(string value, StringComparison comparisonType) { ArgumentNullException.ThrowIfNull(value); From e21d63455b37c8cf53396dd40581ff09576cdf55 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 17 Feb 2024 12:42:18 +0100 Subject: [PATCH 2/5] Fix build --- src/coreclr/jit/importervectorization.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/importervectorization.cpp b/src/coreclr/jit/importervectorization.cpp index ce211448b07aa..eb90ebcccb659 100644 --- a/src/coreclr/jit/importervectorization.cpp +++ b/src/coreclr/jit/importervectorization.cpp @@ -450,8 +450,9 @@ GenTree* Compiler::impExpandHalfConstEquals(GenTreeLclVarCommon* data, { // For EndsWith we need to adjust dataAddr to point to the end of the string minus value's length // We spawn a local that we're going to set below - unsigned dataTmp = lvaGrabTemp(true DEBUGARG("clonning data ptr")); - dataAddr = gtNewLclvNode(dataTmp, TYP_BYREF); + unsigned dataTmp = lvaGrabTemp(true DEBUGARG("clonning data ptr")); + lvaTable[dataTmp].lvType = TYP_BYREF; + dataAddr = gtNewLclvNode(dataTmp, TYP_BYREF); } GenTree* indirCmp = nullptr; @@ -475,12 +476,15 @@ GenTree* Compiler::impExpandHalfConstEquals(GenTreeLclVarCommon* data, if (kind == EndsWith) { + // len is expected to be small, so no overflow is possible + assert((len * 2) > len); + // dataAddr = dataAddr + (length * 2 - len * 2) GenTree* castedLen = gtNewCastNode(TYP_I_IMPL, gtCloneExpr(lengthFld), false, TYP_I_IMPL); GenTree* byteLen = gtNewOperNode(GT_MUL, TYP_I_IMPL, castedLen, gtNewIconNode(2, TYP_I_IMPL)); - GenTreeOp* cmpStart = - gtNewOperNode(GT_ADD, TYP_BYREF, gtClone(data), - gtNewOperNode(GT_SUB, TYP_I_IMPL, byteLen, gtNewIconNode(len * 2, TYP_I_IMPL))); + GenTreeOp* cmpStart = gtNewOperNode(GT_ADD, TYP_BYREF, gtClone(data), + gtNewOperNode(GT_SUB, TYP_I_IMPL, byteLen, + gtNewIconNode((ssize_t)(len * 2), TYP_I_IMPL))); GenTree* storeTmp = gtNewTempStore(dataAddr->GetLclNum(), cmpStart); indirCmp = gtNewOperNode(GT_COMMA, indirCmp->TypeGet(), storeTmp, indirCmp); } From 1778e6198b1c60861cefb3b1778d3b8bf1398a17 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 17 Feb 2024 14:15:59 +0100 Subject: [PATCH 3/5] revert unrelated changes --- src/coreclr/jit/importervectorization.cpp | 39 ++++++++++++++++++++--- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/importervectorization.cpp b/src/coreclr/jit/importervectorization.cpp index eb90ebcccb659..dc7d61c13c7f0 100644 --- a/src/coreclr/jit/importervectorization.cpp +++ b/src/coreclr/jit/importervectorization.cpp @@ -152,6 +152,7 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD( GenTree* vec2 = gtNewIndir(simdType, gtNewOperNode(GT_ADD, TYP_BYREF, gtClone(data), offset2)); GenTree* xor1; + GenTree* orr; if (cmpMode == OrdinalIgnoreCase) { @@ -159,8 +160,21 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD( GenTreeVecCon* toLowerVec1 = gtNewVconNode(simdType, toLowerMask); GenTreeVecCon* toLowerVec2 = gtNewVconNode(simdType, (BYTE*)toLowerMask + byteLen - simdSize); - vec1 = gtNewSimdBinOpNode(GT_OR, simdType, vec1, toLowerVec1, baseType, simdSize); - xor1 = gtNewSimdBinOpNode(GT_XOR, simdType, vec1, cnsVec1, baseType, simdSize); +#if defined(TARGET_XARCH) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) + { + GenTree* control; + + control = gtNewIconNode(static_cast((0xF0 | 0xCC) ^ 0xAA)); // (A | B)) ^ C + xor1 = gtNewSimdTernaryLogicNode(simdType, vec1, toLowerVec1, cnsVec1, control, baseType, simdSize); + } + else +#endif // TARGET_XARCH + { + vec1 = gtNewSimdBinOpNode(GT_OR, simdType, vec1, toLowerVec1, baseType, simdSize); + xor1 = gtNewSimdBinOpNode(GT_XOR, simdType, vec1, cnsVec1, baseType, simdSize); + } + vec2 = gtNewSimdBinOpNode(GT_OR, simdType, vec2, toLowerVec2, baseType, simdSize); } else @@ -168,9 +182,24 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD( xor1 = gtNewSimdBinOpNode(GT_XOR, simdType, vec1, cnsVec1, baseType, simdSize); } - // ((v1 ^ cns1) | (v2 ^ cns2)) == zero - GenTree* xor2 = gtNewSimdBinOpNode(GT_XOR, simdType, vec2, cnsVec2, baseType, simdSize); - GenTree* orr = gtNewSimdBinOpNode(GT_OR, simdType, xor1, xor2, baseType, simdSize); +// ((v1 ^ cns1) | (v2 ^ cns2)) == zero + +#if defined(TARGET_XARCH) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) + { + GenTree* control; + + control = gtNewIconNode(static_cast(0xF0 | (0xCC ^ 0xAA))); // A | (B ^ C) + orr = gtNewSimdTernaryLogicNode(simdType, xor1, vec2, cnsVec2, control, baseType, simdSize); + } + else +#endif // TARGET_XARCH + { + GenTree* xor2; + + xor2 = gtNewSimdBinOpNode(GT_XOR, simdType, vec2, cnsVec2, baseType, simdSize); + orr = gtNewSimdBinOpNode(GT_OR, simdType, xor1, xor2, baseType, simdSize); + } // Optimization: use a single load when byteLen equals simdSize. // For code simplicity we always create nodes for two vectors case. From 8e294b2376075f6b0252cd68e2dd6fafff0a8a3b Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Tue, 20 Feb 2024 15:41:57 +0100 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: Jakob Botsch Nielsen --- src/coreclr/jit/importervectorization.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/importervectorization.cpp b/src/coreclr/jit/importervectorization.cpp index dc7d61c13c7f0..68a811478f87e 100644 --- a/src/coreclr/jit/importervectorization.cpp +++ b/src/coreclr/jit/importervectorization.cpp @@ -506,7 +506,7 @@ GenTree* Compiler::impExpandHalfConstEquals(GenTreeLclVarCommon* data, if (kind == EndsWith) { // len is expected to be small, so no overflow is possible - assert((len * 2) > len); + assert(!CheckedOps::MulOverflows(len, 2, /* unsignedMul */ false)); // dataAddr = dataAddr + (length * 2 - len * 2) GenTree* castedLen = gtNewCastNode(TYP_I_IMPL, gtCloneExpr(lengthFld), false, TYP_I_IMPL); @@ -622,7 +622,7 @@ GenTree* Compiler::impUtf16StringComparison(StringComparisonKind kind, CORINFO_S // This optimization spawns several temps so make sure we have a room if (lvaHaveManyLocals(0.75)) { - JITDUMP("impStringComparison: Method has too many locals - bail out.\n") + JITDUMP("impUtf16StringComparison: Method has too many locals - bail out.\n") return nullptr; } From c9c0c242f4f096c857e9fe6a5f00567ceb65a70f Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 20 Feb 2024 15:48:38 +0100 Subject: [PATCH 5/5] enum class --- src/coreclr/jit/compiler.h | 6 ++-- src/coreclr/jit/importercalls.cpp | 12 +++---- src/coreclr/jit/importervectorization.cpp | 44 ++++++++++++----------- 3 files changed, 33 insertions(+), 29 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 95b59ebdd8f2b..54c19845033d6 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4418,12 +4418,12 @@ class Compiler Ordinal = 4, OrdinalIgnoreCase = 5 }; - enum StringComparisonJoint + enum class StringComparisonJoint { Eq, // (d1 == cns1) && (s2 == cns2) Xor, // (d1 ^ cns1) | (s2 ^ cns2) }; - enum StringComparisonKind + enum class StringComparisonKind { Equals, StartsWith, @@ -4444,7 +4444,7 @@ class Compiler ssize_t offset, ssize_t value, StringComparison ignoreCase, - StringComparisonJoint joint = Eq); + StringComparisonJoint joint = StringComparisonJoint::Eq); GenTree* impExpandHalfConstEqualsSWAR( GenTreeLclVarCommon* data, WCHAR* cns, int len, int dataOffset, StringComparison cmpMode); GenTree* impExpandHalfConstEqualsSIMD( diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index ac50d0142b1d6..5bb61097b60e6 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -2827,38 +2827,38 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, case NI_System_String_Equals: { - retNode = impUtf16StringComparison(Equals, sig, methodFlags); + retNode = impUtf16StringComparison(StringComparisonKind::Equals, sig, methodFlags); break; } case NI_System_MemoryExtensions_Equals: case NI_System_MemoryExtensions_SequenceEqual: { - retNode = impUtf16SpanComparison(Equals, sig, methodFlags); + retNode = impUtf16SpanComparison(StringComparisonKind::Equals, sig, methodFlags); break; } case NI_System_String_StartsWith: { - retNode = impUtf16StringComparison(StartsWith, sig, methodFlags); + retNode = impUtf16StringComparison(StringComparisonKind::StartsWith, sig, methodFlags); break; } case NI_System_String_EndsWith: { - retNode = impUtf16StringComparison(EndsWith, sig, methodFlags); + retNode = impUtf16StringComparison(StringComparisonKind::EndsWith, sig, methodFlags); break; } case NI_System_MemoryExtensions_StartsWith: { - retNode = impUtf16SpanComparison(StartsWith, sig, methodFlags); + retNode = impUtf16SpanComparison(StringComparisonKind::StartsWith, sig, methodFlags); break; } case NI_System_MemoryExtensions_EndsWith: { - retNode = impUtf16SpanComparison(EndsWith, sig, methodFlags); + retNode = impUtf16SpanComparison(StringComparisonKind::EndsWith, sig, methodFlags); break; } diff --git a/src/coreclr/jit/importervectorization.cpp b/src/coreclr/jit/importervectorization.cpp index 68a811478f87e..af7a2f2791d16 100644 --- a/src/coreclr/jit/importervectorization.cpp +++ b/src/coreclr/jit/importervectorization.cpp @@ -282,12 +282,12 @@ GenTree* Compiler::impCreateCompareInd(GenTreeLclVarCommon* obj, } GenTree* valueTree = gtNewIconNode(value, actualType); - if (joint == Xor) + if (joint == StringComparisonJoint::Xor) { // XOR is better than CMP if we want to join multiple comparisons return gtNewOperNode(GT_XOR, actualType, indirTree, valueTree); } - assert(joint == Eq); + assert(joint == StringComparisonJoint::Eq); return gtNewOperNode(GT_EQ, TYP_INT, indirTree, valueTree); } @@ -346,11 +346,13 @@ GenTree* Compiler::impExpandHalfConstEqualsSWAR( // // where offset for value2 is 2 bytes (1 char) // - UINT32 value1 = MAKEINT32(cns[0], cns[1]); - UINT32 value2 = MAKEINT32(cns[1], cns[2]); - GenTree* firstIndir = impCreateCompareInd(data, TYP_INT, dataOffset, value1, cmpMode, Xor); - GenTree* secondIndir = impCreateCompareInd(gtClone(data)->AsLclVarCommon(), TYP_INT, - dataOffset + sizeof(USHORT), value2, cmpMode, Xor); + UINT32 value1 = MAKEINT32(cns[0], cns[1]); + UINT32 value2 = MAKEINT32(cns[1], cns[2]); + GenTree* firstIndir = + impCreateCompareInd(data, TYP_INT, dataOffset, value1, cmpMode, StringComparisonJoint::Xor); + GenTree* secondIndir = + impCreateCompareInd(gtClone(data)->AsLclVarCommon(), TYP_INT, dataOffset + sizeof(USHORT), value2, cmpMode, + StringComparisonJoint::Xor); if ((firstIndir == nullptr) || (secondIndir == nullptr)) { @@ -381,12 +383,13 @@ GenTree* Compiler::impExpandHalfConstEqualsSWAR( // For 5..6 the overlapping part is 4 bytes if (len <= 6) { - UINT32 value2 = MAKEINT32(cns[len - 2], cns[len - 1]); - GenTree* firstIndir = impCreateCompareInd(data, TYP_LONG, dataOffset, value1, cmpMode, Xor); + UINT32 value2 = MAKEINT32(cns[len - 2], cns[len - 1]); + GenTree* firstIndir = + impCreateCompareInd(data, TYP_LONG, dataOffset, value1, cmpMode, StringComparisonJoint::Xor); - ssize_t offset = dataOffset + len * sizeof(WCHAR) - sizeof(UINT32); - GenTree* secondIndir = - impCreateCompareInd(gtClone(data)->AsLclVarCommon(), TYP_INT, offset, value2, cmpMode, Xor); + ssize_t offset = dataOffset + len * sizeof(WCHAR) - sizeof(UINT32); + GenTree* secondIndir = impCreateCompareInd(gtClone(data)->AsLclVarCommon(), TYP_INT, offset, value2, cmpMode, + StringComparisonJoint::Xor); if ((firstIndir == nullptr) || (secondIndir == nullptr)) { @@ -402,10 +405,11 @@ GenTree* Compiler::impExpandHalfConstEqualsSWAR( assert((len == 7) || (len == 8)); UINT64 value2 = MAKEINT64(cns[len - 4], cns[len - 3], cns[len - 2], cns[len - 1]); - GenTree* firstIndir = impCreateCompareInd(data, TYP_LONG, dataOffset, value1, cmpMode, Xor); + GenTree* firstIndir = impCreateCompareInd(data, TYP_LONG, dataOffset, value1, cmpMode, StringComparisonJoint::Xor); ssize_t offset = dataOffset + len * sizeof(WCHAR) - sizeof(UINT64); - GenTree* secondIndir = impCreateCompareInd(gtClone(data)->AsLclVarCommon(), TYP_LONG, offset, value2, cmpMode, Xor); + GenTree* secondIndir = impCreateCompareInd(gtClone(data)->AsLclVarCommon(), TYP_LONG, offset, value2, cmpMode, + StringComparisonJoint::Xor); if ((firstIndir == nullptr) || (secondIndir == nullptr)) { @@ -458,7 +462,7 @@ GenTree* Compiler::impExpandHalfConstEquals(GenTreeLclVarCommon* data, return nullptr; } - const genTreeOps cmpOp = kind == Equals ? GT_EQ : GT_GE; + const genTreeOps cmpOp = kind == StringComparisonKind::Equals ? GT_EQ : GT_GE; GenTree* elementsCount = gtNewIconNode(len); GenTree* lenCheckNode; if (len == 0) @@ -475,7 +479,7 @@ GenTree* Compiler::impExpandHalfConstEquals(GenTreeLclVarCommon* data, GenTreeLclVarCommon* dataAddr = gtClone(data)->AsLclVarCommon(); - if (kind == EndsWith) + if (kind == StringComparisonKind::EndsWith) { // For EndsWith we need to adjust dataAddr to point to the end of the string minus value's length // We spawn a local that we're going to set below @@ -503,10 +507,10 @@ GenTree* Compiler::impExpandHalfConstEquals(GenTreeLclVarCommon* data, } assert(indirCmp->TypeIs(TYP_INT, TYP_UBYTE)); - if (kind == EndsWith) + if (kind == StringComparisonKind::EndsWith) { // len is expected to be small, so no overflow is possible - assert(!CheckedOps::MulOverflows(len, 2, /* unsignedMul */ false)); + assert(!CheckedOps::MulOverflows(len, 2, CheckedOps::Signed)); // dataAddr = dataAddr + (length * 2 - len * 2) GenTree* castedLen = gtNewCastNode(TYP_I_IMPL, gtCloneExpr(lengthFld), false, TYP_I_IMPL); @@ -663,7 +667,7 @@ GenTree* Compiler::impUtf16StringComparison(StringComparisonKind kind, CORINFO_S } else { - if (kind != Equals) + if (kind != StringComparisonKind::Equals) { // StartsWith and EndsWith are not commutative return nullptr; @@ -829,7 +833,7 @@ GenTree* Compiler::impUtf16SpanComparison(StringComparisonKind kind, CORINFO_SIG } else { - if (kind != Equals) + if (kind != StringComparisonKind::Equals) { // StartsWith and EndsWith are not commutative return nullptr;