From 0c79a411945af11499281ed10010d175d3d77287 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 18 Feb 2024 23:26:18 +0100 Subject: [PATCH 01/12] Intrinsify ClearWithoutReferences and Fill --- src/coreclr/jit/fgbasic.cpp | 2 + src/coreclr/jit/fgprofile.cpp | 3 + src/coreclr/jit/importercalls.cpp | 19 ++ src/coreclr/jit/lower.cpp | 178 +++++++++++++++++- src/coreclr/jit/lower.h | 1 + src/coreclr/jit/namedintrinsiclist.h | 2 + .../src/System/SpanHelpers.T.cs | 1 + .../src/System/SpanHelpers.cs | 1 + 8 files changed, 202 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 9ba1d20cd4bd6..f4046baaa47e3 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -1285,6 +1285,8 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed break; } + case NI_System_SpanHelpers_ClearWithoutReferences: + case NI_System_SpanHelpers_Fill: case NI_System_SpanHelpers_SequenceEqual: case NI_System_Buffer_Memmove: { diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 9e29cc2579273..6af874bcb43f0 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -2544,6 +2544,9 @@ PhaseStatus Compiler::fgPrepareToInstrumentMethod() case NI_System_MemoryExtensions_Equals: case NI_System_MemoryExtensions_SequenceEqual: case NI_System_MemoryExtensions_StartsWith: + case NI_System_SpanHelpers_Fill: + case NI_System_SpanHelpers_SequenceEqual: + case NI_System_SpanHelpers_ClearWithoutReferences: // Same here, these are only folded when JIT knows the exact types case NI_System_Type_IsAssignableFrom: diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index b263285c42b54..094edc5dd4908 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3974,6 +3974,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, case NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8: case NI_System_SpanHelpers_SequenceEqual: + case NI_System_SpanHelpers_ClearWithoutReferences: case NI_System_Buffer_Memmove: { if (sig->sigInst.methInstCount == 0) @@ -3985,6 +3986,16 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, break; } + case NI_System_SpanHelpers_Fill: + { + if (sig->sigInst.methInstCount == 1) + { + // We'll try to unroll this in lower for constant input. + isSpecial = true; + } + break; + } + case NI_System_BitConverter_DoubleToInt64Bits: { GenTree* op1 = impStackTop().val; @@ -9009,6 +9020,14 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { result = NI_System_SpanHelpers_SequenceEqual; } + else if (strcmp(methodName, "Fill") == 0) + { + result = NI_System_SpanHelpers_Fill; + } + else if (strcmp(methodName, "ClearWithoutReferences") == 0) + { + result = NI_System_SpanHelpers_ClearWithoutReferences; + } } else if (strcmp(className, "String") == 0) { diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 591db3a78a22c..23aabd45f97fc 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1841,6 +1841,153 @@ GenTree* Lowering::AddrGen(void* addr) return AddrGen((ssize_t)addr); } +// LowerCallMemset: Replaces the following memset-like special intrinsics: +// +// SpanHelpers.Fill(ref dstRef, CNS_SIZE, 0) +// SpanHelpers.ClearWithoutReferences(ref dstRef, CNS_SIZE) +// +// with a GT_STORE_BLK node: +// +// * STORE_BLK struct (init) (Unroll) +// +--* LCL_VAR byref dstRef +// \--* CNS_INT int 0 +// +// Arguments: +// tree - GenTreeCall node to replace with STORE_BLK +// next - [out] Next node to lower if this function returns true +// +// Return Value: +// false if no changes were made +// +bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) +{ + JITDUMP("Considering Memset-like call [%06d] for unrolling.. ", comp->dspTreeID(call)) + + if (comp->info.compHasNextCallRetAddr) + { + JITDUMP("compHasNextCallRetAddr=true so we won't be able to remove the call - bail out.\n"); + return false; + } + + // void SpanHelpers::Fill(ref T dstRef, nuint numElements, T value) + // void SpanHelpers::ClearWithoutReferences(ref byte dstRef, nuint byteLength) + + GenTree* dstRefArg = call->gtArgs.GetUserArgByIndex(0)->GetNode(); + GenTree* lengthArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); + GenTree* valueArg = nullptr; + + if (!lengthArg->IsIntegralConst()) + { + JITDUMP("Length is not a constant - bail out.\n") + return false; + } + + // Fill's length is not in bytes, so we need to scale it depending on the signature + unsigned lengthScale; + + const NamedIntrinsic ni = comp->lookupNamedIntrinsic(call->gtCallMethHnd); + if (ni == NI_System_SpanHelpers_Fill) + { + // void SpanHelpers::Fill(ref T refData, nuint numElements, T value) + // + assert(call->gtArgs.CountUserArgs() == 3); + valueArg = call->gtArgs.GetUserArgByIndex(2)->GetNode(); + + // Get that from the signature + CORINFO_SIG_INFO sig; + comp->info.compCompHnd->getMethodSig(call->gtCallMethHnd, &sig, nullptr); + assert(sig.sigInst.methInstCount == 1); + lengthScale = genTypeSize( + JITtype2varType(comp->info.compCompHnd->getTypeForPrimitiveValueClass(sig.sigInst.methInst[0]))); + + // If value is not zero, we can only unroll for single-byte values + if (!valueArg->IsIntegralConst(0) && (lengthScale != 1)) + { + JITDUMP("SpanHelpers.Fill's value is not unroll-friendly - bail out.\n") + return false; + } + } + else + { + // void SpanHelpers::ClearWithoutReferences(ref byte b, nuint byteLength) + // + assert(call->gtArgs.CountUserArgs() == 2); + assert(ni == NI_System_SpanHelpers_ClearWithoutReferences); + + lengthScale = 1; // it's always in bytes + } + + // Convert lenCns to bytes + ssize_t lenCns = lengthArg->AsIntCon()->IconValue(); + if (lenCns > (lenCns * lengthScale)) + { + // lenCns overflows + JITDUMP("lenCns * lengthScale overflows - bail out.\n") + return false; + } + lenCns *= lengthScale; + + // TODO-CQ: drop the whole thing in case of lenCns = 0 + if ((lenCns <= 0) || (lenCns > (ssize_t)comp->getUnrollThreshold(Compiler::UnrollKind::Memset))) + { + JITDUMP("Size is either 0 or too big to unroll - bail out.\n") + return false; + } + + JITDUMP("Accepted for unrolling!\nOld tree:\n"); + DISPTREE(call); + + GenTree* blkInnerValue = nullptr; + GenTree* blkValue; + if (valueArg == nullptr || valueArg->IsIntegralConst(0)) + { + // Simple zeroing + blkValue = comp->gtNewZeroConNode(TYP_INT); + blkValue->SetContained(); + } + else + { + // Non-zero (byte) value + blkInnerValue = comp->gtCloneExpr(valueArg); + blkInnerValue->SetContained(); + blkValue = comp->gtNewOperNode(GT_INIT_VAL, TYP_INT, blkInnerValue); + blkValue->SetContained(); + } + + GenTreeBlk* storeBlk = new (comp, GT_STORE_BLK) + GenTreeBlk(GT_STORE_BLK, TYP_STRUCT, dstRefArg, blkValue, comp->typGetBlkLayout((unsigned)lenCns)); + storeBlk->gtFlags |= (GTF_IND_UNALIGNED | GTF_ASG | GTF_EXCEPT | GTF_GLOB_REF); + storeBlk->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + + // Insert/Remove trees into LIR + BlockRange().InsertBefore(call, blkValue); + BlockRange().InsertBefore(call, storeBlk); + BlockRange().Remove(lengthArg); + BlockRange().Remove(call); + if (valueArg != nullptr) + { + BlockRange().Remove(valueArg); + } + if (blkInnerValue != nullptr) + { + BlockRange().InsertBefore(blkValue, blkInnerValue); + } + + // Remove all non-user args (e.g. r2r cell) + for (CallArg& arg : call->gtArgs.Args()) + { + if (arg.IsArgAddedLate()) + { + arg.GetNode()->SetUnusedValue(); + } + } + + JITDUMP("\nNew tree:\n"); + DISPTREE(storeBlk); + *next = storeBlk->gtNext; + return true; +} + //------------------------------------------------------------------------ // LowerCallMemmove: Replace Buffer.Memmove(DST, SRC, CNS_SIZE) with a GT_STORE_BLK: // @@ -2216,12 +2363,33 @@ GenTree* Lowering::LowerCall(GenTree* node) #if defined(TARGET_AMD64) || defined(TARGET_ARM64) if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) { - GenTree* nextNode = nullptr; - NamedIntrinsic ni = comp->lookupNamedIntrinsic(call->gtCallMethHnd); - if (((ni == NI_System_Buffer_Memmove) && LowerCallMemmove(call, &nextNode)) || - ((ni == NI_System_SpanHelpers_SequenceEqual) && LowerCallMemcmp(call, &nextNode))) + GenTree* nextNode = nullptr; + switch (comp->lookupNamedIntrinsic(call->gtCallMethHnd)) { - return nextNode; + case NI_System_Buffer_Memmove: + if (LowerCallMemmove(call, &nextNode)) + { + return nextNode; + } + break; + + case NI_System_SpanHelpers_SequenceEqual: + if (LowerCallMemcmp(call, &nextNode)) + { + return nextNode; + } + break; + + case NI_System_SpanHelpers_Fill: + case NI_System_SpanHelpers_ClearWithoutReferences: + if (LowerCallMemset(call, &nextNode)) + { + return nextNode; + } + break; + + default: + break; } } #endif diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index d35738c944dcf..e61eba4520c47 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -140,6 +140,7 @@ class Lowering final : public Phase GenTree* LowerCall(GenTree* call); bool LowerCallMemmove(GenTreeCall* call, GenTree** next); bool LowerCallMemcmp(GenTreeCall* call, GenTree** next); + bool LowerCallMemset(GenTreeCall* call, GenTree** next); void LowerCFGCall(GenTreeCall* call); void MoveCFGCallArg(GenTreeCall* call, GenTree* node); #ifndef TARGET_64BIT diff --git a/src/coreclr/jit/namedintrinsiclist.h b/src/coreclr/jit/namedintrinsiclist.h index 9c4f44e8e2f0b..b039dd6bbdf9a 100644 --- a/src/coreclr/jit/namedintrinsiclist.h +++ b/src/coreclr/jit/namedintrinsiclist.h @@ -117,6 +117,8 @@ enum NamedIntrinsic : unsigned short NI_System_String_StartsWith, NI_System_Span_get_Item, NI_System_Span_get_Length, + NI_System_SpanHelpers_ClearWithoutReferences, + NI_System_SpanHelpers_Fill, NI_System_SpanHelpers_SequenceEqual, NI_System_ReadOnlySpan_get_Item, NI_System_ReadOnlySpan_get_Length, diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index da77d42320a98..ed66759fd0bda 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -16,6 +16,7 @@ namespace System { internal static partial class SpanHelpers // .T { + [Intrinsic] // Unrolled for small sizes public static unsafe void Fill(ref T refData, nuint numElements, T value) { // Early checks to see if it's even possible to vectorize - JIT will turn these checks into consts. diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs index a7e5f48d63180..ecc1a5f3f3718 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs @@ -12,6 +12,7 @@ namespace System { internal static partial class SpanHelpers { + [Intrinsic] // Unrolled for small sizes public static unsafe void ClearWithoutReferences(ref byte b, nuint byteLength) { if (byteLength == 0) From 2b47db3ed870a66d98f0729c0f6fc6714eaa81f7 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 20 Feb 2024 16:59:49 +0100 Subject: [PATCH 02/12] use a better overflow check --- src/coreclr/jit/lower.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 23aabd45f97fc..a486189e99f19 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1919,13 +1919,13 @@ bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) // Convert lenCns to bytes ssize_t lenCns = lengthArg->AsIntCon()->IconValue(); - if (lenCns > (lenCns * lengthScale)) + if (CheckedOps::MulOverflows(lenCns, (ssize_t)lengthScale, CheckedOps::Signed)) { // lenCns overflows JITDUMP("lenCns * lengthScale overflows - bail out.\n") return false; } - lenCns *= lengthScale; + lenCns *= (ssize_t)lengthScale; // TODO-CQ: drop the whole thing in case of lenCns = 0 if ((lenCns <= 0) || (lenCns > (ssize_t)comp->getUnrollThreshold(Compiler::UnrollKind::Memset))) From a7bc5d292af1455873fe57928e93545bd3461b0f Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 20 Feb 2024 17:01:54 +0100 Subject: [PATCH 03/12] Add a comment --- src/coreclr/jit/lower.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index a486189e99f19..f38e41faa8f7f 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1966,6 +1966,9 @@ bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) BlockRange().Remove(call); if (valueArg != nullptr) { + // valueArg is just a constant in case of Fill + // and doesn't exist in case of ClearWithoutReferences + assert(valueArg->IsIntegralConst()); BlockRange().Remove(valueArg); } if (blkInnerValue != nullptr) From 818a72a7d4a321e0e5b82938b8a1d5ca1b589459 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 20 Feb 2024 17:29:08 +0100 Subject: [PATCH 04/12] fix build --- src/coreclr/jit/lower.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index f38e41faa8f7f..8e41b448a92e7 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1919,7 +1919,8 @@ bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) // Convert lenCns to bytes ssize_t lenCns = lengthArg->AsIntCon()->IconValue(); - if (CheckedOps::MulOverflows(lenCns, (ssize_t)lengthScale, CheckedOps::Signed)) + if (CheckedOps::MulOverflows((target_ssize_t)lenCns, (target_ssize_t)lengthScale, + CheckedOps::Signed)) { // lenCns overflows JITDUMP("lenCns * lengthScale overflows - bail out.\n") From 5f9673ccbc2377d6efb45219a04bc6cd89a8409a Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Tue, 20 Feb 2024 18:16:27 +0100 Subject: [PATCH 05/12] Update lower.cpp --- src/coreclr/jit/lower.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 8e41b448a92e7..0d44de47e1bae 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1988,7 +1988,7 @@ bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) JITDUMP("\nNew tree:\n"); DISPTREE(storeBlk); - *next = storeBlk->gtNext; + *next = storeBlk; return true; } From d8dac9bcc2837aa8465a2d00b30e30c886689294 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Wed, 21 Feb 2024 12:18:42 +0100 Subject: [PATCH 06/12] Update src/coreclr/jit/lower.cpp Co-authored-by: SingleAccretion <62474226+SingleAccretion@users.noreply.github.com> --- src/coreclr/jit/lower.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 0d44de47e1bae..a8be54a6c613d 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1919,7 +1919,7 @@ bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) // Convert lenCns to bytes ssize_t lenCns = lengthArg->AsIntCon()->IconValue(); - if (CheckedOps::MulOverflows((target_ssize_t)lenCns, (target_ssize_t)lengthScale, + if (CheckedOps::MulOverflows((target_ssize_t)lenCns, (target_ssize_t)lengthScale, CheckedOps::Signed)) { // lenCns overflows From caca20bf73ef10a70a0eff057307f3b6f4ca9814 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 21 Feb 2024 12:25:17 +0100 Subject: [PATCH 07/12] Address feedback --- src/coreclr/jit/lower.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 2e09a12122995..be26ac3314995 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1920,8 +1920,7 @@ bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) // Convert lenCns to bytes ssize_t lenCns = lengthArg->AsIntCon()->IconValue(); - if (CheckedOps::MulOverflows((target_ssize_t)lenCns, (target_ssize_t)lengthScale, - CheckedOps::Signed)) + if (CheckedOps::MulOverflows((target_ssize_t)lenCns, (target_ssize_t)lengthScale, CheckedOps::Signed)) { // lenCns overflows JITDUMP("lenCns * lengthScale overflows - bail out.\n") @@ -1956,9 +1955,8 @@ bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) blkValue->SetContained(); } - GenTreeBlk* storeBlk = new (comp, GT_STORE_BLK) - GenTreeBlk(GT_STORE_BLK, TYP_STRUCT, dstRefArg, blkValue, comp->typGetBlkLayout((unsigned)lenCns)); - storeBlk->gtFlags |= (GTF_IND_UNALIGNED | GTF_ASG | GTF_EXCEPT | GTF_GLOB_REF); + GenTreeBlk* storeBlk = + comp->gtNewStoreBlkNode(comp->typGetBlkLayout((unsigned)lenCns), dstRefArg, blkValue, GTF_IND_UNALIGNED); storeBlk->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; // Insert/Remove trees into LIR From 9f74b760048b6bbeac31e9188cd644ada422d1cc Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Wed, 21 Feb 2024 22:35:34 +0100 Subject: [PATCH 08/12] Update lower.cpp --- src/coreclr/jit/lower.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 31abe82cce017..60acb153881d7 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2370,7 +2370,6 @@ GenTree* Lowering::LowerCall(GenTree* node) GenTree* nextNode = nullptr; if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) { - GenTree* nextNode = nullptr; switch (comp->lookupNamedIntrinsic(call->gtCallMethHnd)) { case NI_System_Buffer_Memmove: From 1d020ff9de49747395ec90fb5ba7ec66ddd1469b Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 22 Feb 2024 18:55:48 +0100 Subject: [PATCH 09/12] Address feedback --- src/coreclr/jit/lower.cpp | 83 ++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 49 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 60acb153881d7..0eddf14b61494 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1875,11 +1875,11 @@ bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) GenTree* dstRefArg = call->gtArgs.GetUserArgByIndex(0)->GetNode(); GenTree* lengthArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); - GenTree* valueArg = nullptr; + GenTree* valueArg; if (!lengthArg->IsIntegralConst()) { - JITDUMP("Length is not a constant - bail out.\n") + JITDUMP("Length is not a constant - bail out.\n"); return false; } @@ -1892,19 +1892,26 @@ bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) // void SpanHelpers::Fill(ref T refData, nuint numElements, T value) // assert(call->gtArgs.CountUserArgs() == 3); - valueArg = call->gtArgs.GetUserArgByIndex(2)->GetNode(); + CallArg* valueCallArg = call->gtArgs.GetUserArgByIndex(2); + valueArg = valueCallArg->GetNode(); + + if (!valueArg->IsCnsIntOrI() || !valueArg->TypeIs(TYP_INT)) + { + // Can be enabled if needed. Currently, only XARCH can expand it + JITDUMP("Value is not a constant in Fill - bail out.\n"); + return false; + } // Get that from the signature - CORINFO_SIG_INFO sig; - comp->info.compCompHnd->getMethodSig(call->gtCallMethHnd, &sig, nullptr); - assert(sig.sigInst.methInstCount == 1); - lengthScale = genTypeSize( - JITtype2varType(comp->info.compCompHnd->getTypeForPrimitiveValueClass(sig.sigInst.methInst[0]))); + var_types valueType = valueCallArg->GetSignatureType(); + lengthScale = genTypeSize(valueType); + + assert(varTypeIsIntegralOrI(valueType)); // If value is not zero, we can only unroll for single-byte values if (!valueArg->IsIntegralConst(0) && (lengthScale != 1)) { - JITDUMP("SpanHelpers.Fill's value is not unroll-friendly - bail out.\n") + JITDUMP("SpanHelpers.Fill's value is not unroll-friendly - bail out.\n"); return false; } } @@ -1914,8 +1921,11 @@ bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) // assert(call->gtArgs.CountUserArgs() == 2); assert(ni == NI_System_SpanHelpers_ClearWithoutReferences); - lengthScale = 1; // it's always in bytes + + // Simple zeroing + valueArg = comp->gtNewZeroConNode(TYP_INT); + BlockRange().InsertBefore(call, valueArg); } // Convert lenCns to bytes @@ -1936,57 +1946,32 @@ bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) } JITDUMP("Accepted for unrolling!\nOld tree:\n"); - DISPTREE(call); + DISPTREERANGE(BlockRange(), call); - GenTree* blkInnerValue = nullptr; - GenTree* blkValue; - if (valueArg == nullptr || valueArg->IsIntegralConst(0)) - { - // Simple zeroing - blkValue = comp->gtNewZeroConNode(TYP_INT); - blkValue->SetContained(); - } - else + if (!valueArg->IsIntegralConst(0)) { - // Non-zero (byte) value - blkInnerValue = comp->gtCloneExpr(valueArg); - blkInnerValue->SetContained(); - blkValue = comp->gtNewOperNode(GT_INIT_VAL, TYP_INT, blkInnerValue); - blkValue->SetContained(); + // Non-zero (byte) value, wrap value with GT_INIT_VAL + GenTree* initVal = valueArg; + valueArg = comp->gtNewOperNode(GT_INIT_VAL, TYP_INT, initVal); + BlockRange().InsertAfter(initVal, valueArg); } + valueArg->SetContained(); GenTreeBlk* storeBlk = - comp->gtNewStoreBlkNode(comp->typGetBlkLayout((unsigned)lenCns), dstRefArg, blkValue, GTF_IND_UNALIGNED); + comp->gtNewStoreBlkNode(comp->typGetBlkLayout((unsigned)lenCns), dstRefArg, valueArg, GTF_IND_UNALIGNED); storeBlk->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; // Insert/Remove trees into LIR - BlockRange().InsertBefore(call, blkValue); BlockRange().InsertBefore(call, storeBlk); - BlockRange().Remove(lengthArg); - BlockRange().Remove(call); - if (valueArg != nullptr) - { - // valueArg is just a constant in case of Fill - // and doesn't exist in case of ClearWithoutReferences - assert(valueArg->IsIntegralConst()); - BlockRange().Remove(valueArg); - } - if (blkInnerValue != nullptr) - { - BlockRange().InsertBefore(blkValue, blkInnerValue); - } - // Remove all non-user args (e.g. r2r cell) - for (CallArg& arg : call->gtArgs.Args()) - { - if (arg.IsArgAddedLate()) - { - arg.GetNode()->SetUnusedValue(); - } - } + // Remove the call and mark everything as unused ... + BlockRange().Remove(call, true); + // ... except the args we're going to re-use + dstRefArg->ClearUnusedValue(); + valueArg->ClearUnusedValue(); JITDUMP("\nNew tree:\n"); - DISPTREE(storeBlk); + DISPTREERANGE(BlockRange(), storeBlk); *next = storeBlk; return true; } From d3cb8833f59c3295519819b348ea380203c414e2 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 22 Feb 2024 20:44:38 +0100 Subject: [PATCH 10/12] add memset --- src/coreclr/jit/lower.cpp | 83 +++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 34 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 0eddf14b61494..6c716dac59541 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1844,7 +1844,8 @@ GenTree* Lowering::AddrGen(void* addr) // LowerCallMemset: Replaces the following memset-like special intrinsics: // -// SpanHelpers.Fill(ref dstRef, CNS_SIZE, 0) +// SpanHelpers.Fill(ref dstRef, CNS_SIZE, CNS_VALUE) +// CORINFO_HELP_MEMSET(ref dstRef, CNS_VALUE, CNS_SIZE) // SpanHelpers.ClearWithoutReferences(ref dstRef, CNS_SIZE) // // with a GT_STORE_BLK node: @@ -1862,6 +1863,10 @@ GenTree* Lowering::AddrGen(void* addr) // bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) { + assert(call->IsSpecialIntrinsic(comp, NI_System_SpanHelpers_Fill) || + call->IsSpecialIntrinsic(comp, NI_System_SpanHelpers_ClearWithoutReferences) || + call->IsHelperCall(comp, CORINFO_HELP_MEMSET)); + JITDUMP("Considering Memset-like call [%06d] for unrolling.. ", comp->dspTreeID(call)) if (comp->info.compHasNextCallRetAddr) @@ -1870,62 +1875,66 @@ bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) return false; } - // void SpanHelpers::Fill(ref T dstRef, nuint numElements, T value) - // void SpanHelpers::ClearWithoutReferences(ref byte dstRef, nuint byteLength) - GenTree* dstRefArg = call->gtArgs.GetUserArgByIndex(0)->GetNode(); - GenTree* lengthArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); + GenTree* lengthArg; GenTree* valueArg; - if (!lengthArg->IsIntegralConst()) - { - JITDUMP("Length is not a constant - bail out.\n"); - return false; - } - // Fill's length is not in bytes, so we need to scale it depending on the signature unsigned lengthScale; - const NamedIntrinsic ni = comp->lookupNamedIntrinsic(call->gtCallMethHnd); - if (ni == NI_System_SpanHelpers_Fill) + if (call->IsSpecialIntrinsic(comp, NI_System_SpanHelpers_Fill)) { // void SpanHelpers::Fill(ref T refData, nuint numElements, T value) // assert(call->gtArgs.CountUserArgs() == 3); + lengthArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); CallArg* valueCallArg = call->gtArgs.GetUserArgByIndex(2); valueArg = valueCallArg->GetNode(); - if (!valueArg->IsCnsIntOrI() || !valueArg->TypeIs(TYP_INT)) - { - // Can be enabled if needed. Currently, only XARCH can expand it - JITDUMP("Value is not a constant in Fill - bail out.\n"); - return false; - } - // Get that from the signature - var_types valueType = valueCallArg->GetSignatureType(); - lengthScale = genTypeSize(valueType); - - assert(varTypeIsIntegralOrI(valueType)); - - // If value is not zero, we can only unroll for single-byte values - if (!valueArg->IsIntegralConst(0) && (lengthScale != 1)) - { - JITDUMP("SpanHelpers.Fill's value is not unroll-friendly - bail out.\n"); - return false; - } + lengthScale = genTypeSize(valueCallArg->GetSignatureType()); + // NOTE: structs and TYP_REF will be ignored by the "Value is not a constant" check + // Some of those cases can be enabled in future, e.g. s + } + else if (call->IsHelperCall(comp, CORINFO_HELP_MEMSET)) + { + // void CORINFO_HELP_MEMSET(ref T refData, byte value, nuint numElements) + // + assert(call->gtArgs.CountUserArgs() == 3); + lengthArg = call->gtArgs.GetUserArgByIndex(2)->GetNode(); + valueArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); + lengthScale = 1; // it's always in bytes } else { // void SpanHelpers::ClearWithoutReferences(ref byte b, nuint byteLength) // assert(call->gtArgs.CountUserArgs() == 2); - assert(ni == NI_System_SpanHelpers_ClearWithoutReferences); - lengthScale = 1; // it's always in bytes // Simple zeroing - valueArg = comp->gtNewZeroConNode(TYP_INT); + lengthArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); + valueArg = comp->gtNewZeroConNode(TYP_INT); BlockRange().InsertBefore(call, valueArg); + lengthScale = 1; // it's always in bytes + } + + if (!lengthArg->IsIntegralConst()) + { + JITDUMP("Length is not a constant - bail out.\n"); + return false; + } + + if (!valueArg->IsCnsIntOrI() || !valueArg->TypeIs(TYP_INT)) + { + JITDUMP("Value is not a constant - bail out.\n"); + return false; + } + + // If value is not zero, we can only unroll for single-byte values + if (!valueArg->IsIntegralConst(0) && (lengthScale != 1)) + { + JITDUMP("Value is not unroll-friendly - bail out.\n"); + return false; } // Convert lenCns to bytes @@ -2389,6 +2398,12 @@ GenTree* Lowering::LowerCall(GenTree* node) { return nextNode; } + + // Try to lower CORINFO_HELP_MEMSET to unrollable STORE_BLK + if (call->IsHelperCall(comp, CORINFO_HELP_MEMSET) && LowerCallMemset(call, &nextNode)) + { + return nextNode; + } #endif call->ClearOtherRegs(); From 4d95c0dec5f446fd277321532712b10a26d4aeaf Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 22 Feb 2024 21:09:40 +0100 Subject: [PATCH 11/12] Fix CI failures --- src/coreclr/jit/lower.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 6c716dac59541..10dd8d6e7c9ef 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1909,12 +1909,12 @@ bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) { // void SpanHelpers::ClearWithoutReferences(ref byte b, nuint byteLength) // + assert(call->IsSpecialIntrinsic(comp, NI_System_SpanHelpers_ClearWithoutReferences)); assert(call->gtArgs.CountUserArgs() == 2); // Simple zeroing - lengthArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); - valueArg = comp->gtNewZeroConNode(TYP_INT); - BlockRange().InsertBefore(call, valueArg); + lengthArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); + valueArg = comp->gtNewZeroConNode(TYP_INT); lengthScale = 1; // it's always in bytes } @@ -1972,12 +1972,21 @@ bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) // Insert/Remove trees into LIR BlockRange().InsertBefore(call, storeBlk); + if (call->IsSpecialIntrinsic(comp, NI_System_SpanHelpers_ClearWithoutReferences)) + { + // Value didn't exist in LIR previously + BlockRange().InsertBefore(storeBlk, valueArg); + } // Remove the call and mark everything as unused ... BlockRange().Remove(call, true); // ... except the args we're going to re-use dstRefArg->ClearUnusedValue(); valueArg->ClearUnusedValue(); + if (valueArg->OperIs(GT_INIT_VAL)) + { + valueArg->gtGetOp1()->ClearUnusedValue(); + } JITDUMP("\nNew tree:\n"); DISPTREERANGE(BlockRange(), storeBlk); From ac69bf21b297d327ddd032340aeceb57518643c7 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Fri, 23 Feb 2024 02:34:19 +0100 Subject: [PATCH 12/12] Update lower.cpp --- src/coreclr/jit/lower.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 10dd8d6e7c9ef..f9cd17f651004 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1965,7 +1965,6 @@ bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) BlockRange().InsertAfter(initVal, valueArg); } - valueArg->SetContained(); GenTreeBlk* storeBlk = comp->gtNewStoreBlkNode(comp->typGetBlkLayout((unsigned)lenCns), dstRefArg, valueArg, GTF_IND_UNALIGNED); storeBlk->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;