Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Intrinsify ClearWithoutReferences and Fill #98700

Merged
merged 18 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1285,6 +1285,8 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
break;
}

case NI_System_SpanHelpers_ClearWithoutReferences:
case NI_System_SpanHelpers_Fill:
case NI_System_SpanHelpers_SequenceEqual:
case NI_System_Buffer_Memmove:
{
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/fgprofile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2544,6 +2544,9 @@ PhaseStatus Compiler::fgPrepareToInstrumentMethod()
case NI_System_MemoryExtensions_Equals:
case NI_System_MemoryExtensions_SequenceEqual:
case NI_System_MemoryExtensions_StartsWith:
case NI_System_SpanHelpers_Fill:
case NI_System_SpanHelpers_SequenceEqual:
case NI_System_SpanHelpers_ClearWithoutReferences:

// Same here, these are only folded when JIT knows the exact types
case NI_System_Type_IsAssignableFrom:
Expand Down
19 changes: 19 additions & 0 deletions src/coreclr/jit/importercalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3974,6 +3974,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,

case NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8:
case NI_System_SpanHelpers_SequenceEqual:
case NI_System_SpanHelpers_ClearWithoutReferences:
case NI_System_Buffer_Memmove:
{
if (sig->sigInst.methInstCount == 0)
Expand All @@ -3985,6 +3986,16 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
break;
}

case NI_System_SpanHelpers_Fill:
{
if (sig->sigInst.methInstCount == 1)
{
// We'll try to unroll this in lower for constant input.
isSpecial = true;
}
break;
}

case NI_System_BitConverter_DoubleToInt64Bits:
{
GenTree* op1 = impStackTop().val;
Expand Down Expand Up @@ -9009,6 +9020,14 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
{
result = NI_System_SpanHelpers_SequenceEqual;
}
else if (strcmp(methodName, "Fill") == 0)
{
result = NI_System_SpanHelpers_Fill;
}
else if (strcmp(methodName, "ClearWithoutReferences") == 0)
{
result = NI_System_SpanHelpers_ClearWithoutReferences;
}
}
else if (strcmp(className, "String") == 0)
{
Expand Down
182 changes: 177 additions & 5 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1841,6 +1841,157 @@ GenTree* Lowering::AddrGen(void* addr)
return AddrGen((ssize_t)addr);
}

// LowerCallMemset: Replaces the following memset-like special intrinsics:
//
// SpanHelpers.Fill<T>(ref dstRef, CNS_SIZE, 0)
// SpanHelpers.ClearWithoutReferences(ref dstRef, CNS_SIZE)
//
// with a GT_STORE_BLK node:
//
// * STORE_BLK struct<CNS_SIZE> (init) (Unroll)
// +--* LCL_VAR byref dstRef
// \--* CNS_INT int 0
//
// Arguments:
// tree - GenTreeCall node to replace with STORE_BLK
// next - [out] Next node to lower if this function returns true
//
// Return Value:
// false if no changes were made
//
bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next)
{
JITDUMP("Considering Memset-like call [%06d] for unrolling.. ", comp->dspTreeID(call))

if (comp->info.compHasNextCallRetAddr)
{
JITDUMP("compHasNextCallRetAddr=true so we won't be able to remove the call - bail out.\n");
return false;
}

// void SpanHelpers::Fill<T>(ref T dstRef, nuint numElements, T value)
// void SpanHelpers::ClearWithoutReferences(ref byte dstRef, nuint byteLength)

GenTree* dstRefArg = call->gtArgs.GetUserArgByIndex(0)->GetNode();
GenTree* lengthArg = call->gtArgs.GetUserArgByIndex(1)->GetNode();
GenTree* valueArg = nullptr;

if (!lengthArg->IsIntegralConst())
{
JITDUMP("Length is not a constant - bail out.\n")
return false;
}

// Fill<T>'s length is not in bytes, so we need to scale it depending on the signature
unsigned lengthScale;

const NamedIntrinsic ni = comp->lookupNamedIntrinsic(call->gtCallMethHnd);
if (ni == NI_System_SpanHelpers_Fill)
{
// void SpanHelpers::Fill<T>(ref T refData, nuint numElements, T value)
//
assert(call->gtArgs.CountUserArgs() == 3);
valueArg = call->gtArgs.GetUserArgByIndex(2)->GetNode();

// Get that <T> from the signature
CORINFO_SIG_INFO sig;
comp->info.compCompHnd->getMethodSig(call->gtCallMethHnd, &sig, nullptr);
assert(sig.sigInst.methInstCount == 1);
lengthScale = genTypeSize(
JITtype2varType(comp->info.compCompHnd->getTypeForPrimitiveValueClass(sig.sigInst.methInst[0])));
EgorBo marked this conversation as resolved.
Show resolved Hide resolved

// If value is not zero, we can only unroll for single-byte values
if (!valueArg->IsIntegralConst(0) && (lengthScale != 1))
{
JITDUMP("SpanHelpers.Fill's value is not unroll-friendly - bail out.\n")
return false;
}
}
else
{
// void SpanHelpers::ClearWithoutReferences(ref byte b, nuint byteLength)
//
assert(call->gtArgs.CountUserArgs() == 2);
assert(ni == NI_System_SpanHelpers_ClearWithoutReferences);

lengthScale = 1; // it's always in bytes
}

// Convert lenCns to bytes
ssize_t lenCns = lengthArg->AsIntCon()->IconValue();
if (CheckedOps::MulOverflows<target_ssize_t>((target_ssize_t)lenCns, (target_ssize_t)lengthScale,
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
CheckedOps::Signed))
{
// lenCns overflows
JITDUMP("lenCns * lengthScale overflows - bail out.\n")
return false;
}
lenCns *= (ssize_t)lengthScale;

// TODO-CQ: drop the whole thing in case of lenCns = 0
if ((lenCns <= 0) || (lenCns > (ssize_t)comp->getUnrollThreshold(Compiler::UnrollKind::Memset)))
{
JITDUMP("Size is either 0 or too big to unroll - bail out.\n")
return false;
}

JITDUMP("Accepted for unrolling!\nOld tree:\n");
DISPTREE(call);
EgorBo marked this conversation as resolved.
Show resolved Hide resolved

GenTree* blkInnerValue = nullptr;
GenTree* blkValue;
if (valueArg == nullptr || valueArg->IsIntegralConst(0))
{
// Simple zeroing
blkValue = comp->gtNewZeroConNode(TYP_INT);
blkValue->SetContained();
}
else
{
// Non-zero (byte) value
blkInnerValue = comp->gtCloneExpr(valueArg);
blkInnerValue->SetContained();
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
blkValue = comp->gtNewOperNode(GT_INIT_VAL, TYP_INT, blkInnerValue);
blkValue->SetContained();
}

GenTreeBlk* storeBlk = new (comp, GT_STORE_BLK)
GenTreeBlk(GT_STORE_BLK, TYP_STRUCT, dstRefArg, blkValue, comp->typGetBlkLayout((unsigned)lenCns));
storeBlk->gtFlags |= (GTF_IND_UNALIGNED | GTF_ASG | GTF_EXCEPT | GTF_GLOB_REF);
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
storeBlk->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;

// Insert/Remove trees into LIR
BlockRange().InsertBefore(call, blkValue);
BlockRange().InsertBefore(call, storeBlk);
BlockRange().Remove(lengthArg);
BlockRange().Remove(call);
if (valueArg != nullptr)
{
// valueArg is just a constant in case of Fill<T>
// and doesn't exist in case of ClearWithoutReferences
assert(valueArg->IsIntegralConst());
BlockRange().Remove(valueArg);
}
if (blkInnerValue != nullptr)
{
BlockRange().InsertBefore(blkValue, blkInnerValue);
}

// Remove all non-user args (e.g. r2r cell)
for (CallArg& arg : call->gtArgs.Args())
{
if (arg.IsArgAddedLate())
{
arg.GetNode()->SetUnusedValue();
}
}
EgorBo marked this conversation as resolved.
Show resolved Hide resolved

JITDUMP("\nNew tree:\n");
DISPTREE(storeBlk);
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
*next = storeBlk;
return true;
}

//------------------------------------------------------------------------
// LowerCallMemmove: Replace Buffer.Memmove(DST, SRC, CNS_SIZE) with a GT_STORE_BLK:
//
Expand Down Expand Up @@ -2216,12 +2367,33 @@ GenTree* Lowering::LowerCall(GenTree* node)
#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
{
GenTree* nextNode = nullptr;
NamedIntrinsic ni = comp->lookupNamedIntrinsic(call->gtCallMethHnd);
if (((ni == NI_System_Buffer_Memmove) && LowerCallMemmove(call, &nextNode)) ||
((ni == NI_System_SpanHelpers_SequenceEqual) && LowerCallMemcmp(call, &nextNode)))
GenTree* nextNode = nullptr;
switch (comp->lookupNamedIntrinsic(call->gtCallMethHnd))
{
return nextNode;
case NI_System_Buffer_Memmove:
if (LowerCallMemmove(call, &nextNode))
{
return nextNode;
}
break;

case NI_System_SpanHelpers_SequenceEqual:
if (LowerCallMemcmp(call, &nextNode))
{
return nextNode;
}
break;

case NI_System_SpanHelpers_Fill:
case NI_System_SpanHelpers_ClearWithoutReferences:
if (LowerCallMemset(call, &nextNode))
{
return nextNode;
}
break;

default:
break;
}
}
#endif
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ class Lowering final : public Phase
GenTree* LowerCall(GenTree* call);
bool LowerCallMemmove(GenTreeCall* call, GenTree** next);
bool LowerCallMemcmp(GenTreeCall* call, GenTree** next);
bool LowerCallMemset(GenTreeCall* call, GenTree** next);
void LowerCFGCall(GenTreeCall* call);
void MoveCFGCallArg(GenTreeCall* call, GenTree* node);
#ifndef TARGET_64BIT
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/namedintrinsiclist.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ enum NamedIntrinsic : unsigned short
NI_System_String_StartsWith,
NI_System_Span_get_Item,
NI_System_Span_get_Length,
NI_System_SpanHelpers_ClearWithoutReferences,
NI_System_SpanHelpers_Fill,
NI_System_SpanHelpers_SequenceEqual,
NI_System_ReadOnlySpan_get_Item,
NI_System_ReadOnlySpan_get_Length,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ namespace System
{
internal static partial class SpanHelpers // .T
{
[Intrinsic] // Unrolled for small sizes
public static unsafe void Fill<T>(ref T refData, nuint numElements, T value)
{
// Early checks to see if it's even possible to vectorize - JIT will turn these checks into consts.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ namespace System
{
internal static partial class SpanHelpers
{
[Intrinsic] // Unrolled for small sizes
public static unsafe void ClearWithoutReferences(ref byte b, nuint byteLength)
{
if (byteLength == 0)
Expand Down
Loading