Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update handling of limited register during consecutive registers allocation #84588

Merged
merged 12 commits into from
Apr 11, 2023
Merged
188 changes: 133 additions & 55 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,13 +496,6 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask)
{
mask |= refPosition->registerAssignment;
}

#ifdef TARGET_ARM64
if ((refPosition != nullptr) && refPosition->isFirstRefPositionOfConsecutiveRegisters())
{
mask |= LsraLimitFPSetForConsecutive;
}
#endif
}

return mask;
Expand Down Expand Up @@ -662,7 +655,9 @@ LinearScan::LinearScan(Compiler* theCompiler)
firstColdLoc = MaxLocation;

#ifdef DEBUG
maxNodeLocation = 0;
maxNodeLocation = 0;
consecutiveRegistersLocation = 0;

activeRefPosition = nullptr;
currBuildNode = nullptr;

Expand Down Expand Up @@ -4901,6 +4896,24 @@ void LinearScan::allocateRegisters()
}
}
prevLocation = currentLocation;
#ifdef TARGET_ARM64

#ifdef DEBUG
if (hasConsecutiveRegister)
{
if (currentRefPosition.needsConsecutive)
{
// track all the refpositions around the location that is also
// allocating consecutive registers.
consecutiveRegistersLocation = currentLocation;
}
else if (consecutiveRegistersLocation < currentLocation)
{
consecutiveRegistersLocation = MinLocation;
}
}
#endif // DEBUG
#endif // TARGET_ARM64

// get previous refposition, then current refpos is the new previous
if (currentReferent != nullptr)
Expand Down Expand Up @@ -11683,49 +11696,53 @@ void LinearScan::RegisterSelection::try_SPILL_COST()
Interval* assignedInterval = spillCandidateRegRecord->assignedInterval;
RefPosition* recentRefPosition = assignedInterval != nullptr ? assignedInterval->recentRefPosition : nullptr;

// Can and should the interval in this register be spilled for this one,
// if we don't find a better alternative?
// Can and should the interval in this register be spilled for this one,
// if we don't find a better alternative?

weight_t currentSpillWeight = 0;
#ifdef TARGET_ARM64
if (assignedInterval == nullptr)
{
// Ideally we should not be seeing this candidate because it is not assigned to
// any interval. But based on that, we cannot determine if it is a good spill
// candidate or not. Skip processing it.
continue;
}

if ((recentRefPosition != nullptr) && linearScan->isRefPositionActive(recentRefPosition, thisLocation) &&
(recentRefPosition->needsConsecutive))
{
continue;
}
#endif // TARGET_ARM64

if ((linearScan->getNextIntervalRef(spillCandidateRegNum, regType) == thisLocation) &&
!assignedInterval->getNextRefPosition()->RegOptional())
{
continue;
}
if (!linearScan->isSpillCandidate(currentInterval, refPosition, spillCandidateRegRecord))
else if (assignedInterval != nullptr)
#endif
{
continue;
}
if ((linearScan->getNextIntervalRef(spillCandidateRegNum, regType) == thisLocation) &&
!assignedInterval->getNextRefPosition()->RegOptional())
{
continue;
}
if (!linearScan->isSpillCandidate(currentInterval, refPosition, spillCandidateRegRecord))
{
continue;
}

weight_t currentSpillWeight = 0;
if ((recentRefPosition != nullptr) &&
(recentRefPosition->RegOptional() && !(assignedInterval->isLocalVar && recentRefPosition->IsActualRef())))
{
// We do not "spillAfter" if previous (recent) refPosition was regOptional or if it
// is not an actual ref. In those cases, we will reload in future (next) refPosition.
// For such cases, consider the spill cost of next refposition.
// See notes in "spillInterval()".
RefPosition* reloadRefPosition = assignedInterval->getNextRefPosition();
if (reloadRefPosition != nullptr)
if ((recentRefPosition != nullptr) && (recentRefPosition->RegOptional() &&
!(assignedInterval->isLocalVar && recentRefPosition->IsActualRef())))
{
currentSpillWeight = linearScan->getWeight(reloadRefPosition);
// We do not "spillAfter" if previous (recent) refPosition was regOptional or if it
// is not an actual ref. In those cases, we will reload in future (next) refPosition.
// For such cases, consider the spill cost of next refposition.
// See notes in "spillInterval()".
RefPosition* reloadRefPosition = assignedInterval->getNextRefPosition();
if (reloadRefPosition != nullptr)
{
currentSpillWeight = linearScan->getWeight(reloadRefPosition);
}
}
}
#ifdef TARGET_ARM64
else
{
// Ideally we should not be seeing this candidate because it is not assigned to
// any interval. But it is possible for certain scenarios. One of them is that
// `refPosition` needs consecutive registers and we decided to pick a mix of free+busy
// registers. This candidate is part of that set and is free and hence is not assigned
// to any interval.
}
#endif // TARGET_ARM64

// Only consider spillCost if we were not able to calculate weight of reloadRefPosition.
if (currentSpillWeight == 0)
Expand Down Expand Up @@ -11875,7 +11892,16 @@ void LinearScan::RegisterSelection::try_PREV_REG_OPT()
#ifdef DEBUG
// The assigned should be non-null, and should have a recentRefPosition, however since
// this is a heuristic, we don't want a fatal error, so we just assert (not noway_assert).
if (!hasAssignedInterval)
if (!hasAssignedInterval
#ifdef TARGET_ARM64
// We could see a candidate that doesn't have assignedInterval because allocation is
// happening for `refPosition` that needs consecutive registers and we decided to pick
// a mix of free+busy registers. This candidate is part of that set and is free and hence
// is not assigned to any interval.

&& !refPosition->needsConsecutive
#endif
)
{
assert(!"Spill candidate has no assignedInterval recentRefPosition");
}
Expand Down Expand Up @@ -11988,6 +12014,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
*registerScore = NONE;
#endif

#ifdef TARGET_ARM64
assert(!needsConsecutiveRegisters || refPosition->needsConsecutive);
#endif

reset(currentInterval, refPosition);

// process data-structures
Expand Down Expand Up @@ -12036,7 +12066,19 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
}

#ifdef DEBUG
candidates = linearScan->stressLimitRegs(refPosition, candidates);
#ifdef TARGET_ARM64
if (!refPosition->needsConsecutive && (linearScan->consecutiveRegistersLocation == refPosition->nodeLocation))
{
// If a method has consecutive registers and we are assigning to refPositions that are not part
// of consecutive registers, but are live at same location, skip the limit stress for them, because
// there are high chances that many registers are busy for consecutive requirements and we don't
// have enough remaining for other refpositions (like operands).
}
else
#endif
{
candidates = linearScan->stressLimitRegs(refPosition, candidates);
}
#endif
assert(candidates != RBM_NONE);

Expand Down Expand Up @@ -12186,6 +12228,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
}
}

#ifdef DEBUG
regMaskTP inUseOrBusyRegsMask = RBM_NONE;
#endif

// Eliminate candidates that are in-use or busy.
if (!found)
{
Expand All @@ -12195,6 +12241,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
regMaskTP busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation;
candidates &= ~busyRegs;

#ifdef DEBUG
inUseOrBusyRegsMask |= busyRegs;
#endif

// Also eliminate as busy any register with a conflicting fixed reference at this or
// the next location.
// Note that this will eliminate the fixedReg, if any, but we'll add it back below.
Expand All @@ -12210,6 +12260,9 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
(refPosition->delayRegFree && (checkConflictLocation == (refPosition->nodeLocation + 1))))
{
candidates &= ~checkConflictBit;
#ifdef DEBUG
inUseOrBusyRegsMask |= checkConflictBit;
#endif
}
}
candidates |= fixedRegMask;
Expand All @@ -12226,12 +12279,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
prevRegBit = genRegMask(prevRegRec->regNum);
if ((prevRegRec->assignedInterval == currentInterval) && ((candidates & prevRegBit) != RBM_NONE))
{
#ifdef TARGET_ARM64
// If this is allocating for consecutive register, we need to make sure that
// we allocate register, whose consecutive registers are also free.
if (!needsConsecutiveRegisters)
#endif
{
// If this is allocating for consecutive register, we need to make sure that
// we allocate register, whose consecutive registers are also free.
candidates = prevRegBit;
found = true;
#ifdef DEBUG
Expand All @@ -12245,13 +12296,6 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
prevRegBit = RBM_NONE;
}

if (!found && (candidates == RBM_NONE))
{
assert(refPosition->RegOptional());
currentInterval->assignedReg = nullptr;
return RBM_NONE;
}

// TODO-Cleanup: Previously, the "reverseSelect" stress mode reversed the order of the heuristics.
// It needs to be re-engineered with this refactoring.
// In non-debug builds, this will simply get optimized away
Expand All @@ -12260,9 +12304,9 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,
reverseSelect = linearScan->doReverseSelect();
#endif // DEBUG

#ifdef TARGET_ARM64
if (needsConsecutiveRegisters)
{
#ifdef TARGET_ARM64
regMaskTP busyConsecutiveCandidates = RBM_NONE;
if (refPosition->isFirstRefPositionOfConsecutiveRegisters())
{
Expand All @@ -12287,12 +12331,46 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval,

if ((freeCandidates == RBM_NONE) && (candidates == RBM_NONE))
{
noway_assert(!"Not sufficient consecutive registers available.");
#ifdef DEBUG
// Need to make sure that candidates has N consecutive registers to assign
if (linearScan->getStressLimitRegs() != LSRA_LIMIT_NONE)
{
// If the refPosition needs consecutive registers, then we want to make sure that
// the candidates have atleast one range of N registers that are consecutive, where N
// is the number of consecutive registers needed.
// Remove the `inUseOrBusyRegsMask` from the original candidates list and find one
// such range that is consecutive. Next, append that range to the `candidates`.
//
regMaskTP limitCandidatesForConsecutive = refPosition->registerAssignment & ~inUseOrBusyRegsMask;
regMaskTP overallLimitCandidates;
regMaskTP limitConsecutiveResult =
linearScan->filterConsecutiveCandidates(limitCandidatesForConsecutive, refPosition->regCount,
&overallLimitCandidates);
assert(limitConsecutiveResult != RBM_NONE);

unsigned startRegister = BitOperations::BitScanForward(limitConsecutiveResult);

regMaskTP registersNeededMask = (1ULL << refPosition->regCount) - 1;
candidates |= (registersNeededMask << startRegister);
}

if (candidates == RBM_NONE)
#endif // DEBUG
{
noway_assert(!"Not sufficient consecutive registers available.");
}
}
#endif // TARGET_ARM64
}
else
#endif // TARGET_ARM64
{
if (!found && (candidates == RBM_NONE))
{
assert(refPosition->RegOptional());
currentInterval->assignedReg = nullptr;
return RBM_NONE;
}

freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType));
}

Expand Down
9 changes: 5 additions & 4 deletions src/coreclr/jit/lsra.h
Original file line number Diff line number Diff line change
Expand Up @@ -792,9 +792,6 @@ class LinearScan : public LinearScanInterface
#elif defined(TARGET_ARM64)
static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);
static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);
// LsraLimitFPSetForConsecutive is used for stress mode and gives few extra registers to satisfy
// the requirements for allocating consecutive registers.
static const regMaskTP LsraLimitFPSetForConsecutive = (RBM_V3 | RBM_V5 | RBM_V7);
#elif defined(TARGET_X86)
static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
Expand Down Expand Up @@ -2006,9 +2003,13 @@ class LinearScan : public LinearScanInterface
int BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCount);
#ifdef TARGET_ARM64
int BuildConsecutiveRegistersForUse(GenTree* treeNode, GenTree* rmwNode = nullptr);
#endif
#endif // TARGET_ARM64
#endif // FEATURE_HW_INTRINSICS

#ifdef DEBUG
LsraLocation consecutiveRegistersLocation;
#endif // DEBUG

int BuildPutArgStk(GenTreePutArgStk* argNode);
#if FEATURE_ARG_SPLIT
int BuildPutArgSplit(GenTreePutArgSplit* tree);
Expand Down
Loading