From c52c79822273f0e754a7197e935c569312352162 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 1 May 2024 10:38:40 +0200 Subject: [PATCH 1/8] Metrics for ref positions --- src/coreclr/jit/jitmetadatalist.h | 14 ++++++++++++++ src/coreclr/jit/lsrabuild.cpp | 6 ++++++ 2 files changed, 20 insertions(+) diff --git a/src/coreclr/jit/jitmetadatalist.h b/src/coreclr/jit/jitmetadatalist.h index 43afa6384d4c6..47599024ac5a3 100644 --- a/src/coreclr/jit/jitmetadatalist.h +++ b/src/coreclr/jit/jitmetadatalist.h @@ -60,6 +60,20 @@ JITMETADATAMETRIC(ProfileInconsistentImporterBranchFold, int, 0) JITMETADATAMETRIC(ProfileInconsistentImporterSwitchFold, int, 0) JITMETADATAMETRIC(ProfileInconsistentChainedGDV, int, 0) JITMETADATAMETRIC(ProfileInconsistentScratchBB, int, 0) +JITMETADATAMETRIC(NumRefTypeInvalid, int, 0) +JITMETADATAMETRIC(NumRefTypeDef, int, 0) +JITMETADATAMETRIC(NumRefTypeUse, int, 0) +JITMETADATAMETRIC(NumRefTypeKill, int, 0) +JITMETADATAMETRIC(NumRefTypeBB, int, 0) +JITMETADATAMETRIC(NumRefTypeFixedReg, int, 0) +JITMETADATAMETRIC(NumRefTypeExpUse, int, 0) +JITMETADATAMETRIC(NumRefTypeParamDef, int, 0) +JITMETADATAMETRIC(NumRefTypeDummyDef, int, 0) +JITMETADATAMETRIC(NumRefTypeZeroInit, int, 0) +JITMETADATAMETRIC(NumRefTypeUpperVectorSave, int, 0) +JITMETADATAMETRIC(NumRefTypeUpperVectorRestore, int, 0) +JITMETADATAMETRIC(NumRefTypeKillGCRefs, int, 0) + #undef JITMETADATA #undef JITMETADATAINFO diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 22cba5900a7dd..a4709a20a5791 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -191,6 +191,12 @@ RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* t assert(!((refType == RefTypeParamDef) || (refType == RefTypeZeroInit) || (refType == RefTypeDummyDef) || (refType == RefTypeExpUse))); } + + switch (refType) + { +#define DEF_REFTYPE(name, x, y) case name: compiler->Metrics.Num ## name++; break; +#include "lsra_reftypes.h" + } #endif // DEBUG return newRP; } From 4bc8ebec6ce8d4c4a88ae1583478eef53c2048cd Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 1 May 2024 10:38:53 +0200 Subject: [PATCH 2/8] JIT: Process all register kills in bulk in LSRA `RefTypeKill` is one of the most common type of ref position because we create a separate one for every register whenever registers are killed, and registers are killed with very high frequency (for example, all callee trashed registers at calls). This can be seen in metrics: in `libraries_tests.run`, `RefTypeKill` is as common as `RefTypeUse` and `RefTypeDef` combined on x64, and 3x as common as them combined on arm64 due to having more registers. The main complication around changing `RefTypeKill` to represent killing a full set of registers is the fact that we want to be able to easily look up the next `RefPosition` at which a particular register is killed or used. Today, this is very simple: all kills (`RefTypeKill`) and uses (`RefTypeFixedReg`) participate in an intrusive linked list, and finding the next `RefPosition` is as simple as following this list. This PR changes LSRA to model the kills in bulk instead of creating an individual `RefPosition` for every kill. To handle finding the next fixed reg ref position LSRA is updated to take into account that there can be `RefPosition`'s corresponding to a register from two different linked lists: the `RefTypeFixedReg` intrusive linked list, and also a linked list of all `RefTypeKill` ref positions. The latter linked list may take some searching (not every kill necessarily kills the register we are looking for), but the reduction in number of `RefPosition`'s created more than makes up for this. --- src/coreclr/jit/lsra.cpp | 256 +++++++++++++++++++--------------- src/coreclr/jit/lsra.h | 14 +- src/coreclr/jit/lsrabuild.cpp | 20 +-- 3 files changed, 163 insertions(+), 127 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index efd861348223d..42604a892c0fe 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -271,20 +271,31 @@ regMaskTP LinearScan::lowSIMDRegs() #endif } -void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition) +void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition, RefPosition* nextKill) { - LsraLocation nextLocation; + LsraLocation nextLocation = nextRefPosition == nullptr ? MaxLocation : nextRefPosition->nodeLocation; - if (nextRefPosition == nullptr) + RefPosition* kill = nextKill; + while ((kill != nullptr) && (kill->nodeLocation < nextLocation)) + { + if ((kill->registerAssignment & genRegMask(regRecord->regNum)) != RBM_NONE) + { + nextLocation = kill->nodeLocation; + break; + } + + kill = kill->nextRefPosition; + } + + if (nextLocation == MaxLocation) { - nextLocation = MaxLocation; fixedRegs &= ~genRegMask(regRecord->regNum); } else { - nextLocation = nextRefPosition->nodeLocation; fixedRegs |= genRegMask(regRecord->regNum); } + nextFixedRef[regRecord->regNum] = nextLocation; } @@ -712,6 +723,8 @@ LinearScan::LinearScan(Compiler* theCompiler) , intervals(theCompiler->getAllocator(CMK_LSRA_Interval)) , allocationPassComplete(false) , refPositions(theCompiler->getAllocator(CMK_LSRA_RefPosition)) + , killHead(nullptr) + , killTail(&killHead) , listNodePool(theCompiler) { availableRegCount = ACTUAL_REG_COUNT; @@ -3948,6 +3961,46 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio } } +//------------------------------------------------------------------------ +// processKills: Handle that some registers are being killed. +// +// Arguments: +// killRefPosition - The RefPosition for the kill +// nextKill - [in, out] Iterator for next kill ref position +// +// Return Value: +// None. +// +// Notes: +// This is used to ensure that we have no live GC refs in registers at an +// unmanaged call. +// +void LinearScan::processKills(RefPosition* killRefPosition) +{ + RefPosition* nextKill = killRefPosition->nextRefPosition; + + regMaskTP killedRegs = killRefPosition->registerAssignment; + while (killedRegs != RBM_NONE) + { + regNumber killedReg = genFirstRegNumFromMaskAndToggle(killedRegs); + RegRecord* regRecord = getRegisterRecord(killedReg); + Interval* assignedInterval = regRecord->assignedInterval; + if (assignedInterval != nullptr) + { + unassignPhysReg(regRecord, assignedInterval->recentRefPosition); + clearConstantReg(regRecord->regNum, assignedInterval->registerType); + makeRegAvailable(regRecord->regNum, assignedInterval->registerType); + } + + assert((nextFixedRef[killedReg] == killRefPosition->nodeLocation) || (killedReg >= AVAILABLE_REG_COUNT)); + RefPosition* regNextRefPos = regRecord->recentRefPosition == nullptr ? regRecord->firstRefPosition : regRecord->recentRefPosition->nextRefPosition; + updateNextFixedRef(regRecord, regNextRefPos, nextKill); + } + + regsBusyUntilKill &= ~killRefPosition->registerAssignment; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, nullptr, NONE, killRefPosition->registerAssignment)); +} + //------------------------------------------------------------------------ // spillGCRefs: Spill any GC-type intervals that are currently in registers. // @@ -4865,7 +4918,7 @@ void LinearScan::allocateRegistersMinimal() { RegRecord* physRegRecord = getRegisterRecord(reg); physRegRecord->recentRefPosition = nullptr; - updateNextFixedRef(physRegRecord, physRegRecord->firstRefPosition); + updateNextFixedRef(physRegRecord, physRegRecord->firstRefPosition, killHead); assert(physRegRecord->assignedInterval == nullptr); } @@ -4887,6 +4940,7 @@ void LinearScan::allocateRegistersMinimal() #endif // DEBUG BasicBlock* currentBlock = nullptr; + RefPosition* nextKill = killHead; LsraLocation prevLocation = MinLocation; regMaskTP regsToFree = RBM_NONE; @@ -4906,8 +4960,6 @@ void LinearScan::allocateRegistersMinimal() for (RefPosition& currentRefPosition : refPositions) { - RefPosition* nextRefPosition = currentRefPosition.nextRefPosition; - // TODO: Can we combine this with the freeing of registers below? It might // mess with the dump, since this was previously being done before the call below // to dumpRegRecords. @@ -5017,7 +5069,7 @@ void LinearScan::allocateRegistersMinimal() } else { - assert((refType == RefTypeBB) || (refType == RefTypeKillGCRefs)); + assert((refType == RefTypeBB) || (refType == RefTypeKill) || (refType == RefTypeKillGCRefs)); } #ifdef DEBUG @@ -5066,65 +5118,59 @@ void LinearScan::allocateRegistersMinimal() continue; } + if (refType == RefTypeKill) + { + assert(nextKill == ¤tRefPosition); + processKills(¤tRefPosition); + nextKill = nextKill->nextRefPosition; + continue; + } + if (refType == RefTypeKillGCRefs) { spillGCRefs(¤tRefPosition); continue; } - if (currentRefPosition.isPhysRegRef) + if (refType == RefTypeFixedReg) { RegRecord* regRecord = currentRefPosition.getReg(); Interval* assignedInterval = regRecord->assignedInterval; - updateNextFixedRef(regRecord, currentRefPosition.nextRefPosition); + updateNextFixedRef(regRecord, currentRefPosition.nextRefPosition, nextKill); - // If this is a FixedReg, disassociate any inactive constant interval from this register. - // Otherwise, do nothing. - if (refType == RefTypeFixedReg) + // This is a FixedReg. Disassociate any inactive constant interval from this register. + if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant) { - if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant) - { - clearConstantReg(regRecord->regNum, assignedInterval->registerType); - regRecord->assignedInterval = nullptr; - spillCost[regRecord->regNum] = 0; + clearConstantReg(regRecord->regNum, assignedInterval->registerType); + regRecord->assignedInterval = nullptr; + spillCost[regRecord->regNum] = 0; #ifdef TARGET_ARM - // Update overlapping floating point register for TYP_DOUBLE - if (assignedInterval->registerType == TYP_DOUBLE) - { - RegRecord* otherRegRecord = findAnotherHalfRegRec(regRecord); - assert(otherRegRecord->assignedInterval == assignedInterval); - otherRegRecord->assignedInterval = nullptr; - spillCost[otherRegRecord->regNum] = 0; - } -#endif // TARGET_ARM - } - regsInUseThisLocation |= currentRefPosition.registerAssignment; - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition.assignedReg())); - -#ifdef SWIFT_SUPPORT - if (currentRefPosition.delayRegFree) + // Update overlapping floating point register for TYP_DOUBLE + if (assignedInterval->registerType == TYP_DOUBLE) { - regsInUseNextLocation |= currentRefPosition.registerAssignment; + RegRecord* otherRegRecord = findAnotherHalfRegRec(regRecord); + assert(otherRegRecord->assignedInterval == assignedInterval); + otherRegRecord->assignedInterval = nullptr; + spillCost[otherRegRecord->regNum] = 0; } -#endif // SWIFT_SUPPORT +#endif // TARGET_ARM } - else + regsInUseThisLocation |= currentRefPosition.registerAssignment; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition.assignedReg())); + +#ifdef SWIFT_SUPPORT + if (currentRefPosition.delayRegFree) { - assert(refType == RefTypeKill); - if (assignedInterval != nullptr) - { - unassignPhysReg(regRecord, assignedInterval->recentRefPosition); - clearConstantReg(regRecord->regNum, assignedInterval->registerType); - makeRegAvailable(regRecord->regNum, assignedInterval->registerType); - } - clearRegBusyUntilKill(regRecord->regNum); - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum)); + regsInUseNextLocation |= currentRefPosition.registerAssignment; } +#endif // SWIFT_SUPPORT continue; } + assert(!currentRefPosition.isPhysRegRef); + regNumber assignedRegister = REG_NA; assert(currentRefPosition.isIntervalRef()); @@ -5532,7 +5578,7 @@ void LinearScan::allocateRegisters() { RegRecord* physRegRecord = getRegisterRecord(reg); physRegRecord->recentRefPosition = nullptr; - updateNextFixedRef(physRegRecord, physRegRecord->firstRefPosition); + updateNextFixedRef(physRegRecord, physRegRecord->firstRefPosition, killHead); // Is this an incoming arg register? (Note that we don't, currently, consider reassigning // an incoming arg register as having spill cost.) @@ -5574,6 +5620,7 @@ void LinearScan::allocateRegisters() #endif // DEBUG BasicBlock* currentBlock = nullptr; + RefPosition* nextKill = killHead; LsraLocation prevLocation = MinLocation; regMaskTP regsToFree = RBM_NONE; @@ -5725,7 +5772,7 @@ void LinearScan::allocateRegisters() } else { - assert((refType == RefTypeBB) || (refType == RefTypeKillGCRefs)); + assert((refType == RefTypeBB) || (refType == RefTypeKill) || (refType == RefTypeKillGCRefs)); } #ifdef DEBUG @@ -5781,62 +5828,54 @@ void LinearScan::allocateRegisters() continue; } + if (refType == RefTypeKill) + { + assert(nextKill == ¤tRefPosition); + processKills(¤tRefPosition); + nextKill = nextKill->nextRefPosition; + continue; + } + if (refType == RefTypeKillGCRefs) { spillGCRefs(¤tRefPosition); continue; } - if (currentRefPosition.isPhysRegRef) + if (refType == RefTypeFixedReg) { RegRecord* regRecord = currentRefPosition.getReg(); Interval* assignedInterval = regRecord->assignedInterval; - updateNextFixedRef(regRecord, currentRefPosition.nextRefPosition); + updateNextFixedRef(regRecord, currentRefPosition.nextRefPosition, nextKill); - // If this is a FixedReg, disassociate any inactive constant interval from this register. - // Otherwise, do nothing. - if (refType == RefTypeFixedReg) + // This is a FixedReg. Disassociate any inactive constant interval from this register. + if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant) { - if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant) - { - clearConstantReg(regRecord->regNum, assignedInterval->registerType); - regRecord->assignedInterval = nullptr; - spillCost[regRecord->regNum] = 0; + clearConstantReg(regRecord->regNum, assignedInterval->registerType); + regRecord->assignedInterval = nullptr; + spillCost[regRecord->regNum] = 0; #ifdef TARGET_ARM - // Update overlapping floating point register for TYP_DOUBLE - if (assignedInterval->registerType == TYP_DOUBLE) - { - RegRecord* otherRegRecord = findAnotherHalfRegRec(regRecord); - assert(otherRegRecord->assignedInterval == assignedInterval); - otherRegRecord->assignedInterval = nullptr; - spillCost[otherRegRecord->regNum] = 0; - } -#endif // TARGET_ARM - } - regsInUseThisLocation |= currentRefPosition.registerAssignment; - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition.assignedReg())); - -#ifdef SWIFT_SUPPORT - if (currentRefPosition.delayRegFree) + // Update overlapping floating point register for TYP_DOUBLE + if (assignedInterval->registerType == TYP_DOUBLE) { - regsInUseNextLocation |= currentRefPosition.registerAssignment; + RegRecord* otherRegRecord = findAnotherHalfRegRec(regRecord); + assert(otherRegRecord->assignedInterval == assignedInterval); + otherRegRecord->assignedInterval = nullptr; + spillCost[otherRegRecord->regNum] = 0; } -#endif // SWIFT_SUPPORT +#endif // TARGET_ARM } - else + regsInUseThisLocation |= currentRefPosition.registerAssignment; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition.assignedReg())); + +#ifdef SWIFT_SUPPORT + if (currentRefPosition.delayRegFree) { - assert(refType == RefTypeKill); - if (assignedInterval != nullptr) - { - unassignPhysReg(regRecord, assignedInterval->recentRefPosition); - clearConstantReg(regRecord->regNum, assignedInterval->registerType); - makeRegAvailable(regRecord->regNum, assignedInterval->registerType); - } - clearRegBusyUntilKill(regRecord->regNum); - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum)); + regsInUseNextLocation |= currentRefPosition.registerAssignment; } +#endif // SWIFT_SUPPORT continue; } @@ -7972,7 +8011,6 @@ void LinearScan::resolveRegisters() case RefTypeDef: // These are the ones we're interested in break; - case RefTypeKill: case RefTypeFixedReg: // These require no handling at resolution time assert(currentRefPosition->referent != nullptr); @@ -7988,6 +8026,7 @@ void LinearScan::resolveRegisters() currentRefPosition->getInterval()->getVarIndex(compiler))); currentRefPosition->referent->recentRefPosition = currentRefPosition; continue; + case RefTypeKill: case RefTypeKillGCRefs: // No action to take at resolution time, and no interval to update recentRefPosition for. continue; @@ -10937,7 +10976,7 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) printf("\n Kill: "); killPrinted = true; } - printf(getRegName(currentRefPosition->assignedReg())); + compiler->dumpRegMask(currentRefPosition->registerAssignment); printf(" "); break; case RefTypeFixedReg: @@ -10966,7 +11005,7 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) } void LinearScan::dumpLsraAllocationEvent( - LsraDumpEvent event, Interval* interval, regNumber reg, BasicBlock* currentBlock, RegisterScore registerScore) + LsraDumpEvent event, Interval* interval, regNumber reg, BasicBlock* currentBlock, RegisterScore registerScore, regMaskTP regMask) { if (!(VERBOSE)) { @@ -11171,6 +11210,15 @@ void LinearScan::dumpLsraAllocationEvent( printf("UVRes %-4s ", getRegName(reg)); break; + case LSRA_EVENT_KILL_REGS: + dumpRefPositionShort(activeRefPosition, currentBlock); + printf("None "); + dspRegMask(regMask); + printf("\n"); + dumpRefPositionShort(activeRefPosition, currentBlock); + printf(" "); + break; + // We currently don't dump anything for these events. case LSRA_EVENT_DEFUSE_FIXED_DELAY_USE: case LSRA_EVENT_SPILL_EXTENDED_LIFETIME: @@ -11560,7 +11608,7 @@ void LinearScan::dumpRefPositionShort(RefPosition* refPosition, BasicBlock* curr } else { - assert(refPosition->refType == RefTypeKillGCRefs); + assert((refPosition->refType == RefTypeKill) || (refPosition->refType == RefTypeKillGCRefs)); // There's no interval or reg name associated with this. printf(regNameFormat, " "); printf(" %s ", getRefTypeShortName(refPosition->refType)); @@ -11726,8 +11774,8 @@ void LinearScan::verifyFreeRegisters(regMaskTP regsToFree) assert(nextIntervalRef[reg] == MaxLocation); assert(spillCost[reg] == 0); } - LsraLocation thisNextFixedRef = physRegRecord->getNextRefLocation(); - assert(nextFixedRef[reg] == thisNextFixedRef); + //LsraLocation thisNextFixedRef = physRegRecord->getNextRefLocation(); + //assert(nextFixedRef[reg] == thisNextFixedRef); #ifdef TARGET_ARM // If this is occupied by a double interval, skip the corresponding float reg. if ((assignedInterval != nullptr) && (assignedInterval->registerType == TYP_DOUBLE)) @@ -11933,10 +11981,9 @@ void LinearScan::verifyFinalAllocation() break; case RefTypeKill: - assert(regRecord != nullptr); - assert(regRecord->assignedInterval == nullptr); - dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock); + dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, currentBlock, NONE, currentRefPosition.registerAssignment); break; + case RefTypeFixedReg: assert(regRecord != nullptr); dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock); @@ -13259,17 +13306,12 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current !nextRefPos->isFixedRegRef && genMaxOneBit(refPosition->registerAssignment)) { regNumber defReg = refPosition->assignedReg(); - RegRecord* defRegRecord = linearScan->getRegisterRecord(defReg); - - RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition; - assert(currFixedRegRefPosition != nullptr && - currFixedRegRefPosition->nodeLocation == refPosition->nodeLocation); // If there is another fixed reference to this register before the use, change the candidates // on this RefPosition to include that of nextRefPos. - RefPosition* nextFixedRegRefPosition = defRegRecord->getNextRefPosition(); - if (nextFixedRegRefPosition != nullptr && - nextFixedRegRefPosition->nodeLocation <= nextRefPos->getRefEndLocation()) + // TODO-Quirk: Should pass right type here, but previously this didn't consider TYP_DOUBLE case for arm32. + unsigned nextFixedRegRefLocation = linearScan->getNextFixedRef(defReg, TYP_I_IMPL); + if (nextFixedRegRefLocation <= nextRefPos->getRefEndLocation()) { candidates |= nextRefPos->registerAssignment; if (preferences == refPosition->registerAssignment) @@ -13725,17 +13767,11 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* !nextRefPos->isFixedRegRef && genMaxOneBit(refPosition->registerAssignment)) { regNumber defReg = refPosition->assignedReg(); - RegRecord* defRegRecord = linearScan->getRegisterRecord(defReg); - - RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition; - assert(currFixedRegRefPosition != nullptr && - currFixedRegRefPosition->nodeLocation == refPosition->nodeLocation); // If there is another fixed reference to this register before the use, change the candidates // on this RefPosition to include that of nextRefPos. - RefPosition* nextFixedRegRefPosition = defRegRecord->getNextRefPosition(); - if (nextFixedRegRefPosition != nullptr && - nextFixedRegRefPosition->nodeLocation <= nextRefPos->getRefEndLocation()) + unsigned nextFixedRegRefLocation = linearScan->getNextFixedRef(defReg, TYP_I_IMPL); + if (nextFixedRegRefLocation <= nextRefPos->getRefEndLocation()) { candidates |= nextRefPos->registerAssignment; } diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 797c9d69c91d8..ab51f8e85aabf 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1262,6 +1262,7 @@ class LinearScan : public LinearScanInterface void setIntervalAsSplit(Interval* interval); void spillInterval(Interval* interval, RefPosition* fromRefPosition DEBUGARG(RefPosition* toRefPosition)); + void processKills(RefPosition* killRefPosition); void spillGCRefs(RefPosition* killRefPosition); /***************************************************************************** @@ -1581,6 +1582,7 @@ class LinearScan : public LinearScanInterface LSRA_EVENT_FREE_REGS, LSRA_EVENT_UPPER_VECTOR_SAVE, LSRA_EVENT_UPPER_VECTOR_RESTORE, + LSRA_EVENT_KILL_REGS, // Characteristics of the current RefPosition LSRA_EVENT_INCREMENT_RANGE_END, // ??? @@ -1606,7 +1608,8 @@ class LinearScan : public LinearScanInterface Interval* interval = nullptr, regNumber reg = REG_NA, BasicBlock* currentBlock = nullptr, - RegisterScore registerScore = NONE); + RegisterScore registerScore = NONE, + regMaskTP regMask = RBM_NONE); void validateIntervals(); @@ -1733,6 +1736,11 @@ class LinearScan : public LinearScanInterface // Ordered list of RefPositions RefPositionList refPositions; + // Head of linked list of RefTypeKill ref positions + RefPosition* killHead; + // Tail slot of linked list of RefTypeKill ref positions + RefPosition** killTail; + // Per-block variable location mappings: an array indexed by block number that yields a // pointer to an array of regNumber, one per variable. VarToRegMap* inVarToRegMaps; @@ -1897,7 +1905,7 @@ class LinearScan : public LinearScanInterface regMaskTP fixedRegs; LsraLocation nextFixedRef[REG_COUNT]; - void updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition); + void updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition, RefPosition* nextKill); LsraLocation getNextFixedRef(regNumber regNum, var_types regType) { LsraLocation loc = nextFixedRef[regNum]; @@ -2717,7 +2725,7 @@ class RefPosition bool IsPhysRegRef() { - return ((refType == RefTypeFixedReg) || (refType == RefTypeKill)); + return (refType == RefTypeFixedReg); } void setRegOptional(bool val) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index a4709a20a5791..d189cc69997a4 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -504,7 +504,7 @@ void LinearScan::associateRefPosWithInterval(RefPosition* rp) } else { - assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs)); + assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs) || (rp->refType == RefTypeKill)); } } @@ -576,7 +576,7 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, } else { - assert(theRefType == RefTypeBB || theRefType == RefTypeKillGCRefs); + assert(theRefType == RefTypeBB || theRefType == RefTypeKillGCRefs || theRefType == RefTypeKill); } #ifdef DEBUG if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType) @@ -718,19 +718,11 @@ void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, // modified until codegen, which is too late. compiler->codeGen->regSet.rsSetRegsModified(mask DEBUGARG(true)); - for (regMaskTP candidates = mask; candidates != RBM_NONE;) - { - regNumber reg = genFirstRegNumFromMaskAndToggle(candidates); - // This assumes that these are all "special" RefTypes that - // don't need to be recorded on the tree (hence treeNode is nullptr) - RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr, - genRegMask(reg)); // This MUST occupy the physical register (obviously) + RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, refType, nullptr, mask); + pos->lastUse = isLastUse; - if (isLastUse) - { - pos->lastUse = true; - } - } + *killTail = pos; + killTail = &pos->nextRefPosition; } //------------------------------------------------------------------------ From fe1fc57e240c5d9a3420b5472bd36a9942380f2b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 1 May 2024 10:58:52 +0200 Subject: [PATCH 3/8] Run jit-format --- src/coreclr/jit/lsra.cpp | 40 +++++++++++++++++++++-------------- src/coreclr/jit/lsrabuild.cpp | 11 ++++++---- 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 42604a892c0fe..7a041e2b59c8c 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -3982,9 +3982,9 @@ void LinearScan::processKills(RefPosition* killRefPosition) regMaskTP killedRegs = killRefPosition->registerAssignment; while (killedRegs != RBM_NONE) { - regNumber killedReg = genFirstRegNumFromMaskAndToggle(killedRegs); - RegRecord* regRecord = getRegisterRecord(killedReg); - Interval* assignedInterval = regRecord->assignedInterval; + regNumber killedReg = genFirstRegNumFromMaskAndToggle(killedRegs); + RegRecord* regRecord = getRegisterRecord(killedReg); + Interval* assignedInterval = regRecord->assignedInterval; if (assignedInterval != nullptr) { unassignPhysReg(regRecord, assignedInterval->recentRefPosition); @@ -3993,12 +3993,15 @@ void LinearScan::processKills(RefPosition* killRefPosition) } assert((nextFixedRef[killedReg] == killRefPosition->nodeLocation) || (killedReg >= AVAILABLE_REG_COUNT)); - RefPosition* regNextRefPos = regRecord->recentRefPosition == nullptr ? regRecord->firstRefPosition : regRecord->recentRefPosition->nextRefPosition; + RefPosition* regNextRefPos = regRecord->recentRefPosition == nullptr + ? regRecord->firstRefPosition + : regRecord->recentRefPosition->nextRefPosition; updateNextFixedRef(regRecord, regNextRefPos, nextKill); } regsBusyUntilKill &= ~killRefPosition->registerAssignment; - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, nullptr, NONE, killRefPosition->registerAssignment)); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, nullptr, NONE, + killRefPosition->registerAssignment)); } //------------------------------------------------------------------------ @@ -4939,8 +4942,8 @@ void LinearScan::allocateRegistersMinimal() } #endif // DEBUG - BasicBlock* currentBlock = nullptr; - RefPosition* nextKill = killHead; + BasicBlock* currentBlock = nullptr; + RefPosition* nextKill = killHead; LsraLocation prevLocation = MinLocation; regMaskTP regsToFree = RBM_NONE; @@ -5619,8 +5622,8 @@ void LinearScan::allocateRegisters() } #endif // DEBUG - BasicBlock* currentBlock = nullptr; - RefPosition* nextKill = killHead; + BasicBlock* currentBlock = nullptr; + RefPosition* nextKill = killHead; LsraLocation prevLocation = MinLocation; regMaskTP regsToFree = RBM_NONE; @@ -11004,8 +11007,12 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) printf("\n\n"); } -void LinearScan::dumpLsraAllocationEvent( - LsraDumpEvent event, Interval* interval, regNumber reg, BasicBlock* currentBlock, RegisterScore registerScore, regMaskTP regMask) +void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event, + Interval* interval, + regNumber reg, + BasicBlock* currentBlock, + RegisterScore registerScore, + regMaskTP regMask) { if (!(VERBOSE)) { @@ -11774,8 +11781,8 @@ void LinearScan::verifyFreeRegisters(regMaskTP regsToFree) assert(nextIntervalRef[reg] == MaxLocation); assert(spillCost[reg] == 0); } - //LsraLocation thisNextFixedRef = physRegRecord->getNextRefLocation(); - //assert(nextFixedRef[reg] == thisNextFixedRef); + // LsraLocation thisNextFixedRef = physRegRecord->getNextRefLocation(); + // assert(nextFixedRef[reg] == thisNextFixedRef); #ifdef TARGET_ARM // If this is occupied by a double interval, skip the corresponding float reg. if ((assignedInterval != nullptr) && (assignedInterval->registerType == TYP_DOUBLE)) @@ -11981,7 +11988,8 @@ void LinearScan::verifyFinalAllocation() break; case RefTypeKill: - dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, currentBlock, NONE, currentRefPosition.registerAssignment); + dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, currentBlock, NONE, + currentRefPosition.registerAssignment); break; case RefTypeFixedReg: @@ -13305,7 +13313,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current else if (refPosition->isFixedRegRef && nextRefPos != nullptr && RefTypeIsUse(nextRefPos->refType) && !nextRefPos->isFixedRegRef && genMaxOneBit(refPosition->registerAssignment)) { - regNumber defReg = refPosition->assignedReg(); + regNumber defReg = refPosition->assignedReg(); // If there is another fixed reference to this register before the use, change the candidates // on this RefPosition to include that of nextRefPos. @@ -13766,7 +13774,7 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* else if (refPosition->isFixedRegRef && nextRefPos != nullptr && RefTypeIsUse(nextRefPos->refType) && !nextRefPos->isFixedRegRef && genMaxOneBit(refPosition->registerAssignment)) { - regNumber defReg = refPosition->assignedReg(); + regNumber defReg = refPosition->assignedReg(); // If there is another fixed reference to this register before the use, change the candidates // on this RefPosition to include that of nextRefPos. diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index d189cc69997a4..703d526399f9c 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -194,7 +194,10 @@ RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* t switch (refType) { -#define DEF_REFTYPE(name, x, y) case name: compiler->Metrics.Num ## name++; break; +#define DEF_REFTYPE(name, x, y) \ + case name: \ + compiler->Metrics.Num##name++; \ + break; #include "lsra_reftypes.h" } #endif // DEBUG @@ -719,10 +722,10 @@ void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, compiler->codeGen->regSet.rsSetRegsModified(mask DEBUGARG(true)); RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, refType, nullptr, mask); - pos->lastUse = isLastUse; + pos->lastUse = isLastUse; - *killTail = pos; - killTail = &pos->nextRefPosition; + *killTail = pos; + killTail = &pos->nextRefPosition; } //------------------------------------------------------------------------ From d495a1fbdd6deb71af039baa4106565198af1172 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 1 May 2024 11:08:13 +0200 Subject: [PATCH 4/8] Revert "Metrics for ref positions" This reverts commit c52c79822273f0e754a7197e935c569312352162. --- src/coreclr/jit/jitmetadatalist.h | 14 -------------- src/coreclr/jit/lsrabuild.cpp | 9 --------- 2 files changed, 23 deletions(-) diff --git a/src/coreclr/jit/jitmetadatalist.h b/src/coreclr/jit/jitmetadatalist.h index 47599024ac5a3..43afa6384d4c6 100644 --- a/src/coreclr/jit/jitmetadatalist.h +++ b/src/coreclr/jit/jitmetadatalist.h @@ -60,20 +60,6 @@ JITMETADATAMETRIC(ProfileInconsistentImporterBranchFold, int, 0) JITMETADATAMETRIC(ProfileInconsistentImporterSwitchFold, int, 0) JITMETADATAMETRIC(ProfileInconsistentChainedGDV, int, 0) JITMETADATAMETRIC(ProfileInconsistentScratchBB, int, 0) -JITMETADATAMETRIC(NumRefTypeInvalid, int, 0) -JITMETADATAMETRIC(NumRefTypeDef, int, 0) -JITMETADATAMETRIC(NumRefTypeUse, int, 0) -JITMETADATAMETRIC(NumRefTypeKill, int, 0) -JITMETADATAMETRIC(NumRefTypeBB, int, 0) -JITMETADATAMETRIC(NumRefTypeFixedReg, int, 0) -JITMETADATAMETRIC(NumRefTypeExpUse, int, 0) -JITMETADATAMETRIC(NumRefTypeParamDef, int, 0) -JITMETADATAMETRIC(NumRefTypeDummyDef, int, 0) -JITMETADATAMETRIC(NumRefTypeZeroInit, int, 0) -JITMETADATAMETRIC(NumRefTypeUpperVectorSave, int, 0) -JITMETADATAMETRIC(NumRefTypeUpperVectorRestore, int, 0) -JITMETADATAMETRIC(NumRefTypeKillGCRefs, int, 0) - #undef JITMETADATA #undef JITMETADATAINFO diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 703d526399f9c..eba4872ffa024 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -191,15 +191,6 @@ RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* t assert(!((refType == RefTypeParamDef) || (refType == RefTypeZeroInit) || (refType == RefTypeDummyDef) || (refType == RefTypeExpUse))); } - - switch (refType) - { -#define DEF_REFTYPE(name, x, y) \ - case name: \ - compiler->Metrics.Num##name++; \ - break; -#include "lsra_reftypes.h" - } #endif // DEBUG return newRP; } From a4bfdeeb6673a07094d4ad9f201b17bc2af0300c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 1 May 2024 11:18:04 +0200 Subject: [PATCH 5/8] Remove assert --- src/coreclr/jit/lsra.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 7a041e2b59c8c..5712457c0cae7 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -11781,8 +11781,6 @@ void LinearScan::verifyFreeRegisters(regMaskTP regsToFree) assert(nextIntervalRef[reg] == MaxLocation); assert(spillCost[reg] == 0); } - // LsraLocation thisNextFixedRef = physRegRecord->getNextRefLocation(); - // assert(nextFixedRef[reg] == thisNextFixedRef); #ifdef TARGET_ARM // If this is occupied by a double interval, skip the corresponding float reg. if ((assignedInterval != nullptr) && (assignedInterval->registerType == TYP_DOUBLE)) From 52a32cb34fbd5807d023e6449a7e2873efb4e1b0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 1 May 2024 13:07:59 +0200 Subject: [PATCH 6/8] Clean up --- src/coreclr/jit/lsra.cpp | 8 -------- src/coreclr/jit/lsra.h | 3 +-- src/coreclr/jit/lsrabuild.cpp | 17 ++++++----------- 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 5712457c0cae7..8f0875d7b09fc 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -3966,14 +3966,6 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio // // Arguments: // killRefPosition - The RefPosition for the kill -// nextKill - [in, out] Iterator for next kill ref position -// -// Return Value: -// None. -// -// Notes: -// This is used to ensure that we have no live GC refs in registers at an -// unmanaged call. // void LinearScan::processKills(RefPosition* killRefPosition) { diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index ab51f8e85aabf..fe09a2e456441 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1083,8 +1083,7 @@ class LinearScan : public LinearScanInterface // insert refpositions representing prolog zero-inits which will be added later void insertZeroInitRefPositions(); - // add physreg refpositions for a tree node, based on calling convention and instruction selection predictions - void addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse); + void addKillForRegs(regMaskTP mask, LsraLocation currentLoc); void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index eba4872ffa024..4a4af7725dd4d 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -689,18 +689,14 @@ bool LinearScan::isContainableMemoryOp(GenTree* node) } //------------------------------------------------------------------------ -// addRefsForPhysRegMask: Adds RefPositions of the given type for all the registers in 'mask'. +// addKillForRegs: Adds a RefTypeKill ref position for the given registers. // // Arguments: // mask - the mask (set) of registers. // currentLoc - the location at which they should be added -// refType - the type of refposition -// isLastUse - true IFF this is a last use of the register // -void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse) +void LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) { - assert(refType == RefTypeKill); - // The mask identifies a set of registers that will be used during // codegen. Mark these as modified here, so when we do final frame // layout, we'll know about all these registers. This is especially @@ -712,8 +708,7 @@ void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, // modified until codegen, which is too late. compiler->codeGen->regSet.rsSetRegsModified(mask DEBUGARG(true)); - RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, refType, nullptr, mask); - pos->lastUse = isLastUse; + RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask); *killTail = pos; killTail = &pos->nextRefPosition; @@ -1119,7 +1114,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo if (killMask != RBM_NONE) { - addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true); + addKillForRegs(killMask, currentLoc); // TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee // save regs on infrequently executed paths. However, it results in a large number of asmDiffs, @@ -2538,7 +2533,7 @@ void LinearScan::buildIntervals() if ((block == compiler->fgFirstBB) && compiler->lvaHasAnySwiftStackParamToReassemble()) { assert(compiler->fgFirstBBisScratch()); - addRefsForPhysRegMask(genRegMask(REG_SCRATCH), currentLoc + 1, RefTypeKill, true); + addKillForRegs(genRegMask(REG_SCRATCH), currentLoc + 1); currentLoc += 2; } @@ -2556,7 +2551,7 @@ void LinearScan::buildIntervals() // Poisoning uses REG_SCRATCH for small vars and memset helper for big vars. killed = genRegMask(REG_SCRATCH) | compiler->compHelperCallKillSet(CORINFO_HELP_NATIVE_MEMSET); #endif - addRefsForPhysRegMask(killed, currentLoc + 1, RefTypeKill, true); + addKillForRegs(killed, currentLoc + 1); currentLoc += 2; } From 0c8b3be6ba63db651aced9ad50e6bcc38baf70da Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 1 May 2024 13:34:58 +0200 Subject: [PATCH 7/8] Fix Swift error RefTypeFixedReg location --- src/coreclr/jit/lsraarmarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index f88173e05e209..cf5302dee9c71 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -419,7 +419,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // We could use RefTypeKill, but RefTypeFixedReg is used less commonly, so the check for delayRegFree // during register allocation should be cheaper in terms of TP. - RefPosition* pos = newRefPosition(REG_SWIFT_ERROR, currentLoc, RefTypeFixedReg, call, RBM_SWIFT_ERROR); + RefPosition* pos = newRefPosition(REG_SWIFT_ERROR, currentLoc + 1, RefTypeFixedReg, call, RBM_SWIFT_ERROR); setDelayFree(pos); } #endif // SWIFT_SUPPORT From d4a5b4b4f3f7115924bde18c2f382914560d39ad Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 1 May 2024 16:47:34 +0200 Subject: [PATCH 8/8] Also fix Swift for xarch --- src/coreclr/jit/lsraxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 6abc86aab0ed5..7109c9087ac4e 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1395,7 +1395,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // We could use RefTypeKill, but RefTypeFixedReg is used less commonly, so the check for delayRegFree // during register allocation should be cheaper in terms of TP. - RefPosition* pos = newRefPosition(REG_SWIFT_ERROR, currentLoc, RefTypeFixedReg, call, RBM_SWIFT_ERROR); + RefPosition* pos = newRefPosition(REG_SWIFT_ERROR, currentLoc + 1, RefTypeFixedReg, call, RBM_SWIFT_ERROR); setDelayFree(pos); } #endif // SWIFT_SUPPORT