Skip to content

Commit

Permalink
JIT: Have lowering set up IR for post-indexed addressing
Browse files Browse the repository at this point in the history
This adds a transformation in lowering that tries to set up the IR to be
amenable to post-indexed addressing in the backend. It does so by
looking for RMW additions/subtractions of a local that was also recently
used as the address to an indirection.
  • Loading branch information
jakobbotsch committed Jul 20, 2024
1 parent 75276fc commit b084c08
Show file tree
Hide file tree
Showing 6 changed files with 245 additions and 17 deletions.
17 changes: 10 additions & 7 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -632,8 +632,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
FALLTHROUGH;

case GT_STORE_LCL_FLD:
LowerStoreLocCommon(node->AsLclVarCommon());
break;
return LowerStoreLocCommon(node->AsLclVarCommon());

#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
case GT_CMPXCHG:
Expand Down Expand Up @@ -4783,7 +4782,10 @@ void Lowering::LowerRet(GenTreeOp* ret)
// Arguments:
// lclStore - The store lcl node to lower.
//
void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
// Returns:
// Next node to lower.
//
GenTree* Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
{
assert(lclStore->OperIs(GT_STORE_LCL_FLD, GT_STORE_LCL_VAR));
JITDUMP("lowering store lcl var/field (before):\n");
Expand Down Expand Up @@ -4870,8 +4872,7 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
lclStore->gtOp1 = spilledCall;
src = lclStore->gtOp1;
JITDUMP("lowering store lcl var/field has to spill call src.\n");
LowerStoreLocCommon(lclStore);
return;
return LowerStoreLocCommon(lclStore);
}
#endif // !WINDOWS_AMD64_ABI
convertToStoreObj = false;
Expand Down Expand Up @@ -4966,7 +4967,7 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
DISPTREERANGE(BlockRange(), objStore);
JITDUMP("\n");

return;
return objStore->gtNext;
}
}

Expand All @@ -4984,11 +4985,13 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
ContainCheckBitCast(bitcast);
}

LowerStoreLoc(lclStore);
GenTree* next = LowerStoreLoc(lclStore);

JITDUMP("lowering store lcl var/field (after):\n");
DISPTREERANGE(BlockRange(), lclStore);
JITDUMP("\n");

return next;
}

//----------------------------------------------------------------------------------------------
Expand Down
14 changes: 8 additions & 6 deletions src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ class Lowering final : public Phase
GenTreeCC* LowerNodeCC(GenTree* node, GenCondition condition);
void LowerJmpMethod(GenTree* jmp);
void LowerRet(GenTreeOp* ret);
void LowerStoreLocCommon(GenTreeLclVarCommon* lclVar);
GenTree* LowerStoreLocCommon(GenTreeLclVarCommon* lclVar);
void LowerRetStruct(GenTreeUnOp* ret);
void LowerRetSingleRegStructLclVar(GenTreeUnOp* ret);
void LowerCallStruct(GenTreeCall* call);
Expand Down Expand Up @@ -353,6 +353,8 @@ class Lowering final : public Phase
GenTree* LowerIndir(GenTreeIndir* ind);
bool OptimizeForLdpStp(GenTreeIndir* ind);
bool TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indir);
bool TryMoveAddSubRMWAfterIndir(GenTreeLclVarCommon* store);
bool TryMakeIndirAndStoreAdjacent(GenTreeIndir* prevIndir, GenTreeLclVarCommon* store);
void MarkTree(GenTree* root);
void UnmarkTree(GenTree* root);
GenTree* LowerStoreIndir(GenTreeStoreInd* node);
Expand Down Expand Up @@ -401,11 +403,11 @@ class Lowering final : public Phase
bool LowerRMWMemOp(GenTreeIndir* storeInd);
#endif

void WidenSIMD12IfNecessary(GenTreeLclVarCommon* node);
bool CheckMultiRegLclVar(GenTreeLclVar* lclNode, int registerCount);
void LowerStoreLoc(GenTreeLclVarCommon* tree);
void LowerRotate(GenTree* tree);
void LowerShift(GenTreeOp* shift);
void WidenSIMD12IfNecessary(GenTreeLclVarCommon* node);
bool CheckMultiRegLclVar(GenTreeLclVar* lclNode, int registerCount);
GenTree* LowerStoreLoc(GenTreeLclVarCommon* tree);
void LowerRotate(GenTree* tree);
void LowerShift(GenTreeOp* shift);
#ifdef FEATURE_HW_INTRINSICS
GenTree* LowerHWIntrinsic(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition);
Expand Down
213 changes: 212 additions & 1 deletion src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,10 @@ bool Lowering::IsContainableUnaryOrBinaryOp(GenTree* parentNode, GenTree* childN
// This involves:
// - Widening small stores (on ARM).
//
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
// Returns:
// Next node to lower.
//
GenTree* Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
{
#ifdef TARGET_ARM
// On ARM, small stores can cost a bit more in terms of code size so we try to widen them. This is legal
Expand All @@ -495,6 +498,17 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
}

ContainCheckStoreLoc(storeLoc);

GenTree* next = storeLoc->gtNext;

#ifdef TARGET_ARM64
if (comp->opts.OptimizationEnabled())
{
TryMoveAddSubRMWAfterIndir(storeLoc);
}
#endif

return next;
}

//------------------------------------------------------------------------
Expand Down Expand Up @@ -1053,6 +1067,203 @@ void Lowering::LowerModPow2(GenTree* node)
ContainCheckNode(mod);
}

const int POST_INDEXED_ADDRESSING_MAX_DISTANCE = 16;

//------------------------------------------------------------------------
// TryMoveAddSubRMWAfterIndir: Try to move an RMW update of a local with an
// ADD/SUB operand back to happen right after an indirection on the same local,
// attempting to make these combinable intro post-indexed addressing.
//
// Arguments:
// store - The store to a local
//
// Return Value:
// True if the store was moved; otherwise false.
//
bool Lowering::TryMoveAddSubRMWAfterIndir(GenTreeLclVarCommon* store)
{
if (!store->OperIs(GT_STORE_LCL_VAR))
{
return false;
}

unsigned lclNum = store->GetLclNum();
if (comp->lvaGetDesc(lclNum)->lvDoNotEnregister)
{
return false;
}

GenTree* data = store->Data();
if (!data->OperIs(GT_ADD, GT_SUB) || data->gtOverflow())
{
return false;
}

GenTree* op1 = data->gtGetOp1();
GenTree* op2 = data->gtGetOp2();
if (!op1->OperIs(GT_LCL_VAR) || !op2->isContainedIntOrIImmed())
{
return false;
}

if (op1->AsLclVarCommon()->GetLclNum() != lclNum)
{
return false;
}

int maxCount = min(m_blockIndirs.Height(), POST_INDEXED_ADDRESSING_MAX_DISTANCE / 2);
for (int i = 0; i < maxCount; i++)
{
SavedIndir& prev = m_blockIndirs.TopRef(i);
if ((prev.AddrBase->GetLclNum() != lclNum) || (prev.Offset != 0))
{
continue;
}

GenTreeIndir* prevIndir = prev.Indir;
if ((prevIndir == nullptr) || (prevIndir->gtNext == nullptr))
{
continue;
}

JITDUMP(
"[%06u] is an an RMW ADD/SUB on local V%02u which is used as the address to [%06u]. Trying to make them adjacent.\n",
Compiler::dspTreeID(store), lclNum, Compiler::dspTreeID(prevIndir));

if (TryMakeIndirAndStoreAdjacent(prevIndir, store))
{
prev.Indir = nullptr;
return true;
}
}

return false;
}

//------------------------------------------------------------------------
// TryMakeIndirAndStoreAdjacent: Try to move a store backwards to be next to
// the specified indirection.
//
// Arguments:
// prevIndir - Indirection that comes before "store"
// store - Store that we want to happen next to the indirection
//
// Return Value:
// True if the store was moved; otherwise false.
//
bool Lowering::TryMakeIndirAndStoreAdjacent(GenTreeIndir* prevIndir, GenTreeLclVarCommon* store)
{
GenTree* cur = prevIndir;
for (int i = 0; i < POST_INDEXED_ADDRESSING_MAX_DISTANCE; i++)
{
cur = cur->gtNext;
if (cur == store)
break;
}

if (cur != store)
{
JITDUMP(" Too far separated, giving up\n");
return false;
}

JITDUMP(" They are close. Trying to move the following range (where * are nodes part of the data flow):\n\n");
#ifdef DEBUG
bool isClosed;
GenTree* startDumpNode = BlockRange().GetTreeRange(prevIndir, &isClosed).FirstNode();
GenTree* endDumpNode = store->gtNext;

auto dumpWithMarks = [=]() {
if (!comp->verbose)
{
return;
}

for (GenTree* node = startDumpNode; node != endDumpNode; node = node->gtNext)
{
const char* prefix;
if (node == prevIndir)
prefix = "1. ";
else if (node == store)
prefix = "2. ";
else if ((node->gtLIRFlags & LIR::Flags::Mark) != 0)
prefix = "* ";
else
prefix = " ";

comp->gtDispLIRNode(node, prefix);
}
};

#endif

MarkTree(store);

INDEBUG(dumpWithMarks());
JITDUMP("\n");

assert((prevIndir->gtLIRFlags & LIR::Flags::Mark) == 0);
m_scratchSideEffects.Clear();

for (GenTree* cur = prevIndir->gtNext; cur != store; cur = cur->gtNext)
{
if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0)
{
// 'cur' is part of data flow of 'store', so we will be moving the
// currently recorded effects past 'cur'.
if (m_scratchSideEffects.InterferesWith(comp, cur, true))
{
JITDUMP("Giving up due to interference with [%06u]\n", Compiler::dspTreeID(cur));
UnmarkTree(store);
return false;
}
}
else
{
// Not part of dataflow; add its effects that will move past
// 'store'.
m_scratchSideEffects.AddNode(comp, cur);
}
}

if (m_scratchSideEffects.InterferesWith(comp, store, true))
{
JITDUMP("Have interference. Giving up.\n");
UnmarkTree(store);
return false;
}

JITDUMP("Interference checks passed. Moving nodes that are not part of data flow of [%06u]\n\n",
Compiler::dspTreeID(store));

GenTree* previous = prevIndir;
for (GenTree* node = prevIndir->gtNext;;)
{
GenTree* next = node->gtNext;

if ((node->gtLIRFlags & LIR::Flags::Mark) != 0)
{
// Part of data flow. Move it to happen right after 'previous'.
BlockRange().Remove(node);
BlockRange().InsertAfter(previous, node);
previous = node;
}

if (node == store)
{
break;
}

node = next;
}

JITDUMP("Result:\n\n");
INDEBUG(dumpWithMarks());
JITDUMP("\n");
UnmarkTree(store);
return true;
}

//------------------------------------------------------------------------
// LowerAddForPossibleContainment: Tries to lower GT_ADD in such a way
// that would allow one of its operands
Expand Down
6 changes: 5 additions & 1 deletion src/coreclr/jit/lowerloongarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,14 +249,18 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
// This involves:
// - Widening operations of unsigneds.
//
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
// Returns:
// Next node to lower.
//
GenTree* Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
{
if (storeLoc->OperIs(GT_STORE_LCL_FLD))
{
// We should only encounter this for lclVars that are lvDoNotEnregister.
verifyLclFldDoNotEnregister(storeLoc->GetLclNum());
}
ContainCheckStoreLoc(storeLoc);
return storeLoc->gtNext;
}

//------------------------------------------------------------------------
Expand Down
6 changes: 5 additions & 1 deletion src/coreclr/jit/lowerriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,14 +198,18 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
// This involves:
// - Widening operations of unsigneds.
//
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
// Returns:
// Next node to lower.
//
GenTree* Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
{
if (storeLoc->OperIs(GT_STORE_LCL_FLD))
{
// We should only encounter this for lclVars that are lvDoNotEnregister.
verifyLclFldDoNotEnregister(storeLoc->GetLclNum());
}
ContainCheckStoreLoc(storeLoc);
return storeLoc->gtNext;
}

//------------------------------------------------------------------------
Expand Down
6 changes: 5 additions & 1 deletion src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ void Lowering::LowerRotate(GenTree* tree)
// - Handling of contained immediates.
// - Widening some small stores.
//
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
// Returns:
// Next tree to lower.
//
GenTree* Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
{
// Most small locals (the exception is dependently promoted fields) have 4 byte wide stack slots, so
// we can widen the store, if profitable. The widening is only (largely) profitable for 2 byte stores.
Expand All @@ -64,6 +67,7 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
}

ContainCheckStoreLoc(storeLoc);
return storeLoc->gtNext;
}

//------------------------------------------------------------------------
Expand Down

0 comments on commit b084c08

Please sign in to comment.