Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[JIT] x86/x64 - improved localloc codegen in some cases #76851

Merged
merged 15 commits into from
Nov 1, 2022
11 changes: 7 additions & 4 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1430,13 +1430,16 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

void genReturn(GenTree* treeNode);

#ifdef TARGET_XARCH
void genStackPointerConstantAdjustment(ssize_t spDelta, bool canTrack);
void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool canTrack);
target_ssize_t genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, bool canTrack);
void genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta);
#else // !TARGET_XARCH
void genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp);
void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp);
target_ssize_t genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp);

#if defined(TARGET_XARCH)
void genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, regNumber regTmp);
#endif // defined(TARGET_XARCH)
#endif // !TARGET_XARCH

void genLclHeap(GenTree* tree);

Expand Down
71 changes: 28 additions & 43 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2216,36 +2216,32 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
//
// Arguments:
// spDelta - the value to add to SP. Must be negative or zero.
// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP
// adjustment from the emitter, using this register.
// canTrack - x86 only: whether or not to track the SP adjustment
TIHan marked this conversation as resolved.
Show resolved Hide resolved
//
// Return Value:
// None.
//
void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp)
void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, bool canTrack)
{
assert(spDelta < 0);

// We assert that the SP change is less than one page. If it's greater, you should have called a
// function that does a probe, which will in turn call this function.
assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize());

#ifdef TARGET_X86
if (regTmp != REG_NA)
#ifdef TARGET_AMD64
// We always track the SP adjustment on X64.
canTrack = true;
#endif // TARGET_AMD64

if (canTrack)
{
// For x86, some cases don't want to use "sub ESP" because we don't want the emitter to track the adjustment
// to ESP. So do the work in the count register.
// TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require
// creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't
// track".
inst_Mov(TYP_I_IMPL, regTmp, REG_SPBASE, /* canSkip */ false);
inst_RV_IV(INS_sub, regTmp, (target_ssize_t)-spDelta, EA_PTRSIZE);
inst_Mov(TYP_I_IMPL, REG_SPBASE, regTmp, /* canSkip */ false);
inst_RV_IV(INS_sub, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE);
}
else
#endif // TARGET_X86
{
inst_RV_IV(INS_sub, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE);
// For x86, some cases don't want to track the adjustment to SP.
inst_RV_IV(INS_sub_hide, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE);
}
}

Expand All @@ -2257,16 +2253,15 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm
// Arguments:
// spDelta - the value to add to SP. Must be negative or zero. If zero, the probe happens,
// but the stack pointer doesn't move.
// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP
// adjustment from the emitter, using this register.
// canTrack - x86 only: whether or not to track the SP adjustment
//
// Return Value:
// None.
//
void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp)
void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool canTrack)
{
GetEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
genStackPointerConstantAdjustment(spDelta, regTmp);
genStackPointerConstantAdjustment(spDelta, canTrack);
}

//------------------------------------------------------------------------
Expand All @@ -2280,13 +2275,12 @@ void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNum
//
// Arguments:
// spDelta - the value to add to SP. Must be negative.
// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP
// adjustment from the emitter, using this register.
// canTrack - x86 only: whether or not to track the SP adjustment
//
// Return Value:
// Offset in bytes from SP to last probed address.
//
target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp)
target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, bool canTrack)
{
assert(spDelta < 0);

Expand All @@ -2296,7 +2290,7 @@ target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t s
do
{
ssize_t spOneDelta = -(ssize_t)min((target_size_t)-spRemainingDelta, pageSize);
genStackPointerConstantAdjustmentWithProbe(spOneDelta, regTmp);
genStackPointerConstantAdjustmentWithProbe(spOneDelta, canTrack);
spRemainingDelta -= spOneDelta;
} while (spRemainingDelta < 0);

Expand All @@ -2323,21 +2317,18 @@ target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t s
// genStackPointerDynamicAdjustmentWithProbe: add a register value to the stack pointer,
// and probe the stack as appropriate.
//
// Note that for x86, we hide the ESP adjustment from the emitter. To do that, currently,
// requires a temporary register and extra code.
// We hide the ESP adjustment from the emitter.
//
// Arguments:
// regSpDelta - the register value to add to SP. The value in this register must be negative.
// This register might be trashed.
// regTmp - an available temporary register. Will be trashed.
//
// Return Value:
// None.
//
void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, regNumber regTmp)
void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta)
{
assert(regSpDelta != REG_NA);
assert(regTmp != REG_NA);

// Tickle the pages to ensure that ESP is always valid and is
// in sync with the "stack guard page". Note that in the worst
Expand All @@ -2356,9 +2347,7 @@ void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, re
// xor regSpDelta, regSpDelta // Overflow, pick lowest possible number
// loop:
// test ESP, [ESP+0] // tickle the page
// mov regTmp, ESP
// sub regTmp, eeGetPageSize()
// mov ESP, regTmp
// sub ESP, eeGetPageSize()
// cmp ESP, regSpDelta
// jae loop
// mov ESP, regSpDelta
Expand All @@ -2376,11 +2365,8 @@ void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, re
// be on the guard page. It is OK to leave the final value of ESP on the guard page.
GetEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);

// Subtract a page from ESP. This is a trick to avoid the emitter trying to track the
// decrement of the ESP - we do the subtraction in another reg instead of adjusting ESP directly.
inst_Mov(TYP_I_IMPL, regTmp, REG_SPBASE, /* canSkip */ false);
inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE);
inst_Mov(TYP_I_IMPL, REG_SPBASE, regTmp, /* canSkip */ false);
// Subtract a page from ESP and hide the adjustment.
inst_RV_IV(INS_sub_hide, REG_SPBASE, compiler->eeGetPageSize(), EA_PTRSIZE);

inst_RV_RV(INS_cmp, REG_SPBASE, regSpDelta, TYP_I_IMPL);
inst_JMP(EJ_jae, loop);
Expand Down Expand Up @@ -2533,7 +2519,7 @@ void CodeGen::genLclHeap(GenTree* tree)

if ((amount > 0) && !initMemOrLargeAlloc)
{
lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, REG_NA);
lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, /* canTrack */ true);
stackAdjustment = 0;
locAllocStackOffset = (target_size_t)compiler->lvaOutgoingArgSpaceSize;
goto ALLOC_DONE;
Expand Down Expand Up @@ -2595,7 +2581,7 @@ void CodeGen::genLclHeap(GenTree* tree)
// the alloc, not after.

assert(amount < compiler->eeGetPageSize()); // must be < not <=
lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, regCnt);
lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, /* canTrack */ regCnt == REG_NA);
goto ALLOC_DONE;
}

Expand Down Expand Up @@ -2646,8 +2632,7 @@ void CodeGen::genLclHeap(GenTree* tree)
// adds to ESP).

inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
regNumber regTmp = tree->GetSingleTempReg();
TIHan marked this conversation as resolved.
Show resolved Hide resolved
genStackPointerDynamicAdjustmentWithProbe(regCnt, regTmp);
genStackPointerDynamicAdjustmentWithProbe(regCnt);

// lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space,
// we're going to assume the worst and probe.
Expand All @@ -2667,11 +2652,11 @@ void CodeGen::genLclHeap(GenTree* tree)
(stackAdjustment + (target_size_t)lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES >
compiler->eeGetPageSize()))
{
genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, REG_NA);
genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, /* canTrack */ true);
}
else
{
genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, REG_NA);
genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, /* canTrack */ true);
}
}

Expand Down Expand Up @@ -7862,7 +7847,7 @@ bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
if ((argSize >= ARG_STACK_PROBE_THRESHOLD_BYTES) ||
compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5))
{
genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)argSize, REG_NA);
genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)argSize, /* canTrack */ true);
}
else
{
Expand Down
18 changes: 10 additions & 8 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11236,7 +11236,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)

case IF_RRW_ARD:
// Mark the destination register as holding a GCT_BYREF
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide));
emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
break;

Expand All @@ -11253,7 +11253,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)

case IF_ARW_RRD:
case IF_ARW_CNS:
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide));
break;

default:
Expand Down Expand Up @@ -11683,7 +11683,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)

// reg could have been a GCREF as GCREF + int=BYREF
// or BYREF+/-int=BYREF
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide));
emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
break;

Expand Down Expand Up @@ -12143,7 +12143,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
case IF_RRW_MRD:

assert(id->idGCref() == GCT_BYREF);
assert(ins == INS_add || ins == INS_sub);
assert(ins == INS_add || ins == INS_sub || ins == INS_sub_hide);

// Mark it as holding a GCT_BYREF
emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
Expand Down Expand Up @@ -12709,6 +12709,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)

case INS_add:
case INS_sub:
case INS_sub_hide:
assert(id->idGCref() == GCT_BYREF);

#if 0
Expand All @@ -12731,7 +12732,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
// r1/r2 could have been a GCREF as GCREF + int=BYREF
// or BYREF+/-int=BYREF
assert(((regMask & emitThisGCrefRegs) && (ins == INS_add)) ||
((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub)));
((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide)));
#endif // DEBUG
#endif // 0

Expand Down Expand Up @@ -13209,7 +13210,7 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
}
if (emitThisByrefRegs & regMask)
{
assert(ins == INS_add || ins == INS_sub);
assert(ins == INS_add || ins == INS_sub || ins == INS_sub_hide);
}
#endif
// Mark it as holding a GCT_BYREF
Expand Down Expand Up @@ -14833,7 +14834,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)

case INS_sub:
// Check for "sub ESP, icon"
if (ins == INS_sub && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
if (id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
{
assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
emitStackPushN(dst, (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE));
Expand All @@ -14842,7 +14843,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)

case INS_add:
// Check for "add ESP, icon"
if (ins == INS_add && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
if (id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
{
assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
emitStackPop(dst, /*isCall*/ false, /*callInstrSize*/ 0,
Expand Down Expand Up @@ -15322,6 +15323,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins

case INS_add:
case INS_sub:
case INS_sub_hide:
case INS_and:
case INS_or:
case INS_xor:
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ INST4(adc, "adc", IUM_RW, 0x000010, 0x001080,
INST4(sbb, "sbb", IUM_RW, 0x000018, 0x001880, 0x00001A, 0x00001C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | Reads_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(and, "and", IUM_RW, 0x000020, 0x002080, 0x000022, 0x000024, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(sub, "sub", IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
// Does not affect the stack tracking in the emitter
INST4(sub_hide, "sub", IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )

INST4(xor, "xor", IUM_RW, 0x000030, 0x003080, 0x000032, 0x000034, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(cmp, "cmp", IUM_RD, 0x000038, 0x003880, 0x00003A, 0x00003C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit )
INST4(test, "test", IUM_RD, 0x000084, 0x0000F6, 0x000084, 0x0000A8, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Wbit )
Expand Down