Skip to content

Commit

Permalink
Memory containment for FIELD_LIST operands on x86 (#65803)
Browse files Browse the repository at this point in the history
* Remove "inst_RV_TT" usages from non-XARCH code

a) It is clearer and cheaper to directly use the emitter anyway.
b) "inst_RV_TT" will be made XARCH-only in an upcoming change.

* Refactor "inst_TT" and "inst_RV_TT"

a) Make them XARCH-only.
b) Rewrite them using "OperandDesc".

* Memory containment for FIELD_LIST operands on x86

Enable broader containment for FIELD_LIST operands on x86.

The primary benefits come from containing loads that long
decomposition has left (e. g. LCL_FLDs):

```diff
-       mov      ecx, dword ptr [ebp+08H]
-       mov      edx, dword ptr [ebp+0CH]
-       push     edx
-       push     ecx
+       push     dword ptr [ebp+0CH]
+       push     dword ptr [ebp+08H]
```

* emitIns_A: add function header

* Remove the "UNDONE: ..." printing

Insert an assert instead about what we expect and handle GCInfo-wise.
  • Loading branch information
SingleAccretion authored Mar 31, 2022
1 parent a847ddf commit e402d8e
Show file tree
Hide file tree
Showing 11 changed files with 233 additions and 440 deletions.
17 changes: 5 additions & 12 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1224,7 +1224,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#endif // TARGET_XARCH

void genCodeForCast(GenTreeOp* tree);
void genCodeForLclAddr(GenTree* tree);
void genCodeForLclAddr(GenTreeLclVarCommon* lclAddrNode);
void genCodeForIndexAddr(GenTreeIndexAddr* tree);
void genCodeForIndir(GenTreeIndir* tree);
void genCodeForNegNot(GenTree* tree);
Expand Down Expand Up @@ -1474,17 +1474,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

void inst_FS_ST(instruction ins, emitAttr size, TempDsc* tmp, unsigned ofs);

void inst_TT(instruction ins, GenTree* tree, unsigned offs = 0, int shfv = 0, emitAttr size = EA_UNKNOWN);

void inst_TT_RV(instruction ins, emitAttr size, GenTree* tree, regNumber reg);

void inst_RV_TT(instruction ins,
regNumber reg,
GenTree* tree,
unsigned offs = 0,
emitAttr size = EA_UNKNOWN,
insFlags flags = INS_FLAGS_DONT_CARE);

void inst_RV_SH(instruction ins, emitAttr size, regNumber reg, unsigned val, insFlags flags = INS_FLAGS_DONT_CARE);

#if defined(TARGET_XARCH)
Expand Down Expand Up @@ -1602,10 +1593,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
return m_immediate;
}

bool ImmediateNeedsReloc() const
emitAttr GetEmitAttrForImmediate(emitAttr baseAttr) const
{
assert(m_kind == OperandKind::Imm);
return m_immediateNeedsReloc;
return m_immediateNeedsReloc ? EA_SET_FLG(baseAttr, EA_CNS_RELOC_FLG) : baseAttr;
}

regNumber GetReg() const
Expand All @@ -1621,6 +1612,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

OperandDesc genOperandDesc(GenTree* op);

void inst_TT(instruction ins, emitAttr size, GenTree* op1);
void inst_RV_TT(instruction ins, emitAttr size, regNumber op1Reg, GenTree* op2);
void inst_RV_RV_IV(instruction ins, emitAttr size, regNumber reg1, regNumber reg2, unsigned ival);
void inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenTree* rmOp, int ival);
void inst_RV_RV_TT(instruction ins, emitAttr size, regNumber targetReg, regNumber op1Reg, GenTree* op2, bool isRMW);
Expand Down
18 changes: 9 additions & 9 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)

case GT_LCL_FLD_ADDR:
case GT_LCL_VAR_ADDR:
genCodeForLclAddr(treeNode);
genCodeForLclAddr(treeNode->AsLclVarCommon());
break;

case GT_LCL_FLD:
Expand Down Expand Up @@ -1714,22 +1714,22 @@ void CodeGen::genCodeForShift(GenTree* tree)
// genCodeForLclAddr: Generates the code for GT_LCL_FLD_ADDR/GT_LCL_VAR_ADDR.
//
// Arguments:
// tree - the node.
// lclAddrNode - the node.
//
void CodeGen::genCodeForLclAddr(GenTree* tree)
void CodeGen::genCodeForLclAddr(GenTreeLclVarCommon* lclAddrNode)
{
assert(tree->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR));
assert(lclAddrNode->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR));

var_types targetType = tree->TypeGet();
regNumber targetReg = tree->GetRegNum();
var_types targetType = lclAddrNode->TypeGet();
emitAttr size = emitTypeSize(targetType);
regNumber targetReg = lclAddrNode->GetRegNum();

// Address of a local var.
noway_assert((targetType == TYP_BYREF) || (targetType == TYP_I_IMPL));

emitAttr size = emitTypeSize(targetType);
GetEmitter()->emitIns_R_S(INS_lea, size, targetReg, lclAddrNode->GetLclNum(), lclAddrNode->GetLclOffs());

inst_RV_TT(INS_lea, targetReg, tree, 0, size);
genProduceReg(tree);
genProduceReg(lclAddrNode);
}

//------------------------------------------------------------------------
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1976,7 +1976,9 @@ void CodeGen::genSetBlockSrc(GenTreeBlk* blkNode, regNumber srcReg)
{
// This must be a local struct.
// Load its address into srcReg.
inst_RV_TT(INS_lea, srcReg, src, 0, EA_BYREF);
unsigned varNum = src->AsLclVarCommon()->GetLclNum();
unsigned offset = src->AsLclVarCommon()->GetLclOffs();
GetEmitter()->emitIns_R_S(INS_lea, EA_BYREF, srcReg, varNum, offset);
return;
}
}
Expand Down
102 changes: 41 additions & 61 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1565,7 +1565,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)

case GT_LCL_FLD_ADDR:
case GT_LCL_VAR_ADDR:
genCodeForLclAddr(treeNode);
genCodeForLclAddr(treeNode->AsLclVarCommon());
break;

case GT_LCL_FLD:
Expand Down Expand Up @@ -4583,22 +4583,22 @@ void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd)
// genCodeForLclAddr: Generates the code for GT_LCL_FLD_ADDR/GT_LCL_VAR_ADDR.
//
// Arguments:
// tree - the node.
// lclAddrNode - the node.
//
void CodeGen::genCodeForLclAddr(GenTree* tree)
void CodeGen::genCodeForLclAddr(GenTreeLclVarCommon* lclAddrNode)
{
assert(tree->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR));
assert(lclAddrNode->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR));

var_types targetType = tree->TypeGet();
regNumber targetReg = tree->GetRegNum();
var_types targetType = lclAddrNode->TypeGet();
emitAttr size = emitTypeSize(targetType);
regNumber targetReg = lclAddrNode->GetRegNum();

// Address of a local var.
noway_assert((targetType == TYP_BYREF) || (targetType == TYP_I_IMPL));

emitAttr size = emitTypeSize(targetType);
GetEmitter()->emitIns_R_S(INS_lea, size, targetReg, lclAddrNode->GetLclNum(), lclAddrNode->GetLclOffs());

inst_RV_TT(INS_lea, targetReg, tree, 0, size);
genProduceReg(tree);
genProduceReg(lclAddrNode);
}

//------------------------------------------------------------------------
Expand Down Expand Up @@ -7661,7 +7661,6 @@ bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
break;

case GenTreePutArgStk::Kind::Push:
case GenTreePutArgStk::Kind::PushAllSlots:
assert(source->OperIs(GT_FIELD_LIST) || source->AsObj()->GetLayout()->HasGCPtr() ||
(argSize < XMM_REGSIZE_BYTES));
break;
Expand Down Expand Up @@ -7775,8 +7774,6 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
// The GC encoder requires that the stack remain 4-byte aligned at all times. Round the adjustment up
// to the next multiple of 4. If we are going to generate a `push` instruction, the adjustment must
// not require rounding.
// NOTE: if the field is of GC type, we must use a push instruction, since the emitter is not otherwise
// able to detect stores into the outgoing argument area of the stack on x86.
const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevFieldOffset - fieldOffset) >= 4);
int adjustment = roundUp(currentOffset - fieldOffset, 4);
if (fieldIsSlot && !varTypeIsSIMD(fieldType))
Expand All @@ -7792,6 +7789,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
AddStackLevel(pushSize);
adjustment -= pushSize;
}

m_pushStkArg = true;
}
else
Expand Down Expand Up @@ -7829,63 +7827,43 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
}
}

if (argReg == REG_NA)
bool canStoreWithPush = fieldIsSlot;
bool canLoadWithPush = varTypeIsI(fieldNode) || genIsValidIntReg(argReg);

if (canStoreWithPush && canLoadWithPush)
{
if (m_pushStkArg)
assert(m_pushStkArg);
assert(genTypeSize(fieldNode) == TARGET_POINTER_SIZE);
inst_TT(INS_push, emitActualTypeSize(fieldNode), fieldNode);

currentOffset -= TARGET_POINTER_SIZE;
AddStackLevel(TARGET_POINTER_SIZE);
}
else
{
// If the field is of GC type, we must use a push instruction, since the emitter is not
// otherwise able to detect stores into the outgoing argument area of the stack on x86.
assert(!varTypeIsGC(fieldNode));

// First, if needed, load the field into the temporary register.
if (argReg == REG_NA)
{
if (fieldNode->isUsedFromSpillTemp())
assert(varTypeIsIntegralOrI(fieldNode) && genIsValidIntReg(intTmpReg));

if (fieldNode->isContainedIntOrIImmed())
{
assert(!varTypeIsSIMD(fieldType)); // Q: can we get here with SIMD?
assert(fieldNode->IsRegOptional());
TempDsc* tmp = getSpillTempDsc(fieldNode);
GetEmitter()->emitIns_S(INS_push, emitActualTypeSize(fieldNode->TypeGet()), tmp->tdTempNum(), 0);
regSet.tmpRlsTemp(tmp);
genSetRegToConst(intTmpReg, fieldNode->TypeGet(), fieldNode);
}
else
{
assert(varTypeIsIntegralOrI(fieldNode));
switch (fieldNode->OperGet())
{
case GT_LCL_VAR:
inst_TT(INS_push, fieldNode, 0, 0, emitActualTypeSize(fieldNode->TypeGet()));
break;
case GT_CNS_INT:
if (fieldNode->IsIconHandle())
{
inst_IV_handle(INS_push, fieldNode->AsIntCon()->gtIconVal);
}
else
{
inst_IV(INS_push, fieldNode->AsIntCon()->gtIconVal);
}
break;
default:
unreached();
}
}
currentOffset -= TARGET_POINTER_SIZE;
AddStackLevel(TARGET_POINTER_SIZE);
}
else
{
// The stack has been adjusted and we will load the field to intTmpReg and then store it on the stack.
assert(varTypeIsIntegralOrI(fieldNode));
switch (fieldNode->OperGet())
{
case GT_LCL_VAR:
inst_RV_TT(INS_mov, intTmpReg, fieldNode);
break;
case GT_CNS_INT:
genSetRegToConst(intTmpReg, fieldNode->TypeGet(), fieldNode);
break;
default:
unreached();
// TODO-XArch-CQ: using "ins_Load" here is conservative, as it will always
// extend, which we can avoid if the field type is smaller than the node type.
inst_RV_TT(ins_Load(fieldNode->TypeGet()), emitTypeSize(fieldNode), intTmpReg, fieldNode);
}
genStoreRegToStackArg(fieldType, intTmpReg, fieldOffset - currentOffset);

argReg = intTmpReg;
}
}
else
{

#if defined(FEATURE_SIMD)
if (fieldType == TYP_SIMD12)
{
Expand All @@ -7897,6 +7875,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
{
genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset);
}

if (m_pushStkArg)
{
// We always push a slot-rounded size
Expand All @@ -7906,6 +7885,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)

prevFieldOffset = fieldOffset;
}

if (currentOffset != 0)
{
// We don't expect padding at the beginning of a struct, but it could happen with explicit layout.
Expand Down
39 changes: 30 additions & 9 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4362,6 +4362,32 @@ void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fld
emitAdjustStackDepthPushPop(ins);
}

//------------------------------------------------------------------------
// emitIns_A: Emit an instruction with one memory ("[address mode]") operand.
//
// Arguments:
// ins - The instruction to emit
// attr - The corresponding emit attribute
// indir - The memory operand, represented as an indirection tree
//
void emitter::emitIns_A(instruction ins, emitAttr attr, GenTreeIndir* indir)
{
ssize_t offs = indir->Offset();
instrDesc* id = emitNewInstrAmd(attr, offs);
insFormat fmt = emitInsModeFormat(ins, IF_ARD);

id->idIns(ins);
emitHandleMemOp(indir, id, fmt, ins);

UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
id->idCodeSize(sz);

dispIns(id);
emitCurIGsize += sz;

emitAdjustStackDepthPushPop(ins);
}

//------------------------------------------------------------------------
// IsMovInstruction: Determines whether a give instruction is a move instruction
//
Expand Down Expand Up @@ -13102,21 +13128,16 @@ BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id)
emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
}
}

// Did we push a GC ref value?
if (id->idGCref())
{
#ifdef DEBUG
printf("UNDONE: record GCref push [cns]\n");
#endif
}

break;

default:
assert(!"unexpected instruction");
}

// GC tracking for "push"es is done by "emitStackPush", for all other instructions
// that can reach here we do not expect (and do not handle) GC refs.
assert((ins == INS_push) || !id->idGCref());

return dst;
}

Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/emitxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,8 @@ void emitIns_R(instruction ins, emitAttr attr, regNumber reg);

void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs);

void emitIns_A(instruction ins, emitAttr attr, GenTreeIndir* indir);

void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t val DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));

void emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regNumber srgReg, bool canSkip);
Expand Down
3 changes: 0 additions & 3 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10940,9 +10940,6 @@ void Compiler::gtDispTree(GenTree* tree,
case GenTreePutArgStk::Kind::Push:
printf(" (Push)");
break;
case GenTreePutArgStk::Kind::PushAllSlots:
printf(" (PushAllSlots)");
break;
default:
unreached();
}
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -6913,7 +6913,7 @@ struct GenTreePutArgStk : public GenTreeUnOp
// block node.

enum class Kind : __int8{
Invalid, RepInstr, PartialRepInstr, Unroll, Push, PushAllSlots,
Invalid, RepInstr, PartialRepInstr, Unroll, Push,
};
Kind gtPutArgStkKind;
#endif
Expand Down Expand Up @@ -7017,7 +7017,7 @@ struct GenTreePutArgStk : public GenTreeUnOp

bool isPushKind() const
{
return (gtPutArgStkKind == Kind::Push) || (gtPutArgStkKind == Kind::PushAllSlots);
return gtPutArgStkKind == Kind::Push;
}
#else // !FEATURE_PUT_STRUCT_ARG_STK
unsigned GetStackByteSize() const;
Expand Down
Loading

0 comments on commit e402d8e

Please sign in to comment.