From e402d8e35bd351f06a8f75576c06e2c63df48855 Mon Sep 17 00:00:00 2001 From: SingleAccretion <62474226+SingleAccretion@users.noreply.github.com> Date: Thu, 31 Mar 2022 23:25:42 +0300 Subject: [PATCH] Memory containment for `FIELD_LIST` operands on x86 (#65803) * Remove "inst_RV_TT" usages from non-XARCH code a) It is clearer and cheaper to directly use the emitter anyway. b) "inst_RV_TT" will be made XARCH-only in an upcoming change. * Refactor "inst_TT" and "inst_RV_TT" a) Make them XARCH-only. b) Rewrite them using "OperandDesc". * Memory containment for FIELD_LIST operands on x86 Enable broader containment for FIELD_LIST operands on x86. The primary benefits come from containing loads that long decomposition has left (e. g. LCL_FLDs): ```diff - mov ecx, dword ptr [ebp+08H] - mov edx, dword ptr [ebp+0CH] - push edx - push ecx + push dword ptr [ebp+0CH] + push dword ptr [ebp+08H] ``` * emitIns_A: add function header * Remove the "UNDONE: ..." printing Insert an assert instead about what we expect and handle GCInfo-wise. --- src/coreclr/jit/codegen.h | 17 +- src/coreclr/jit/codegenarmarch.cpp | 18 +- src/coreclr/jit/codegenlinear.cpp | 4 +- src/coreclr/jit/codegenxarch.cpp | 102 +++----- src/coreclr/jit/emitxarch.cpp | 39 ++- src/coreclr/jit/emitxarch.h | 2 + src/coreclr/jit/gentree.cpp | 3 - src/coreclr/jit/gentree.h | 4 +- src/coreclr/jit/instr.cpp | 392 ++++++++--------------------- src/coreclr/jit/lowerxarch.cpp | 62 ++--- src/coreclr/jit/lsraxarch.cpp | 30 +-- 11 files changed, 233 insertions(+), 440 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 7820c437429da..0c5ae4c0fffa4 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1224,7 +1224,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #endif // TARGET_XARCH void genCodeForCast(GenTreeOp* tree); - void genCodeForLclAddr(GenTree* tree); + void genCodeForLclAddr(GenTreeLclVarCommon* lclAddrNode); void genCodeForIndexAddr(GenTreeIndexAddr* tree); void genCodeForIndir(GenTreeIndir* tree); void genCodeForNegNot(GenTree* tree); @@ -1474,17 +1474,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void inst_FS_ST(instruction ins, emitAttr size, TempDsc* tmp, unsigned ofs); - void inst_TT(instruction ins, GenTree* tree, unsigned offs = 0, int shfv = 0, emitAttr size = EA_UNKNOWN); - void inst_TT_RV(instruction ins, emitAttr size, GenTree* tree, regNumber reg); - void inst_RV_TT(instruction ins, - regNumber reg, - GenTree* tree, - unsigned offs = 0, - emitAttr size = EA_UNKNOWN, - insFlags flags = INS_FLAGS_DONT_CARE); - void inst_RV_SH(instruction ins, emitAttr size, regNumber reg, unsigned val, insFlags flags = INS_FLAGS_DONT_CARE); #if defined(TARGET_XARCH) @@ -1602,10 +1593,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX return m_immediate; } - bool ImmediateNeedsReloc() const + emitAttr GetEmitAttrForImmediate(emitAttr baseAttr) const { assert(m_kind == OperandKind::Imm); - return m_immediateNeedsReloc; + return m_immediateNeedsReloc ? EA_SET_FLG(baseAttr, EA_CNS_RELOC_FLG) : baseAttr; } regNumber GetReg() const @@ -1621,6 +1612,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX OperandDesc genOperandDesc(GenTree* op); + void inst_TT(instruction ins, emitAttr size, GenTree* op1); + void inst_RV_TT(instruction ins, emitAttr size, regNumber op1Reg, GenTree* op2); void inst_RV_RV_IV(instruction ins, emitAttr size, regNumber reg1, regNumber reg2, unsigned ival); void inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenTree* rmOp, int ival); void inst_RV_RV_TT(instruction ins, emitAttr size, regNumber targetReg, regNumber op1Reg, GenTree* op2, bool isRMW); diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 01fbaff7a8c4d..c5435172a1188 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -260,7 +260,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_LCL_FLD_ADDR: case GT_LCL_VAR_ADDR: - genCodeForLclAddr(treeNode); + genCodeForLclAddr(treeNode->AsLclVarCommon()); break; case GT_LCL_FLD: @@ -1714,22 +1714,22 @@ void CodeGen::genCodeForShift(GenTree* tree) // genCodeForLclAddr: Generates the code for GT_LCL_FLD_ADDR/GT_LCL_VAR_ADDR. // // Arguments: -// tree - the node. +// lclAddrNode - the node. // -void CodeGen::genCodeForLclAddr(GenTree* tree) +void CodeGen::genCodeForLclAddr(GenTreeLclVarCommon* lclAddrNode) { - assert(tree->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR)); + assert(lclAddrNode->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR)); - var_types targetType = tree->TypeGet(); - regNumber targetReg = tree->GetRegNum(); + var_types targetType = lclAddrNode->TypeGet(); + emitAttr size = emitTypeSize(targetType); + regNumber targetReg = lclAddrNode->GetRegNum(); // Address of a local var. noway_assert((targetType == TYP_BYREF) || (targetType == TYP_I_IMPL)); - emitAttr size = emitTypeSize(targetType); + GetEmitter()->emitIns_R_S(INS_lea, size, targetReg, lclAddrNode->GetLclNum(), lclAddrNode->GetLclOffs()); - inst_RV_TT(INS_lea, targetReg, tree, 0, size); - genProduceReg(tree); + genProduceReg(lclAddrNode); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index deb9e42a176d2..cfe969fea0b48 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1976,7 +1976,9 @@ void CodeGen::genSetBlockSrc(GenTreeBlk* blkNode, regNumber srcReg) { // This must be a local struct. // Load its address into srcReg. - inst_RV_TT(INS_lea, srcReg, src, 0, EA_BYREF); + unsigned varNum = src->AsLclVarCommon()->GetLclNum(); + unsigned offset = src->AsLclVarCommon()->GetLclOffs(); + GetEmitter()->emitIns_R_S(INS_lea, EA_BYREF, srcReg, varNum, offset); return; } } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 1cdc55a814158..66bf55d2b5e2d 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -1565,7 +1565,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_LCL_FLD_ADDR: case GT_LCL_VAR_ADDR: - genCodeForLclAddr(treeNode); + genCodeForLclAddr(treeNode->AsLclVarCommon()); break; case GT_LCL_FLD: @@ -4583,22 +4583,22 @@ void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd) // genCodeForLclAddr: Generates the code for GT_LCL_FLD_ADDR/GT_LCL_VAR_ADDR. // // Arguments: -// tree - the node. +// lclAddrNode - the node. // -void CodeGen::genCodeForLclAddr(GenTree* tree) +void CodeGen::genCodeForLclAddr(GenTreeLclVarCommon* lclAddrNode) { - assert(tree->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR)); + assert(lclAddrNode->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR)); - var_types targetType = tree->TypeGet(); - regNumber targetReg = tree->GetRegNum(); + var_types targetType = lclAddrNode->TypeGet(); + emitAttr size = emitTypeSize(targetType); + regNumber targetReg = lclAddrNode->GetRegNum(); // Address of a local var. noway_assert((targetType == TYP_BYREF) || (targetType == TYP_I_IMPL)); - emitAttr size = emitTypeSize(targetType); + GetEmitter()->emitIns_R_S(INS_lea, size, targetReg, lclAddrNode->GetLclNum(), lclAddrNode->GetLclOffs()); - inst_RV_TT(INS_lea, targetReg, tree, 0, size); - genProduceReg(tree); + genProduceReg(lclAddrNode); } //------------------------------------------------------------------------ @@ -7661,7 +7661,6 @@ bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk) break; case GenTreePutArgStk::Kind::Push: - case GenTreePutArgStk::Kind::PushAllSlots: assert(source->OperIs(GT_FIELD_LIST) || source->AsObj()->GetLayout()->HasGCPtr() || (argSize < XMM_REGSIZE_BYTES)); break; @@ -7775,8 +7774,6 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk) // The GC encoder requires that the stack remain 4-byte aligned at all times. Round the adjustment up // to the next multiple of 4. If we are going to generate a `push` instruction, the adjustment must // not require rounding. - // NOTE: if the field is of GC type, we must use a push instruction, since the emitter is not otherwise - // able to detect stores into the outgoing argument area of the stack on x86. const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevFieldOffset - fieldOffset) >= 4); int adjustment = roundUp(currentOffset - fieldOffset, 4); if (fieldIsSlot && !varTypeIsSIMD(fieldType)) @@ -7792,6 +7789,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk) AddStackLevel(pushSize); adjustment -= pushSize; } + m_pushStkArg = true; } else @@ -7829,63 +7827,43 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk) } } - if (argReg == REG_NA) + bool canStoreWithPush = fieldIsSlot; + bool canLoadWithPush = varTypeIsI(fieldNode) || genIsValidIntReg(argReg); + + if (canStoreWithPush && canLoadWithPush) { - if (m_pushStkArg) + assert(m_pushStkArg); + assert(genTypeSize(fieldNode) == TARGET_POINTER_SIZE); + inst_TT(INS_push, emitActualTypeSize(fieldNode), fieldNode); + + currentOffset -= TARGET_POINTER_SIZE; + AddStackLevel(TARGET_POINTER_SIZE); + } + else + { + // If the field is of GC type, we must use a push instruction, since the emitter is not + // otherwise able to detect stores into the outgoing argument area of the stack on x86. + assert(!varTypeIsGC(fieldNode)); + + // First, if needed, load the field into the temporary register. + if (argReg == REG_NA) { - if (fieldNode->isUsedFromSpillTemp()) + assert(varTypeIsIntegralOrI(fieldNode) && genIsValidIntReg(intTmpReg)); + + if (fieldNode->isContainedIntOrIImmed()) { - assert(!varTypeIsSIMD(fieldType)); // Q: can we get here with SIMD? - assert(fieldNode->IsRegOptional()); - TempDsc* tmp = getSpillTempDsc(fieldNode); - GetEmitter()->emitIns_S(INS_push, emitActualTypeSize(fieldNode->TypeGet()), tmp->tdTempNum(), 0); - regSet.tmpRlsTemp(tmp); + genSetRegToConst(intTmpReg, fieldNode->TypeGet(), fieldNode); } else { - assert(varTypeIsIntegralOrI(fieldNode)); - switch (fieldNode->OperGet()) - { - case GT_LCL_VAR: - inst_TT(INS_push, fieldNode, 0, 0, emitActualTypeSize(fieldNode->TypeGet())); - break; - case GT_CNS_INT: - if (fieldNode->IsIconHandle()) - { - inst_IV_handle(INS_push, fieldNode->AsIntCon()->gtIconVal); - } - else - { - inst_IV(INS_push, fieldNode->AsIntCon()->gtIconVal); - } - break; - default: - unreached(); - } - } - currentOffset -= TARGET_POINTER_SIZE; - AddStackLevel(TARGET_POINTER_SIZE); - } - else - { - // The stack has been adjusted and we will load the field to intTmpReg and then store it on the stack. - assert(varTypeIsIntegralOrI(fieldNode)); - switch (fieldNode->OperGet()) - { - case GT_LCL_VAR: - inst_RV_TT(INS_mov, intTmpReg, fieldNode); - break; - case GT_CNS_INT: - genSetRegToConst(intTmpReg, fieldNode->TypeGet(), fieldNode); - break; - default: - unreached(); + // TODO-XArch-CQ: using "ins_Load" here is conservative, as it will always + // extend, which we can avoid if the field type is smaller than the node type. + inst_RV_TT(ins_Load(fieldNode->TypeGet()), emitTypeSize(fieldNode), intTmpReg, fieldNode); } - genStoreRegToStackArg(fieldType, intTmpReg, fieldOffset - currentOffset); + + argReg = intTmpReg; } - } - else - { + #if defined(FEATURE_SIMD) if (fieldType == TYP_SIMD12) { @@ -7897,6 +7875,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk) { genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset); } + if (m_pushStkArg) { // We always push a slot-rounded size @@ -7906,6 +7885,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk) prevFieldOffset = fieldOffset; } + if (currentOffset != 0) { // We don't expect padding at the beginning of a struct, but it could happen with explicit layout. diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 01fd6e4059832..2f097108ef937 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -4362,6 +4362,32 @@ void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fld emitAdjustStackDepthPushPop(ins); } +//------------------------------------------------------------------------ +// emitIns_A: Emit an instruction with one memory ("[address mode]") operand. +// +// Arguments: +// ins - The instruction to emit +// attr - The corresponding emit attribute +// indir - The memory operand, represented as an indirection tree +// +void emitter::emitIns_A(instruction ins, emitAttr attr, GenTreeIndir* indir) +{ + ssize_t offs = indir->Offset(); + instrDesc* id = emitNewInstrAmd(attr, offs); + insFormat fmt = emitInsModeFormat(ins, IF_ARD); + + id->idIns(ins); + emitHandleMemOp(indir, id, fmt, ins); + + UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins)); + id->idCodeSize(sz); + + dispIns(id); + emitCurIGsize += sz; + + emitAdjustStackDepthPushPop(ins); +} + //------------------------------------------------------------------------ // IsMovInstruction: Determines whether a give instruction is a move instruction // @@ -13102,21 +13128,16 @@ BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id) emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW); } } - - // Did we push a GC ref value? - if (id->idGCref()) - { -#ifdef DEBUG - printf("UNDONE: record GCref push [cns]\n"); -#endif - } - break; default: assert(!"unexpected instruction"); } + // GC tracking for "push"es is done by "emitStackPush", for all other instructions + // that can reach here we do not expect (and do not handle) GC refs. + assert((ins == INS_push) || !id->idGCref()); + return dst; } diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 5cef7d5aa12f2..2f912ea8cbdfb 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -321,6 +321,8 @@ void emitIns_R(instruction ins, emitAttr attr, regNumber reg); void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs); +void emitIns_A(instruction ins, emitAttr attr, GenTreeIndir* indir); + void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t val DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY)); void emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regNumber srgReg, bool canSkip); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 7c4c5fe1a9d3f..5d921014cb58d 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -10940,9 +10940,6 @@ void Compiler::gtDispTree(GenTree* tree, case GenTreePutArgStk::Kind::Push: printf(" (Push)"); break; - case GenTreePutArgStk::Kind::PushAllSlots: - printf(" (PushAllSlots)"); - break; default: unreached(); } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 15d648d467cff..38f30fffe5fa4 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6913,7 +6913,7 @@ struct GenTreePutArgStk : public GenTreeUnOp // block node. enum class Kind : __int8{ - Invalid, RepInstr, PartialRepInstr, Unroll, Push, PushAllSlots, + Invalid, RepInstr, PartialRepInstr, Unroll, Push, }; Kind gtPutArgStkKind; #endif @@ -7017,7 +7017,7 @@ struct GenTreePutArgStk : public GenTreeUnOp bool isPushKind() const { - return (gtPutArgStkKind == Kind::Push) || (gtPutArgStkKind == Kind::PushAllSlots); + return gtPutArgStkKind == Kind::Push; } #else // !FEATURE_PUT_STRUCT_ARG_STK unsigned GetStackByteSize() const; diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index fc33d9f06a0ee..34d55023a3ce5 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -620,113 +620,6 @@ void CodeGen::inst_RV_IV( #endif // !TARGET_ARM } -/***************************************************************************** - * - * Generate an instruction that has one operand given by a tree (which has - * been made addressable). - */ - -void CodeGen::inst_TT(instruction ins, GenTree* tree, unsigned offs, int shfv, emitAttr size) -{ - bool sizeInferred = false; - - if (size == EA_UNKNOWN) - { - sizeInferred = true; - if (instIsFP(ins)) - { - size = EA_ATTR(genTypeSize(tree->TypeGet())); - } - else - { - size = emitTypeSize(tree->TypeGet()); - } - } - -AGAIN: - - /* Is this a spilled value? */ - - if (tree->gtFlags & GTF_SPILLED) - { - assert(!"ISSUE: If this can happen, we need to generate 'ins [ebp+spill]'"); - } - - switch (tree->gtOper) - { - unsigned varNum; - - case GT_LCL_VAR: - - inst_set_SV_var(tree); - goto LCL; - - case GT_LCL_FLD: - offs += tree->AsLclFld()->GetLclOffs(); - goto LCL; - - LCL: - varNum = tree->AsLclVarCommon()->GetLclNum(); - assert(varNum < compiler->lvaCount); - - if (shfv) - { - GetEmitter()->emitIns_S_I(ins, size, varNum, offs, shfv); - } - else - { - GetEmitter()->emitIns_S(ins, size, varNum, offs); - } - - return; - - case GT_CLS_VAR: - // Make sure FP instruction size matches the operand size - // (We optimized constant doubles to floats when we can, just want to - // make sure that we don't mistakenly use 8 bytes when the - // constant. - assert(!isFloatRegType(tree->gtType) || genTypeSize(tree->gtType) == EA_SIZE_IN_BYTES(size)); - - if (shfv) - { - GetEmitter()->emitIns_C_I(ins, size, tree->AsClsVar()->gtClsVarHnd, offs, shfv); - } - else - { - GetEmitter()->emitIns_C(ins, size, tree->AsClsVar()->gtClsVarHnd, offs); - } - return; - - case GT_IND: - case GT_NULLCHECK: - case GT_ARR_ELEM: - { - assert(!"inst_TT not supported for GT_IND, GT_NULLCHECK or GT_ARR_ELEM"); - } - break; - -#ifdef TARGET_X86 - case GT_CNS_INT: - // We will get here for GT_MKREFANY from CodeGen::genPushArgList - assert(offs == 0); - assert(!shfv); - if (tree->IsIconHandle()) - inst_IV_handle(ins, tree->AsIntCon()->gtIconVal); - else - inst_IV(ins, tree->AsIntCon()->gtIconVal); - break; -#endif - - case GT_COMMA: - // tree->AsOp()->gtOp1 - already processed by genCreateAddrMode() - tree = tree->AsOp()->gtOp2; - goto AGAIN; - - default: - assert(!"invalid address"); - } -} - //------------------------------------------------------------------------ // inst_TT_RV: Generate a store of a lclVar // @@ -765,188 +658,6 @@ void CodeGen::inst_TT_RV(instruction ins, emitAttr size, GenTree* tree, regNumbe GetEmitter()->emitIns_S_R(ins, size, reg, varNum, 0); } -/***************************************************************************** - * - * Generate an instruction that has one operand given by a register and the - * other one by a tree (which has been made addressable). - */ - -void CodeGen::inst_RV_TT(instruction ins, - regNumber reg, - GenTree* tree, - unsigned offs, - emitAttr size, - insFlags flags /* = INS_FLAGS_DONT_CARE */) -{ - assert(reg != REG_STK); - - if (size == EA_UNKNOWN) - { - if (!instIsFP(ins)) - { - size = emitTypeSize(tree->TypeGet()); - } - else - { - size = EA_ATTR(genTypeSize(tree->TypeGet())); - } - } - -#ifdef TARGET_XARCH -#ifdef DEBUG - // If it is a GC type and the result is not, then either - // 1) it is an LEA - // 2) optOptimizeBools() optimized if (ref != 0 && ref != 0) to if (ref & ref) - // 3) optOptimizeBools() optimized if (ref == 0 || ref == 0) to if (ref | ref) - // 4) byref - byref = int - if (tree->gtType == TYP_REF && !EA_IS_GCREF(size)) - { - assert((EA_IS_BYREF(size) && ins == INS_add) || (ins == INS_lea || ins == INS_and || ins == INS_or)); - } - if (tree->gtType == TYP_BYREF && !EA_IS_BYREF(size)) - { - assert(ins == INS_lea || ins == INS_and || ins == INS_or || ins == INS_sub); - } -#endif -#endif - -#if CPU_LOAD_STORE_ARCH - if (ins == INS_mov) - { -#if defined(TARGET_ARM64) || defined(TARGET_ARM64) - ins = ins_Move_Extend(tree->TypeGet(), false); -#else - NYI("CodeGen::inst_RV_TT with INS_mov"); -#endif - } -#endif // CPU_LOAD_STORE_ARCH - -AGAIN: - - /* Is this a spilled value? */ - - if (tree->gtFlags & GTF_SPILLED) - { - assert(!"ISSUE: If this can happen, we need to generate 'ins [ebp+spill]'"); - } - - switch (tree->gtOper) - { - unsigned varNum; - - case GT_LCL_VAR: - case GT_LCL_VAR_ADDR: - - inst_set_SV_var(tree); - goto LCL; - - case GT_LCL_FLD_ADDR: - case GT_LCL_FLD: - offs += tree->AsLclFld()->GetLclOffs(); - goto LCL; - - LCL: - varNum = tree->AsLclVarCommon()->GetLclNum(); - assert(varNum < compiler->lvaCount); - -#ifdef TARGET_ARM - switch (ins) - { - case INS_mov: - ins = ins_Load(tree->TypeGet()); - FALLTHROUGH; - - case INS_lea: - case INS_ldr: - case INS_ldrh: - case INS_ldrb: - case INS_ldrsh: - case INS_ldrsb: - case INS_vldr: - assert(flags != INS_FLAGS_SET); - GetEmitter()->emitIns_R_S(ins, size, reg, varNum, offs); - return; - - default: - regNumber regTmp; - regTmp = tree->GetRegNum(); - - GetEmitter()->emitIns_R_S(ins_Load(tree->TypeGet()), size, regTmp, varNum, offs); - GetEmitter()->emitIns_R_R(ins, size, reg, regTmp, flags); - - regSet.verifyRegUsed(regTmp); - return; - } -#else // !TARGET_ARM - GetEmitter()->emitIns_R_S(ins, size, reg, varNum, offs); - return; -#endif // !TARGET_ARM - - case GT_CLS_VAR: - // Make sure FP instruction size matches the operand size - // (We optimized constant doubles to floats when we can, just want to - // make sure that we don't mistakenly use 8 bytes when the - // constant. - assert(!isFloatRegType(tree->gtType) || genTypeSize(tree->gtType) == EA_SIZE_IN_BYTES(size)); - -#if CPU_LOAD_STORE_ARCH - assert(!"GT_CLS_VAR not supported in ARM backend"); -#else // CPU_LOAD_STORE_ARCH - GetEmitter()->emitIns_R_C(ins, size, reg, tree->AsClsVar()->gtClsVarHnd, offs); -#endif // CPU_LOAD_STORE_ARCH - return; - - case GT_IND: - case GT_NULLCHECK: - case GT_ARR_ELEM: - case GT_LEA: - { - assert(!"inst_RV_TT not supported for GT_IND, GT_NULLCHECK, GT_ARR_ELEM or GT_LEA"); - } - break; - - case GT_CNS_INT: - - assert(offs == 0); - - // TODO-CrossBitness: we wouldn't need the cast below if GenTreeIntCon::gtIconVal had target_ssize_t type. - inst_RV_IV(ins, reg, (target_ssize_t)tree->AsIntCon()->gtIconVal, emitActualTypeSize(tree->TypeGet()), - flags); - break; - - case GT_CNS_LNG: - - assert(size == EA_4BYTE || size == EA_8BYTE); - -#ifdef TARGET_AMD64 - assert(offs == 0); -#endif // TARGET_AMD64 - - target_ssize_t constVal; - emitAttr size; - if (offs == 0) - { - constVal = (target_ssize_t)(tree->AsLngCon()->gtLconVal); - size = EA_PTRSIZE; - } - else - { - constVal = (target_ssize_t)(tree->AsLngCon()->gtLconVal >> 32); - size = EA_4BYTE; - } - - inst_RV_IV(ins, reg, constVal, size, flags); - break; - - case GT_COMMA: - tree = tree->AsOp()->gtOp2; - goto AGAIN; - - default: - assert(!"invalid address"); - } -} - /***************************************************************************** * * Generate a "shift reg, icon" instruction. @@ -1105,6 +816,109 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op) return OperandDesc(varNum, offset); } +//------------------------------------------------------------------------ +// inst_TT: Generates an instruction with one operand. +// +// Arguments: +// ins -- The instruction being emitted +// size -- The emit size attribute +// op1 -- The operand, which may be a memory node or a node producing a register, +// or a contained immediate node +// +void CodeGen::inst_TT(instruction ins, emitAttr size, GenTree* op1) +{ + emitter* emit = GetEmitter(); + OperandDesc op1Desc = genOperandDesc(op1); + + switch (op1Desc.GetKind()) + { + case OperandKind::ClsVar: + emit->emitIns_C(ins, size, op1Desc.GetFieldHnd(), 0); + break; + + case OperandKind::Local: + emit->emitIns_S(ins, size, op1Desc.GetVarNum(), op1Desc.GetLclOffset()); + break; + + case OperandKind::Indir: + { + // Until we improve the handling of addressing modes in the emitter, we'll create a + // temporary GT_IND to generate code with. + GenTreeIndir indirForm; + GenTreeIndir* indir = op1Desc.GetIndirForm(&indirForm); + emit->emitIns_A(ins, size, indir); + } + break; + + case OperandKind::Imm: + emit->emitIns_I(ins, op1Desc.GetEmitAttrForImmediate(size), op1Desc.GetImmediate()); + break; + + case OperandKind::Reg: + emit->emitIns_R(ins, size, op1Desc.GetReg()); + break; + + default: + unreached(); + } +} + +//------------------------------------------------------------------------ +// inst_RV_TT: Generates an instruction with two operands, the first of which +// is a register. +// +// Arguments: +// ins -- The instruction being emitted +// size -- The emit size attribute +// op1Reg -- The first operand, a register +// op2 -- The operand, which may be a memory node or a node producing a register, +// or a contained immediate node +// +void CodeGen::inst_RV_TT(instruction ins, emitAttr size, regNumber op1Reg, GenTree* op2) +{ + emitter* emit = GetEmitter(); + OperandDesc op2Desc = genOperandDesc(op2); + + switch (op2Desc.GetKind()) + { + case OperandKind::ClsVar: + emit->emitIns_R_C(ins, size, op1Reg, op2Desc.GetFieldHnd(), 0); + break; + + case OperandKind::Local: + emit->emitIns_R_S(ins, size, op1Reg, op2Desc.GetVarNum(), op2Desc.GetLclOffset()); + break; + + case OperandKind::Indir: + { + // Until we improve the handling of addressing modes in the emitter, we'll create a + // temporary GT_IND to generate code with. + GenTreeIndir indirForm; + GenTreeIndir* indir = op2Desc.GetIndirForm(&indirForm); + emit->emitIns_R_A(ins, size, op1Reg, indir); + } + break; + + case OperandKind::Imm: + emit->emitIns_R_I(ins, op2Desc.GetEmitAttrForImmediate(size), op1Reg, op2Desc.GetImmediate()); + break; + + case OperandKind::Reg: + if (emit->IsMovInstruction(ins)) + { + emit->emitIns_Mov(ins, size, op1Reg, op2Desc.GetReg(), /* canSkip */ true); + } + else + { + emit->emitIns_R_R(ins, size, op1Reg, op2Desc.GetReg()); + } + break; + + default: + unreached(); + } +} + /***************************************************************************** * * Generate an instruction of the form "op reg1, reg2, icon". diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 090431ac913e5..82d488fe1e00f 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -467,8 +467,6 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) if (src->OperIs(GT_FIELD_LIST)) { #ifdef TARGET_X86 - putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Invalid; - GenTreeFieldList* fieldList = src->AsFieldList(); // The code generator will push these fields in reverse order by offset. Reorder the list here s.t. the order @@ -476,42 +474,35 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) assert(fieldList->Uses().IsSorted()); fieldList->Uses().Reverse(); - // Now that the fields have been sorted, the kind of code we will generate. - bool allFieldsAreSlots = true; - unsigned prevOffset = putArgStk->GetStackByteSize(); + // Containment checks. for (GenTreeFieldList::Use& use : fieldList->Uses()) { - GenTree* const fieldNode = use.GetNode(); - const unsigned fieldOffset = use.GetOffset(); - + GenTree* const fieldNode = use.GetNode(); + const var_types fieldType = use.GetType(); assert(!fieldNode->TypeIs(TYP_LONG)); - // We can treat as a slot any field that is stored at a slot boundary, where the previous - // field is not in the same slot. (Note that we store the fields in reverse order.) - const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4); - if (!fieldIsSlot) - { - allFieldsAreSlots = false; - } - // For x86 we must mark all integral fields as contained or reg-optional, and handle them // accordingly in code generation, since we may have up to 8 fields, which cannot all be in // registers to be consumed atomically by the call. if (varTypeIsIntegralOrI(fieldNode)) { - if (fieldNode->OperGet() == GT_LCL_VAR) + // If we are loading from an in-memory local, we would like to use "push", but this + // is only legal if we can safely load all 4 bytes. Retype the local node here to + // TYP_INT for such legal cases to make downstream (LSRA & codegen) logic simpler. + // Retyping is ok because we model this node as STORE(LOAD). + // If the field came from promotion, we allow the padding to remain undefined, if + // from decomposition, the field type will be INT (naturally blocking the retyping). + if (varTypeIsSmall(fieldNode) && (genTypeSize(fieldType) <= genTypeSize(fieldNode)) && + fieldNode->OperIsLocalRead()) { - const LclVarDsc* varDsc = comp->lvaGetDesc(fieldNode->AsLclVarCommon()); - if (!varDsc->lvDoNotEnregister) - { - fieldNode->SetRegOptional(); - } - else - { - MakeSrcContained(putArgStk, fieldNode); - } + fieldNode->ChangeType(TYP_INT); + } + + if (IsContainableImmed(putArgStk, fieldNode)) + { + MakeSrcContained(putArgStk, fieldNode); } - else if (fieldNode->IsIntCnsFitsInI32()) + else if (IsContainableMemoryOp(fieldNode) && IsSafeToContainMem(putArgStk, fieldNode)) { MakeSrcContained(putArgStk, fieldNode); } @@ -519,14 +510,12 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) { // For the case where we cannot directly push the value, if we run out of registers, // it would be better to defer computation until we are pushing the arguments rather - // than spilling, but this situation is not all that common, as most cases of promoted - // structs do not have a large number of fields, and of those most are lclVars or - // copy-propagated constants. + // than spilling, but this situation is not all that common, as most cases of FIELD_LIST + // are promoted structs, which do not not have a large number of fields, and of those + // most are lclVars or copy-propagated constants. fieldNode->SetRegOptional(); } } - - prevOffset = fieldOffset; } // Set the copy kind. @@ -535,14 +524,7 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) // this tuning should probably be undertaken as a whole. // Also, if there are floating point fields, it may be better to use the "Unroll" mode // of copying the struct as a whole, if the fields are not register candidates. - if (allFieldsAreSlots) - { - putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::PushAllSlots; - } - else - { - putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push; - } + putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push; #endif // TARGET_X86 return; } diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index f9e69773d70cb..fb2a3c8a16aed 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1519,21 +1519,28 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk) #endif // defined(FEATURE_SIMD) #ifdef TARGET_X86 - if (putArgStk->gtPutArgStkKind == GenTreePutArgStk::Kind::Push) + // In lowering, we have marked all integral fields as usable from memory + // (either contained or reg optional), however, we will not always be able + // to use "push [mem]" in codegen, and so may have to reserve an internal + // register here (for explicit "mov"s). + if (varTypeIsIntegralOrI(fieldNode)) { + assert(genTypeSize(fieldNode) <= TARGET_POINTER_SIZE); + // We can treat as a slot any field that is stored at a slot boundary, where the previous // field is not in the same slot. (Note that we store the fields in reverse order.) - const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4); - if (intTemp == nullptr) + const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4); + const bool canStoreWithPush = fieldIsSlot; + const bool canLoadWithPush = varTypeIsI(fieldNode); + + if ((!canStoreWithPush || !canLoadWithPush) && (intTemp == nullptr)) { intTemp = buildInternalIntRegisterDefForNode(putArgStk); } - if (!fieldIsSlot && varTypeIsByte(fieldType)) + + // We can only store bytes using byteable registers. + if (!canStoreWithPush && varTypeIsByte(fieldType)) { - // If this field is a slot--i.e. it is an integer field that is 4-byte aligned and takes up 4 bytes - // (including padding)--we can store the whole value rather than just the byte. Otherwise, we will - // need a byte-addressable register for the store. We will enforce this requirement on an internal - // register, which we can use to copy multiple byte values. intTemp->registerAssignment &= allByteRegs(); } } @@ -1544,12 +1551,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk) for (GenTreeFieldList::Use& use : putArgStk->gtOp1->AsFieldList()->Uses()) { - GenTree* const fieldNode = use.GetNode(); - if (!fieldNode->isContained()) - { - BuildUse(fieldNode); - srcCount++; - } + srcCount += BuildOperandUses(use.GetNode()); } buildInternalRegisterUses();