diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index 26c7411b71e13..c3f7c07bbfcf8 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -1953,17 +1953,31 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree) genConsumeRegs(data); regNumber dataReg = REG_NA; - if (data->isContainedIntOrIImmed()) + if (data->isContained()) { // This is only possible for a zero-init. - assert(data->IsIntegralConst(0)); + assert(data->IsIntegralConst(0) || data->IsSIMDZero()); if (varTypeIsSIMD(targetType)) { - assert(targetType == TYP_SIMD16); - assert(targetReg != REG_NA); - emit->emitIns_R_I(INS_movi, EA_16BYTE, targetReg, 0x00, INS_OPTS_16B); - genProduceReg(tree); + if (targetReg != REG_NA) + { + emit->emitIns_R_I(INS_movi, emitActualTypeSize(targetType), targetReg, 0x00, INS_OPTS_16B); + genProduceReg(tree); + } + else + { + if (targetType == TYP_SIMD16) + { + GetEmitter()->emitIns_S_S_R_R(INS_stp, EA_8BYTE, EA_8BYTE, REG_ZR, REG_ZR, varNum, 0); + } + else + { + assert(targetType == TYP_SIMD8); + GetEmitter()->emitIns_S_R(INS_str, EA_8BYTE, REG_ZR, varNum, 0); + } + genUpdateLife(tree); + } return; } @@ -5019,7 +5033,20 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode) } GenTree* op1 = treeNode->AsOp()->gtOp1; - assert(!op1->isContained()); + + if (op1->isContained()) + { + // This is only possible for a zero-init. + assert(op1->IsIntegralConst(0) || op1->IsSIMDZero()); + + // store lower 8 bytes + GetEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, REG_ZR, varNum, offs); + + // Store upper 4 bytes + GetEmitter()->emitIns_S_R(ins_Store(TYP_FLOAT), EA_4BYTE, REG_ZR, varNum, offs + 8); + + return; + } regNumber operandReg = genConsumeReg(op1); // Need an addtional integer register to extract upper 4 bytes from data. diff --git a/src/coreclr/src/jit/codegenlinear.cpp b/src/coreclr/src/jit/codegenlinear.cpp index e3973ba565119..a1e90ee040ed7 100644 --- a/src/coreclr/src/jit/codegenlinear.cpp +++ b/src/coreclr/src/jit/codegenlinear.cpp @@ -1447,7 +1447,7 @@ void CodeGen::genConsumeRegs(GenTree* tree) #ifdef FEATURE_SIMD // (In)Equality operation that produces bool result, when compared // against Vector zero, marks its Vector Zero operand as contained. - assert(tree->OperIsLeaf() || tree->IsIntegralConstVector(0)); + assert(tree->OperIsLeaf() || tree->IsSIMDZero()); #else assert(tree->OperIsLeaf()); #endif diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index 2070291a20808..3323b0566f7fa 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -4676,17 +4676,6 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree) genStoreLclTypeSIMD12(tree); return; } - - // TODO-CQ: It would be better to simply contain the zero, rather than - // generating zero into a register. - if (varTypeIsSIMD(targetType) && (targetReg != REG_NA) && op1->IsCnsIntOrI()) - { - // This is only possible for a zero-init. - noway_assert(op1->IsIntegralConst(0)); - genSIMDZero(targetType, varDsc->lvBaseType, targetReg); - genProduceReg(tree); - return; - } #endif // FEATURE_SIMD genConsumeRegs(op1); diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 12645fb12d219..efee04b7ee178 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -1646,6 +1646,7 @@ struct GenTree inline bool IsFPZero(); inline bool IsIntegralConst(ssize_t constVal); inline bool IsIntegralConstVector(ssize_t constVal); + inline bool IsSIMDZero(); inline bool IsBoxedValue(); @@ -6770,6 +6771,25 @@ inline bool GenTree::IsIntegralConstVector(ssize_t constVal) return false; } +//------------------------------------------------------------------- +// IsSIMDZero: returns true if this this is a SIMD vector +// with all its elements equal to zero. +// +// Returns: +// True if this represents an integral const SIMD vector. +// +inline bool GenTree::IsSIMDZero() +{ +#ifdef FEATURE_SIMD + if ((gtOper == GT_SIMD) && (AsSIMD()->gtSIMDIntrinsicID == SIMDIntrinsicInit)) + { + return (gtGetOp1()->IsIntegralConst(0) || gtGetOp1()->IsFPZero()); + } +#endif + + return false; +} + inline bool GenTree::IsBoxedValue() { assert(gtOper != GT_BOX || AsBox()->BoxOp() != nullptr); diff --git a/src/coreclr/src/jit/lowerarmarch.cpp b/src/coreclr/src/jit/lowerarmarch.cpp index 6f3cbda1f55a9..87639a0fdfd8f 100644 --- a/src/coreclr/src/jit/lowerarmarch.cpp +++ b/src/coreclr/src/jit/lowerarmarch.cpp @@ -730,13 +730,20 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) return; } } + + const LclVarDsc* varDsc = comp->lvaGetDesc(storeLoc); + #ifdef FEATURE_SIMD if (varTypeIsSIMD(storeLoc)) { - if (op1->IsIntegralConst(0)) + // If this is a store to memory, we can initialize a zero vector in memory from REG_ZR. + if ((op1->IsIntegralConst(0) || op1->IsSIMDZero()) && varDsc->lvDoNotEnregister) { - // For an InitBlk we want op1 to be contained MakeSrcContained(storeLoc, op1); + if (op1->IsSIMDZero()) + { + MakeSrcContained(op1, op1->gtGetOp1()); + } } return; } @@ -745,8 +752,7 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) // If the source is a containable immediate, make it contained, unless it is // an int-size or larger store of zero to memory, because we can generate smaller code // by zeroing a register and then storing it. - const LclVarDsc* varDsc = comp->lvaGetDesc(storeLoc); - var_types type = varDsc->GetRegisterType(storeLoc); + var_types type = varDsc->GetRegisterType(storeLoc); if (IsContainableImmed(storeLoc, op1) && (!op1->IsIntegralConst(0) || varTypeIsSmall(type))) { MakeSrcContained(storeLoc, op1); diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index ad0426bbb620d..dc5ed420cf19f 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -1990,14 +1990,22 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) return; } } + + const LclVarDsc* varDsc = comp->lvaGetDesc(storeLoc); + #ifdef FEATURE_SIMD if (varTypeIsSIMD(storeLoc)) { - if (op1->IsCnsIntOrI()) + assert(!op1->IsCnsIntOrI()); + if (storeLoc->TypeIs(TYP_SIMD12) && op1->IsSIMDZero() && varDsc->lvDoNotEnregister) { - // For an InitBlk we want op1 to be contained; otherwise we want it to - // be evaluated into an xmm register. + // For a SIMD12 store we can zero from integer registers more easily. MakeSrcContained(storeLoc, op1); + GenTree* constNode = op1->gtGetOp1(); + assert(constNode->OperIsConst()); + constNode->ClearContained(); + constNode->gtType = TYP_INT; + constNode->SetOper(GT_CNS_INT); } return; } @@ -2006,8 +2014,7 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) // If the source is a containable immediate, make it contained, unless it is // an int-size or larger store of zero to memory, because we can generate smaller code // by zeroing a register and then storing it. - const LclVarDsc* varDsc = comp->lvaGetDesc(storeLoc); - var_types type = varDsc->GetRegisterType(storeLoc); + var_types type = varDsc->GetRegisterType(storeLoc); if (IsContainableImmed(storeLoc, op1) && (!op1->IsIntegralConst(0) || varTypeIsSmall(type))) { MakeSrcContained(storeLoc, op1); diff --git a/src/coreclr/src/jit/lsrabuild.cpp b/src/coreclr/src/jit/lsrabuild.cpp index 7a41dd28e713a..0e578987e9e49 100644 --- a/src/coreclr/src/jit/lsrabuild.cpp +++ b/src/coreclr/src/jit/lsrabuild.cpp @@ -3113,7 +3113,20 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) #endif // !TARGET_64BIT else if (op1->isContained()) { - srcCount = 0; +#ifdef TARGET_XARCH + if (varTypeIsSIMD(storeLoc)) + { + // This is the zero-init case, and we need a register to hold the zero. + // (On Arm64 we can just store REG_ZR.) + assert(op1->IsSIMDZero()); + singleUseRef = BuildUse(op1->gtGetOp1()); + srcCount = 1; + } + else +#endif + { + srcCount = 0; + } } else { diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp index fdff5b7298d6d..fc64cf09877f9 100644 --- a/src/coreclr/src/jit/morph.cpp +++ b/src/coreclr/src/jit/morph.cpp @@ -17431,7 +17431,8 @@ void Compiler::fgMorphLocalField(GenTree* tree, GenTree* parent) var_types treeType = tree->TypeGet(); var_types fieldType = fldVarDsc->TypeGet(); - if (fldOffset != BAD_VAR_NUM && (genTypeSize(fieldType) == genTypeSize(treeType))) + if (fldOffset != BAD_VAR_NUM && + ((genTypeSize(fieldType) == genTypeSize(treeType)) || (varDsc->lvFieldCnt == 1))) { // There is an existing sub-field we can use. tree->AsLclFld()->SetLclNum(fieldLclIndex); diff --git a/src/coreclr/src/jit/simdcodegenxarch.cpp b/src/coreclr/src/jit/simdcodegenxarch.cpp index 8ca69e35be37a..b777d1da4eeed 100644 --- a/src/coreclr/src/jit/simdcodegenxarch.cpp +++ b/src/coreclr/src/jit/simdcodegenxarch.cpp @@ -2973,13 +2973,26 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode) offs = treeNode->AsLclFld()->GetLclOffs(); } - GenTree* op1 = treeNode->AsOp()->gtOp1; + regNumber tmpReg = treeNode->GetSingleTempReg(); + GenTree* op1 = treeNode->AsOp()->gtOp1; + if (op1->isContained()) + { + // This is only possible for a zero-init. + assert(op1->IsIntegralConst(0) || op1->IsSIMDZero()); + genSIMDZero(TYP_SIMD16, op1->AsSIMD()->gtSIMDBaseType, tmpReg); + + // store lower 8 bytes + GetEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, tmpReg, varNum, offs); + + // Store upper 4 bytes + GetEmitter()->emitIns_S_R(ins_Store(TYP_FLOAT), EA_4BYTE, tmpReg, varNum, offs + 8); + + return; + } + assert(!op1->isContained()); regNumber operandReg = genConsumeReg(op1); - // Need an addtional Xmm register to extract upper 4 bytes from data. - regNumber tmpReg = treeNode->GetSingleTempReg(); - // store lower 8 bytes GetEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, operandReg, varNum, offs);