Skip to content

Commit

Permalink
Support mulx returning ValueTuple
Browse files Browse the repository at this point in the history
  • Loading branch information
CarolEidt authored and echesakov committed Nov 19, 2020
1 parent 496bc88 commit cbd01ca
Show file tree
Hide file tree
Showing 18 changed files with 8,054 additions and 99 deletions.
4,179 changes: 4,179 additions & 0 deletions build-tests.out

Large diffs are not rendered by default.

2,351 changes: 2,351 additions & 0 deletions build.out

Large diffs are not rendered by default.

1,299 changes: 1,299 additions & 0 deletions libs.out

Large diffs are not rendered by default.

23 changes: 0 additions & 23 deletions src/coreclr/src/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19072,29 +19072,6 @@ bool GenTreeHWIntrinsic::OperIsMemoryStore() const
{
return true;
}
#ifdef TARGET_XARCH
else if (HWIntrinsicInfo::MaybeMemoryStore(gtHWIntrinsicId) &&
(category == HW_Category_IMM || category == HW_Category_Scalar))
{
// Some intrinsics (without HW_Category_MemoryStore) also have MemoryStore semantics

// Bmi2/Bmi2.X64.MultiplyNoFlags may return the lower half result by a out argument
// unsafe ulong MultiplyNoFlags(ulong left, ulong right, ulong* low)
//
// So, the 3-argument form is MemoryStore
if (HWIntrinsicInfo::lookupNumArgs(this) == 3)
{
switch (gtHWIntrinsicId)
{
case NI_BMI2_MultiplyNoFlags:
case NI_BMI2_X64_MultiplyNoFlags:
return true;
default:
return false;
}
}
}
#endif // TARGET_XARCH
#endif // TARGET_XARCH || TARGET_ARM64
return false;
}
Expand Down
22 changes: 17 additions & 5 deletions src/coreclr/src/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -785,14 +785,26 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

if ((retType == TYP_STRUCT) && featureSIMD)
{
unsigned int sizeBytes;
baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
retType = getSIMDTypeForSize(sizeBytes);
assert(sizeBytes != 0);
unsigned int sizeBytes = 0;
baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
bool checkBaseType = true;
#ifdef TARGET_XARCH
if (sizeBytes == 0)
{
assert((intrinsic == NI_BMI2_MultiplyNoFlags2) || (intrinsic == NI_BMI2_X64_MultiplyNoFlags2));
assert(retType == TYP_STRUCT);
checkBaseType = false;
}
else
{
retType = getSIMDTypeForSize(sizeBytes);
assert(retType != TYP_STRUCT);
}
#endif

// We want to return early here for cases where retType was TYP_STRUCT as per method signature and
// rather than deferring the decision after getting the baseType of arg.
if (!isSupportedBaseType(intrinsic, baseType))
if (checkBaseType && !isSupportedBaseType(intrinsic, baseType))
{
return nullptr;
}
Expand Down
68 changes: 28 additions & 40 deletions src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1851,8 +1851,12 @@ void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node)
GenTree* op1 = node->gtGetOp1();
GenTree* op2 = node->gtGetOp2();
var_types targetType = node->TypeGet();
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType);
emitter* emit = GetEmitter();
if ((intrinsicId == NI_BMI2_MultiplyNoFlags2) || (intrinsicId == NI_BMI2_X64_MultiplyNoFlags2))
{
targetType = op1->TypeGet();
}
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType);
emitter* emit = GetEmitter();

assert(targetReg != REG_NA);
assert(op1 != nullptr);
Expand Down Expand Up @@ -1901,60 +1905,44 @@ void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node)
}

case NI_BMI2_MultiplyNoFlags:
case NI_BMI2_MultiplyNoFlags2:
case NI_BMI2_X64_MultiplyNoFlags:
case NI_BMI2_X64_MultiplyNoFlags2:
{
int numArgs = HWIntrinsicInfo::lookupNumArgs(node);
assert(numArgs == 2 || numArgs == 3);
assert(numArgs == 2);

regNumber op1Reg = REG_NA;
regNumber op2Reg = REG_NA;
regNumber op3Reg = REG_NA;
regNumber lowReg = REG_NA;
regNumber op1Reg = REG_NA;
regNumber op2Reg = REG_NA;
regNumber lowReg = targetReg;
regNumber highReg = targetReg;

if (numArgs == 2)
op1Reg = op1->GetRegNum();
op2Reg = op2->GetRegNum();
if ((intrinsicId == NI_BMI2_MultiplyNoFlags2) || (intrinsicId == NI_BMI2_X64_MultiplyNoFlags2))
{
op1Reg = op1->GetRegNum();
op2Reg = op2->GetRegNum();
lowReg = targetReg;
}
else
{
GenTreeArgList* argList = op1->AsArgList();
op1 = argList->Current();
op1Reg = op1->GetRegNum();
argList = argList->Rest();
op2 = argList->Current();
op2Reg = op2->GetRegNum();
argList = argList->Rest();
GenTree* op3 = argList->Current();
op3Reg = op3->GetRegNum();
assert(!op3->isContained());
assert(op3Reg != op1Reg);
assert(op3Reg != targetReg);
assert(op3Reg != REG_EDX);
lowReg = node->GetSingleTempReg();
assert(op3Reg != lowReg);
assert(lowReg != targetReg);
highReg = node->AsHWIntrinsic()->GetOtherReg();
}

// These do not support containment
assert(!op2->isContained());
emitAttr attr = emitTypeSize(targetType);
emitAttr attr = emitTypeSize(targetType);
GenTree* explicitSrc = op2;
// mov the first operand into implicit source operand EDX/RDX
if (op1Reg != REG_EDX)
{
assert(op2Reg != REG_EDX);
emit->emitIns_R_R(INS_mov, attr, REG_EDX, op1Reg);
if (op2Reg == REG_EDX)
{
explicitSrc = op1;
}
else
{
emit->emitIns_R_R(INS_mov, attr, REG_EDX, op1Reg);
}
}

// generate code for MULX
genHWIntrinsic_R_R_RM(node, ins, attr, targetReg, lowReg, op2);

// If requires the lower half result, store in the memory pointed to by op3
if (numArgs == 3)
{
emit->emitIns_AR_R(INS_mov, attr, lowReg, op3Reg, 0);
}
genHWIntrinsic_R_R_RM(node, ins, attr, highReg, lowReg, explicitSrc);

break;
}
Expand Down
6 changes: 4 additions & 2 deletions src/coreclr/src/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,8 @@ HARDWARE_INTRINSIC(BMI1_X64, BitFieldExtract,
HARDWARE_INTRINSIC(BMI2, ParallelBitDeposit, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pdep, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI2, ParallelBitExtract, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pext, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI2, ZeroHighBits, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bzhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(BMI2, MultiplyNoFlags, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI2, MultiplyNoFlags, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(BMI2, MultiplyNoFlags2, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
Expand All @@ -624,7 +625,8 @@ HARDWARE_INTRINSIC(BMI2, MultiplyNoFlags,
HARDWARE_INTRINSIC(BMI2_X64, ParallelBitDeposit, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pdep, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI2_X64, ParallelBitExtract, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pext, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI2_X64, ZeroHighBits, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bzhi, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(BMI2_X64, MultiplyNoFlags, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI2_X64, MultiplyNoFlags, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(BMI2_X64, MultiplyNoFlags2, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
Expand Down
28 changes: 28 additions & 0 deletions src/coreclr/src/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1639,6 +1639,34 @@ GenTree* Compiler::impBMI1OrBMI2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METH
return gtNewScalarHWIntrinsicNode(callType, op2, op1, intrinsic);
}

case NI_BMI2_MultiplyNoFlags:
case NI_BMI2_MultiplyNoFlags2:
case NI_BMI2_X64_MultiplyNoFlags:
case NI_BMI2_X64_MultiplyNoFlags2:
{
if (sig->numArgs == 3)
{
// These are implemented in terms of NI_BMI2_MultiplyNoFlags2 or NI_BMI2_X64_MultiplyNoFlags2.
assert((intrinsic == NI_BMI2_MultiplyNoFlags) || (intrinsic == NI_BMI2_X64_MultiplyNoFlags));
{
return nullptr;
}
}
assert(sig->numArgs == 2);
GenTree* op2 = impPopStack().val;
GenTree* op1 = impPopStack().val;

if (callType == TYP_STRUCT)
{
CORINFO_CLASS_HANDLE structHandle = sig->retTypeSigClass;
assert(structHandle != NO_CLASS_HANDLE);
GenTreeHWIntrinsic* result = gtNewScalarHWIntrinsicNode(TYP_STRUCT, op1, op2, intrinsic);
result->SetLayout(typGetObjLayout(structHandle));
return result;
}
return gtNewScalarHWIntrinsicNode(callType, op1, op2, intrinsic);
}

default:
return nullptr;
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/src/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1281,7 +1281,7 @@ GenTree* Compiler::impAssignStructPtr(GenTree* destAddr,
}

assert(src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_FIELD, GT_IND, GT_OBJ, GT_CALL, GT_MKREFANY, GT_RET_EXPR, GT_COMMA) ||
(src->TypeGet() != TYP_STRUCT && src->OperIsSimdOrHWintrinsic()));
src->OperIsSimdOrHWintrinsic());

var_types asgType = src->TypeGet();

Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/src/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6178,10 +6178,10 @@ bool Lowering::CheckMultiRegLclVar(GenTreeLclVar* lclNode, const ReturnTypeDesc*
}
}
#ifdef TARGET_XARCH
// For local stores on XARCH we only handle mismatched src/dest register count for
// calls of SIMD type. If the source was another lclVar similarly promoted, we would
// For local stores on XARCH we can't handle another lclVar source.
// If the source was another lclVar similarly promoted, we would
// have broken it into multiple stores.
if (lclNode->OperIs(GT_STORE_LCL_VAR) && !lclNode->gtGetOp1()->OperIs(GT_CALL))
if (lclNode->OperIs(GT_STORE_LCL_VAR) && lclNode->gtGetOp1()->OperIs(GT_LCL_VAR))
{
canEnregister = false;
}
Expand Down
10 changes: 10 additions & 0 deletions src/coreclr/src/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,16 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
#endif
}
}
else if (src->OperIsHWIntrinsic())
{
assert(!blkNode->AsObj()->GetLayout()->HasGCPtr());
if (blkNode->OperIs(GT_STORE_OBJ))
{
blkNode->SetOper(GT_STORE_BLK);
}
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
ContainBlockStoreAddress(blkNode, size, dstAddr);
}
else
{
assert(src->OperIs(GT_IND, GT_LCL_VAR, GT_LCL_FLD));
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/src/jit/lsra.h
Original file line number Diff line number Diff line change
Expand Up @@ -1068,7 +1068,7 @@ class LinearScan : public LinearScanInterface
var_types type = tree->TypeGet();
if (type == TYP_STRUCT)
{
assert(tree->OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR));
assert(tree->OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR) || tree->OperIsHWIntrinsic());
GenTreeLclVar* lclVar = tree->AsLclVar();
LclVarDsc* varDsc = compiler->lvaGetDesc(lclVar);
type = varDsc->GetRegisterType(lclVar);
Expand Down
Loading

0 comments on commit cbd01ca

Please sign in to comment.