Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Porting additional SIMD Intrinsics to use SimdAsHWIntrinsic #37882

Merged
merged 25 commits into from
Jul 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
542d9a8
Moving SIMDIntrinsicInit to use SimdAsHWIntrinsic
tannergooding May 25, 2020
4aa6464
Optimize the simple case of Create(0) and Create(-1) to get_Zero and …
tannergooding May 25, 2020
6f923ea
Adding a new gtNewSimdCreateBroadcastNode method
tannergooding May 25, 2020
38824c6
Updating SIMDIntrinsicGetOne to use SimdAsHWIntrinsic
tannergooding May 27, 2020
7c00186
Fixing build errors
tannergooding May 27, 2020
2b1e9a7
Ensure all forms of ToScalar are intrinsic on x86
tannergooding Jun 14, 2020
e5ec98a
Moving SIMDIntrinsicDot to use SimdAsHWIntrinsic
tannergooding Jun 14, 2020
d96174e
Removing SIMDIntrinsicDot, SIMDIntrinsicAdd, SIMDIntrinsicMul, and SI…
tannergooding Jun 15, 2020
987b77e
Removing SIMDIntrinsicGetCount, SIMDIntrinsicGetOne, SIMDIntrinsicGet…
tannergooding Jun 15, 2020
7bc2c10
Default TYP_SIMD12 constants to be 16 bytes
tannergooding Jun 15, 2020
6897bc4
Get the simdType from the size for LowerHWIntrinsicDot
tannergooding Jun 15, 2020
ba35606
Applying formatting patch
tannergooding Jun 15, 2020
636b31a
Use AddPairwise for floating-point dot product
tannergooding Jun 15, 2020
92fc030
Applying formatting patch
tannergooding Jun 15, 2020
305d00d
Correctly handle decomposed long constants on x86
tannergooding Jun 15, 2020
786f61d
Check JitConfig.EnableHWIntrinsic when in impSIMDIntrinsic or impSimd…
tannergooding Jun 16, 2020
08460ef
Ensure AdvSimd.Arm64.Multiply is used for TYP_DOUBLE
tannergooding Jun 18, 2020
290bd56
Applying formatting patch
tannergooding Jun 18, 2020
73700d4
Updating LowerHWIntrinsicCreate to handle TYP_SIMD8 retyped as TYP_DO…
tannergooding Jun 18, 2020
95ce9c9
Ensure the CreateBroadcast SimdAsHWIntrinsic nodes have the correct g…
tannergooding Jun 19, 2020
3bdcdf9
Applying formatting patch
tannergooding Jun 19, 2020
d3afbdf
Apply suggestions from code review
tannergooding Jul 2, 2020
3ac2b4a
Adding some additional clarifying comments
tannergooding Jul 2, 2020
5560f44
Breaking NI_Vector256_ToScalar importation logic into its own checks
tannergooding Jul 2, 2020
a954af2
Applying formatting patch
tannergooding Jul 2, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/coreclr/src/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -980,7 +980,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode);
void genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode);
void genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode);
void genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode);
void genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode);
void genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode);
void genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode);
Expand Down
110 changes: 1 addition & 109 deletions src/coreclr/src/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3855,20 +3855,13 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
genSIMDIntrinsicNarrow(simdNode);
break;

case SIMDIntrinsicAdd:
case SIMDIntrinsicSub:
case SIMDIntrinsicMul:
case SIMDIntrinsicDiv:
case SIMDIntrinsicBitwiseAnd:
case SIMDIntrinsicBitwiseOr:
case SIMDIntrinsicEqual:
genSIMDIntrinsicBinOp(simdNode);
break;

case SIMDIntrinsicDotProduct:
genSIMDIntrinsicDotProduct(simdNode);
break;

case SIMDIntrinsicGetItem:
genSIMDIntrinsicGetItem(simdNode);
break;
Expand Down Expand Up @@ -3945,9 +3938,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
{
switch (intrinsicId)
{
case SIMDIntrinsicAdd:
result = INS_fadd;
break;
case SIMDIntrinsicBitwiseAnd:
result = INS_and;
break;
Expand All @@ -3961,15 +3951,9 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
case SIMDIntrinsicConvertToInt64:
result = INS_fcvtzs;
break;
case SIMDIntrinsicDiv:
result = INS_fdiv;
break;
case SIMDIntrinsicEqual:
result = INS_fcmeq;
break;
case SIMDIntrinsicMul:
result = INS_fmul;
break;
case SIMDIntrinsicNarrow:
// Use INS_fcvtn lower bytes of result followed by INS_fcvtn2 for upper bytes
// Return lower bytes instruction here
Expand All @@ -3995,9 +3979,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type

switch (intrinsicId)
{
case SIMDIntrinsicAdd:
result = INS_add;
break;
case SIMDIntrinsicBitwiseAnd:
result = INS_and;
break;
Expand All @@ -4014,9 +3995,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
case SIMDIntrinsicEqual:
result = INS_cmeq;
break;
case SIMDIntrinsicMul:
result = INS_mul;
break;
case SIMDIntrinsicNarrow:
// Use INS_xtn lower bytes of result followed by INS_xtn2 for upper bytes
// Return lower bytes instruction here
Expand Down Expand Up @@ -4326,9 +4304,7 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
//
void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
{
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAdd || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd ||
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd ||
simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual);

GenTree* op1 = simdNode->gtGetOp1();
Expand Down Expand Up @@ -4357,90 +4333,6 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
genProduceReg(simdNode);
}

//--------------------------------------------------------------------------------
// genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product.
//
// Arguments:
// simdNode - The GT_SIMD node
//
// Return Value:
// None.
//
void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
{
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDotProduct);

GenTree* op1 = simdNode->gtGetOp1();
GenTree* op2 = simdNode->gtGetOp2();
var_types baseType = simdNode->gtSIMDBaseType;
var_types simdType = op1->TypeGet();

regNumber targetReg = simdNode->GetRegNum();
assert(targetReg != REG_NA);

var_types targetType = simdNode->TypeGet();
assert(targetType == baseType);

genConsumeOperands(simdNode);
regNumber op1Reg = op1->GetRegNum();
regNumber op2Reg = op2->GetRegNum();
regNumber tmpReg = targetReg;

if (!varTypeIsFloating(baseType))
{
tmpReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT);
}

instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicMul, baseType);
emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
insOpts opt = genGetSimdInsOpt(attr, baseType);

// Vector multiply
GetEmitter()->emitIns_R_R_R(ins, attr, tmpReg, op1Reg, op2Reg, opt);

if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0)
{
// For 12Byte vectors we must zero upper bits to get correct dot product
// We do not assume upper bits are zero.
GetEmitter()->emitIns_R_R_I(INS_ins, EA_4BYTE, tmpReg, REG_ZR, 3);
}

// Vector add horizontal
if (varTypeIsFloating(baseType))
{
if (baseType == TYP_FLOAT)
{
if (opt == INS_OPTS_4S)
{
GetEmitter()->emitIns_R_R_R(INS_faddp, EA_16BYTE, tmpReg, tmpReg, tmpReg, INS_OPTS_4S);
}
GetEmitter()->emitIns_R_R(INS_faddp, EA_8BYTE, targetReg, tmpReg, INS_OPTS_2S);
}
else
{
GetEmitter()->emitIns_R_R(INS_faddp, EA_16BYTE, targetReg, tmpReg, INS_OPTS_2D);
}
}
else
{
ins = varTypeIsUnsigned(baseType) ? INS_uaddlv : INS_saddlv;

GetEmitter()->emitIns_R_R(ins, attr, tmpReg, tmpReg, opt);

// Mov to integer register
if (varTypeIsUnsigned(baseType) || (genTypeSize(baseType) < 4))
{
GetEmitter()->emitIns_R_R_I(INS_mov, emitTypeSize(baseType), targetReg, tmpReg, 0);
}
else
{
GetEmitter()->emitIns_R_R_I(INS_smov, emitActualTypeSize(baseType), targetReg, tmpReg, 0);
}
}

genProduceReg(simdNode);
}

//------------------------------------------------------------------------------------
// genSIMDIntrinsicGetItem: Generate code for SIMD Intrinsic get element at index i.
//
Expand Down
14 changes: 10 additions & 4 deletions src/coreclr/src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -2542,7 +2542,6 @@ class Compiler

#ifdef FEATURE_SIMD
GenTree* gtNewSIMDVectorZero(var_types simdType, var_types baseType, unsigned size);
GenTree* gtNewSIMDVectorOne(var_types simdType, var_types baseType, unsigned size);
#endif

GenTree* gtNewBlkOpNode(GenTree* dst, GenTree* srcOrFillVal, bool isVolatile, bool isCopyBlock);
Expand Down Expand Up @@ -2630,6 +2629,9 @@ class Compiler
var_types baseType,
unsigned size);

GenTreeHWIntrinsic* gtNewSimdCreateBroadcastNode(
var_types type, GenTree* op1, var_types baseType, unsigned size, bool isSimdAsHWIntrinsic);

GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode(var_types type,
NamedIntrinsic hwIntrinsicID,
var_types baseType,
Expand Down Expand Up @@ -3751,7 +3753,7 @@ class Compiler
CORINFO_CLASS_HANDLE clsHnd,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
bool mustExpand);
GenTree* newobjThis);

protected:
bool compSupportsHWIntrinsic(CORINFO_InstructionSet isa);
Expand All @@ -3761,7 +3763,8 @@ class Compiler
CORINFO_SIG_INFO* sig,
var_types retType,
var_types baseType,
unsigned simdSize);
unsigned simdSize,
GenTree* newobjThis);

GenTree* impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd,
var_types retType,
Expand All @@ -3779,7 +3782,10 @@ class Compiler
var_types retType,
unsigned simdSize);

GenTree* getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass, bool expectAddr = false);
GenTree* getArgForHWIntrinsic(var_types argType,
CORINFO_CLASS_HANDLE argClass,
bool expectAddr = false,
GenTree* newobjThis = nullptr);
GenTree* impNonConstFallback(NamedIntrinsic intrinsic, var_types simdType, var_types baseType);
GenTree* addRangeCheckIfNeeded(
NamedIntrinsic intrinsic, GenTree* immOp, bool mustExpand, int immLowerBound, int immUpperBound);
Expand Down
73 changes: 37 additions & 36 deletions src/coreclr/src/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6058,40 +6058,6 @@ GenTree* Compiler::gtNewSIMDVectorZero(var_types simdType, var_types baseType, u
initVal->gtType = baseType;
return gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, baseType, size);
}

//---------------------------------------------------------------------
// gtNewSIMDVectorOne: create a GT_SIMD node for Vector<T>.One
//
// Arguments:
// simdType - simd vector type
// baseType - element type of vector
// size - size of vector in bytes
GenTree* Compiler::gtNewSIMDVectorOne(var_types simdType, var_types baseType, unsigned size)
{
GenTree* initVal;
if (varTypeIsSmallInt(baseType))
{
unsigned baseSize = genTypeSize(baseType);
int val;
if (baseSize == 1)
{
val = 0x01010101;
}
else
{
val = 0x00010001;
}
initVal = gtNewIconNode(val);
}
else
{
initVal = gtNewOneConNode(baseType);
}

baseType = genActualType(baseType);
initVal->gtType = baseType;
return gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, baseType, size);
}
#endif // FEATURE_SIMD

GenTreeCall* Compiler::gtNewIndCallNode(GenTree* addr, var_types type, GenTreeCall::Use* args, IL_OFFSETX ilOffset)
Expand Down Expand Up @@ -18463,11 +18429,9 @@ bool GenTree::isCommutativeSIMDIntrinsic()
assert(gtOper == GT_SIMD);
switch (AsSIMD()->gtSIMDIntrinsicID)
{
case SIMDIntrinsicAdd:
case SIMDIntrinsicBitwiseAnd:
case SIMDIntrinsicBitwiseOr:
case SIMDIntrinsicEqual:
case SIMDIntrinsicMul:
return true;
default:
return false;
Expand Down Expand Up @@ -18630,6 +18594,43 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type,
GenTreeHWIntrinsic(type, gtNewArgList(op1, op2, op3, op4), hwIntrinsicID, baseType, size);
}

GenTreeHWIntrinsic* Compiler::gtNewSimdCreateBroadcastNode(
var_types type, GenTree* op1, var_types baseType, unsigned size, bool isSimdAsHWIntrinsic)
{
NamedIntrinsic hwIntrinsicID = NI_Vector128_Create;

#if defined(TARGET_XARCH)
#if defined(TARGET_X86)
if (varTypeIsLong(baseType) && !op1->IsIntegralConst())
{
// TODO-XARCH-CQ: It may be beneficial to emit the movq
// instruction, which takes a 64-bit memory address and
// works on 32-bit x86 systems.
unreached();
}
#endif // TARGET_X86

if (size == 32)
{
hwIntrinsicID = NI_Vector256_Create;
}
#elif defined(TARGET_ARM64)
if (size == 8)
{
hwIntrinsicID = NI_Vector64_Create;
}
#else
#error Unsupported platform
#endif // !TARGET_XARCH && !TARGET_ARM64

if (isSimdAsHWIntrinsic)
{
return gtNewSimdAsHWIntrinsicNode(type, op1, hwIntrinsicID, baseType, size);
}

return gtNewSimdHWIntrinsicNode(type, op1, hwIntrinsicID, baseType, size);
}

GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID)
{
SetOpLclRelatedToSIMDIntrinsic(op1);
Expand Down
29 changes: 25 additions & 4 deletions src/coreclr/src/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -487,14 +487,19 @@ bool HWIntrinsicInfo::isImmOp(NamedIntrinsic id, const GenTree* op)
// Arguments:
// argType -- the required type of argument
// argClass -- the class handle of argType
// expectAddr -- if true indicates we are expecting type stack entry to be a TYP_BYREF.
// expectAddr -- if true indicates we are expecting type stack entry to be a TYP_BYREF.
// newobjThis -- For CEE_NEWOBJ, this is the temp grabbed for the allocated uninitalized object.
//
// Return Value:
// the validated argument
//
GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass, bool expectAddr)
GenTree* Compiler::getArgForHWIntrinsic(var_types argType,
CORINFO_CLASS_HANDLE argClass,
bool expectAddr,
GenTree* newobjThis)
{
GenTree* arg = nullptr;

if (varTypeIsStruct(argType))
{
if (!varTypeIsSIMD(argType))
Expand All @@ -504,16 +509,32 @@ GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE
argType = getSIMDTypeForSize(argSizeBytes);
}
assert(varTypeIsSIMD(argType));
arg = impSIMDPopStack(argType, expectAddr);
assert(varTypeIsSIMD(arg->TypeGet()));

if (newobjThis == nullptr)
{
arg = impSIMDPopStack(argType, expectAddr);
assert(varTypeIsSIMD(arg->TypeGet()));
}
else
{
assert((newobjThis->gtOper == GT_ADDR) && (newobjThis->AsOp()->gtOp1->gtOper == GT_LCL_VAR));
arg = newobjThis;

// push newobj result on type stack
unsigned tmp = arg->AsOp()->gtOp1->AsLclVarCommon()->GetLclNum();
impPushOnStack(gtNewLclvNode(tmp, lvaGetRealType(tmp)), verMakeTypeInfo(argClass).NormaliseForStack());
}
}
else
{
assert(varTypeIsArithmetic(argType));

arg = impPopStack().val;
assert(varTypeIsArithmetic(arg->TypeGet()));

assert(genActualType(arg->gtType) == genActualType(argType));
}

return arg;
}

Expand Down
Loading