Skip to content

Commit

Permalink
Mark and expose additional Vector functions as Intrinsic (#77562)
Browse files Browse the repository at this point in the history
* Mark and expose additional Vector functions as Intrinsic

* Applying formatting patch

* Ensure Vector64.CreateScalarUnsafe for long/ulong/double is handled

* Ensure we use the right Count

* Ensure small types don't sign extend up to int

* Ensure reflection invocation is correct for Vector<T> bitwise ops

* Adding tests and fixing a couple small typing issues

* Fixing the name of the One property

* Assert GT_RSH for simd isn't TYP_LONG on xarch

* Ensure the correct operation is picked for vector ShiftRightArithmetic

* Fixing StoreAlignedNonTemporal on Arm64
  • Loading branch information
tannergooding authored Nov 2, 2022
1 parent a27ecc5 commit 13d4e6d
Show file tree
Hide file tree
Showing 27 changed files with 3,985 additions and 1,445 deletions.
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -2350,7 +2350,7 @@ class Compiler

GenTree* gtNewZeroConNode(var_types type);

GenTree* gtNewOneConNode(var_types type);
GenTree* gtNewOneConNode(var_types type, var_types simdBaseType = TYP_UNDEF);

GenTreeLclVar* gtNewStoreLclVar(unsigned dstLclNum, GenTree* src);

Expand Down
129 changes: 125 additions & 4 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7189,9 +7189,11 @@ GenTree* Compiler::gtNewZeroConNode(var_types type)
case TYP_SIMD12:
case TYP_SIMD16:
case TYP_SIMD32:
{
zero = gtNewVconNode(type);
zero->AsVecCon()->gtSimd32Val = {};
break;
}
#endif // FEATURE_SIMD

default:
Expand All @@ -7201,9 +7203,10 @@ GenTree* Compiler::gtNewZeroConNode(var_types type)
return zero;
}

GenTree* Compiler::gtNewOneConNode(var_types type)
GenTree* Compiler::gtNewOneConNode(var_types type, var_types simdBaseType /* = TYP_UNDEF */)
{
GenTree* one;

switch (type)
{
case TYP_INT:
Expand All @@ -7221,6 +7224,88 @@ GenTree* Compiler::gtNewOneConNode(var_types type)
one = gtNewDconNode(1.0, type);
break;

#ifdef FEATURE_SIMD
case TYP_SIMD8:
case TYP_SIMD12:
case TYP_SIMD16:
case TYP_SIMD32:
{
GenTreeVecCon* vecCon = gtNewVconNode(type);

unsigned simdSize = genTypeSize(type);
uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType);

switch (simdBaseType)
{
case TYP_BYTE:
case TYP_UBYTE:
{
for (uint32_t index = 0; index < simdLength; index++)
{
vecCon->gtSimd32Val.u8[index] = 1;
}
break;
}

case TYP_SHORT:
case TYP_USHORT:
{
for (uint32_t index = 0; index < simdLength; index++)
{
vecCon->gtSimd32Val.u16[index] = 1;
}
break;
}

case TYP_INT:
case TYP_UINT:
{
for (uint32_t index = 0; index < simdLength; index++)
{
vecCon->gtSimd32Val.u32[index] = 1;
}
break;
}

case TYP_LONG:
case TYP_ULONG:
{
for (uint32_t index = 0; index < simdLength; index++)
{
vecCon->gtSimd32Val.u64[index] = 1;
}
break;
}

case TYP_FLOAT:
{
for (uint32_t index = 0; index < simdLength; index++)
{
vecCon->gtSimd32Val.f32[index] = 1.0f;
}
break;
}

case TYP_DOUBLE:
{
for (uint32_t index = 0; index < simdLength; index++)
{
vecCon->gtSimd32Val.f64[index] = 1.0;
}
break;
}

default:
{
unreached();
}
}

one = vecCon;
break;
}
#endif // FEATURE_SIMD

default:
unreached();
}
Expand Down Expand Up @@ -19224,6 +19309,11 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,
// TODO-XARCH-CQ: We could support division by constant for integral types
assert(varTypeIsFloating(simdBaseType));

if (varTypeIsArithmetic(op2))
{
op2 = gtNewSimdCreateBroadcastNode(type, op2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
}

if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
Expand All @@ -19244,9 +19334,22 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,
case GT_RSH:
case GT_RSZ:
{
// float and double don't have actual instructions for shifting
// so we'll just use the equivalent integer instruction instead.

if (simdBaseType == TYP_FLOAT)
{
simdBaseJitType = CORINFO_TYPE_INT;
simdBaseType = TYP_INT;
}
else if (simdBaseType == TYP_DOUBLE)
{
simdBaseJitType = CORINFO_TYPE_LONG;
simdBaseType = TYP_LONG;
}

assert(!varTypeIsByte(simdBaseType));
assert(!varTypeIsFloating(simdBaseType));
assert((op != GT_RSH) || !varTypeIsUnsigned(simdBaseType));
assert((op != GT_RSH) || (!varTypeIsUnsigned(simdBaseType) && !varTypeIsLong(simdBaseType)));

// "over shifting" is platform specific behavior. We will match the C# behavior
// this requires we mask with (sizeof(T) * 8) - 1 which ensures the shift cannot
Expand Down Expand Up @@ -19558,6 +19661,11 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,
// TODO-AARCH-CQ: We could support division by constant for integral types
assert(varTypeIsFloating(simdBaseType));

if (varTypeIsArithmetic(op2))
{
op2 = gtNewSimdCreateBroadcastNode(type, op2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
}

if ((simdSize == 8) && (simdBaseType == TYP_DOUBLE))
{
intrinsic = NI_AdvSimd_DivideScalar;
Expand All @@ -19573,9 +19681,22 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,
case GT_RSH:
case GT_RSZ:
{
assert(!varTypeIsFloating(simdBaseType));
assert((op != GT_RSH) || !varTypeIsUnsigned(simdBaseType));

// float and double don't have actual instructions for shifting
// so we'll just use the equivalent integer instruction instead.

if (simdBaseType == TYP_FLOAT)
{
simdBaseJitType = CORINFO_TYPE_INT;
simdBaseType = TYP_INT;
}
else if (simdBaseType == TYP_DOUBLE)
{
simdBaseJitType = CORINFO_TYPE_LONG;
simdBaseType = TYP_LONG;
}

// "over shifting" is platform specific behavior. We will match the C# behavior
// this requires we mask with (sizeof(T) * 8) - 1 which ensures the shift cannot
// exceed the number of bits available in `T`. This is roughly equivalent to
Expand Down
26 changes: 22 additions & 4 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -545,8 +545,16 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_Vector64_Create:
case NI_Vector64_CreateScalarUnsafe:
{
if (genTypeSize(simdBaseType) == 8)
{
intrinsic = NI_Vector64_Create;
}
FALLTHROUGH;
}

case NI_Vector64_Create:
case NI_Vector128_Create:
case NI_Vector128_CreateScalarUnsafe:
{
Expand Down Expand Up @@ -1041,6 +1049,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_Vector64_get_One:
case NI_Vector128_get_One:
{
assert(sig->numArgs == 0);
retNode = gtNewOneConNode(retType, simdBaseType);
break;
}

case NI_Vector64_get_Zero:
case NI_Vector128_get_Zero:
{
Expand Down Expand Up @@ -1544,11 +1560,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
case NI_Vector128_ShiftRightArithmetic:
{
assert(sig->numArgs == 2);
genTreeOps op = varTypeIsUnsigned(simdBaseType) ? GT_RSZ : GT_RSH;

op2 = impPopStack().val;
op1 = impSIMDPopStack(retType);

retNode = gtNewSimdBinOpNode(GT_RSH, retType, op1, op2, simdBaseJitType, simdSize,
retNode = gtNewSimdBinOpNode(op, retType, op1, op2, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ false);
break;
}
Expand Down Expand Up @@ -1743,17 +1760,18 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
{
assert(numArgs == 3);
GenTree* indexOp = impStackTop(1).val;

if (!indexOp->OperIsConst())
{
// TODO-XARCH-CQ: We should always import these like we do with GetElement
// TODO-ARM64-CQ: We should always import these like we do with GetElement
// If index is not constant use software fallback.
return nullptr;
}

ssize_t imm8 = indexOp->AsIntCon()->IconValue();
ssize_t count = simdSize / genTypeSize(simdBaseType);

if (imm8 >= count || imm8 < 0)
if ((imm8 >= count) || (imm8 < 0))
{
// Using software fallback if index is out of range (throw exception)
return nullptr;
Expand Down
Loading

0 comments on commit 13d4e6d

Please sign in to comment.