Skip to content

Commit

Permalink
Moving CreateScalarUnsafe, ToScalar, Vector128.ToVector256Unsafe, and…
Browse files Browse the repository at this point in the history
… Vector256.GetLower to be intrinsics (dotnet/coreclr#21351)

* Moving CreateScalarUnsafe, ToScalar, Vector128.ToVector256Unsafe, and Vector256.GetLower to be intrinsics

* Adding containment support to the helper intrinsics


Commit migrated from dotnet/coreclr@1c18b32
  • Loading branch information
tannergooding authored Dec 7, 2018
1 parent 9862599 commit 44b8f2a
Show file tree
Hide file tree
Showing 9 changed files with 349 additions and 25 deletions.
5 changes: 4 additions & 1 deletion src/coreclr/src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3411,7 +3411,10 @@ class Compiler
NamedIntrinsic lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method);

#ifdef FEATURE_HW_INTRINSICS
GenTree* impBaseIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
GenTree* impBaseIntrinsic(NamedIntrinsic intrinsic,
CORINFO_CLASS_HANDLE clsHnd,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig);
GenTree* impHWIntrinsic(NamedIntrinsic intrinsic,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
Expand Down
106 changes: 94 additions & 12 deletions src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1254,29 +1254,111 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node)
var_types targetType = node->TypeGet();
var_types baseType = node->gtSIMDBaseType;

assert(node->gtGetOp1() == nullptr);
assert(compiler->compSupports(InstructionSet_SSE));
assert((baseType >= TYP_BYTE) && (baseType <= TYP_DOUBLE));

GenTree* op1 = node->gtGetOp1();
regNumber op1Reg = REG_NA;

if (op1 != nullptr)
{
assert(!op1->OperIsList());
op1Reg = op1->gtRegNum;
genConsumeOperands(node);
}

assert(node->gtGetOp2() == nullptr);
assert(baseType >= TYP_BYTE && baseType <= TYP_DOUBLE);

emitter* emit = getEmitter();
emitAttr attr = EA_ATTR(node->gtSIMDSize);
emitter* emit = getEmitter();
emitAttr attr = EA_ATTR(node->gtSIMDSize);
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);

switch (intrinsicId)
{
case NI_Base_Vector128_Zero:
case NI_Base_Vector128_CreateScalarUnsafe:
case NI_Base_Vector256_CreateScalarUnsafe:
{
// When SSE2 is supported, we generate pxor for integral types otherwise just use xorps
instruction ins =
(compiler->compSupports(InstructionSet_SSE2) && varTypeIsIntegral(baseType)) ? INS_pxor : INS_xorps;
emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg);
if (varTypeIsIntegral(baseType))
{
genHWIntrinsic_R_RM(node, ins, emitActualTypeSize(baseType));
}
else
{
assert(varTypeIsFloating(baseType));

attr = emitTypeSize(baseType);

if (op1->isContained() || op1->isUsedFromSpillTemp())
{
genHWIntrinsic_R_RM(node, ins, attr);
}
else if (targetReg != op1Reg)
{
// Just use movaps for reg->reg moves as it has zero-latency on modern CPUs
emit->emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
}
}
break;
}

case NI_Base_Vector128_ToScalar:
case NI_Base_Vector256_ToScalar:
{
assert(varTypeIsFloating(baseType));

attr = emitTypeSize(TYP_SIMD16);

if (op1->isContained() || op1->isUsedFromSpillTemp())
{
genHWIntrinsic_R_RM(node, ins, attr);
}
else if (targetReg != op1Reg)
{
// Just use movaps for reg->reg moves as it has zero-latency on modern CPUs
emit->emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
}
break;
}

case NI_Base_Vector128_ToVector256:
{
// ToVector256 has zero-extend semantics in order to ensure it is deterministic
// We always emit a move to the target register, even when op1Reg == targetReg,
// in order to ensure that Bits MAXVL-1:128 are zeroed.

attr = emitTypeSize(TYP_SIMD16);

if (op1->isContained() || op1->isUsedFromSpillTemp())
{
genHWIntrinsic_R_RM(node, ins, attr);
}
else
{
// Just use movaps for reg->reg moves as it has zero-latency on modern CPUs
emit->emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
}
break;
}

case NI_Base_Vector128_ToVector256Unsafe:
case NI_Base_Vector256_GetLower:
{
if (op1->isContained() || op1->isUsedFromSpillTemp())
{
genHWIntrinsic_R_RM(node, ins, attr);
}
else if (targetReg != op1Reg)
{
// Just use movaps for reg->reg moves as it has zero-latency on modern CPUs
emit->emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
}
break;
}

case NI_Base_Vector128_Zero:
case NI_Base_Vector256_Zero:
{
// When AVX2 is supported, we generate pxor for integral types otherwise just use xorps
instruction ins =
(compiler->compSupports(InstructionSet_AVX2) && varTypeIsIntegral(baseType)) ? INS_pxor : INS_xorps;
assert(op1 == nullptr);
emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg);
break;
}
Expand Down
11 changes: 9 additions & 2 deletions src/coreclr/src/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,11 @@ HARDWARE_INTRINSIC(Base_Vector128_AsSingle, "AsSingle",
HARDWARE_INTRINSIC(Base_Vector128_AsUInt16, "AsUInt16", Base, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector128_AsUInt32, "AsUInt32", Base, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector128_AsUInt64, "AsUInt64", Base, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector128_Zero, "get_Zero", Base, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector128_CreateScalarUnsafe, "CreateScalarUnsafe", Base, -1, 16, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector128_ToScalar, "ToScalar", Base, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsdsse2}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector128_ToVector256, "ToVector256", Base, -1, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector128_ToVector256Unsafe, "ToVector256Unsafe", Base, -1, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector128_Zero, "get_Zero", Base, -1, 16, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector256_As, "As`1", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector256_AsByte, "AsByte", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector256_AsDouble, "AsDouble", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
Expand All @@ -50,7 +54,10 @@ HARDWARE_INTRINSIC(Base_Vector256_AsSingle, "AsSingle",
HARDWARE_INTRINSIC(Base_Vector256_AsUInt16, "AsUInt16", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector256_AsUInt32, "AsUInt32", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector256_AsUInt64, "AsUInt64", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector256_Zero, "get_Zero", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector256_CreateScalarUnsafe, "CreateScalarUnsafe", Base, -1, 32, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector256_GetLower, "GetLower", Base, -1, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector256_ToScalar, "ToScalar", Base, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsdsse2}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Base_Vector256_Zero, "get_Zero", Base, -1, 32, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// Intrinsic ID Function name ISA ival SIMD size NumArg instructions Category Flags
Expand Down
Loading

0 comments on commit 44b8f2a

Please sign in to comment.