Skip to content

Commit

Permalink
Implement Narrow and Widen using SIMDAsHWIntrinsic (#60094)
Browse files Browse the repository at this point in the history
* Moving Narrow to implemented using SIMDAsHWIntrinsic

* Moving Widen to implemented using SIMDAsHWIntrinsic

* Fix some handling of Narrow/Widen hwintrinsics

* Ensure that Vector.Widen is still treated as an intrinsic

* Fixing NI_VectorT128_WidenUpper on ARM64 to actually call gtNewSimdWidenUpper
  • Loading branch information
tannergooding committed Nov 12, 2021
1 parent 99dd33b commit 5fa6dd3
Show file tree
Hide file tree
Showing 22 changed files with 1,730 additions and 1,066 deletions.
2 changes: 0 additions & 2 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -968,9 +968,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
regNumber targetReg);
void genSIMDIntrinsic32BitConvert(GenTreeSIMD* simdNode);
void genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode);
void genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode);
void genSIMDExtractUpperHalf(GenTreeSIMD* simdNode, regNumber srcReg, regNumber tgtReg);
void genSIMDIntrinsicWiden(GenTreeSIMD* simdNode);
void genSIMDIntrinsic(GenTreeSIMD* simdNode);

// TYP_SIMD12 (i.e Vector3 of size 12 bytes) is not a hardware supported size and requires
Expand Down
138 changes: 0 additions & 138 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3898,15 +3898,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
genSIMDIntrinsicUnOp(simdNode);
break;

case SIMDIntrinsicWidenLo:
case SIMDIntrinsicWidenHi:
genSIMDIntrinsicWiden(simdNode);
break;

case SIMDIntrinsicNarrow:
genSIMDIntrinsicNarrow(simdNode);
break;

case SIMDIntrinsicSub:
case SIMDIntrinsicBitwiseAnd:
case SIMDIntrinsicBitwiseOr:
Expand Down Expand Up @@ -3995,20 +3986,9 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
case SIMDIntrinsicEqual:
result = INS_fcmeq;
break;
case SIMDIntrinsicNarrow:
// Use INS_fcvtn lower bytes of result followed by INS_fcvtn2 for upper bytes
// Return lower bytes instruction here
result = INS_fcvtn;
break;
case SIMDIntrinsicSub:
result = INS_fsub;
break;
case SIMDIntrinsicWidenLo:
result = INS_fcvtl;
break;
case SIMDIntrinsicWidenHi:
result = INS_fcvtl2;
break;
default:
assert(!"Unsupported SIMD intrinsic");
unreached();
Expand Down Expand Up @@ -4036,20 +4016,9 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
case SIMDIntrinsicEqual:
result = INS_cmeq;
break;
case SIMDIntrinsicNarrow:
// Use INS_xtn lower bytes of result followed by INS_xtn2 for upper bytes
// Return lower bytes instruction here
result = INS_xtn;
break;
case SIMDIntrinsicSub:
result = INS_sub;
break;
case SIMDIntrinsicWidenLo:
result = isUnsigned ? INS_uxtl : INS_sxtl;
break;
case SIMDIntrinsicWidenHi:
result = isUnsigned ? INS_uxtl2 : INS_sxtl2;
break;
default:
assert(!"Unsupported SIMD intrinsic");
unreached();
Expand Down Expand Up @@ -4228,113 +4197,6 @@ void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
genProduceReg(simdNode);
}

//--------------------------------------------------------------------------------
// genSIMDIntrinsicWiden: Generate code for SIMD Intrinsic Widen operations
//
// Arguments:
// simdNode - The GT_SIMD node
//
// Notes:
// The Widen intrinsics are broken into separate intrinsics for the two results.
//
void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode)
{
assert((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenLo) ||
(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenHi));

GenTree* op1 = simdNode->gtGetOp1();
var_types baseType = simdNode->GetSimdBaseType();
regNumber targetReg = simdNode->GetRegNum();
assert(targetReg != REG_NA);
var_types simdType = simdNode->TypeGet();

genConsumeOperands(simdNode);
regNumber op1Reg = op1->GetRegNum();
regNumber srcReg = op1Reg;
emitAttr emitSize = emitActualTypeSize(simdType);

instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);

emitAttr attr = (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenHi) ? EA_16BYTE : EA_8BYTE;
insOpts opt = genGetSimdInsOpt(attr, baseType);

GetEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt);

genProduceReg(simdNode);
}

//--------------------------------------------------------------------------------
// genSIMDIntrinsicNarrow: Generate code for SIMD Intrinsic Narrow operations
//
// Arguments:
// simdNode - The GT_SIMD node
//
// Notes:
// This intrinsic takes two arguments. The first operand is narrowed to produce the
// lower elements of the results, and the second operand produces the high elements.
//
void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
{
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicNarrow);

GenTree* op1 = simdNode->gtGetOp1();
GenTree* op2 = simdNode->gtGetOp2();
var_types baseType = simdNode->GetSimdBaseType();
regNumber targetReg = simdNode->GetRegNum();
assert(targetReg != REG_NA);
var_types simdType = simdNode->TypeGet();
emitAttr emitSize = emitTypeSize(simdType);

genConsumeOperands(simdNode);
regNumber op1Reg = op1->GetRegNum();
regNumber op2Reg = op2->GetRegNum();

assert(genIsValidFloatReg(op1Reg));
assert(genIsValidFloatReg(op2Reg));
assert(genIsValidFloatReg(targetReg));
assert(op2Reg != targetReg);
assert(simdNode->GetSimdSize() == 16);

instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
assert((ins == INS_fcvtn) || (ins == INS_xtn));

instruction ins2 = (ins == INS_fcvtn) ? INS_fcvtn2 : INS_xtn2;

insOpts opt = INS_OPTS_NONE;
insOpts opt2 = INS_OPTS_NONE;

// This is not the same as genGetSimdInsOpt()
// Basetype is the soure operand type
// However encoding is based on the destination operand type which is 1/2 the basetype.
switch (baseType)
{
case TYP_ULONG:
case TYP_LONG:
case TYP_DOUBLE:
opt = INS_OPTS_2S;
opt2 = INS_OPTS_4S;
break;
case TYP_UINT:
case TYP_INT:
opt = INS_OPTS_4H;
opt2 = INS_OPTS_8H;
break;
case TYP_USHORT:
case TYP_SHORT:
opt = INS_OPTS_8B;
opt2 = INS_OPTS_16B;
break;
default:
assert(!"Unsupported narrowing element type");
unreached();
}

GetEmitter()->emitIns_R_R(ins, EA_8BYTE, targetReg, op1Reg, opt);
GetEmitter()->emitIns_R_R(ins2, EA_16BYTE, targetReg, op2Reg, opt2);

genProduceReg(simdNode);
}

//--------------------------------------------------------------------------------
// genSIMDIntrinsicBinOp: Generate code for SIMD Intrinsic binary operations
// add, sub, mul, bit-wise And, AndNot and Or.
Expand Down
13 changes: 13 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3283,6 +3283,13 @@ class Compiler
unsigned simdSize,
bool isSimdAsHWIntrinsic);

GenTree* gtNewSimdNarrowNode(var_types type,
GenTree* op1,
GenTree* op2,
CorInfoType simdBaseJitType,
unsigned simdSize,
bool isSimdAsHWIntrinsic);

GenTree* gtNewSimdSqrtNode(
var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic);

Expand All @@ -3293,6 +3300,12 @@ class Compiler
unsigned simdSize,
bool isSimdAsHWIntrinsic);

GenTree* gtNewSimdWidenLowerNode(
var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic);

GenTree* gtNewSimdWidenUpperNode(
var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic);

GenTree* gtNewSimdWithElementNode(var_types type,
GenTree* op1,
GenTree* op2,
Expand Down
Loading

0 comments on commit 5fa6dd3

Please sign in to comment.