Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Narrow and Widen using SIMDAsHWIntrinsic #60094

Merged
merged 5 commits into from
Nov 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -967,9 +967,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
regNumber targetReg);
void genSIMDIntrinsic32BitConvert(GenTreeSIMD* simdNode);
void genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode);
void genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode);
void genSIMDExtractUpperHalf(GenTreeSIMD* simdNode, regNumber srcReg, regNumber tgtReg);
void genSIMDIntrinsicWiden(GenTreeSIMD* simdNode);
void genSIMDIntrinsic(GenTreeSIMD* simdNode);

// TYP_SIMD12 (i.e Vector3 of size 12 bytes) is not a hardware supported size and requires
Expand Down
138 changes: 0 additions & 138 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3894,15 +3894,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
genSIMDIntrinsicUnOp(simdNode);
break;

case SIMDIntrinsicWidenLo:
case SIMDIntrinsicWidenHi:
genSIMDIntrinsicWiden(simdNode);
break;

case SIMDIntrinsicNarrow:
genSIMDIntrinsicNarrow(simdNode);
break;

case SIMDIntrinsicSub:
case SIMDIntrinsicBitwiseAnd:
case SIMDIntrinsicBitwiseOr:
Expand Down Expand Up @@ -3991,20 +3982,9 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
case SIMDIntrinsicEqual:
result = INS_fcmeq;
break;
case SIMDIntrinsicNarrow:
// Use INS_fcvtn lower bytes of result followed by INS_fcvtn2 for upper bytes
// Return lower bytes instruction here
result = INS_fcvtn;
break;
case SIMDIntrinsicSub:
result = INS_fsub;
break;
case SIMDIntrinsicWidenLo:
result = INS_fcvtl;
break;
case SIMDIntrinsicWidenHi:
result = INS_fcvtl2;
break;
default:
assert(!"Unsupported SIMD intrinsic");
unreached();
Expand Down Expand Up @@ -4032,20 +4012,9 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
case SIMDIntrinsicEqual:
result = INS_cmeq;
break;
case SIMDIntrinsicNarrow:
// Use INS_xtn lower bytes of result followed by INS_xtn2 for upper bytes
// Return lower bytes instruction here
result = INS_xtn;
break;
case SIMDIntrinsicSub:
result = INS_sub;
break;
case SIMDIntrinsicWidenLo:
result = isUnsigned ? INS_uxtl : INS_sxtl;
break;
case SIMDIntrinsicWidenHi:
result = isUnsigned ? INS_uxtl2 : INS_sxtl2;
break;
default:
assert(!"Unsupported SIMD intrinsic");
unreached();
Expand Down Expand Up @@ -4224,113 +4193,6 @@ void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
genProduceReg(simdNode);
}

//--------------------------------------------------------------------------------
// genSIMDIntrinsicWiden: Generate code for SIMD Intrinsic Widen operations
//
// Arguments:
// simdNode - The GT_SIMD node
//
// Notes:
// The Widen intrinsics are broken into separate intrinsics for the two results.
//
void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode)
{
assert((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenLo) ||
(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenHi));

GenTree* op1 = simdNode->gtGetOp1();
var_types baseType = simdNode->GetSimdBaseType();
regNumber targetReg = simdNode->GetRegNum();
assert(targetReg != REG_NA);
var_types simdType = simdNode->TypeGet();

genConsumeOperands(simdNode);
regNumber op1Reg = op1->GetRegNum();
regNumber srcReg = op1Reg;
emitAttr emitSize = emitActualTypeSize(simdType);

instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);

emitAttr attr = (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenHi) ? EA_16BYTE : EA_8BYTE;
insOpts opt = genGetSimdInsOpt(attr, baseType);

GetEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt);

genProduceReg(simdNode);
}

//--------------------------------------------------------------------------------
// genSIMDIntrinsicNarrow: Generate code for SIMD Intrinsic Narrow operations
//
// Arguments:
// simdNode - The GT_SIMD node
//
// Notes:
// This intrinsic takes two arguments. The first operand is narrowed to produce the
// lower elements of the results, and the second operand produces the high elements.
//
void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
{
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicNarrow);

GenTree* op1 = simdNode->gtGetOp1();
GenTree* op2 = simdNode->gtGetOp2();
var_types baseType = simdNode->GetSimdBaseType();
regNumber targetReg = simdNode->GetRegNum();
assert(targetReg != REG_NA);
var_types simdType = simdNode->TypeGet();
emitAttr emitSize = emitTypeSize(simdType);

genConsumeOperands(simdNode);
regNumber op1Reg = op1->GetRegNum();
regNumber op2Reg = op2->GetRegNum();

assert(genIsValidFloatReg(op1Reg));
assert(genIsValidFloatReg(op2Reg));
assert(genIsValidFloatReg(targetReg));
assert(op2Reg != targetReg);
assert(simdNode->GetSimdSize() == 16);

instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
assert((ins == INS_fcvtn) || (ins == INS_xtn));

instruction ins2 = (ins == INS_fcvtn) ? INS_fcvtn2 : INS_xtn2;

insOpts opt = INS_OPTS_NONE;
insOpts opt2 = INS_OPTS_NONE;

// This is not the same as genGetSimdInsOpt()
// Basetype is the soure operand type
// However encoding is based on the destination operand type which is 1/2 the basetype.
switch (baseType)
{
case TYP_ULONG:
case TYP_LONG:
case TYP_DOUBLE:
opt = INS_OPTS_2S;
opt2 = INS_OPTS_4S;
break;
case TYP_UINT:
case TYP_INT:
opt = INS_OPTS_4H;
opt2 = INS_OPTS_8H;
break;
case TYP_USHORT:
case TYP_SHORT:
opt = INS_OPTS_8B;
opt2 = INS_OPTS_16B;
break;
default:
assert(!"Unsupported narrowing element type");
unreached();
}

GetEmitter()->emitIns_R_R(ins, EA_8BYTE, targetReg, op1Reg, opt);
GetEmitter()->emitIns_R_R(ins2, EA_16BYTE, targetReg, op2Reg, opt2);

genProduceReg(simdNode);
}

//--------------------------------------------------------------------------------
// genSIMDIntrinsicBinOp: Generate code for SIMD Intrinsic binary operations
// add, sub, mul, bit-wise And, AndNot and Or.
Expand Down
13 changes: 13 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3262,6 +3262,13 @@ class Compiler
unsigned simdSize,
bool isSimdAsHWIntrinsic);

GenTree* gtNewSimdNarrowNode(var_types type,
GenTree* op1,
GenTree* op2,
CorInfoType simdBaseJitType,
unsigned simdSize,
bool isSimdAsHWIntrinsic);

GenTree* gtNewSimdSqrtNode(
var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic);

Expand All @@ -3272,6 +3279,12 @@ class Compiler
unsigned simdSize,
bool isSimdAsHWIntrinsic);

GenTree* gtNewSimdWidenLowerNode(
var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic);

GenTree* gtNewSimdWidenUpperNode(
var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic);

GenTree* gtNewSimdWithElementNode(var_types type,
GenTree* op1,
GenTree* op2,
Expand Down
Loading