Skip to content

Commit

Permalink
Add lowering for As*() and dependent methods. (#83861)
Browse files Browse the repository at this point in the history
* Add lowering for As*() and dependent methods.

This includes the following:
As(), AsByte(), AsDouble(), AsInt16(), AsInt32(), AsInt64(), AsNInt(), AsNUInt(), AsByte(), AsSingle(), AsUInt16(), AsUInt32(), AsUInt64(), AsVector(), AsVector512(), GetLower(), ToVector512()

* Handling xmm <-> zmm conversions.

* Addressing review comments.
  • Loading branch information
DeepakRajendrakumaran authored Mar 28, 2023
1 parent fd157a5 commit e467e59
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 7 deletions.
13 changes: 13 additions & 0 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1482,6 +1482,19 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
case NI_VectorT256_AsVectorUInt32:
case NI_VectorT256_AsVectorUInt64:
case NI_VectorT256_op_UnaryPlus:
case NI_Vector512_As:
case NI_Vector512_AsByte:
case NI_Vector512_AsDouble:
case NI_Vector512_AsInt16:
case NI_Vector512_AsInt32:
case NI_Vector512_AsInt64:
case NI_Vector512_AsNInt:
case NI_Vector512_AsNUInt:
case NI_Vector512_AsSByte:
case NI_Vector512_AsSingle:
case NI_Vector512_AsUInt16:
case NI_Vector512_AsUInt32:
case NI_Vector512_AsUInt64:
#endif // TARGET_XARCH
#endif // FEATURE_HW_INTRINSICS
case NI_SRCS_UNSAFE_As:
Expand Down
34 changes: 30 additions & 4 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1102,12 +1102,21 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node)
}

case NI_Vector128_ToVector256:
case NI_Vector128_ToVector512:
case NI_Vector256_ToVector512:
{
// ToVector256 has zero-extend semantics in order to ensure it is deterministic
// We always emit a move to the target register, even when op1Reg == targetReg,
// in order to ensure that Bits MAXVL-1:128 are zeroed.

attr = emitTypeSize(TYP_SIMD16);
if (intrinsicId == NI_Vector256_ToVector512)
{
attr = emitTypeSize(TYP_SIMD32);
}
else
{
attr = emitTypeSize(TYP_SIMD16);
}

if (op1->isContained() || op1->isUsedFromSpillTemp())
{
Expand All @@ -1124,15 +1133,24 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node)
case NI_Vector128_ToVector256Unsafe:
case NI_Vector256_ToVector512Unsafe:
case NI_Vector256_GetLower:
case NI_Vector512_GetLower:
case NI_Vector512_GetLower128:
{
if (op1->isContained() || op1->isUsedFromSpillTemp())
{
// We want to always emit the EA_16BYTE version here.
//
// For ToVector256Unsafe the upper bits don't matter and for GetLower we
// only actually need the lower 16-bytes, so we can just be "more efficient"

genHWIntrinsic_R_RM(node, ins, EA_16BYTE, targetReg, op1);
if ((intrinsicId == NI_Vector512_GetLower) || (intrinsicId == NI_Vector256_ToVector512Unsafe))
{
attr = emitTypeSize(TYP_SIMD32);
}
else
{
attr = emitTypeSize(TYP_SIMD16);
}
genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1);
}
else
{
Expand All @@ -1143,7 +1161,15 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node)
// so the upper bits aren't impactful either allowing the same.

// Just use movaps for reg->reg moves as it has zero-latency on modern CPUs
emit->emitIns_Mov(INS_movaps, EA_32BYTE, targetReg, op1Reg, /* canSkip */ true);
if ((intrinsicId == NI_Vector128_ToVector256Unsafe) || (intrinsicId == NI_Vector256_GetLower))
{
attr = emitTypeSize(TYP_SIMD32);
}
else
{
attr = emitTypeSize(TYP_SIMD64);
}
emit->emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true);
}
break;
}
Expand Down
Loading

0 comments on commit e467e59

Please sign in to comment.