Skip to content

Commit

Permalink
Arm AdvSimd: Rename StoreVectorMxN to Store (#103689)
Browse files Browse the repository at this point in the history
* Arm AdvSimd: Rename StoreVectorMxN to Store

* Restore HW_Flag_InvalidNodeId flag for AdvSimd.Store node

* Rename StoreVectorMxN to Store for Mono

* Incorporate review comments and remove InvalidNodeId flag

---------

Co-authored-by: fanyang-mono <yangfan@microsoft.com>
  • Loading branch information
SwapnilGaikwad and fanyang-mono authored Jun 20, 2024
1 parent 117cfcc commit ab013a3
Show file tree
Hide file tree
Showing 12 changed files with 311 additions and 354 deletions.
107 changes: 51 additions & 56 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1909,22 +1909,63 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
}

case NI_AdvSimd_Store:
case NI_AdvSimd_Arm64_Store:
{
assert(retType == TYP_VOID);
assert(sig->numArgs == 2);
CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
var_types argType = TYP_UNKNOWN;
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
op2 = impPopStack().val;

var_types simdType = getSIMDTypeForSize(simdSize);
if (op2->TypeGet() == TYP_STRUCT)
{
info.compNeedsConsecutiveRegisters = true;
unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass);

op2 = impSIMDPopStack();
op1 = impPopStack().val;
if (!op2->OperIs(GT_LCL_VAR))
{
unsigned tmp = lvaGrabTemp(true DEBUGARG("StoreVectorN"));

if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF))
{
// If what we have is a BYREF, that's what we really want, so throw away the cast.
op1 = op1->gtGetOp1();
impStoreToTemp(tmp, op2, CHECK_SPILL_NONE);
op2 = gtNewLclvNode(tmp, argType);
}
op2 = gtConvertTableOpToFieldList(op2, fieldCount);
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
op1 = getArgForHWIntrinsic(argType, argClass);

if (op1->OperIs(GT_CAST))
{
// Although the API specifies a pointer, if what we have is a BYREF, that's what
// we really want, so throw away the cast.
if (op1->gtGetOp1()->TypeGet() == TYP_BYREF)
{
op1 = op1->gtGetOp1();
}
}

retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
}
else
{
if (op2->TypeGet() == TYP_SIMD16)
{
// Update the simdSize explicitly as Vector128 variant of Store() is present in AdvSimd instead of
// AdvSimd.Arm64.
simdSize = 16;
}

var_types simdType = getSIMDTypeForSize(simdSize);
op1 = impPopStack().val;

if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF))
{
// If what we have is a BYREF, that's what we really want, so throw away the cast.
op1 = op1->gtGetOp1();
}

retNode = gtNewSimdStoreNode(op1, op2, simdBaseJitType, simdSize);
retNode = gtNewSimdStoreNode(op1, op2, simdBaseJitType, simdSize);
}
break;
}

Expand Down Expand Up @@ -2080,52 +2121,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_AdvSimd_StoreVector64x2:
case NI_AdvSimd_StoreVector64x3:
case NI_AdvSimd_StoreVector64x4:
case NI_AdvSimd_Arm64_StoreVector128x2:
case NI_AdvSimd_Arm64_StoreVector128x3:
case NI_AdvSimd_Arm64_StoreVector128x4:
{
assert(sig->numArgs == 2);
assert(retType == TYP_VOID);

CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
var_types argType = TYP_UNKNOWN;
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;

argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
op2 = impPopStack().val;
unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass);
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
op1 = getArgForHWIntrinsic(argType, argClass);

assert(op2->TypeGet() == TYP_STRUCT);
if (op1->OperIs(GT_CAST))
{
// Although the API specifies a pointer, if what we have is a BYREF, that's what
// we really want, so throw away the cast.
if (op1->gtGetOp1()->TypeGet() == TYP_BYREF)
{
op1 = op1->gtGetOp1();
}
}

if (!op2->OperIs(GT_LCL_VAR))
{
unsigned tmp = lvaGrabTemp(true DEBUGARG("StoreVectorNx2 temp tree"));

impStoreToTemp(tmp, op2, CHECK_SPILL_NONE);
op2 = gtNewLclvNode(tmp, argType);
}
op2 = gtConvertTableOpToFieldList(op2, fieldCount);

info.compNeedsConsecutiveRegisters = true;
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
break;
}

case NI_AdvSimd_StoreSelectedScalar:
case NI_AdvSimd_Arm64_StoreSelectedScalar:
{
Expand Down
44 changes: 6 additions & 38 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1280,40 +1280,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_AdvSimd_StoreVector64x2:
case NI_AdvSimd_StoreVector64x3:
case NI_AdvSimd_StoreVector64x4:
case NI_AdvSimd_Arm64_StoreVector128x2:
case NI_AdvSimd_Arm64_StoreVector128x3:
case NI_AdvSimd_Arm64_StoreVector128x4:
{
unsigned regCount = 0;

assert(intrin.op2->OperIsFieldList());

GenTreeFieldList* fieldList = intrin.op2->AsFieldList();
GenTree* firstField = fieldList->Uses().GetHead()->GetNode();
op2Reg = firstField->GetRegNum();

#ifdef DEBUG
regNumber argReg = op2Reg;
for (GenTreeFieldList::Use& use : fieldList->Uses())
{
regCount++;

GenTree* argNode = use.GetNode();
assert(argReg == argNode->GetRegNum());
argReg = getNextSIMDRegWithWraparound(argReg);
}
assert((ins == INS_st1_2regs && regCount == 2) || (ins == INS_st2 && regCount == 2) ||
(ins == INS_st1_3regs && regCount == 3) || (ins == INS_st3 && regCount == 3) ||
(ins == INS_st1_4regs && regCount == 4) || (ins == INS_st4 && regCount == 4));
#endif

GetEmitter()->emitIns_R_R(ins, emitSize, op2Reg, op1Reg, opt);
break;
}

case NI_AdvSimd_Store:
case NI_AdvSimd_Arm64_Store:
case NI_AdvSimd_StoreVectorAndZip:
case NI_AdvSimd_Arm64_StoreVectorAndZip:
{
Expand All @@ -1336,24 +1304,24 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
#endif
}

bool isSequentialStore = (intrin.id == NI_AdvSimd_Arm64_Store || intrin.id == NI_AdvSimd_Store);
switch (regCount)
{
case 2:
ins = INS_st2;
ins = isSequentialStore ? INS_st1_2regs : INS_st2;
break;

case 3:
ins = INS_st3;
ins = isSequentialStore ? INS_st1_3regs : INS_st3;
break;

case 4:
ins = INS_st4;
ins = isSequentialStore ? INS_st1_4regs : INS_st4;
break;

default:
unreached();
}

GetEmitter()->emitIns_R_R(ins, emitSize, op2Reg, op1Reg, opt);
break;
}
Expand Down
9 changes: 2 additions & 7 deletions src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -464,12 +464,9 @@ HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalScalar,
HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningLower, 8, 1, true, {INS_sxtl, INS_invalid, INS_sxtl, INS_invalid, INS_sxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningUpper, 16, 1, true, {INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AdvSimd, SqrtScalar, 8, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsqrt, INS_fsqrt}, HW_Category_SIMD, HW_Flag_SIMDScalar)
HARDWARE_INTRINSIC(AdvSimd, Store, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AdvSimd, Store, 8, 2, true, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_invalid, INS_invalid, INS_st1_2regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar, 8, 3, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVectorAndZip, 8, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x2, 8, 2, true, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_invalid, INS_invalid, INS_st1_2regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x3, 8, 2, true, {INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_invalid, INS_invalid, INS_st1_3regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x4, 8, 2, true, {INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_invalid, INS_invalid, INS_st1_4regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, Subtract, -1, 2, true, {INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_fsub, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd, SubtractHighNarrowingLower, 8, 2, true, {INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd, SubtractHighNarrowingUpper, 16, 3, true, {INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics)
Expand Down Expand Up @@ -663,9 +660,7 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalarNonTemporal,
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairNonTemporal, -1, 3, true, {INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stp}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalar, 16, 3, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVectorAndZip, 16, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x2, 16, 2, true, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x3, 16, 2, true, {INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x4, 16, 2, true, {INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, Store, 16, 2, true, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, Subtract, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsub}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd_Arm64, SubtractSaturateScalar, 8, 2, true, {INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar)
HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeEven, -1, 2, true, {INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1}, HW_Category_SIMD, HW_Flag_NoFlag)
Expand Down
8 changes: 2 additions & 6 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1725,14 +1725,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
break;
}

case NI_AdvSimd_Store:
case NI_AdvSimd_Arm64_Store:
case NI_AdvSimd_StoreVectorAndZip:
case NI_AdvSimd_Arm64_StoreVectorAndZip:
case NI_AdvSimd_StoreVector64x2:
case NI_AdvSimd_StoreVector64x3:
case NI_AdvSimd_StoreVector64x4:
case NI_AdvSimd_Arm64_StoreVector128x2:
case NI_AdvSimd_Arm64_StoreVector128x3:
case NI_AdvSimd_Arm64_StoreVector128x4:
{
assert(intrin.op1 != nullptr);
srcCount += BuildConsecutiveRegistersForUse(intrin.op2);
Expand Down
Loading

0 comments on commit ab013a3

Please sign in to comment.