Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arm64/Sve: Add FFR register liveness tracking #105348

Merged
merged 14 commits into from
Jul 26, 2024
14 changes: 14 additions & 0 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2954,6 +2954,20 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode)
}
#endif // FEATURE_SIMD

#ifdef FEATURE_MASKED_HW_INTRINSICS
if ((compiler->lvaFfrRegister == lclNode->GetLclNum()) && (targetReg == REG_NA))
{
assert(targetType == TYP_MASK);
// We are about to store the FFR on stack. So first read the FFR register and
// store it in op1Reg. After that, op1Reg will be stored on the stack.
assert(varDsc->lvImplicitlyReferenced == 1);

regNumber op1Reg = data->GetRegNum();
assert(op1Reg != REG_NA);
GetEmitter()->emitIns_R(INS_sve_rdffr, EA_SCALABLE, op1Reg);
}
#endif

kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
genConsumeRegs(data);

regNumber dataReg = REG_NA;
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -4344,6 +4344,10 @@ class Compiler
#endif // defined(FEATURE_SIMD)

unsigned lvaGSSecurityCookie; // LclVar number
#ifdef TARGET_ARM64
unsigned lvaFfrRegister; // LclVar number
unsigned getFFRegisterVarNum();
#endif
bool lvaTempsHaveLargerOffsetThanVars();

// Returns "true" iff local variable "lclNum" is in SSA form.
Expand Down
10 changes: 10 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7448,7 +7448,11 @@ GenTreeIntCon* Compiler::gtNewFalse()
// return a new node representing the value in a physical register
GenTree* Compiler::gtNewPhysRegNode(regNumber reg, var_types type)
{
#ifdef TARGET_ARM64
assert(genIsValidIntReg(reg) || (reg == REG_SPBASE) || (reg == REG_FFR));
#else
assert(genIsValidIntReg(reg) || (reg == REG_SPBASE));
#endif
GenTree* result = new (this, GT_PHYSREG) GenTreePhysReg(reg, type);
return result;
}
Expand Down Expand Up @@ -11554,6 +11558,12 @@ void Compiler::gtGetLclVarNameInfo(unsigned lclNum, const char** ilKindOut, cons
{
ilName = "GsCookie";
}
#ifdef TARGET_ARM64
else if (lclNum == lvaFfrRegister)
{
ilName = "FFReg";
}
#endif
else if (lclNum == lvaRetAddrVar)
{
ilName = "ReturnAddress";
Expand Down
29 changes: 10 additions & 19 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2297,10 +2297,15 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

switch (intrinsic)
{
case NI_Sve_CreateBreakAfterMask:
case NI_Sve_CreateBreakAfterPropagateMask:
case NI_Sve_CreateBreakBeforeMask:
case NI_Sve_CreateBreakBeforePropagateMask:
{
// HWInstrinsic requires a mask for op3
convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(3));
FALLTHROUGH;
}
case NI_Sve_CreateBreakAfterMask:
case NI_Sve_CreateBreakBeforeMask:
case NI_Sve_CreateMaskForFirstActiveElement:
case NI_Sve_CreateMaskForNextActiveElement:
case NI_Sve_GetActiveElementCount:
Expand All @@ -2310,30 +2315,16 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
{
// HWInstrinsic requires a mask for op2
convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(2));
break;
FALLTHROUGH;
}

default:
break;
}

switch (intrinsic)
{
case NI_Sve_CreateBreakAfterPropagateMask:
case NI_Sve_CreateBreakBeforePropagateMask:
{
// HWInstrinsic requires a mask for op3
convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(3));
// HWInstrinsic requires a mask for op1
convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(1));
break;
}

default:
break;
}

// HWInstrinsic requires a mask for op1
convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(1));

if (HWIntrinsicInfo::IsMultiReg(intrinsic))
{
assert(HWIntrinsicInfo::IsExplicitMaskedOperation(retNode->AsHWIntrinsic()->GetHWIntrinsicId()));
Expand Down
55 changes: 55 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)

emitAttr emitSize;
insOpts opt;
bool unspilledFfr = false;

if (HWIntrinsicInfo::SIMDScalar(intrin.id))
{
Expand All @@ -318,6 +319,29 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
emitSize = EA_SCALABLE;
opt = emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType));

bool readsFfr = false;
switch (intrin.id)
{
case NI_Sve_GetFfrByte:
case NI_Sve_GetFfrInt16:
case NI_Sve_GetFfrInt32:
case NI_Sve_GetFfrInt64:
case NI_Sve_GetFfrSByte:
case NI_Sve_GetFfrUInt16:
case NI_Sve_GetFfrUInt32:
case NI_Sve_GetFfrUInt64:
case NI_Sve_LoadVectorFirstFaulting:
readsFfr = true;
break;
default:
break;
}

if (readsFfr && (intrin.op1 != nullptr) && ((intrin.op1->gtFlags & GTF_SPILLED) != 0))
{
unspilledFfr = true;
}
}
else if (intrin.category == HW_Category_Special)
{
Expand Down Expand Up @@ -2366,6 +2390,37 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_Sve_LoadVectorFirstFaulting:
{
insScalableOpts sopt = (opt == INS_OPTS_SCALABLE_B) ? INS_SCALABLE_OPTS_NONE : INS_SCALABLE_OPTS_LSL_N;
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, REG_ZR, opt, sopt);
break;
}

case NI_Sve_GetFfrByte:
case NI_Sve_GetFfrInt16:
case NI_Sve_GetFfrInt32:
case NI_Sve_GetFfrInt64:
case NI_Sve_GetFfrSByte:
case NI_Sve_GetFfrUInt16:
case NI_Sve_GetFfrUInt32:
case NI_Sve_GetFfrUInt64:
{
if (unspilledFfr)
{
// We have unspilled the FFR in op1Reg. Restore it back in FFR register.
GetEmitter()->emitIns_R(INS_sve_wrffr, emitSize, op1Reg, opt);
Copy link
Member

@jakobbotsch jakobbotsch Jul 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could the codegen for these intrinsics just emit mov targetReg, op1Reg and skip wrffr + rdffr entirely?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could, but I will have to double check if that is fine to do, because sometimes, we do want to make sure ffr is up-to-date. I will leave this as a future optimization.

}

GetEmitter()->emitIns_R(ins, emitSize, targetReg, INS_OPTS_SCALABLE_B);
break;
}
case NI_Sve_SetFfr:
{
assert(targetReg == REG_NA);
GetEmitter()->emitIns_R(ins, emitSize, op1Reg, opt);
break;
}
case NI_Sve_ConditionalExtractAfterLastActiveElementScalar:
case NI_Sve_ConditionalExtractLastActiveElementScalar:
{
Expand Down
10 changes: 10 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,14 @@ HARDWARE_INTRINSIC(Sve, GatherVectorUInt32WithByteOffsetsZeroExtend,
HARDWARE_INTRINSIC(Sve, GatherVectorUInt32ZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, GatherVectorWithByteOffsets, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, GetActiveElementCount, -1, 2, true, {INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation)
HARDWARE_INTRINSIC(Sve, GetFfrByte, -1, -1, false, {INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, GetFfrInt16, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, GetFfrInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, GetFfrInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, GetFfrSByte, -1, -1, false, {INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, GetFfrUInt16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, GetFfrUInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, GetFfrUInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffectMask)
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
HARDWARE_INTRINSIC(Sve, InsertIntoShiftedVector, -1, 2, true, {INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, LeadingSignCount, -1, -1, false, {INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LeadingZeroCount, -1, -1, false, {INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
Expand All @@ -142,6 +150,7 @@ HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt64,
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorFirstFaulting, -1, -1, false, {INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1w, INS_sve_ldff1w, INS_sve_ldff1d, INS_sve_ldff1d, INS_sve_ldff1w, INS_sve_ldff1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToUInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
Expand Down Expand Up @@ -237,6 +246,7 @@ HARDWARE_INTRINSIC(Sve, Scatter32BitNarrowing,
HARDWARE_INTRINSIC(Sve, Scatter32BitWithByteOffsetsNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1w, INS_sve_st1w, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Scatter8BitNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Scatter8BitWithByteOffsetsNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SetFfr, -1, 1, true, {INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialSideEffectMask|HW_Flag_SpecialCodeGen)
jakobbotsch marked this conversation as resolved.
Show resolved Hide resolved
HARDWARE_INTRINSIC(Sve, ShiftLeftLogical, -1, -1, false, {INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, ShiftRightArithmetic, -1, -1, false, {INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, ShiftRightArithmeticForDivide, -1, -1, false, {INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand)
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ void Compiler::lvaInit()
#endif // JIT32_GCENCODER
lvaNewObjArrayArgs = BAD_VAR_NUM;
lvaGSSecurityCookie = BAD_VAR_NUM;
#ifdef TARGET_ARM64
lvaFfrRegister = BAD_VAR_NUM;
#endif
#ifdef TARGET_X86
lvaVarargsBaseOfStkArgs = BAD_VAR_NUM;
#endif // TARGET_X86
Expand Down
Loading
Loading