Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARM64-SVE: Add checks for MOVPRFX sequencing (#105514) #106184

Merged
merged 21 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7095,8 +7095,28 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
*instrCount = 0;
jitstd::list<RichIPMapping>::iterator nextMapping = emitComp->genRichIPmappings.begin();
#endif
#if defined(DEBUG) && defined(TARGET_ARM64)
instrDesc* prevId = nullptr;
#endif // DEBUG && TARGET_ARM64
a74nh marked this conversation as resolved.
Show resolved Hide resolved

for (insGroup* ig = emitIGlist; ig != nullptr; ig = ig->igNext)
{

#if defined(DEBUG) && defined(TARGET_ARM64)
instrDesc* currId = emitFirstInstrDesc(ig->igData);
for (unsigned cnt = ig->igInsCnt; cnt > 0; cnt--)
a74nh marked this conversation as resolved.
Show resolved Hide resolved
{
emitInsPairSanityCheck(prevId, currId);
prevId = currId;
emitAdvanceInstrDesc(&currId, emitSizeOfInsDsc(currId));
}
// Final instruction can't be a movprfx
if (ig->igNext == nullptr)
{
assert(prevId->idIns() != INS_sve_movprfx);
}
#endif // DEBUG && TARGET_ARM64
a74nh marked this conversation as resolved.
Show resolved Hide resolved

assert(!(ig->igFlags & IGF_PLACEHOLDER)); // There better not be any placeholder groups left

/* Is this the first cold block? */
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -3128,6 +3128,10 @@ class emitter
#ifndef TARGET_LOONGARCH64
void emitInsSanityCheck(instrDesc* id);
#endif // TARGET_LOONGARCH64

#ifdef TARGET_ARM64
void emitInsPairSanityCheck(instrDesc* prevId, instrDesc* id);
#endif
#endif // DEBUG

#ifdef TARGET_ARMARCH
Expand Down
208 changes: 208 additions & 0 deletions src/coreclr/jit/emitarm64sve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18432,4 +18432,212 @@ void emitter::getInsSveExecutionCharacteristics(instrDesc* id, insExecutionChara
}
#endif // defined(DEBUG) || defined(LATE_DISASM)

#ifdef DEBUG
/*****************************************************************************
*
* Sanity check two instruction are valid when placed next to each other
a74nh marked this conversation as resolved.
Show resolved Hide resolved
*/

void emitter::emitInsPairSanityCheck(instrDesc* firstId, instrDesc* secondId)
{
if (firstId == nullptr || secondId == nullptr)
{
return;
}

// Currently only concerned with instructions that follow movprfx
if (firstId->idIns() != INS_sve_movprfx)
{
return;
}

bool movprefxIsPredicated = false;
if (firstId->idInsFmt() == IF_SVE_AH_3A)
{
movprefxIsPredicated = true;
}
else
{
// Unpredicated version
assert(firstId->idInsFmt() == IF_SVE_BI_2A);
}

// Quoted sections are taken from the Arm manual.

// "It is required that the prefixed instruction at PC+4 must be an SVE destructive binary or ternary
// instruction encoding, or a unary operation with merging predication, but excluding other MOVPRFX instructions."
// "The prefixed instruction must not use the destination register in any other operand position, even if
// they have different names but refer to the same architectural register state."
// "A predicated MOVPRFX cannot be used with an unpredicated instruction."
switch (secondId->idInsFmt())
{
case IF_SVE_BN_1A: // <Zdn>.D{, <pattern>{, MUL #<imm>}}
case IF_SVE_BP_1A: // <Zdn>.D{, <pattern>{, MUL #<imm>}}
case IF_SVE_CC_2A: // <Zdn>.<T>, <V><m>
case IF_SVE_CD_2A: // <Zdn>.<T>, <R><m>
case IF_SVE_DN_2A: // <Zdn>.<T>, <Pm>.<T>
case IF_SVE_DP_2A: // <Zdn>.<T>, <Pm>.<T>
// Tied registers
case IF_SVE_BS_1A: // <Zdn>.<T>, <Zdn>.<T>, #<const>
case IF_SVE_EC_1A: // <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
case IF_SVE_ED_1A: // <Zdn>.<T>, <Zdn>.<T>, #<imm>
case IF_SVE_EE_1A: // <Zdn>.<T>, <Zdn>.<T>, #<imm>
assert(!movprefxIsPredicated);
break;

case IF_SVE_BU_2A: // <Zd>.<T>, <Pg>/M, #<const>
case IF_SVE_BV_2A_A: // <Zd>.<T>, <Pg>/M, #<imm>{, <shift>}
case IF_SVE_BV_2A_J: // <Zd>.<T>, <Pg>/M, #<imm>{, <shift>}
case IF_SVE_BV_2B: // <Zd>.<T>, <Pg>/M, #0.0
case IF_SVE_CQ_3A: // <Zd>.<T>, <Pg>/M, <R><n|SP>
// Tied registers
case IF_SVE_AM_2A: // <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const>
case IF_SVE_HM_2A: // <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
break;

case IF_SVE_FU_2A: // <Zda>.<T>, <Zn>.<T>, #<const>
// Tied registers
case IF_SVE_AW_2A: // <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<const>
case IF_SVE_BY_2A: // <Zdn>.B, <Zdn>.B, <Zm>.B, #<imm>
case IF_SVE_FV_2A: // <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, <const>
case IF_SVE_HN_2A: // <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<imm>
assert(!movprefxIsPredicated);
assert(secondId->idReg1() != secondId->idReg2());
break;

case IF_SVE_AP_3A: // <Zd>.<T>, <Pg>/M, <Zn>.<T>
case IF_SVE_AQ_3A: // <Zd>.<T>, <Pg>/M, <Zn>.<T>
case IF_SVE_CP_3A: // <Zd>.<T>, <Pg>/M, <V><n>
case IF_SVE_CT_3A: // <Zd>.Q, <Pg>/M, <Zn>.Q
case IF_SVE_CU_3A: // <Zd>.<T>, <Pg>/M, <Zn>.<T>
case IF_SVE_ES_3A: // <Zd>.<T>, <Pg>/M, <Zn>.<T>
case IF_SVE_EQ_3A: // <Zda>.<T>, <Pg>/M, <Zn>.<Tb>
case IF_SVE_HO_3A: // <Zd>.H, <Pg>/M, <Zn>.S
case IF_SVE_HO_3B: // <Zd>.D, <Pg>/M, <Zn>.S
case IF_SVE_HO_3C: // <Zd>.S, <Pg>/M, <Zn>.D
case IF_SVE_HP_3A: // <Zd>.<T>, <Pg>/M, <Zn>.<T>
case IF_SVE_HQ_3A: // <Zd>.<T>, <Pg>/M, <Zn>.<T>
case IF_SVE_HR_3A: // <Zd>.<T>, <Pg>/M, <Zn>.<T>
case IF_SVE_HS_3A: // <Zd>.<H|S|D>, <Pg>/M, <Zn>.<H|S|D>
case IF_SVE_HP_3B: // <Zd>.<H|S|D>, <Pg>/M, <Zn>.<H|S|D>
// Tied registers
case IF_SVE_AA_3A: // <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
case IF_SVE_AB_3B: // <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D
case IF_SVE_AC_3A: // <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
case IF_SVE_AO_3A: // <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D
case IF_SVE_CM_3A: // <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T>
case IF_SVE_GP_3A: // <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const>
case IF_SVE_GR_3A: // <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
case IF_SVE_HL_3A: // <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
case IF_SVE_HL_3B: // <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H
assert(secondId->idReg1() != secondId->idReg3());
break;

case IF_SVE_EF_3A: // <Zda>.S, <Zn>.H, <Zm>.H
case IF_SVE_EG_3A: // <Zda>.S, <Zn>.H, <Zm>.H[<imm>]
case IF_SVE_EH_3A: // <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
case IF_SVE_EI_3A: // <Zda>.S, <Zn>.B, <Zm>.B
case IF_SVE_EJ_3A: // <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>, <const>
case IF_SVE_EK_3A: // <Zda>.<T>, <Zn>.<T>, <Zm>.<T>, <const>
case IF_SVE_EL_3A: // <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
case IF_SVE_EM_3A: // <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
case IF_SVE_EW_3A: // <Zda>.D, <Zn>.D, <Zm>.D
case IF_SVE_EW_3B: // <Zdn>.D, <Zm>.D, <Za>.D
case IF_SVE_EY_3A: // <Zda>.S, <Zn>.B, <Zm>.B[<imm>]
case IF_SVE_EY_3B: // <Zda>.D, <Zn>.H, <Zm>.H[<imm>]
case IF_SVE_EZ_3A: // <Zda>.S, <Zn>.B, <Zm>.B[<imm>]
case IF_SVE_FA_3A: // <Zda>.S, <Zn>.B, <Zm>.B[<imm>], <const>
case IF_SVE_FA_3B: // <Zda>.D, <Zn>.H, <Zm>.H[<imm>], <const>
case IF_SVE_FB_3A: // <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const>
case IF_SVE_FB_3B: // <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
case IF_SVE_FC_3A: // <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const>
case IF_SVE_FC_3B: // <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
case IF_SVE_FF_3A: // <Zda>.H, <Zn>.H, <Zm>.H[<imm>]
case IF_SVE_FF_3B: // <Zda>.S, <Zn>.S, <Zm>.S[<imm>]
case IF_SVE_FF_3C: // <Zda>.D, <Zn>.D, <Zm>.D[<imm>]
case IF_SVE_FG_3A: // <Zda>.S, <Zn>.H, <Zm>.H[<imm>]
case IF_SVE_FG_3B: // <Zda>.D, <Zn>.S, <Zm>.S[<imm>]
case IF_SVE_FJ_3A: // <Zda>.S, <Zn>.H, <Zm>.H[<imm>]
case IF_SVE_FJ_3B: // <Zda>.D, <Zn>.S, <Zm>.S[<imm>]
case IF_SVE_FK_3A: // <Zda>.H, <Zn>.H, <Zm>.H[<imm>]
case IF_SVE_FK_3B: // <Zda>.S, <Zn>.S, <Zm>.S[<imm>]
case IF_SVE_FK_3C: // <Zda>.D, <Zn>.D, <Zm>.D[<imm>]
case IF_SVE_FO_3A: // <Zda>.S, <Zn>.B, <Zm>.B
case IF_SVE_FW_3A: // <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
case IF_SVE_FY_3A: // <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
case IF_SVE_GM_3A: // <Zda>.H, <Zn>.B, <Zm>.B[<imm>]
case IF_SVE_GN_3A: // <Zda>.H, <Zn>.B, <Zm>.B
case IF_SVE_GO_3A: // <Zda>.S, <Zn>.B, <Zm>.B
case IF_SVE_GU_3A: // <Zda>.S, <Zn>.S, <Zm>.S[<imm>]
case IF_SVE_GU_3B: // <Zda>.D, <Zn>.D, <Zm>.D[<imm>]
case IF_SVE_GU_3C: // <Zda>.H, <Zn>.H, <Zm>.H[<imm>]
case IF_SVE_GV_3A: // <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
case IF_SVE_GW_3B: // <Zd>.H, <Zn>.H, <Zm>.H
case IF_SVE_GY_3A: // <Zda>.H, <Zn>.B, <Zm>.B[<imm>]
case IF_SVE_GY_3B: // <Zda>.S, <Zn>.H, <Zm>.H[<imm>]
case IF_SVE_GY_3B_D: // <Zda>.S, <Zn>.B, <Zm>.B[<imm>]
case IF_SVE_GZ_3A: // <Zda>.S, <Zn>.H, <Zm>.H[<imm>]
case IF_SVE_HA_3A: // <Zda>.S, <Zn>.H, <Zm>.H
case IF_SVE_HA_3A_E: // <Zda>.H, <Zn>.B, <Zm>.B
case IF_SVE_HA_3A_F: // <Zda>.S, <Zn>.B, <Zm>.B
case IF_SVE_HB_3A: // <Zda>.S, <Zn>.H, <Zm>.H
case IF_SVE_HC_3A: // <Zda>.S, <Zn>.B, <Zm>.B[<imm>]
case IF_SVE_HD_3A: // <Zda>.S, <Zn>.H, <Zm>.H
case IF_SVE_HD_3A_A: // <Zda>.D, <Zn>.D, <Zm>.D
// Tied registers
case IF_SVE_AV_3A: // <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D
assert(!movprefxIsPredicated);
assert(secondId->idReg1() != secondId->idReg2());
assert(secondId->idReg1() != secondId->idReg3());
break;

case IF_SVE_AR_4A: // <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
case IF_SVE_AS_4A: // <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
case IF_SVE_GT_4A: // <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const>
case IF_SVE_HU_4A: // <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
case IF_SVE_HU_4B: // <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H
case IF_SVE_HV_4A: // <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
assert(secondId->idReg1() != secondId->idReg3());
assert(secondId->idReg1() != secondId->idReg4());
break;

case IF_SVE_AT_3A: // <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
// Only a subset of this group is valid
switch (secondId->idIns())
{
case INS_sve_sclamp:
case INS_sve_uclamp:
case INS_sve_eorbt:
case INS_sve_eortb:
case INS_sve_fclamp:
break;
default:
assert(!"Got unexpected instruction format within group after MOVPRFX");
}
assert(!movprefxIsPredicated);
assert(secondId->idReg1() != secondId->idReg2());
assert(secondId->idReg1() != secondId->idReg3());
break;

default:
assert(!"Got unexpected instruction format after MOVPRFX");
break;
}

// "The prefixed instruction must specify the same destination vector as the MOVPRFX instruction."
assert(firstId->idReg1() == secondId->idReg1());

if (movprefxIsPredicated)
{
// "The prefixed instruction must specify the same predicate register"
assert(isPredicateRegister(firstId->idReg2()));
assert(isPredicateRegister(secondId->idReg2()));
assert(firstId->idReg2() == secondId->idReg2());

// "predicated using the same governing predicate register and source element size as this instruction."
assert(firstId->idInsOpt() == secondId->idInsOpt());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this be true for unpredicated version as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The unpredicated movprfx doesn't care about element sizes, it is just MOVPRFX <Zd>, <Zn>.

}
}
#endif // DEBUG

#endif // TARGET_ARM64
29 changes: 20 additions & 9 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
if (intrin.op3->isContained())
{
assert(intrin.op3->IsVectorZero());

if (intrin.op1->isContained() || intrin.op1->IsMaskAllBitsSet())
{
// We already skip importing ConditionalSelect if op1 == trueAll, however
Expand All @@ -659,6 +660,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}
else
{
assert(!HWIntrinsicInfo::IsZeroingMaskedOperation(intrinEmbMask.id));

// If falseValue is zero, just zero out those lanes of targetReg using `movprfx`
// and /Z
GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, targetReg,
Expand All @@ -680,6 +683,14 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
falseReg, opt);
break;
}
else if (HWIntrinsicInfo::IsZeroingMaskedOperation(intrinEmbMask.id))
{
// At this point, target != embMaskOp1Reg != falseReg, so just go ahead
// and move the falseReg unpredicated into targetReg.
// Cannot use movprfx for zeroing mask operations.
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, falseReg,
/* canSkip */ true);
}
else
{
// At this point, target != embMaskOp1Reg != falseReg, so just go ahead
Expand Down Expand Up @@ -2408,17 +2419,17 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
assert(isRMW);

if (targetReg != op1Reg)
{
assert(targetReg != op2Reg);

GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true);
}

HWIntrinsicImmOpHelper helper(this, intrin.op3, node);

for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
{
if (targetReg != op1Reg)
{
assert(targetReg != op2Reg);

GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
}

const int elementIndex = helper.ImmValue();
const int byteIndex = genTypeSize(intrin.baseType) * elementIndex;

Expand Down Expand Up @@ -2560,7 +2571,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)

HWIntrinsicImmOpHelper helper(this, intrin.op3, node);

for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd(), (targetReg != op1Reg) ? 2 : 1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this syntax doing? It looks like the result of the ternary operator isn't being used.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That change should have been on the line above - the HWIntrinsicImmOpHelper constructor needs to take in the number of instructions that will be generated in each stage of the loop.

{
if (targetReg != op1Reg)
{
Expand Down Expand Up @@ -2610,7 +2621,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)

// Generate the table using the combined immediate
HWIntrinsicImmOpHelper helper(this, op4Reg, 0, 7, node);
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd(), (targetReg != op1Reg) ? 2 : 1)
{
if (targetReg != op1Reg)
{
Expand Down
Loading