Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle more than 64 registers - The finale #103387

Merged
merged 10 commits into from
Jun 14, 2024
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegenarm64test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6254,7 +6254,7 @@ void CodeGen::genArm64EmitterUnitTestsSve()

// IF_SVE_CW_4A
theEmitter->emitIns_R_R_R(INS_sve_mov, EA_SCALABLE, REG_V0, REG_P0, REG_V30, INS_OPTS_SCALABLE_H,
INS_SCALABLE_OPTS_PREDICATE_MERGE_MOV); // MOV <Zd>.<T>, <Pv>/M, <Zn>.<T>
INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV <Zd>.<T>, <Pv>/M, <Zn>.<T>
theEmitter->emitIns_R_R_R_R(INS_sve_sel, EA_SCALABLE, REG_V29, REG_P15, REG_V28, REG_V4, INS_OPTS_SCALABLE_D,
INS_SCALABLE_OPTS_UNPREDICATED); // SEL <Zd>.<T>, <Pv>, <Zn>.<T>, <Zm>.<T>
theEmitter->emitIns_R_R_R_R(INS_sve_sel, EA_SCALABLE, REG_V5, REG_P13, REG_V27, REG_V5, INS_OPTS_SCALABLE_S,
Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,7 @@ class emitter
// x86: 38 bits
// amd64: 38 bits
// arm: 32 bits
// arm64: 44 bits
// arm64: 46 bits
// loongarch64: 28 bits
// risc-v: 28 bits

Expand Down Expand Up @@ -828,7 +828,7 @@ class emitter
// x86: 48 bits
// amd64: 48 bits
// arm: 48 bits
// arm64: 53 bits
// arm64: 55 bits
// loongarch64: 46 bits
// risc-v: 46 bits

Expand All @@ -840,7 +840,7 @@ class emitter
#if defined(TARGET_ARM)
#define ID_EXTRA_BITFIELD_BITS (16)
#elif defined(TARGET_ARM64)
#define ID_EXTRA_BITFIELD_BITS (21)
#define ID_EXTRA_BITFIELD_BITS (23)
#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
#define ID_EXTRA_BITFIELD_BITS (14)
#elif defined(TARGET_XARCH)
Expand Down Expand Up @@ -881,7 +881,7 @@ class emitter
// x86: 54/50 bits
// amd64: 55/50 bits
// arm: 54/50 bits
// arm64: 60/55 bits
// arm64: 62/57 bits
// loongarch64: 53/48 bits
// risc-v: 53/48 bits

Expand All @@ -897,7 +897,7 @@ class emitter
// x86: 10/14 bits
// amd64: 9/14 bits
// arm: 10/14 bits
// arm64: 4/9 bits
// arm64: 2/7 bits
// loongarch64: 11/16 bits
// risc-v: 11/16 bits

Expand Down
6 changes: 2 additions & 4 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4250,11 +4250,9 @@ void emitter::emitIns_Mov(

case INS_sve_mov:
{
// TODO-SVE: Remove check for insOptsNone() when predicate registers
// are present.
if (insOptsNone(opt) && isPredicateRegister(dstReg) && isPredicateRegister(srcReg))
if (isPredicateRegister(dstReg) && isPredicateRegister(srcReg))
{
// assert(insOptsNone(opt));
assert(insOptsNone(opt));

opt = INS_OPTS_SCALABLE_B;
attr = EA_SCALABLE;
Expand Down
6 changes: 2 additions & 4 deletions src/coreclr/jit/emitarm64sve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3786,9 +3786,7 @@ void emitter::emitInsSve_R_R_R(instruction ins,
// MOV is an alias for CPY, and is always the preferred disassembly.
ins = INS_sve_mov;
}
// TODO-SVE: Change the below check to INS_SCALABLE_OPTS_PREDICATE_MERGE
// once predicate registers are present.
else if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE_MOV)
else if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE)
{
assert(isVectorRegister(reg1));
assert(isPredicateRegister(reg2));
Expand Down Expand Up @@ -5900,7 +5898,7 @@ void emitter::emitInsSve_R_R_R_R(instruction ins,
{
// mov is a preferred alias for sel
return emitInsSve_R_R_R(INS_sve_mov, attr, reg1, reg2, reg3, opt,
INS_SCALABLE_OPTS_PREDICATE_MERGE_MOV);
INS_SCALABLE_OPTS_PREDICATE_MERGE);
}

assert(insOptsScalableStandard(opt));
Expand Down
1 change: 0 additions & 1 deletion src/coreclr/jit/instr.h
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,6 @@ enum insScalableOpts : unsigned
INS_SCALABLE_OPTS_TO_PREDICATE, // Variants moving to a predicate from a vector (e.g. pmov)
INS_SCALABLE_OPTS_TO_VECTOR, // Variants moving to a vector from a predicate (e.g. pmov)
INS_SCALABLE_OPTS_BROADCAST, // Used to distinguish mov from cpy, where mov is an alias for both
INS_SCALABLE_OPTS_PREDICATE_MERGE_MOV, // Use to distinguish mov (predicated) from other variants
};

// Maps directly to the pattern used in SVE instructions such as cntb.
Expand Down
10 changes: 8 additions & 2 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11510,7 +11510,13 @@ void LinearScan::dumpRegRecordTitleIfNeeded()
if ((lastDumpedRegisters != registersToDump) || (rowCountSinceLastTitle > MAX_ROWS_BETWEEN_TITLES))
{
lastUsedRegNumIndex = 0;
int lastRegNumIndex = compiler->compFloatingPointUsed ? REG_FP_LAST : REG_INT_LAST;
int lastRegNumIndex = compiler->compFloatingPointUsed ?
#ifdef HAS_MORE_THAN_64_REGISTERS
REG_MASK_LAST
#else
REG_FP_LAST
#endif
: REG_INT_LAST;
for (int regNumIndex = 0; regNumIndex <= lastRegNumIndex; regNumIndex++)
{
if (registersToDump.IsRegNumInMask((regNumber)regNumIndex))
Expand Down Expand Up @@ -12121,7 +12127,7 @@ void LinearScan::verifyFinalAllocation()

case RefTypeKill:
dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, currentBlock, NONE,
currentRefPosition.registerAssignment);
currentRefPosition.getKillRegisterAssignment());
break;

case RefTypeFixedReg:
Expand Down
43 changes: 25 additions & 18 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1587,30 +1587,37 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
}
else if (HWIntrinsicInfo::IsMaskedOperation(intrin.id))
{
SingleTypeRegSet predMask = RBM_ALLMASK.GetPredicateRegSet();
if (intrin.id == NI_Sve_ConditionalSelect)
if (!varTypeIsMask(intrin.op1->TypeGet()) && !HWIntrinsicInfo::IsExplicitMaskedOperation(intrin.id))
{
// If this is conditional select, make sure to check the embedded
// operation to determine the predicate mask.
assert(intrinsicTree->GetOperandCount() == 3);
assert(!HWIntrinsicInfo::IsLowMaskedOperation(intrin.id));

if (intrin.op2->OperIs(GT_HWINTRINSIC))
srcCount += BuildOperandUses(intrin.op1);
}
else
{
SingleTypeRegSet predMask = RBM_ALLMASK.GetPredicateRegSet();
if (intrin.id == NI_Sve_ConditionalSelect)
{
GenTreeHWIntrinsic* embOp2Node = intrin.op2->AsHWIntrinsic();
const HWIntrinsic intrinEmb(embOp2Node);
if (HWIntrinsicInfo::IsLowMaskedOperation(intrinEmb.id))
// If this is conditional select, make sure to check the embedded
// operation to determine the predicate mask.
assert(intrinsicTree->GetOperandCount() == 3);
assert(!HWIntrinsicInfo::IsLowMaskedOperation(intrin.id));

if (intrin.op2->OperIs(GT_HWINTRINSIC))
{
predMask = RBM_LOWMASK.GetPredicateRegSet();
GenTreeHWIntrinsic* embOp2Node = intrin.op2->AsHWIntrinsic();
const HWIntrinsic intrinEmb(embOp2Node);
if (HWIntrinsicInfo::IsLowMaskedOperation(intrinEmb.id))
{
predMask = RBM_LOWMASK.GetPredicateRegSet();
}
}
}
}
else if (HWIntrinsicInfo::IsLowMaskedOperation(intrin.id))
{
predMask = RBM_LOWMASK.GetPredicateRegSet();
}
else if (HWIntrinsicInfo::IsLowMaskedOperation(intrin.id))
{
predMask = RBM_LOWMASK.GetPredicateRegSet();
}

srcCount += BuildOperandUses(intrin.op1, predMask);
srcCount += BuildOperandUses(intrin.op1, predMask);
}
}
else if (intrinsicTree->OperIsMemoryLoadOrStore())
{
Expand Down
41 changes: 22 additions & 19 deletions src/coreclr/jit/registerarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,28 +94,31 @@ REGDEF(V29, 29+VBASE, VMASK(29), "d29", "s29")
REGDEF(V30, 30+VBASE, VMASK(30), "d30", "s30")
REGDEF(V31, 31+VBASE, VMASK(31), "d31", "s31")

// TODO-SVE: Fix once we add predicate registers
REGALIAS(P0, V0)
REGALIAS(P1, V1)
REGALIAS(P2, V2)
REGALIAS(P3, V3)
REGALIAS(P4, V4)
REGALIAS(P5, V5)
REGALIAS(P6, V6)
REGALIAS(P7, V7)
REGALIAS(P8, V8)
REGALIAS(P9, V9)
REGALIAS(P10, V10)
REGALIAS(P11, V11)
REGALIAS(P12, V12)
REGALIAS(P13, V13)
REGALIAS(P14, V14)
REGALIAS(P15, V15)
#define PBASE 64
#define PMASK(x) (1ULL << x)

/*
REGDEF(name, rnum, mask, xname, wname) */
REGDEF(P0, 0+PBASE, PMASK(0), "p0" , "na")
REGDEF(P1, 1+PBASE, PMASK(1), "p1" , "na")
REGDEF(P2, 2+PBASE, PMASK(2), "p2" , "na")
REGDEF(P3, 3+PBASE, PMASK(3), "p3" , "na")
REGDEF(P4, 4+PBASE, PMASK(4), "p4" , "na")
REGDEF(P5, 5+PBASE, PMASK(5), "p5" , "na")
REGDEF(P6, 6+PBASE, PMASK(6), "p6" , "na")
REGDEF(P7, 7+PBASE, PMASK(7), "p7" , "na")
REGDEF(P8, 8+PBASE, PMASK(8), "p8" , "na")
REGDEF(P9, 9+PBASE, PMASK(9), "p9" , "na")
REGDEF(P10, 10+PBASE, PMASK(10), "p10", "na")
REGDEF(P11, 11+PBASE, PMASK(11), "p11", "na")
REGDEF(P12, 12+PBASE, PMASK(12), "p12", "na")
REGDEF(P13, 13+PBASE, PMASK(13), "p13", "na")
REGDEF(P14, 14+PBASE, PMASK(14), "p14", "na")
REGDEF(P15, 15+PBASE, PMASK(15), "p15", "na")


// The registers with values 64 (NBASE) and above are not real register numbers
#define NBASE 64
// The registers with values 80 (NBASE) and above are not real register numbers
#define NBASE 80

REGDEF(SP, 0+NBASE, 0x0000, "sp", "wsp?")
// This must be last!
Expand Down
11 changes: 2 additions & 9 deletions src/coreclr/jit/target.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ typedef uint64_t regMaskSmall;
#endif

#ifdef TARGET_ARM64
// #define HAS_MORE_THAN_64_REGISTERS 1
#define HAS_MORE_THAN_64_REGISTERS 1
#endif // TARGET_ARM64

// TODO: Rename regMaskSmall as RegSet64 (at least for 64-bit)
Expand Down Expand Up @@ -267,14 +267,7 @@ struct regMaskTP
static constexpr regMaskTP CreateFromRegNum(regNumber reg, regMaskSmall mask)
{
#ifdef HAS_MORE_THAN_64_REGISTERS
if (reg < 64)
{
return regMaskTP(mask, RBM_NONE);
}
else
{
return regMaskTP(RBM_NONE, mask);
}
return (reg < 64) ? regMaskTP(mask, RBM_NONE) : regMaskTP(RBM_NONE, mask);
#else
return regMaskTP(mask, RBM_NONE);
#endif
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/targetarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@

static_assert_no_msg(REG_PREDICATE_HIGH_LAST == REG_PREDICATE_LAST);

#define REGNUM_BITS 6 // number of bits in a REG_*
#define REGNUM_BITS 7 // number of bits in a REG_*
#define REGSIZE_BYTES 8 // number of bytes in one general purpose register
#define FP_REGSIZE_BYTES 16 // number of bytes in one FP/SIMD register
#define FPSAVE_REGSIZE_BYTES 8 // number of bytes in one FP/SIMD register that are saved/restored, for callee-saved registers
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ const char* dspRegRange(regMaskTP regMask, size_t& minSiz, const char* sep, regN
{
regMaskTP regBit = genRegMask(regNum);

if ((regMask & regBit) != 0)
if ((regMask & regBit).IsNonEmpty())
{
// We have a register to display. It gets displayed now if:
// 1. This is the first register to display of a new range of registers (possibly because
Expand Down
Loading