Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Arm64 encoding for IF_SVE_BL_1A #97223

Merged
merged 1 commit into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/coreclr/jit/codegenarm64test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5138,6 +5138,22 @@ void CodeGen::genArm64EmitterUnitTestsSve()
theEmitter->emitIns_R_R_R(INS_sve_ftssel, EA_SCALABLE, REG_V17, REG_V16, REG_V15,
INS_OPTS_SCALABLE_D); // FTSSEL <Zd>.<T>, <Zn>.<T>, <Zm>.<T>

// IF_SVE_BL_1A
theEmitter->emitIns_R_PATTERN_I(INS_sve_cntb, EA_8BYTE, REG_R0, SVE_PATTERN_POW2,
1); // CNTB <Xd>{, <pattern>{, MUL #<imm>}}
theEmitter->emitIns_R_PATTERN_I(INS_sve_cntd, EA_8BYTE, REG_R30, SVE_PATTERN_VL1,
16); // CNTD <Xd>{, <pattern>{, MUL #<imm>}}
theEmitter->emitIns_R_PATTERN_I(INS_sve_cnth, EA_8BYTE, REG_R12, SVE_PATTERN_VL7,
5); // CNTH <Xd>{, <pattern>{, MUL #<imm>}}
theEmitter->emitIns_R_PATTERN_I(INS_sve_cntw, EA_8BYTE, REG_R23, SVE_PATTERN_VL256,
7); // CNTW <Xd>{, <pattern>{, MUL #<imm>}}
theEmitter->emitIns_R_PATTERN_I(INS_sve_cntb, EA_8BYTE, REG_R21, SVE_PATTERN_MUL4,
8); // CNTB <Xd>{, <pattern>{, MUL #<imm>}}
theEmitter->emitIns_R_PATTERN_I(INS_sve_cntd, EA_8BYTE, REG_R15, SVE_PATTERN_MUL3,
10); // CNTD <Xd>{, <pattern>{, MUL #<imm>}}
theEmitter->emitIns_R_PATTERN_I(INS_sve_cnth, EA_8BYTE, REG_R5, SVE_PATTERN_ALL,
13); // CNTH <Xd>{, <pattern>{, MUL #<imm>}}

// IF_SVE_CL_3A
theEmitter->emitIns_R_R_R(INS_sve_compact, EA_SCALABLE, REG_V16, REG_P7, REG_V13,
INS_OPTS_SCALABLE_S); // COMPACT <Zd>.<T>, <Pg>, <Zn>.<T>
Expand Down
32 changes: 24 additions & 8 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -971,19 +971,25 @@ class emitter
iiaEncodedInstrCount = (count << iaut_SHIFT) | iaut_INST_COUNT;
}

#ifdef TARGET_ARMARCH

#ifdef TARGET_ARM
struct
{
#ifdef TARGET_ARM64
// For 64-bit architecture this 32-bit structure can pack with these unsigned bit fields
regNumber _idReg3 : REGNUM_BITS;
regNumber _idReg4 : REGNUM_BITS;
};
#elif defined(TARGET_ARM64)
struct
{
// This 32-bit structure can pack with these unsigned bit fields
emitLclVarAddr iiaLclVar;
unsigned _idRegBit : 1; // Reg3 is scaled by idOpSize bits
GCtype _idGCref2 : 2;
#endif
regNumber _idReg3 : REGNUM_BITS;
regNumber _idReg4 : REGNUM_BITS;
regNumber _idReg3 : REGNUM_BITS;
regNumber _idReg4 : REGNUM_BITS;
};

insSvePattern _idSvePattern;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks ok to me, but wondering why not just have something like this after the end of struct definition.

#ifdef TARGET_ARM64
  insSvePattern _idSvePattern;
#endif

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy to switch to that.

My reasoning was when you look at the whole file we have an #ifdef for each target. Many of them just have the same thing (_idReg3 and _idReg4). The nesting of the Arm32/Arm64 is a little messy to read and doesn't fit the rest of the file style. Splitting it made it simpler.


#elif defined(TARGET_XARCH)
struct
{
Expand Down Expand Up @@ -1155,7 +1161,7 @@ class emitter
_idCodeSize = sz;
}
#elif defined(TARGET_RISCV64)
unsigned idCodeSize() const
unsigned idCodeSize() const
{
return _idCodeSize;
}
Expand Down Expand Up @@ -1433,6 +1439,16 @@ class emitter
assert(!idIsSmallDsc());
idAddr()->_idRegBit = val ? 1 : 0;
}
insSvePattern idSvePattern() const
{
assert(!idIsSmallDsc());
return (idAddr()->_idSvePattern);
}
void idSvePattern(insSvePattern idSvePattern)
{
assert(!idIsSmallDsc());
idAddr()->_idSvePattern = idSvePattern;
}
#endif // TARGET_ARM64

#endif // TARGET_ARMARCH
Expand Down
124 changes: 124 additions & 0 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1081,6 +1081,14 @@ void emitter::emitInsSanityCheck(instrDesc* id)
assert(isValidScalarDatasize(elemsize));
break;

case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count
elemsize = id->idOpSize();
assert(id->idInsOpt() == INS_OPTS_NONE);
assert(isGeneralRegister(id->idReg1()));
assert(elemsize == EA_8BYTE);
assert(isValidUimm4From1(emitGetInsSC(id)));
break;

// Scalable, 4 regs, to predicate register.
case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
elemsize = id->idOpSize();
Expand Down Expand Up @@ -1938,6 +1946,19 @@ static const char * const pnRegNames[] =
"pn10", "pn11", "pn12", "pn13", "pn14",
"pn15"
};

static const char * const svePatternNames[] =
{
"pow2", "vl1", "vl2", "vl3",
"vl4", "vl5", "vl6", "vl7",
"vl8", "vl16", "vl32", "vl64",
"vl128", "vl256", "invalid", "invalid",
"invalid", "invalid", "invalid", "invalid",
"invalid", "invalid", "invalid", "invalid",
"invalid", "invalid", "invalid", "invalid",
"invalid", "mul4", "mul3", "all"
};

// clang-format on

//------------------------------------------------------------------------
Expand Down Expand Up @@ -11360,6 +11381,49 @@ void emitter::emitIns_R_I_FLAGS_COND(
appendToCurIG(id);
}

/*****************************************************************************
*
* Add an instruction referencing a register, a SVE Pattern and an immediate.
*/

void emitter::emitIns_R_PATTERN_I(instruction ins, emitAttr attr, regNumber reg1, insSvePattern pattern, int imm)
{
emitAttr size = EA_SIZE(attr);
emitAttr elemsize = EA_UNKNOWN;
insFormat fmt = IF_NONE;

/* Figure out the encoding format of the instruction */
switch (ins)
{
case INS_sve_cntb:
case INS_sve_cntd:
case INS_sve_cnth:
case INS_sve_cntw:
assert(isGeneralRegister(reg1));
assert(size == EA_8BYTE);
assert(isValidUimm4From1(imm));
fmt = IF_SVE_BL_1A;
break;

default:
unreached();
break;

} // end switch (ins)
assert(fmt != IF_NONE);

instrDesc* id = emitNewInstrCns(attr, imm);

id->idIns(ins);
id->idInsFmt(fmt);

id->idReg1(reg1);
id->idSvePattern(pattern);

dispIns(id);
appendToCurIG(id);
}

/*****************************************************************************
*
* Add a memory barrier instruction with a 'barrier' immediate
Expand Down Expand Up @@ -14538,6 +14602,17 @@ void emitter::emitIns_Call(EmitCallType callType,
return (code_t)imm << 14;
}

/*****************************************************************************
*
* Returns the encoding for the immediate value as 4-bits starting from 1, at bit locations '19-16'.
*/

/*static*/ emitter::code_t emitter::insEncodeUimm4From1_19_to_16(ssize_t imm)
{
assert(isValidUimm4From1(imm));
return (code_t)(imm - 1) << 16;
}

/*****************************************************************************
*
* Returns the encoding to select the <R> 4/8-byte width specifier <R>
Expand All @@ -14554,6 +14629,15 @@ void emitter::emitIns_Call(EmitCallType callType,
return 0;
}

/*****************************************************************************
*
* Returns the encoding to select an insSvePattern
*/
/*static*/ emitter::code_t emitter::insEncodeSvePattern(insSvePattern pattern)
{
return (code_t)((unsigned)pattern << 5);
}

BYTE* emitter::emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id)
{
instruction ins = id->idIns();
Expand Down Expand Up @@ -16621,6 +16705,16 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
dst += emitOutput_Instr(dst, code);
break;

// Immediate and patterm to general purpose.
case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count
imm = emitGetInsSC(id);
code = emitInsCodeSve(ins, fmt);
code |= insEncodeReg_Rd(id->idReg1()); // ddddd
code |= insEncodeSvePattern(id->idSvePattern()); // ppppp
code |= insEncodeUimm4From1_19_to_16(imm); // iiii
dst += emitOutput_Instr(dst, code);
break;

// Scalable to general register.
case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register
case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register
Expand Down Expand Up @@ -17848,6 +17942,20 @@ void emitter::emitDispAddrRRExt(regNumber reg1, regNumber reg2, insOpts opt, boo
printf("]");
}

/*****************************************************************************
*
* Display an insSvePattern
*/
void emitter::emitDispSvePattern(insSvePattern pattern, bool addComma)
{
printf("%s", svePatternNames[pattern]);

if (addComma)
{
emitDispComma();
}
}

/*****************************************************************************
*
* Display (optionally) the instruction encoding in hex
Expand Down Expand Up @@ -19270,6 +19378,17 @@ void emitter::emitDispInsHelp(
emitDispReg(id->idReg3(), size, false); // mmmmm
break;

case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count
imm = emitGetInsSC(id);
emitDispReg(id->idReg1(), size, true); // ddddd
emitDispSvePattern(id->idSvePattern(), (imm > 1)); // ppppp
if (imm > 1)
{
printf("mul ");
emitDispImm(emitGetInsSC(id), false, false); // iiii
}
break;

// <Zd>.<T>, <Zn>.<T>, <Zm>.D
case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated)
emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
Expand Down Expand Up @@ -21946,6 +22065,11 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insLatency = PERFSCORE_LATENCY_8C;
break;

case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count
result.insThroughput = PERFSCORE_THROUGHPUT_2C;
result.insLatency = PERFSCORE_LATENCY_2C;
break;

case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient
result.insThroughput = PERFSCORE_THROUGHPUT_2C;
result.insLatency = PERFSCORE_LATENCY_3C;
Expand Down
15 changes: 15 additions & 0 deletions src/coreclr/jit/emitarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ void emitDispShiftedReg(regNumber reg, insOpts opt, ssize_t imm, emitAttr attr);
void emitDispExtendReg(regNumber reg, insOpts opt, ssize_t imm);
void emitDispAddrRI(regNumber reg, insOpts opt, ssize_t imm);
void emitDispAddrRRExt(regNumber reg1, regNumber reg2, insOpts opt, bool isScaled, emitAttr size);
void emitDispSvePattern(insSvePattern pattern, bool addComma);

/************************************************************************/
/* Private members that deal with target-dependent instr. descriptors */
Expand Down Expand Up @@ -526,6 +527,9 @@ static code_t insEncodeSimm5_20_to_16(ssize_t imm);
// Returns the encoding for the immediate value as 7-bits at bit locations '20-14'.
static code_t insEncodeUimm7_20_to_14(ssize_t imm);

// Returns the encoding for the immediate value as 4-bits starting from 1, at bit locations '19-16'.
static code_t insEncodeUimm4From1_19_to_16(ssize_t imm);

// Returns the encoding to select the elemsize for an Arm64 SVE vector instruction plus an immediate.
// This specifically encodes the field 'tszh:tszl' at bit locations '23-22:9-8'.
static code_t insEncodeSveShift_23_to_22_9_to_0(emitAttr size, bool isRightShift, size_t imm);
Expand All @@ -534,6 +538,9 @@ static code_t insEncodeSveShift_23_to_22_9_to_0(emitAttr size, bool isRightShift
// for an Arm64 Sve instruction.
static code_t insEncodeSveElemsize_R_22(emitAttr size);

// Returns the encoding to select an insSvePattern
static code_t insEncodeSvePattern(insSvePattern pattern);

// Returns true if 'reg' represents an integer register.
static bool isIntegerRegister(regNumber reg)
{
Expand Down Expand Up @@ -582,6 +589,12 @@ static bool isValidSimm4_MultipleOf32(ssize_t value)
return (-256 <= value) && (value <= 224) && (value % 32 == 0);
};

// Returns true if 'value' is a legal unsigned immediate 4 bit encoding, starting from 1 (such as for CNTB).
static bool isValidUimm4From1(ssize_t value)
{
return (1 <= value) && (value <= 16);
};

// Returns true if 'value' is a legal unsigned immediate 5 bit encoding (such as for CCMP).
static bool isValidUimm5(ssize_t value)
{
Expand Down Expand Up @@ -1174,6 +1187,8 @@ void emitIns_R_R_FLAGS_COND(

void emitIns_R_I_FLAGS_COND(instruction ins, emitAttr attr, regNumber reg1, int imm, insCflags flags, insCond cond);

void emitIns_R_PATTERN_I(instruction ins, emitAttr attr, regNumber reg1, insSvePattern pattern, int imm);

void emitIns_BARR(instruction ins, insBarrier barrier);

void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs);
Expand Down
22 changes: 22 additions & 0 deletions src/coreclr/jit/instr.h
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,28 @@ enum insScalableOpts : unsigned
INS_SCALABLE_OPTS_UNPREDICATED_WIDE, // Variants without a predicate and wide elements (eg asr)
};

// Maps directly to the pattern used in SVE instructions such as cntb.
enum insSvePattern : unsigned
{
SVE_PATTERN_POW2 = 0, // The largest power of 2.
SVE_PATTERN_VL1 = 1, // 1 element.
SVE_PATTERN_VL2 = 2, // 2 elements.
SVE_PATTERN_VL3 = 3, // 3 elements.
SVE_PATTERN_VL4 = 4, // 4 elements.
SVE_PATTERN_VL5 = 5, // 5 elements.
SVE_PATTERN_VL6 = 6, // 6 elements.
SVE_PATTERN_VL7 = 7, // 7 elements.
SVE_PATTERN_VL8 = 8, // 8 elements.
SVE_PATTERN_VL16 = 9, // 16 elements.
SVE_PATTERN_VL32 = 10, // 32 elements.
SVE_PATTERN_VL64 = 11, // 64 elements.
SVE_PATTERN_VL128 = 12, // 128 elements.
SVE_PATTERN_VL256 = 13, // 256 elements.
SVE_PATTERN_MUL4 = 29, // The largest multiple of 3.
SVE_PATTERN_MUL3 = 30, // The largest multiple of 4.
SVE_PATTERN_ALL = 31 // All available (implicitly a multiple of two).
};

enum insCond : unsigned
{
INS_COND_EQ,
Expand Down
Loading