From 1563f8c0795c727665de1b64376ba87f497ba855 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 19 Jan 2024 10:29:35 +0000 Subject: [PATCH] Add Arm64 encoding for IF_SVE_BL_1A --- src/coreclr/jit/codegenarm64test.cpp | 16 ++++ src/coreclr/jit/emit.h | 32 +++++-- src/coreclr/jit/emitarm64.cpp | 124 +++++++++++++++++++++++++++ src/coreclr/jit/emitarm64.h | 15 ++++ src/coreclr/jit/instr.h | 22 +++++ 5 files changed, 201 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 03edaae736690..ba53a3631958c 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5138,6 +5138,22 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R(INS_sve_ftssel, EA_SCALABLE, REG_V17, REG_V16, REG_V15, INS_OPTS_SCALABLE_D); // FTSSEL ., ., . + // IF_SVE_BL_1A + theEmitter->emitIns_R_PATTERN_I(INS_sve_cntb, EA_8BYTE, REG_R0, SVE_PATTERN_POW2, + 1); // CNTB {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_cntd, EA_8BYTE, REG_R30, SVE_PATTERN_VL1, + 16); // CNTD {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_cnth, EA_8BYTE, REG_R12, SVE_PATTERN_VL7, + 5); // CNTH {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_cntw, EA_8BYTE, REG_R23, SVE_PATTERN_VL256, + 7); // CNTW {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_cntb, EA_8BYTE, REG_R21, SVE_PATTERN_MUL4, + 8); // CNTB {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_cntd, EA_8BYTE, REG_R15, SVE_PATTERN_MUL3, + 10); // CNTD {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_cnth, EA_8BYTE, REG_R5, SVE_PATTERN_ALL, + 13); // CNTH {, {, MUL #}} + // IF_SVE_CL_3A theEmitter->emitIns_R_R_R(INS_sve_compact, EA_SCALABLE, REG_V16, REG_P7, REG_V13, INS_OPTS_SCALABLE_S); // COMPACT ., , . diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 51ed6b72a0c52..134574154dcff 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -971,19 +971,25 @@ class emitter iiaEncodedInstrCount = (count << iaut_SHIFT) | iaut_INST_COUNT; } -#ifdef TARGET_ARMARCH - +#ifdef TARGET_ARM struct { -#ifdef TARGET_ARM64 - // For 64-bit architecture this 32-bit structure can pack with these unsigned bit fields + regNumber _idReg3 : REGNUM_BITS; + regNumber _idReg4 : REGNUM_BITS; + }; +#elif defined(TARGET_ARM64) + struct + { + // This 32-bit structure can pack with these unsigned bit fields emitLclVarAddr iiaLclVar; unsigned _idRegBit : 1; // Reg3 is scaled by idOpSize bits GCtype _idGCref2 : 2; -#endif - regNumber _idReg3 : REGNUM_BITS; - regNumber _idReg4 : REGNUM_BITS; + regNumber _idReg3 : REGNUM_BITS; + regNumber _idReg4 : REGNUM_BITS; }; + + insSvePattern _idSvePattern; + #elif defined(TARGET_XARCH) struct { @@ -1155,7 +1161,7 @@ class emitter _idCodeSize = sz; } #elif defined(TARGET_RISCV64) - unsigned idCodeSize() const + unsigned idCodeSize() const { return _idCodeSize; } @@ -1433,6 +1439,16 @@ class emitter assert(!idIsSmallDsc()); idAddr()->_idRegBit = val ? 1 : 0; } + insSvePattern idSvePattern() const + { + assert(!idIsSmallDsc()); + return (idAddr()->_idSvePattern); + } + void idSvePattern(insSvePattern idSvePattern) + { + assert(!idIsSmallDsc()); + idAddr()->_idSvePattern = idSvePattern; + } #endif // TARGET_ARM64 #endif // TARGET_ARMARCH diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 5c9fb990bb859..50b129bad6e4b 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1081,6 +1081,14 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidScalarDatasize(elemsize)); break; + case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_NONE); + assert(isGeneralRegister(id->idReg1())); + assert(elemsize == EA_8BYTE); + assert(isValidUimm4From1(emitGetInsSC(id))); + break; + // Scalable, 4 regs, to predicate register. case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors elemsize = id->idOpSize(); @@ -1938,6 +1946,19 @@ static const char * const pnRegNames[] = "pn10", "pn11", "pn12", "pn13", "pn14", "pn15" }; + +static const char * const svePatternNames[] = +{ + "pow2", "vl1", "vl2", "vl3", + "vl4", "vl5", "vl6", "vl7", + "vl8", "vl16", "vl32", "vl64", + "vl128", "vl256", "invalid", "invalid", + "invalid", "invalid", "invalid", "invalid", + "invalid", "invalid", "invalid", "invalid", + "invalid", "invalid", "invalid", "invalid", + "invalid", "mul4", "mul3", "all" +}; + // clang-format on //------------------------------------------------------------------------ @@ -11360,6 +11381,49 @@ void emitter::emitIns_R_I_FLAGS_COND( appendToCurIG(id); } +/***************************************************************************** + * + * Add an instruction referencing a register, a SVE Pattern and an immediate. + */ + +void emitter::emitIns_R_PATTERN_I(instruction ins, emitAttr attr, regNumber reg1, insSvePattern pattern, int imm) +{ + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + + /* Figure out the encoding format of the instruction */ + switch (ins) + { + case INS_sve_cntb: + case INS_sve_cntd: + case INS_sve_cnth: + case INS_sve_cntw: + assert(isGeneralRegister(reg1)); + assert(size == EA_8BYTE); + assert(isValidUimm4From1(imm)); + fmt = IF_SVE_BL_1A; + break; + + default: + unreached(); + break; + + } // end switch (ins) + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstrCns(attr, imm); + + id->idIns(ins); + id->idInsFmt(fmt); + + id->idReg1(reg1); + id->idSvePattern(pattern); + + dispIns(id); + appendToCurIG(id); +} + /***************************************************************************** * * Add a memory barrier instruction with a 'barrier' immediate @@ -14538,6 +14602,17 @@ void emitter::emitIns_Call(EmitCallType callType, return (code_t)imm << 14; } +/***************************************************************************** + * + * Returns the encoding for the immediate value as 4-bits starting from 1, at bit locations '19-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm4From1_19_to_16(ssize_t imm) +{ + assert(isValidUimm4From1(imm)); + return (code_t)(imm - 1) << 16; +} + /***************************************************************************** * * Returns the encoding to select the 4/8-byte width specifier @@ -14554,6 +14629,15 @@ void emitter::emitIns_Call(EmitCallType callType, return 0; } +/***************************************************************************** + * + * Returns the encoding to select an insSvePattern + */ +/*static*/ emitter::code_t emitter::insEncodeSvePattern(insSvePattern pattern) +{ + return (code_t)((unsigned)pattern << 5); +} + BYTE* emitter::emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id) { instruction ins = id->idIns(); @@ -16621,6 +16705,16 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + // Immediate and patterm to general purpose. + case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeSvePattern(id->idSvePattern()); // ppppp + code |= insEncodeUimm4From1_19_to_16(imm); // iiii + dst += emitOutput_Instr(dst, code); + break; + // Scalable to general register. case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register @@ -17848,6 +17942,20 @@ void emitter::emitDispAddrRRExt(regNumber reg1, regNumber reg2, insOpts opt, boo printf("]"); } +/***************************************************************************** + * + * Display an insSvePattern + */ +void emitter::emitDispSvePattern(insSvePattern pattern, bool addComma) +{ + printf("%s", svePatternNames[pattern]); + + if (addComma) + { + emitDispComma(); + } +} + /***************************************************************************** * * Display (optionally) the instruction encoding in hex @@ -19270,6 +19378,17 @@ void emitter::emitDispInsHelp( emitDispReg(id->idReg3(), size, false); // mmmmm break; + case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count + imm = emitGetInsSC(id); + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispSvePattern(id->idSvePattern(), (imm > 1)); // ppppp + if (imm > 1) + { + printf("mul "); + emitDispImm(emitGetInsSC(id), false, false); // iiii + } + break; + // ., ., .D case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated) emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd @@ -21946,6 +22065,11 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_8C; break; + case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient result.insThroughput = PERFSCORE_THROUGHPUT_2C; result.insLatency = PERFSCORE_LATENCY_3C; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 04f8322a06fc9..afde40e5c14f9 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -65,6 +65,7 @@ void emitDispShiftedReg(regNumber reg, insOpts opt, ssize_t imm, emitAttr attr); void emitDispExtendReg(regNumber reg, insOpts opt, ssize_t imm); void emitDispAddrRI(regNumber reg, insOpts opt, ssize_t imm); void emitDispAddrRRExt(regNumber reg1, regNumber reg2, insOpts opt, bool isScaled, emitAttr size); +void emitDispSvePattern(insSvePattern pattern, bool addComma); /************************************************************************/ /* Private members that deal with target-dependent instr. descriptors */ @@ -526,6 +527,9 @@ static code_t insEncodeSimm5_20_to_16(ssize_t imm); // Returns the encoding for the immediate value as 7-bits at bit locations '20-14'. static code_t insEncodeUimm7_20_to_14(ssize_t imm); +// Returns the encoding for the immediate value as 4-bits starting from 1, at bit locations '19-16'. +static code_t insEncodeUimm4From1_19_to_16(ssize_t imm); + // Returns the encoding to select the elemsize for an Arm64 SVE vector instruction plus an immediate. // This specifically encodes the field 'tszh:tszl' at bit locations '23-22:9-8'. static code_t insEncodeSveShift_23_to_22_9_to_0(emitAttr size, bool isRightShift, size_t imm); @@ -534,6 +538,9 @@ static code_t insEncodeSveShift_23_to_22_9_to_0(emitAttr size, bool isRightShift // for an Arm64 Sve instruction. static code_t insEncodeSveElemsize_R_22(emitAttr size); +// Returns the encoding to select an insSvePattern +static code_t insEncodeSvePattern(insSvePattern pattern); + // Returns true if 'reg' represents an integer register. static bool isIntegerRegister(regNumber reg) { @@ -582,6 +589,12 @@ static bool isValidSimm4_MultipleOf32(ssize_t value) return (-256 <= value) && (value <= 224) && (value % 32 == 0); }; +// Returns true if 'value' is a legal unsigned immediate 4 bit encoding, starting from 1 (such as for CNTB). +static bool isValidUimm4From1(ssize_t value) +{ + return (1 <= value) && (value <= 16); +}; + // Returns true if 'value' is a legal unsigned immediate 5 bit encoding (such as for CCMP). static bool isValidUimm5(ssize_t value) { @@ -1174,6 +1187,8 @@ void emitIns_R_R_FLAGS_COND( void emitIns_R_I_FLAGS_COND(instruction ins, emitAttr attr, regNumber reg1, int imm, insCflags flags, insCond cond); +void emitIns_R_PATTERN_I(instruction ins, emitAttr attr, regNumber reg1, insSvePattern pattern, int imm); + void emitIns_BARR(instruction ins, insBarrier barrier); void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs); diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 5fd9dd456d65c..8d2b16a50265d 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -322,6 +322,28 @@ enum insScalableOpts : unsigned INS_SCALABLE_OPTS_UNPREDICATED_WIDE, // Variants without a predicate and wide elements (eg asr) }; +// Maps directly to the pattern used in SVE instructions such as cntb. +enum insSvePattern : unsigned +{ + SVE_PATTERN_POW2 = 0, // The largest power of 2. + SVE_PATTERN_VL1 = 1, // 1 element. + SVE_PATTERN_VL2 = 2, // 2 elements. + SVE_PATTERN_VL3 = 3, // 3 elements. + SVE_PATTERN_VL4 = 4, // 4 elements. + SVE_PATTERN_VL5 = 5, // 5 elements. + SVE_PATTERN_VL6 = 6, // 6 elements. + SVE_PATTERN_VL7 = 7, // 7 elements. + SVE_PATTERN_VL8 = 8, // 8 elements. + SVE_PATTERN_VL16 = 9, // 16 elements. + SVE_PATTERN_VL32 = 10, // 32 elements. + SVE_PATTERN_VL64 = 11, // 64 elements. + SVE_PATTERN_VL128 = 12, // 128 elements. + SVE_PATTERN_VL256 = 13, // 256 elements. + SVE_PATTERN_MUL4 = 29, // The largest multiple of 3. + SVE_PATTERN_MUL3 = 30, // The largest multiple of 4. + SVE_PATTERN_ALL = 31 // All available (implicitly a multiple of two). +}; + enum insCond : unsigned { INS_COND_EQ,