diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 2d59d3e461541..00962318ff222 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5338,6 +5338,110 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R(INS_sve_fsqrt, EA_SCALABLE, REG_V6, REG_P6, REG_V6, INS_OPTS_SCALABLE_S); // FSQRT ., /M, . + // IF_SVE_DT_3A + theEmitter->emitIns_R_R_R(INS_sve_whilege, EA_4BYTE, REG_P0, REG_R0, REG_R1, + INS_OPTS_SCALABLE_B); // WHILEGE ., , + theEmitter->emitIns_R_R_R(INS_sve_whilege, EA_8BYTE, REG_P1, REG_R2, REG_R3, + INS_OPTS_SCALABLE_B); // WHILEGE ., , + theEmitter->emitIns_R_R_R(INS_sve_whilegt, EA_4BYTE, REG_P2, REG_R4, REG_R5, + INS_OPTS_SCALABLE_B); // WHILEGT ., , + theEmitter->emitIns_R_R_R(INS_sve_whilegt, EA_8BYTE, REG_P3, REG_R6, REG_R7, + INS_OPTS_SCALABLE_B); // WHILEGT ., , + theEmitter->emitIns_R_R_R(INS_sve_whilehi, EA_4BYTE, REG_P4, REG_R8, REG_R9, + INS_OPTS_SCALABLE_H); // WHILEHI ., , + theEmitter->emitIns_R_R_R(INS_sve_whilehi, EA_8BYTE, REG_P5, REG_R10, REG_R11, + INS_OPTS_SCALABLE_H); // WHILEHI ., , + theEmitter->emitIns_R_R_R(INS_sve_whilehs, EA_4BYTE, REG_P6, REG_R12, REG_R13, + INS_OPTS_SCALABLE_H); // WHILEHS ., , + theEmitter->emitIns_R_R_R(INS_sve_whilehs, EA_8BYTE, REG_P7, REG_R14, REG_R15, + INS_OPTS_SCALABLE_H); // WHILEHS ., , + theEmitter->emitIns_R_R_R(INS_sve_whilele, EA_4BYTE, REG_P8, REG_R0, REG_R1, + INS_OPTS_SCALABLE_S); // WHILELE ., , + theEmitter->emitIns_R_R_R(INS_sve_whilele, EA_8BYTE, REG_P9, REG_R2, REG_R3, + INS_OPTS_SCALABLE_S); // WHILELE ., , + theEmitter->emitIns_R_R_R(INS_sve_whilelo, EA_4BYTE, REG_P10, REG_R4, REG_R5, + INS_OPTS_SCALABLE_S); // WHILELO ., , + theEmitter->emitIns_R_R_R(INS_sve_whilelo, EA_8BYTE, REG_P11, REG_R6, REG_R7, + INS_OPTS_SCALABLE_S); // WHILELO ., , + theEmitter->emitIns_R_R_R(INS_sve_whilels, EA_4BYTE, REG_P12, REG_R8, REG_R9, + INS_OPTS_SCALABLE_D); // WHILELS ., , + theEmitter->emitIns_R_R_R(INS_sve_whilels, EA_8BYTE, REG_P13, REG_R10, REG_R11, + INS_OPTS_SCALABLE_D); // WHILELS ., , + theEmitter->emitIns_R_R_R(INS_sve_whilelt, EA_4BYTE, REG_P14, REG_R12, REG_R13, + INS_OPTS_SCALABLE_D); // WHILELT ., , + theEmitter->emitIns_R_R_R(INS_sve_whilelt, EA_8BYTE, REG_P15, REG_R14, REG_R15, + INS_OPTS_SCALABLE_D); // WHILELT ., , + + // IF_SVE_DU_3A + theEmitter->emitIns_R_R_R(INS_sve_whilerw, EA_8BYTE, REG_P0, REG_R0, REG_R1, + INS_OPTS_SCALABLE_B); // WHILERW ., , + theEmitter->emitIns_R_R_R(INS_sve_whilerw, EA_8BYTE, REG_P1, REG_R2, REG_R3, + INS_OPTS_SCALABLE_H); // WHILERW ., , + theEmitter->emitIns_R_R_R(INS_sve_whilerw, EA_8BYTE, REG_P2, REG_R4, REG_R5, + INS_OPTS_SCALABLE_S); // WHILERW ., , + theEmitter->emitIns_R_R_R(INS_sve_whilerw, EA_8BYTE, REG_P3, REG_R6, REG_R7, + INS_OPTS_SCALABLE_D); // WHILERW ., , + theEmitter->emitIns_R_R_R(INS_sve_whilewr, EA_8BYTE, REG_P4, REG_R8, REG_R9, + INS_OPTS_SCALABLE_B); // WHILEWR ., , + theEmitter->emitIns_R_R_R(INS_sve_whilewr, EA_8BYTE, REG_P5, REG_R10, REG_R11, + INS_OPTS_SCALABLE_H); // WHILEWR ., , + theEmitter->emitIns_R_R_R(INS_sve_whilewr, EA_8BYTE, REG_P6, REG_R12, REG_R13, + INS_OPTS_SCALABLE_S); // WHILEWR ., , + theEmitter->emitIns_R_R_R(INS_sve_whilewr, EA_8BYTE, REG_P7, REG_R14, REG_R15, + INS_OPTS_SCALABLE_D); // WHILEWR ., , + + // IF_SVE_DX_3A + theEmitter->emitIns_R_R_R(INS_sve_whilege, EA_8BYTE, REG_P0, REG_R0, REG_R1, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR); // WHILEGE {., .}, , + theEmitter->emitIns_R_R_R(INS_sve_whilegt, EA_8BYTE, REG_P1, REG_R2, REG_R3, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR); // WHILEGT {., .}, , + theEmitter->emitIns_R_R_R(INS_sve_whilehi, EA_8BYTE, REG_P2, REG_R4, REG_R5, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR); // WHILEHI {., .}, , + theEmitter->emitIns_R_R_R(INS_sve_whilehs, EA_8BYTE, REG_P3, REG_R6, REG_R7, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR); // WHILEHS {., .}, , + theEmitter->emitIns_R_R_R(INS_sve_whilele, EA_8BYTE, REG_P4, REG_R8, REG_R9, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR); // WHILELE {., .}, , + theEmitter->emitIns_R_R_R(INS_sve_whilelo, EA_8BYTE, REG_P5, REG_R10, REG_R11, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR); // WHILELO {., .}, , + theEmitter->emitIns_R_R_R(INS_sve_whilels, EA_8BYTE, REG_P6, REG_R12, REG_R13, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR); // WHILELS {., .}, , + theEmitter->emitIns_R_R_R(INS_sve_whilelt, EA_8BYTE, REG_P7, REG_R14, REG_R15, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR); // WHILELT {., .}, , + + // IF_SVE_DY_3A + theEmitter->emitIns_R_R_R(INS_sve_whilege, EA_8BYTE, REG_P8, REG_R0, REG_R1, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_VL_2X); // WHILEGE ., , , + theEmitter->emitIns_R_R_R(INS_sve_whilege, EA_8BYTE, REG_P9, REG_R2, REG_R3, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_VL_4X); // WHILEGE ., , , + theEmitter->emitIns_R_R_R(INS_sve_whilegt, EA_8BYTE, REG_P10, REG_R4, REG_R5, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_VL_2X); // WHILEGT ., , , + theEmitter->emitIns_R_R_R(INS_sve_whilegt, EA_8BYTE, REG_P11, REG_R6, REG_R7, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_VL_4X); // WHILEGT ., , , + theEmitter->emitIns_R_R_R(INS_sve_whilehi, EA_8BYTE, REG_P12, REG_R8, REG_R9, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_VL_2X); // WHILEHI ., , , + theEmitter->emitIns_R_R_R(INS_sve_whilehi, EA_8BYTE, REG_P13, REG_R10, REG_R11, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_VL_4X); // WHILEHI ., , , + theEmitter->emitIns_R_R_R(INS_sve_whilehs, EA_8BYTE, REG_P14, REG_R12, REG_R13, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_VL_2X); // WHILEHS ., , , + theEmitter->emitIns_R_R_R(INS_sve_whilehs, EA_8BYTE, REG_P15, REG_R14, REG_R15, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_VL_4X); // WHILEHS ., , , + theEmitter->emitIns_R_R_R(INS_sve_whilele, EA_8BYTE, REG_P8, REG_R0, REG_R1, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_VL_2X); // WHILELE ., , , + theEmitter->emitIns_R_R_R(INS_sve_whilele, EA_8BYTE, REG_P9, REG_R2, REG_R3, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_VL_4X); // WHILELE ., , , + theEmitter->emitIns_R_R_R(INS_sve_whilelo, EA_8BYTE, REG_P10, REG_R4, REG_R5, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_VL_2X); // WHILELO ., , , + theEmitter->emitIns_R_R_R(INS_sve_whilelo, EA_8BYTE, REG_P11, REG_R6, REG_R7, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_VL_4X); // WHILELO ., , , + theEmitter->emitIns_R_R_R(INS_sve_whilels, EA_8BYTE, REG_P12, REG_R8, REG_R9, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_VL_2X); // WHILELS ., , , + theEmitter->emitIns_R_R_R(INS_sve_whilels, EA_8BYTE, REG_P13, REG_R10, REG_R11, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_VL_4X); // WHILELS ., , , + theEmitter->emitIns_R_R_R(INS_sve_whilelt, EA_8BYTE, REG_P14, REG_R12, REG_R13, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_VL_2X); // WHILELT ., , , + theEmitter->emitIns_R_R_R(INS_sve_whilelt, EA_8BYTE, REG_P15, REG_R14, REG_R15, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_VL_4X); // WHILELT ., , , + // IF_SVE_IH_3A theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V5, REG_P3, REG_R4, 0, INS_OPTS_SCALABLE_D); // LD1D {.D }, /Z, [{, #, MUL VL}] diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index e45e1c5da5505..f896e5258741b 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -1419,6 +1419,16 @@ class emitter assert(!idIsSmallDsc()); idAddr()->_idRegBit = val ? 1 : 0; } + bool idVectorLength4x() const + { + assert(!idIsSmallDsc()); + return (idAddr()->_idRegBit == 1); + } + void idVectorLength4x(bool val) + { + assert(!idIsSmallDsc()); + idAddr()->_idRegBit = val ? 1 : 0; + } #endif // TARGET_ARM64 #endif // TARGET_ARMARCH diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index f0e6f1d27da34..feb0ddc39d31e 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1260,7 +1260,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // mmmmm -#ifdef DEBUG if (id->idInsOpt() == INS_OPTS_SCALABLE_S) { assert(id->idIns() == INS_sve_sm4e); @@ -1269,7 +1268,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) { assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); } -#endif // DEBUG assert(isScalableVectorSize(elemsize)); break; @@ -1280,6 +1278,37 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isScalableVectorSize(elemsize)); break; + case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare + assert(id->idOpSize() == EA_8BYTE); + + FALLTHROUGH; + case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isGeneralRegister(id->idReg2())); // nnnnn + assert(isValidGeneralDatasize(id->idOpSize())); // X + assert(isGeneralRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + + case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate + // pair) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isLowPredicateRegister(id->idReg1())); // DDD + assert(isGeneralRegister(id->idReg2())); // nnnnn + assert(isGeneralRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + + case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit + // (predicate-as-counter) + assert(insOptsScalableStandard(id->idInsOpt())); // L + assert(isHighPredicateRegister(id->idReg1())); // DDD + assert(isGeneralRegister(id->idReg2())); // nnnnn + assert(isGeneralRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus // immediate) case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus @@ -1319,7 +1348,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isGeneralRegister(id->idReg3())); // nnnnn assert(isScalableVectorSize(elemsize)); -#ifdef DEBUG switch (id->idIns()) { case INS_sve_ld2b: @@ -1379,7 +1407,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidSimm4(emitGetInsSC(id))); // iiii break; } -#endif // DEBUG break; default: @@ -8053,10 +8080,11 @@ void emitter::emitIns_R_R_R(instruction ins, insOpts opt /* = INS_OPTS_NONE */, insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) { - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; - bool pmerge = false; + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + bool pmerge = false; + bool vectorLength4x = false; /* Figure out the encoding format of the instruction */ switch (ins) @@ -9250,6 +9278,53 @@ void emitter::emitIns_R_R_R(instruction ins, fmt = IF_SVE_HR_3A; break; + case INS_sve_whilege: + case INS_sve_whilegt: + case INS_sve_whilelt: + case INS_sve_whilele: + case INS_sve_whilehs: + case INS_sve_whilehi: + case INS_sve_whilelo: + case INS_sve_whilels: + assert(isGeneralRegister(reg2)); // nnnnn + assert(isGeneralRegister(reg3)); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + assert(insOptsScalableStandard(opt)); + + if (insScalableOptsNone(sopt)) + { + assert(isPredicateRegister(reg1)); // DDDD + assert(isValidGeneralDatasize(size)); // X + fmt = IF_SVE_DT_3A; + } + else if (insScalableOptsWithPredicatePair(sopt)) + { + assert(isLowPredicateRegister(reg1)); // DDD + assert(size == EA_8BYTE); + fmt = IF_SVE_DX_3A; + } + else + { + assert(insScalableOptsWithVectorLength(sopt)); // l + assert(isHighPredicateRegister(reg1)); // DDD + assert(size == EA_8BYTE); + vectorLength4x = (sopt == INS_SCALABLE_OPTS_VL_4X); + fmt = IF_SVE_DY_3A; + } + break; + + case INS_sve_whilewr: + case INS_sve_whilerw: + assert(insOptsScalableStandard(opt)); + assert(isPredicateRegister(reg1)); // DDDD + assert(isGeneralRegister(reg2)); // nnnnn + assert(size == EA_8BYTE); + assert(isGeneralRegister(reg3)); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_DU_3A; + break; + default: unreached(); break; @@ -9272,6 +9347,10 @@ void emitter::emitIns_R_R_R(instruction ins, { id->idPredicateReg2Merge(pmerge); } + else if (vectorLength4x) + { + id->idVectorLength4x(vectorLength4x); + } dispIns(id); appendToCurIG(id); @@ -12605,8 +12684,8 @@ void emitter::emitIns_Call(EmitCallType callType, { assert(isPredicateRegister(reg)); emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0; - assert((ureg >= 0) && (ureg <= 15)); - return ureg << 0; + assert((ureg >= 8) && (ureg <= 15)); + return (ureg - 8) << 0; } /***************************************************************************** @@ -12912,6 +12991,24 @@ void emitter::emitIns_Call(EmitCallType callType, } } +/***************************************************************************** + * + * Returns the encoding to set the vector length specifier (vl) for an Arm64 SVE instruction + */ + +/*static*/ emitter::code_t emitter::insEncodeVectorLengthSpecifier(instrDesc* id) +{ + assert(id != nullptr); + assert(insOptsScalableStandard(id->idInsOpt())); + + if (id->idVectorLength4x()) + { + return 0x2000; // set the bit at location 13 + } + + return 0; +} + /***************************************************************************** * * Returns the encoding to select 'index' for an Arm64 vector elem instruction @@ -16260,6 +16357,46 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD + code |= insEncodeReg_R_9_to_5(id->idReg2()); // nnnnn + code |= (id->idOpSize() == EA_8BYTE) ? (1 << 12) : 0; // X + code |= insEncodeReg_R_20_to_16(id->idReg3()); // mmmmm + code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate + // pair) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_P_3_to_1(id->idReg1()); // DDD + code |= insEncodeReg_R_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeReg_R_20_to_16(id->idReg3()); // mmmmm + code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit + // (predicate-as-counter) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeVectorLengthSpecifier(id); // l + code |= insEncodeReg_P_2_to_0(id->idReg1()); // DDD + code |= insEncodeReg_R_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeReg_R_20_to_16(id->idReg3()); // mmmmm + code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD + code |= insEncodeReg_R_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeReg_R_20_to_16(id->idReg3()); // mmmmm + code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus // immediate) case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus @@ -16990,6 +17127,39 @@ void emitter::emitDispLowPredicateReg(regNumber reg, PredicateType ptype, insOpt emitDispPredicateReg(reg, ptype, opt, addComma); } +//------------------------------------------------------------------------ +// emitDispLowPredicateRegPair: Display a pair of low predicate registers +// +void emitter::emitDispLowPredicateRegPair(regNumber reg, insOpts opt) +{ + assert(isLowPredicateRegister(reg)); + + printf("{ "); + const unsigned baseRegNum = ((unsigned)reg - REG_PREDICATE_FIRST) & 0x7; + const unsigned regNum = (baseRegNum * 2) + REG_PREDICATE_FIRST; + emitDispPredicateReg((regNumber)regNum, PREDICATE_SIZED, opt, true); + emitDispPredicateReg((regNumber)(regNum + 1), PREDICATE_SIZED, opt, false); + printf(" }, "); +} + +//------------------------------------------------------------------------ +// emitDispVectorLengthSpecifier: Display the vector length specifier +// +void emitter::emitDispVectorLengthSpecifier(instrDesc* id) +{ + assert(id != nullptr); + assert(insOptsScalableStandard(id->idInsOpt())); + + if (id->idVectorLength4x()) + { + printf("vlx4"); + } + else + { + printf("vlx2"); + } +} + //------------------------------------------------------------------------ // emitDispArrangement: Display a SIMD vector arrangement suffix // @@ -17040,7 +17210,7 @@ void emitter::emitDispArrangement(insOpts opt) break; default: - assert(!"Invalid insOpt for vector register"); + assert(!"Invalid SVE insOpt"); } printf("."); printf(str); @@ -18834,6 +19004,32 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg1(), id->idInsOpt(), false); // ddddd break; + // ., , + case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit + // ., , + case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare + emitDispPredicateReg(id->idReg1(), PREDICATE_SIZED, id->idInsOpt(), true); // DDDD + emitDispReg(id->idReg2(), id->idOpSize(), true); // nnnnn + emitDispReg(id->idReg3(), id->idOpSize(), false); // mmmmm + break; + + // {., .}, , + case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate + // pair) + emitDispLowPredicateRegPair(id->idReg1(), id->idInsOpt()); + emitDispReg(id->idReg2(), id->idOpSize(), true); // nnnnn + emitDispReg(id->idReg3(), id->idOpSize(), false); // mmmmm + break; + + // ., , , + case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit + // (predicate-as-counter) + emitDispPredicateReg(id->idReg1(), PREDICATE_SIZED, id->idInsOpt(), true); // DDD + emitDispReg(id->idReg2(), id->idOpSize(), true); // nnnnn + emitDispReg(id->idReg3(), id->idOpSize(), true); // mmmmm + emitDispVectorLengthSpecifier(id); + break; + // { .D }, /Z, [{, #, MUL VL}] // Some of these formats may allow changing the element size instead of using 'D' for all instructions. case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus @@ -21484,13 +21680,19 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; case IF_SVE_GK_2A: // ................ ......mmmmmddddd -- SVE2 crypto destructive binary operations + case IF_SVE_GL_1A: // ................ ...........ddddd -- SVE2 crypto unary operations result.insThroughput = PERFSCORE_THROUGHPUT_2C; result.insLatency = PERFSCORE_LATENCY_2C; break; - case IF_SVE_GL_1A: // ................ ...........ddddd -- SVE2 crypto unary operations - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; + case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit + case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate + // pair) + case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit + // (predicate-as-counter) + case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; break; case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 12650ee664c07..de7b53805e286 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -56,6 +56,8 @@ void emitDispVectorElemList(regNumber firstReg, unsigned listSize, emitAttr elem void emitDispSveConsecutiveRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma); void emitDispPredicateReg(regNumber reg, PredicateType ptype, insOpts opt, bool addComma); void emitDispLowPredicateReg(regNumber reg, PredicateType ptype, insOpts opt, bool addComma); +void emitDispLowPredicateRegPair(regNumber reg, insOpts opt); +void emitDispVectorLengthSpecifier(instrDesc* id); void emitDispArrangement(insOpts opt); void emitDispElemsize(emitAttr elemsize); void emitDispShiftedReg(regNumber reg, insOpts opt, ssize_t imm, emitAttr attr); @@ -418,6 +420,9 @@ static code_t insEncodeDatasizeBF(code_t code, emitAttr size); // Returns the encoding to select the vectorsize for SIMD Arm64 instructions static code_t insEncodeVectorsize(emitAttr size); +// Returns the encoding to set the vector length specifier (vl) for an Arm64 SVE instruction +static code_t insEncodeVectorLengthSpecifier(instrDesc* id); + // Returns the encoding to select 'index' for an Arm64 vector elem instruction static code_t insEncodeVectorIndex(emitAttr elemsize, ssize_t index); @@ -859,12 +864,17 @@ inline static bool isFloatReg(regNumber reg) inline static bool isPredicateRegister(regNumber reg) { - return (reg >= REG_PREDICATE_FIRST && reg <= REG_PREDICATE_LAST); + return (reg >= REG_PREDICATE_FIRST) && (reg <= REG_PREDICATE_LAST); } inline static bool isLowPredicateRegister(regNumber reg) { - return (reg >= REG_PREDICATE_FIRST && reg <= REG_PREDICATE_LOW_LAST); + return (reg >= REG_PREDICATE_FIRST) && (reg <= REG_PREDICATE_LOW_LAST); +} + +inline static bool isHighPredicateRegister(regNumber reg) +{ + return (reg >= REG_PREDICATE_HIGH_FIRST) && (reg <= REG_PREDICATE_HIGH_LAST); } inline static bool insOptsNone(insOpts opt) @@ -1009,9 +1019,22 @@ inline static bool insOptsScalableWide(insOpts opt) inline static bool insScalableOptsNone(insScalableOpts sopt) { + // `sopt` is used for instructions with no extra encoding variants. return sopt == INS_SCALABLE_OPTS_NONE; } +inline static bool insScalableOptsWithPredicatePair(insScalableOpts sopt) +{ + // `sopt` denotes the instruction's predicate register should be encoded as a {., .} pair. + return sopt == INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR; +} + +inline static bool insScalableOptsWithVectorLength(insScalableOpts sopt) +{ + // `sopt` is any of the scalable types that are valid for use with instructions with a vector length specifier (vl). + return ((sopt == INS_SCALABLE_OPTS_VL_2X) || (sopt == INS_SCALABLE_OPTS_VL_4X)); +} + static bool isValidImmCond(ssize_t imm); static bool isValidImmCondFlags(ssize_t imm); static bool isValidImmCondFlagsImm5(ssize_t imm); diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index e24d25cd5f606..a906ce2a5e440 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -276,7 +276,7 @@ enum insOpts : unsigned INS_OPTS_SCALABLE_D, INS_OPTS_SCALABLE_Q, - INS_OPTS_MSL, // Vector Immediate (shifting ones variant) + INS_OPTS_MSL, // Vector Immediate (shifting ones variant) INS_OPTS_S_TO_4BYTE, // Single to INT32 INS_OPTS_D_TO_4BYTE, // Double to INT32 @@ -297,22 +297,25 @@ enum insOpts : unsigned INS_OPTS_H_TO_D, // Half to Double INS_OPTS_S_TO_H, // Single to Half - INS_OPTS_D_TO_H // Double to Half + INS_OPTS_D_TO_H, // Double to Half #if FEATURE_LOOP_ALIGN - , INS_OPTS_ALIGN // Align instruction + INS_OPTS_ALIGN // Align instruction #endif }; // When a single instruction has different encodings variants, this is used -// to distinguish those that can't be determined soley by register usage. +// to distinguish those that can't be determined solely by register usage. enum insScalableOpts : unsigned { - INS_SCALABLE_OPTS_NONE, // No Variants exist - - INS_SCALABLE_OPTS_WIDE, // Variants with wide elements (eg asr) - INS_SCALABLE_OPTS_WITH_SIMD_SCALAR, // Variants with a NEON SIMD register (eg clasta) - INS_SCALABLE_OPTS_PREDICATE_MERGE, // Variants with a Pg/M predicate (eg brka) + INS_SCALABLE_OPTS_NONE, // No Variants exist + + INS_SCALABLE_OPTS_WIDE, // Variants with wide elements (eg asr) + INS_SCALABLE_OPTS_WITH_SIMD_SCALAR, // Variants with a NEON SIMD register (eg clasta) + INS_SCALABLE_OPTS_PREDICATE_MERGE, // Variants with a Pg/M predicate (eg brka) + INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR, // Variants with {., .} predicate pair (eg whilege) + INS_SCALABLE_OPTS_VL_2X, // Variants with a vector length specifier of 2x (eg whilege) + INS_SCALABLE_OPTS_VL_4X, // Variants with a vector length specifier of 4x (eg whilege) }; enum insCond : unsigned diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index 961862e5184d7..3646ecb4407bf 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -53,6 +53,10 @@ #define REG_PREDICATE_FIRST REG_P0 #define REG_PREDICATE_LAST REG_P15 #define REG_PREDICATE_LOW_LAST REG_P7 // Some instructions can only use the first half of the predicate registers. + #define REG_PREDICATE_HIGH_FIRST REG_P8 // Similarly, some instructions can only use the second half of the predicate registers. + #define REG_PREDICATE_HIGH_LAST REG_P15 + + static_assert_no_msg(REG_PREDICATE_HIGH_LAST == REG_PREDICATE_LAST); #define REGNUM_BITS 6 // number of bits in a REG_* #define REGSIZE_BYTES 8 // number of bytes in one general purpose register