diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 45afd2629a368..03edaae736690 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5092,6 +5092,52 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_R(INS_sve_msb, EA_SCALABLE, REG_V8, REG_P7, REG_V12, REG_V22, INS_OPTS_SCALABLE_D); // MSB ., /M, ., . + // IF_SVE_AT_3A + theEmitter->emitIns_R_R_R(INS_sve_add, EA_SCALABLE, REG_V0, REG_V0, REG_V0, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_UNPREDICATED); // ADD ., ., . + theEmitter->emitIns_R_R_R(INS_sve_sqadd, EA_SCALABLE, REG_V3, REG_V31, REG_V12, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_UNPREDICATED); // SQADD ., ., . + theEmitter->emitIns_R_R_R(INS_sve_sqsub, EA_SCALABLE, REG_V7, REG_V0, REG_V31, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_UNPREDICATED); // SQSUB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_sub, EA_SCALABLE, REG_V19, REG_V7, REG_V13, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_UNPREDICATED); // SUB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uqadd, EA_SCALABLE, REG_V23, REG_V28, REG_V29, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_UNPREDICATED); // UQADD ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uqsub, EA_SCALABLE, REG_V31, REG_V31, REG_V31, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_UNPREDICATED); // UQSUB ., ., . + + // IF_SVE_BA_3A + theEmitter->emitIns_R_R_R(INS_sve_index, EA_4BYTE, REG_V24, REG_ZR, REG_R9, + INS_OPTS_SCALABLE_B); // INDEX ., , + theEmitter->emitIns_R_R_R(INS_sve_index, EA_8BYTE, REG_V12, REG_R15, REG_R0, + INS_OPTS_SCALABLE_D); // INDEX ., , + + // IF_SVE_BD_3A + theEmitter->emitIns_R_R_R(INS_sve_mul, EA_SCALABLE, REG_V5, REG_V0, REG_V31, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_UNPREDICATED); // MUL ., ., . + theEmitter->emitIns_R_R_R(INS_sve_smulh, EA_SCALABLE, REG_V0, REG_V31, REG_V5, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_UNPREDICATED); // SMULH ., ., . + theEmitter->emitIns_R_R_R(INS_sve_umulh, EA_SCALABLE, REG_V31, REG_V5, REG_V0, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_UNPREDICATED); // UMULH ., ., . + + // IF_SVE_BE_3A + theEmitter->emitIns_R_R_R(INS_sve_sqdmulh, EA_SCALABLE, REG_V7, REG_V28, REG_V0, + INS_OPTS_SCALABLE_B); // SQDMULH ., ., . + theEmitter->emitIns_R_R_R(INS_sve_sqrdmulh, EA_SCALABLE, REG_V23, REG_V3, REG_V31, + INS_OPTS_SCALABLE_H); // SQRDMULH ., ., . + + // IF_SVE_BG_3A + theEmitter->emitIns_R_R_R(INS_sve_asr, EA_SCALABLE, REG_V9, REG_V31, REG_V2, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_UNPREDICATED_WIDE); // ASR ., ., .D + theEmitter->emitIns_R_R_R(INS_sve_lsl, EA_SCALABLE, REG_V19, REG_V0, REG_V12, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_UNPREDICATED_WIDE); // LSL ., ., .D + theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V29, REG_V10, REG_V22, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_UNPREDICATED_WIDE); // LSR ., ., .D + + // IF_SVE_BK_3A + theEmitter->emitIns_R_R_R(INS_sve_ftssel, EA_SCALABLE, REG_V17, REG_V16, REG_V15, + INS_OPTS_SCALABLE_D); // FTSSEL ., ., . + // IF_SVE_CL_3A theEmitter->emitIns_R_R_R(INS_sve_compact, EA_SCALABLE, REG_V16, REG_P7, REG_V13, INS_OPTS_SCALABLE_S); // COMPACT ., , . diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index ce5a2820e0e43..1877f0a9036e9 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1055,6 +1055,32 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isScalableVectorSize(elemsize)); break; + // Scalable, unpredicated + case IF_SVE_AT_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) + case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) + case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high + // (unpredicated) + case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated) + case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient + elemsize = id->idOpSize(); + assert(insOptsScalableStandard(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + // Scalable, no predicates. General purpose source registers + case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register + // increment) + elemsize = id->idOpSize(); + assert(insOptsScalableStandard(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isGeneralRegisterOrZR(id->idReg2())); // nnnnn + assert(isGeneralRegisterOrZR(id->idReg3())); // mmmmm + assert(isValidScalarDatasize(elemsize)); + break; + // Scalable, 4 regs, to predicate register. case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors elemsize = id->idOpSize(); @@ -8860,11 +8886,20 @@ void emitter::emitIns_R_R_R(instruction ins, case INS_sve_sub: case INS_sve_subr: assert(isVectorRegister(reg1)); - assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); assert(insOptsScalableStandard(opt)); - assert(insScalableOptsNone(sopt)); - fmt = IF_SVE_AB_3A; + if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + { + assert(isVectorRegister(reg2)); + assert(ins != INS_sve_subr); + fmt = IF_SVE_AT_3A; + } + else + { + assert(isLowPredicateRegister(reg2)); + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_AB_3A; + } break; case INS_sve_sdiv: @@ -8897,11 +8932,19 @@ void emitter::emitIns_R_R_R(instruction ins, case INS_sve_smulh: case INS_sve_umulh: assert(isVectorRegister(reg1)); - assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); assert(insOptsScalableStandard(opt)); - assert(insScalableOptsNone(sopt)); - fmt = IF_SVE_AE_3A; + if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + { + assert(isVectorRegister(reg2)); + fmt = IF_SVE_BD_3A; + } + else + { + assert(insScalableOptsNone(sopt)); + assert(isLowPredicateRegister(reg2)); + fmt = IF_SVE_AE_3A; + } break; case INS_sve_andv: @@ -8999,15 +9042,22 @@ void emitter::emitIns_R_R_R(instruction ins, case INS_sve_lsl: case INS_sve_lsr: assert(isVectorRegister(reg1)); - assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); if (sopt == INS_SCALABLE_OPTS_WIDE) { + assert(isLowPredicateRegister(reg2)); assert(insOptsScalableWide(opt)); fmt = IF_SVE_AO_3A; } + else if (sopt == INS_SCALABLE_OPTS_UNPREDICATED_WIDE) + { + assert(isVectorRegister(reg2)); + assert(insOptsScalableWide(opt)); + fmt = IF_SVE_BG_3A; + } else { + assert(isLowPredicateRegister(reg2)); assert(insScalableOptsNone(sopt)); assert(insOptsScalableStandard(opt)); fmt = IF_SVE_AN_3A; @@ -9077,6 +9127,37 @@ void emitter::emitIns_R_R_R(instruction ins, fmt = IF_SVE_AQ_3A; break; + case INS_sve_index: + assert(isValidScalarDatasize(size)); + assert(isVectorRegister(reg1)); + assert(isGeneralRegisterOrZR(reg2)); + assert(isGeneralRegisterOrZR(reg3)); + assert(insOptsScalableStandard(opt)); + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_BA_3A; + break; + + case INS_sve_sqdmulh: + case INS_sve_sqrdmulh: + assert(isScalableVectorSize(size)); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableStandard(opt)); + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_BE_3A; + break; + + case INS_sve_ftssel: + assert(isScalableVectorSize(size)); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableFloat(opt)); + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_BK_3A; + break; + case INS_sve_compact: assert(isVectorRegister(reg1)); assert(isLowPredicateRegister(reg2)); @@ -9248,10 +9329,27 @@ void emitter::emitIns_R_R_R(instruction ins, case INS_sve_sqadd: case INS_sve_sqsub: - case INS_sve_sqsubr: - case INS_sve_suqadd: case INS_sve_uqadd: case INS_sve_uqsub: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableStandard(opt)); + assert(isScalableVectorSize(size)); + if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + { + assert(isVectorRegister(reg2)); + fmt = IF_SVE_AT_3A; + } + else + { + assert(insScalableOptsNone(sopt)); + assert(isLowPredicateRegister(reg2)); + fmt = IF_SVE_ET_3A; + } + break; + + case INS_sve_sqsubr: + case INS_sve_suqadd: case INS_sve_uqsubr: case INS_sve_usqadd: assert(isVectorRegister(reg1)); @@ -9259,6 +9357,7 @@ void emitter::emitIns_R_R_R(instruction ins, assert(isVectorRegister(reg3)); assert(insOptsScalableStandard(opt)); assert(insScalableOptsNone(sopt)); + assert(isScalableVectorSize(size)); fmt = IF_SVE_ET_3A; break; @@ -16477,6 +16576,32 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + // Scalable, 3 regs, no predicates + case IF_SVE_AT_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) + case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) + case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high + // (unpredicated) + case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated) + case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeReg_V_20_to_16(id->idReg3()); // mmmmm + code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + + // Scalable, 3 regs, no predicates. General purpose source registers + case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register + // increment) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + code |= insEncodeReg_Rm(id->idReg3()); // mmmmm + code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + // Scalable to general register. case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register @@ -19107,6 +19232,32 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg4(), id->idInsOpt(), false); break; + // ., ., . + case IF_SVE_AT_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) + case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) + case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high + // (unpredicated) + case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // ., , + case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register + // increment) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispReg(id->idReg2(), size, true); // nnnnn + emitDispReg(id->idReg3(), size, false); // mmmmm + break; + + // ., ., .D + case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm + break; + // ., , ., . case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd @@ -21758,10 +21909,34 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins // (predicated) case IF_SVE_AS_4A: // ........xx.mmmmm ...gggaaaaaddddd -- SVE integer multiply-add writing multiplicand // (predicated) + case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) + case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high + // (unpredicated) result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_5C; break; + case IF_SVE_AT_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register + // increment) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_8C; + break; + + case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + // Conditional extract operations, SIMD&FP scalar and vector forms case IF_SVE_CL_3A: // ........xx...... ...gggnnnnnddddd -- SVE compress active elements case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index a667cf938b01c..5fd9dd456d65c 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -316,6 +316,10 @@ enum insScalableOpts : unsigned INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR, // Variants with {., .} predicate pair (eg whilege) INS_SCALABLE_OPTS_VL_2X, // Variants with a vector length specifier of 2x (eg whilege) INS_SCALABLE_OPTS_VL_4X, // Variants with a vector length specifier of 4x (eg whilege) + + // Removable once REG_V0 and REG_P0 are distinct + INS_SCALABLE_OPTS_UNPREDICATED, // Variants without a predicate (eg add) + INS_SCALABLE_OPTS_UNPREDICATED_WIDE, // Variants without a predicate and wide elements (eg asr) }; enum insCond : unsigned