From 35562ee5ac02c68d42d5b77fb0af09123d79c3ba Mon Sep 17 00:00:00 2001 From: Aman Khalid Date: Sun, 11 Feb 2024 02:05:55 -0500 Subject: [PATCH] JIT ARM64-SVE: Add FK_3{A,B,C}, EJ_3A, EK_3A, EY_3B, EW_3{A,B} (#98187) --- src/coreclr/jit/codegenarm64test.cpp | 112 +++++-- src/coreclr/jit/emitarm64.cpp | 441 ++++++++++++++++++--------- src/coreclr/jit/emitarm64.h | 11 + 3 files changed, 406 insertions(+), 158 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 619047a20102d..6688ad3814171 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5916,6 +5916,42 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_udot, EA_SCALABLE, REG_V7, REG_V8, REG_V3, 3, INS_OPTS_SCALABLE_H); // UDOT .S, .H, .H[] + // IF_SVE_EJ_3A + theEmitter->emitIns_R_R_R_I(INS_sve_cdot, EA_SCALABLE, REG_V0, REG_V1, REG_V2, 0, + INS_OPTS_SCALABLE_S); // CDOT ., ., ., + theEmitter->emitIns_R_R_R_I(INS_sve_cdot, EA_SCALABLE, REG_V3, REG_V4, REG_V5, 90, + INS_OPTS_SCALABLE_S); // CDOT ., ., ., + theEmitter->emitIns_R_R_R_I(INS_sve_cdot, EA_SCALABLE, REG_V6, REG_V7, REG_V8, 180, + INS_OPTS_SCALABLE_D); // CDOT ., ., ., + theEmitter->emitIns_R_R_R_I(INS_sve_cdot, EA_SCALABLE, REG_V9, REG_V10, REG_V11, 270, + INS_OPTS_SCALABLE_D); // CDOT ., ., ., + + // IF_SVE_EK_3A + theEmitter->emitIns_R_R_R_I(INS_sve_cmla, EA_SCALABLE, REG_V0, REG_V1, REG_V2, 0, + INS_OPTS_SCALABLE_B); // CMLA ., ., ., + theEmitter->emitIns_R_R_R_I(INS_sve_cmla, EA_SCALABLE, REG_V3, REG_V4, REG_V5, 90, + INS_OPTS_SCALABLE_H); // CMLA ., ., ., + theEmitter->emitIns_R_R_R_I(INS_sve_cmla, EA_SCALABLE, REG_V6, REG_V7, REG_V8, 180, + INS_OPTS_SCALABLE_S); // CMLA ., ., ., + theEmitter->emitIns_R_R_R_I(INS_sve_cmla, EA_SCALABLE, REG_V9, REG_V10, REG_V11, 270, + INS_OPTS_SCALABLE_D); // CMLA ., ., ., + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V12, REG_V13, REG_V14, 0, + INS_OPTS_SCALABLE_B); // SQRDCMLAH ., ., ., + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V15, REG_V16, REG_V17, 90, + INS_OPTS_SCALABLE_H); // SQRDCMLAH ., ., ., + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V18, REG_V19, REG_V20, 180, + INS_OPTS_SCALABLE_S); // SQRDCMLAH ., ., ., + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V21, REG_V22, REG_V23, 270, + INS_OPTS_SCALABLE_D); // SQRDCMLAH ., ., ., + +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED + // IF_SVE_EW_3A + theEmitter->emitIns_R_R_R(INS_sve_mlapt, EA_SCALABLE, REG_V0, REG_V1, REG_V2); // MLAPT .D, .D, .D + + // IF_SVE_EW_3B + theEmitter->emitIns_R_R_R(INS_sve_madpt, EA_SCALABLE, REG_V3, REG_V4, REG_V5); // MADPT .D, .D, .D +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED + // IF_SVE_EY_3A theEmitter->emitIns_R_R_R_I(INS_sve_sdot, EA_SCALABLE, REG_V9, REG_V10, REG_V4, 0, INS_OPTS_SCALABLE_B); // SDOT .S, .B, .B[] @@ -5926,6 +5962,16 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_udot, EA_SCALABLE, REG_V15, REG_V16, REG_V7, 3, INS_OPTS_SCALABLE_B); // UDOT .S, .B, .B[] + // IF_SVE_EY_3B + theEmitter->emitIns_R_R_R_I(INS_sve_sdot, EA_SCALABLE, REG_V0, REG_V1, REG_V0, + 0); // SDOT .D, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sdot, EA_SCALABLE, REG_V2, REG_V3, REG_V5, + 1); // SDOT .D, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_udot, EA_SCALABLE, REG_V4, REG_V5, REG_V10, + 0); // UDOT .D, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_udot, EA_SCALABLE, REG_V6, REG_V7, REG_V15, + 1); // UDOT .D, .H, .H[] + // IF_SVE_EZ_3A theEmitter->emitIns_R_R_R_I(INS_sve_sudot, EA_SCALABLE, REG_V17, REG_V18, REG_V0, 0, INS_OPTS_SCALABLE_B); // SUDOT .S, .B, .B[] @@ -6138,6 +6184,36 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_mls, EA_SCALABLE, REG_V22, REG_V23, REG_V15, 1, INS_OPTS_SCALABLE_D); // MLS .D, .D, .D[] + // IF_SVE_FK_3A + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlah, EA_SCALABLE, REG_V0, REG_V1, REG_V1, 1, + INS_OPTS_SCALABLE_H); // SQRDMLAH .H, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlah, EA_SCALABLE, REG_V2, REG_V3, REG_V3, 3, + INS_OPTS_SCALABLE_H); // SQRDMLAH .H, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlsh, EA_SCALABLE, REG_V4, REG_V5, REG_V5, 5, + INS_OPTS_SCALABLE_H); // SQRDMLSH .H, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlsh, EA_SCALABLE, REG_V6, REG_V7, REG_V7, 7, + INS_OPTS_SCALABLE_H); // SQRDMLSH .H, .H, .H[] + + // IF_SVE_FK_3B + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlah, EA_SCALABLE, REG_V8, REG_V9, REG_V0, 0, + INS_OPTS_SCALABLE_S); // SQRDMLAH .S, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlah, EA_SCALABLE, REG_V10, REG_V11, REG_V2, 1, + INS_OPTS_SCALABLE_S); // SQRDMLAH .S, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlsh, EA_SCALABLE, REG_V12, REG_V13, REG_V4, 2, + INS_OPTS_SCALABLE_S); // SQRDMLSH .S, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlsh, EA_SCALABLE, REG_V14, REG_V15, REG_V6, 3, + INS_OPTS_SCALABLE_S); // SQRDMLSH .S, .S, .S[] + + // IF_SVE_FK_3C + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlah, EA_SCALABLE, REG_V16, REG_V17, REG_V0, 0, + INS_OPTS_SCALABLE_D); // SQRDMLAH .D, .D, .D[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlah, EA_SCALABLE, REG_V18, REG_V19, REG_V5, 1, + INS_OPTS_SCALABLE_D); // SQRDMLAH .D, .D, .D[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlsh, EA_SCALABLE, REG_V20, REG_V21, REG_V10, 0, + INS_OPTS_SCALABLE_D); // SQRDMLSH .D, .D, .D[] + theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlsh, EA_SCALABLE, REG_V22, REG_V23, REG_V15, 1, + INS_OPTS_SCALABLE_D); // SQRDMLSH .D, .D, .D[] + // IF_SVE_ED_1A theEmitter->emitIns_R_I(INS_sve_smax, EA_SCALABLE, REG_V0, -128, INS_OPTS_SCALABLE_B); // SMAX ., ., # @@ -6169,61 +6245,61 @@ void CodeGen::genArm64EmitterUnitTestsSve() // IF_SVE_FA_3A theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V0, REG_V7, REG_V1, 3, 0, INS_OPTS_SCALABLE_B); // CDOT .S, .B, .B[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V2, REG_V5, REG_V3, 2, 1, + theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V2, REG_V5, REG_V3, 2, 90, INS_OPTS_SCALABLE_B); // CDOT .S, .B, .B[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V4, REG_V3, REG_V5, 1, 2, + theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V4, REG_V3, REG_V5, 1, 180, INS_OPTS_SCALABLE_B); // CDOT .S, .B, .B[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V6, REG_V1, REG_V7, 0, 3, + theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V6, REG_V1, REG_V7, 0, 270, INS_OPTS_SCALABLE_B); // CDOT .S, .B, .B[], // IF_SVE_FA_3B theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, 0, INS_OPTS_SCALABLE_H); // CDOT .D, .H, .H[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, 1, + theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, 90, INS_OPTS_SCALABLE_H); // CDOT .D, .H, .H[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, 2, + theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, 180, INS_OPTS_SCALABLE_H); // CDOT .D, .H, .H[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, 3, + theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, 270, INS_OPTS_SCALABLE_H); // CDOT .D, .H, .H[], // IF_SVE_FB_3A theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V0, REG_V7, REG_V1, 3, 0, INS_OPTS_SCALABLE_H); // CMLA .H, .H, .H[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V2, REG_V5, REG_V3, 2, 1, + theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V2, REG_V5, REG_V3, 2, 90, INS_OPTS_SCALABLE_H); // CMLA .H, .H, .H[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V4, REG_V3, REG_V5, 1, 2, + theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V4, REG_V3, REG_V5, 1, 180, INS_OPTS_SCALABLE_H); // CMLA .H, .H, .H[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V6, REG_V1, REG_V7, 0, 3, + theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V6, REG_V1, REG_V7, 0, 270, INS_OPTS_SCALABLE_H); // CMLA .H, .H, .H[], // IF_SVE_FB_3B theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, 0, INS_OPTS_SCALABLE_S); // CMLA .S, .S, .S[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, 1, + theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, 90, INS_OPTS_SCALABLE_S); // CMLA .S, .S, .S[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, 2, + theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, 180, INS_OPTS_SCALABLE_S); // CMLA .S, .S, .S[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, 3, + theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, 270, INS_OPTS_SCALABLE_S); // CMLA .S, .S, .S[], // IF_SVE_FC_3A theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V0, REG_V7, REG_V1, 3, 0, INS_OPTS_SCALABLE_H); // SQRDCMLAH .H, .H, .H[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V2, REG_V5, REG_V3, 2, 1, + theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V2, REG_V5, REG_V3, 2, 90, INS_OPTS_SCALABLE_H); // SQRDCMLAH .H, .H, .H[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V4, REG_V3, REG_V5, 1, 2, + theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V4, REG_V3, REG_V5, 1, 180, INS_OPTS_SCALABLE_H); // SQRDCMLAH .H, .H, .H[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V6, REG_V1, REG_V7, 0, 3, + theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V6, REG_V1, REG_V7, 0, 270, INS_OPTS_SCALABLE_H); // SQRDCMLAH .H, .H, .H[], // IF_SVE_FC_3B theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, 0, INS_OPTS_SCALABLE_S); // SQRDCMLAH .S, .S, .S[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, 1, + theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, 90, INS_OPTS_SCALABLE_S); // SQRDCMLAH .S, .S, .S[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, 2, + theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, 180, INS_OPTS_SCALABLE_S); // SQRDCMLAH .S, .S, .S[], - theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, 3, + theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, 270, INS_OPTS_SCALABLE_S); // SQRDCMLAH .S, .S, .S[], // IF_SVE_IH_3A diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 296623036d8df..f460833a28e31 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1157,6 +1157,14 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidUimm7(emitGetInsSC(id))); // iiiii break; + case IF_SVE_EW_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer) + case IF_SVE_EW_3B: // ...........mmmmm ......aaaaaddddd -- SVE2 multiply-add (checked pointer) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn/mmmmm + assert(isVectorRegister(id->idReg3())); // mmmmm/aaaaa + break; + case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) @@ -1166,6 +1174,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) + case IF_SVE_FK_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -1183,6 +1192,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) + case IF_SVE_FK_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -1196,24 +1206,24 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) assert(id->idInsOpt() == INS_OPTS_SCALABLE_S); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isVectorRegister(id->idReg3())); // mmmm - assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V15)); - assert(isValidUimm2(emitGetInsSC(id))); // ii + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isLowVectorRegister(id->idReg3())); // mmmm + assert(isValidUimm2(emitGetInsSC(id))); // ii break; + case IF_SVE_EY_3B: // ...........immmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed) + case IF_SVE_FK_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isVectorRegister(id->idReg3())); // mmmm - assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V15)); - assert(isValidImm1(emitGetInsSC(id))); // i + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isLowVectorRegister(id->idReg3())); // mmmm + assert(isValidImm1(emitGetInsSC(id))); // i break; case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations @@ -1661,6 +1671,19 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx break; + case IF_SVE_EJ_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer dot product + assert(insOptsScalableWords(id->idInsOpt())); + + FALLTHROUGH; + case IF_SVE_EK_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isValidUimm2(emitGetInsSC(id))); // rr + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + case IF_SVE_FA_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed) case IF_SVE_FB_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed) case IF_SVE_FC_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed) @@ -1676,11 +1699,10 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_FB_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed) case IF_SVE_FC_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed) assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isVectorRegister(id->idReg3())); // mmm - assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V15)); - assert(isValidUimm3(emitGetInsSC(id))); // i rr + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isLowVectorRegister(id->idReg3())); // mmm + assert(isValidUimm3(emitGetInsSC(id))); // i rr break; case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus @@ -10871,6 +10893,30 @@ void emitter::emitIns_R_R_R(instruction ins, fmt = IF_SVE_CZ_4A; break; + case INS_sve_mlapt: + unreached(); // TODO-SVE: Not yet supported. + assert(insOptsNone(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmmmm + fmt = IF_SVE_EW_3A; + + // opt is set only for convenience in emitDispInsHelp + opt = INS_OPTS_SCALABLE_D; + break; + + case INS_sve_madpt: + unreached(); // TODO-SVE: Not yet supported. + assert(insOptsNone(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // mmmmm + assert(isVectorRegister(reg3)); // aaaaa + fmt = IF_SVE_EW_3B; + + // opt is set only for convenience in emitDispInsHelp + opt = INS_OPTS_SCALABLE_D; + break; + case INS_sve_fcmeq: case INS_sve_fcmge: case INS_sve_fcmgt: @@ -11146,22 +11192,29 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_sdot: case INS_sve_udot: - assert(insOptsScalableStandard(opt)); - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); // mmm - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); - assert(isValidUimm2(imm)); // ii + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm if (opt == INS_OPTS_SCALABLE_B) { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm2(imm)); // ii fmt = IF_SVE_EY_3A; } - else + else if (opt == INS_OPTS_SCALABLE_H) { - assert(opt == INS_OPTS_SCALABLE_H); + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm2(imm)); // ii fmt = IF_SVE_EG_3A; } + else + { + assert(insOptsNone(opt)); + assert(isValidImm1(imm)); // i + opt = INS_OPTS_SCALABLE_H; + fmt = IF_SVE_EY_3B; + } break; case INS_sve_usdot: @@ -11177,9 +11230,9 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_mul: assert(insOptsScalableAtLeastHalf(opt)); - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm switch (opt) { @@ -11196,8 +11249,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, break; case INS_OPTS_SCALABLE_D: - assert(isValidImm1(imm)); // i - assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + assert(isValidImm1(imm)); // i fmt = IF_SVE_FD_3C; break; @@ -11447,63 +11499,30 @@ void emitter::emitIns_R_R_R_I(instruction ins, break; case INS_sve_cdot: - assert(insOptsScalableStandard(opt)); - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); // mmm - if (opt == INS_OPTS_SCALABLE_B) - { - assert(isValidUimm4(imm)); // ii rr - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); - fmt = IF_SVE_FA_3A; - } - else - { - assert(opt == INS_OPTS_SCALABLE_H); - assert(isValidUimm3(imm)); // i rr - assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); - fmt = IF_SVE_FA_3B; - } - break; + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmmmm + assert(isValidRot(imm)); // rr + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx - case INS_sve_cmla: - assert(insOptsScalableStandard(opt)); - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); // mmm - if (opt == INS_OPTS_SCALABLE_H) - { - assert(isValidUimm4(imm)); // ii rr - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); - fmt = IF_SVE_FB_3A; - } - else - { - assert(opt == INS_OPTS_SCALABLE_S); - assert(isValidUimm3(imm)); // i rr - assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); - fmt = IF_SVE_FB_3B; - } + // Convert rot to bitwise representation + imm /= 90; + fmt = IF_SVE_EJ_3A; break; + case INS_sve_cmla: case INS_sve_sqrdcmlah: assert(insOptsScalableStandard(opt)); - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); // mmm - if (opt == INS_OPTS_SCALABLE_H) - { - assert(isValidUimm4(imm)); // ii rr - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); - fmt = IF_SVE_FC_3A; - } - else - { - assert(opt == INS_OPTS_SCALABLE_S); - assert(isValidUimm3(imm)); // i rr - assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); - fmt = IF_SVE_FC_3B; - } + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmmmm + assert(isValidRot(imm)); // rr + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + + // Convert rot to bitwise representation + imm /= 90; + fmt = IF_SVE_EK_3A; break; case INS_sve_ld1d: @@ -12031,9 +12050,9 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_fmla: case INS_sve_fmls: - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm if (opt == INS_OPTS_SCALABLE_S) { @@ -12044,8 +12063,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, else { assert(opt == INS_OPTS_SCALABLE_D); - assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm - assert(isValidImm1(imm)); // i + assert(isValidImm1(imm)); // i fmt = IF_SVE_GU_3B; } break; @@ -12062,9 +12080,9 @@ void emitter::emitIns_R_R_R_I(instruction ins, break; case INS_sve_fmul: - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm if (opt == INS_OPTS_SCALABLE_S) { @@ -12075,8 +12093,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, else { assert(opt == INS_OPTS_SCALABLE_D); - assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm - assert(isValidImm1(imm)); // i + assert(isValidImm1(imm)); // i fmt = IF_SVE_GX_3B; } break; @@ -12104,9 +12121,9 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_mla: case INS_sve_mls: - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm if (opt == INS_OPTS_SCALABLE_H) { @@ -12123,8 +12140,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, else { assert(opt == INS_OPTS_SCALABLE_D); - assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm - assert(isValidImm1(imm)); // i + assert(isValidImm1(imm)); // i fmt = IF_SVE_FF_3C; } break; @@ -12133,9 +12149,9 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_smullt: case INS_sve_umullb: case INS_sve_umullt: - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm if (opt == INS_OPTS_SCALABLE_H) { @@ -12146,8 +12162,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, else { assert(opt == INS_OPTS_SCALABLE_S); - assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm - assert(isValidUimm2(imm)); // i i + assert(isValidUimm2(imm)); // i i fmt = IF_SVE_FE_3B; } break; @@ -12160,9 +12175,9 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_smlslt: case INS_sve_umlslb: case INS_sve_umlslt: - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm if (opt == INS_OPTS_SCALABLE_H) { @@ -12173,17 +12188,16 @@ void emitter::emitIns_R_R_R_I(instruction ins, else { assert(opt == INS_OPTS_SCALABLE_S); - assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm - assert(isValidUimm2(imm)); // i i + assert(isValidUimm2(imm)); // i i fmt = IF_SVE_FG_3B; } break; case INS_sve_sqdmullb: case INS_sve_sqdmullt: - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm if (opt == INS_OPTS_SCALABLE_H) { @@ -12194,17 +12208,16 @@ void emitter::emitIns_R_R_R_I(instruction ins, else { assert(opt == INS_OPTS_SCALABLE_S); - assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm - assert(isValidUimm2(imm)); // i i + assert(isValidUimm2(imm)); // i i fmt = IF_SVE_FH_3B; } break; case INS_sve_sqdmulh: case INS_sve_sqrdmulh: - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm if (opt == INS_OPTS_SCALABLE_H) { @@ -12221,8 +12234,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, else { assert(opt == INS_OPTS_SCALABLE_D); - assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm - assert(isValidImm1(imm)); // i + assert(isValidImm1(imm)); // i fmt = IF_SVE_FI_3C; } break; @@ -12231,9 +12243,9 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_sqdmlalt: case INS_sve_sqdmlslb: case INS_sve_sqdmlslt: - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm if (opt == INS_OPTS_SCALABLE_H) { @@ -12244,12 +12256,37 @@ void emitter::emitIns_R_R_R_I(instruction ins, else { assert(opt == INS_OPTS_SCALABLE_S); - assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm - assert(isValidUimm2(imm)); // ii + assert(isValidUimm2(imm)); // ii fmt = IF_SVE_FJ_3B; } break; + case INS_sve_sqrdmlah: + case INS_sve_sqrdmlsh: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm + + if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm3(imm)); // i ii + fmt = IF_SVE_FK_3A; + } + else if (opt == INS_OPTS_SCALABLE_S) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm2(imm)); // ii + fmt = IF_SVE_FK_3B; + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + assert(isValidImm1(imm)); // i + fmt = IF_SVE_FK_3C; + } + break; + case INS_sve_fcadd: assert(insOptsScalableAtLeastHalf(opt)); assert(isVectorRegister(reg1)); @@ -12433,49 +12470,95 @@ void emitter::emitIns_R_R_R_I_I(instruction ins, ssize_t imm2, insOpts opt) { + insFormat fmt = IF_NONE; + ssize_t imm; + switch (ins) { case INS_sve_cdot: - { + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm + assert(isValidRot(imm2)); // rr + // Convert imm2 from rotation value (0-270) to bitwise representation (0-3) + imm = (imm1 << 2) | emitEncodeRotationImm0_to_270(imm2); + if (opt == INS_OPTS_SCALABLE_B) { - assert(isValidUimm2(imm1)); // ii + assert(isValidUimm2(imm1)); // ii + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + fmt = IF_SVE_FA_3A; } else { assert(opt == INS_OPTS_SCALABLE_H); assert(isValidImm1(imm1)); // i + fmt = IF_SVE_FA_3B; } - - assert(isValidUimm2(imm2)); // rr - const ssize_t imm = (imm1 << 2) | imm2; - emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, imm, opt); break; - } case INS_sve_cmla: - case INS_sve_sqrdcmlah: - { + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm + assert(isValidRot(imm2)); // rr + // Convert imm2 from rotation value (0-270) to bitwise representation (0-3) + imm = (imm1 << 2) | emitEncodeRotationImm0_to_270(imm2); + if (opt == INS_OPTS_SCALABLE_H) { - assert(isValidUimm2(imm1)); // ii + assert(isValidUimm2(imm1)); // ii + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + fmt = IF_SVE_FB_3A; } else { assert(opt == INS_OPTS_SCALABLE_S); assert(isValidImm1(imm1)); // i + fmt = IF_SVE_FB_3B; } + break; + + case INS_sve_sqrdcmlah: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm + assert(isValidRot(imm2)); // rr + // Convert imm2 from rotation value (0-270) to bitwise representation (0-3) + imm = (imm1 << 2) | emitEncodeRotationImm0_to_270(imm2); - assert(isValidUimm2(imm2)); // rr - const ssize_t imm = (imm1 << 2) | imm2; - emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, imm, opt); + if (opt == INS_OPTS_SCALABLE_H) + { + assert(isValidUimm2(imm1)); // ii + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + fmt = IF_SVE_FC_3A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_S); + assert(isValidImm1(imm1)); // i + fmt = IF_SVE_FC_3B; + } break; - } default: unreached(); break; } + + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstrCns(attr, imm); + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(opt); + + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + + dispIns(id); + appendToCurIG(id); } /***************************************************************************** @@ -21052,6 +21135,22 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_EW_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeReg_V_20_to_16(id->idReg3()); // mmmmm + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_EW_3B: // ...........mmmmm ......aaaaaddddd -- SVE2 multiply-add (checked pointer) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg3()); // aaaaa + code |= insEncodeReg_V_20_to_16(id->idReg2()); // mmmmm + dst += emitOutput_Instr(dst, code); + break; + case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) @@ -21061,6 +21160,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) + case IF_SVE_FK_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn @@ -21074,6 +21174,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) + case IF_SVE_FK_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd @@ -21112,11 +21213,13 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_EY_3B: // ...........immmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed) + case IF_SVE_FK_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn @@ -21401,13 +21504,24 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn - code |= insEncodeReg_V_18_to_16(id->idReg3()); // mmm code |= insEncodeUimm2_11_to_10(rot); // rr + code |= insEncodeReg_V_18_to_16(id->idReg3()); // mmm code |= insEncodeUimm2_20_to_19(index); // ii dst += emitOutput_Instr(dst, code); break; } + case IF_SVE_EJ_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer dot product + case IF_SVE_EK_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeUimm2_11_to_10(emitGetInsSC(id)); // rr + code |= insEncodeReg_V_20_to_16(id->idReg3()); // mmmmm + code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + case IF_SVE_FA_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed) case IF_SVE_FB_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed) case IF_SVE_FC_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed) @@ -24250,9 +24364,13 @@ void emitter::emitDispInsHelp( case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high // (unpredicated) case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient + // .D, .D, .D + case IF_SVE_EW_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer) + // .D, .D, .D + case IF_SVE_EW_3B: // ...........mmmmm ......aaaaaddddd -- SVE2 multiply-add (checked pointer) emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn/mmmmm + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm/aaaaa break; // ., , @@ -24440,6 +24558,7 @@ void emitter::emitDispInsHelp( case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FK_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true); // ddddd emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm @@ -24458,6 +24577,14 @@ void emitter::emitDispInsHelp( emitDispElementIndex(emitGetInsSC(id), false); // ii break; + // .D, .H, .H[] + case IF_SVE_EY_3B: // ...........immmm ......nnnnnddddd -- SVE integer dot product (indexed) + emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm + emitDispElementIndex(emitGetInsSC(id), false); // ii + break; + // .H, .H, .H[] case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) @@ -24471,10 +24598,12 @@ void emitter::emitDispInsHelp( case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed) case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FK_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) // .H, .H, .H[] case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FK_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm @@ -24776,6 +24905,30 @@ void emitter::emitDispInsHelp( emitDispImm(emitGetInsSC(id), false); // iiiiiiii break; + // CDOT ., ., ., + case IF_SVE_EJ_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer dot product + { + const insOpts smallSizeSpecifier = (insOpts)(id->idInsOpt() - 2); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), smallSizeSpecifier, true); // nnnnn + emitDispSveReg(id->idReg3(), smallSizeSpecifier, true); // mmmmm + + // rot specifies a multiple of 90-degree rotations + emitDispImm(emitDecodeRotationImm0_to_270(emitGetInsSC(id)), false); // rr + break; + } + + // CMLA ., ., ., + // SQRDCMLAH ., ., ., + case IF_SVE_EK_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // mmmmm + + // rot specifies a multiple of 90-degree rotations + emitDispImm(emitDecodeRotationImm0_to_270(emitGetInsSC(id)), false); // rr + break; + // CDOT .S, .B, .B[], case IF_SVE_FA_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed) { @@ -24788,7 +24941,7 @@ void emitter::emitDispInsHelp( emitDispElementIndex(index, true); // ii // rot specifies a multiple of 90-degree rotations - emitDispImm(rot * 90, false); // rr + emitDispImm(emitDecodeRotationImm0_to_270(rot), false); // rr break; } @@ -24804,7 +24957,7 @@ void emitter::emitDispInsHelp( emitDispElementIndex(index, true); // i // rot specifies a multiple of 90-degree rotations - emitDispImm(rot * 90, false); // rr + emitDispImm(emitDecodeRotationImm0_to_270(rot), false); // rr break; } @@ -24826,7 +24979,7 @@ void emitter::emitDispInsHelp( emitDispElementIndex(index, true); // i // rot specifies a multiple of 90-degree rotations - emitDispImm(rot * 90, false); // rr + emitDispImm(emitDecodeRotationImm0_to_270(rot), false); // rr break; } @@ -27570,6 +27723,9 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) + case IF_SVE_FK_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) + case IF_SVE_FK_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) + case IF_SVE_FK_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_5C; break; @@ -27606,6 +27762,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) + case IF_SVE_EW_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer) + case IF_SVE_EW_3B: // ...........mmmmm ......aaaaaddddd -- SVE2 multiply-add (checked pointer) result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix result.insLatency = PERFSCORE_LATENCY_1C; // need to fix break; @@ -27687,6 +27845,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_CY_3B: // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) + case IF_SVE_EY_3B: // ...........immmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) @@ -28070,6 +28229,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_GD_2A: // .........x.xx... ......nnnnnddddd -- SVE2 saturating extract narrow case IF_SVE_FA_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed) case IF_SVE_FA_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed) + case IF_SVE_EJ_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer dot product result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency = PERFSCORE_LATENCY_4C; break; @@ -28118,6 +28278,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_FB_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed) case IF_SVE_FC_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed) case IF_SVE_FC_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed) + case IF_SVE_EK_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_5C; break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 6868f27f5bab9..abc745e9421c5 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -782,6 +782,12 @@ static bool isValidSimm5(ssize_t value) return (-0x10LL <= value) && (value <= 0xFLL); }; +// Returns true if 'value' is a legal rotation value (such as for CDOT, CMLA). +static bool isValidRot(ssize_t value) +{ + return (value == 0) || (value == 90) || (value == 180) || (value == 270); +} + // Returns true if 'value' represents a valid 'bitmask immediate' encoding. static bool isValidImmNRS(size_t value, emitAttr size) { @@ -1023,6 +1029,11 @@ inline static bool isVectorRegister(regNumber reg) return (reg >= REG_FP_FIRST && reg <= REG_FP_LAST); } +inline static bool isLowVectorRegister(regNumber reg) +{ + return (reg >= FIRST_FP_ARGREG) && (reg <= LAST_FP_ARGREG); +} + inline static bool isFloatReg(regNumber reg) { return isVectorRegister(reg);