From 12f9f91031224a45c146812a7f4a41e8cdb87e1c Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Thu, 20 Oct 2022 11:04:25 +0300 Subject: [PATCH] [mono][interp] Don't use mov.vt when storing into primitive type fields (#77221) * [mono][interp] Rename opcodes to better suggest that they are doing a conversion * [mono][interp] Don't use mov.vt when storing into primitive type fields Use the faster mov.x opcodes instead. --- src/mono/mono/mini/interp/interp.c | 13 ++++++--- src/mono/mono/mini/interp/mintops.def | 10 ++++--- src/mono/mono/mini/interp/mintops.h | 2 +- src/mono/mono/mini/interp/transform.c | 40 ++++++++++++++++++++------- 4 files changed, 46 insertions(+), 19 deletions(-) diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index 6dc93448078ca..0efeb187b99af 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -7111,10 +7111,15 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; // which is our minimum "register" size in interp. They are only needed when // the address of the local is taken and we should try to optimize them out // because the local can't be propagated. - MINT_IN_CASE(MINT_MOV_I1) MOV(guint32, gint8); MINT_IN_BREAK; - MINT_IN_CASE(MINT_MOV_U1) MOV(guint32, guint8); MINT_IN_BREAK; - MINT_IN_CASE(MINT_MOV_I2) MOV(guint32, gint16); MINT_IN_BREAK; - MINT_IN_CASE(MINT_MOV_U2) MOV(guint32, guint16); MINT_IN_BREAK; + MINT_IN_CASE(MINT_MOV_I4_I1) MOV(gint32, gint8); MINT_IN_BREAK; + MINT_IN_CASE(MINT_MOV_I4_U1) MOV(gint32, guint8); MINT_IN_BREAK; + MINT_IN_CASE(MINT_MOV_I4_I2) MOV(gint32, gint16); MINT_IN_BREAK; + MINT_IN_CASE(MINT_MOV_I4_U2) MOV(gint32, guint16); MINT_IN_BREAK; + // These moves are used to store into the field of a local valuetype + // No sign extension is needed, we just move bytes from the execution + // stack, no additional conversion is needed. + MINT_IN_CASE(MINT_MOV_1) MOV(gint8, gint8); MINT_IN_BREAK; + MINT_IN_CASE(MINT_MOV_2) MOV(gint16, gint16); MINT_IN_BREAK; // Normal moves between locals MINT_IN_CASE(MINT_MOV_4) MOV(guint32, guint32); MINT_IN_BREAK; MINT_IN_CASE(MINT_MOV_8) MOV(guint64, guint64); MINT_IN_BREAK; diff --git a/src/mono/mono/mini/interp/mintops.def b/src/mono/mono/mini/interp/mintops.def index 7c44343aa5e87..d6188995716a2 100644 --- a/src/mono/mono/mini/interp/mintops.def +++ b/src/mono/mono/mini/interp/mintops.def @@ -110,10 +110,12 @@ OPDEF(MINT_LDTSFLDA, "ldtsflda", 4, 1, 0, MintOpInt) OPDEF(MINT_MOV_SRC_OFF, "mov.src.off", 6, 1, 1, MintOpTwoShorts) OPDEF(MINT_MOV_DST_OFF, "mov.dst.off", 6, 1, 1, MintOpTwoShorts) -OPDEF(MINT_MOV_I1, "mov.i1", 3, 1, 1, MintOpNoArgs) -OPDEF(MINT_MOV_U1, "mov.u1", 3, 1, 1, MintOpNoArgs) -OPDEF(MINT_MOV_I2, "mov.i2", 3, 1, 1, MintOpNoArgs) -OPDEF(MINT_MOV_U2, "mov.u2", 3, 1, 1, MintOpNoArgs) +OPDEF(MINT_MOV_I4_I1, "mov.i4.i1", 3, 1, 1, MintOpNoArgs) +OPDEF(MINT_MOV_I4_U1, "mov.i4.u1", 3, 1, 1, MintOpNoArgs) +OPDEF(MINT_MOV_I4_I2, "mov.i4.i2", 3, 1, 1, MintOpNoArgs) +OPDEF(MINT_MOV_I4_U2, "mov.i4.u2", 3, 1, 1, MintOpNoArgs) +OPDEF(MINT_MOV_1, "mov.1", 3, 1, 1, MintOpNoArgs) +OPDEF(MINT_MOV_2, "mov.2", 3, 1, 1, MintOpNoArgs) OPDEF(MINT_MOV_4, "mov.4", 3, 1, 1, MintOpNoArgs) OPDEF(MINT_MOV_8, "mov.8", 3, 1, 1, MintOpNoArgs) OPDEF(MINT_MOV_VT, "mov.vt", 4, 1, 1, MintOpShortInt) diff --git a/src/mono/mono/mini/interp/mintops.h b/src/mono/mono/mini/interp/mintops.h index 1a7d5c84e798d..f4f2f83aa8057 100644 --- a/src/mono/mono/mini/interp/mintops.h +++ b/src/mono/mono/mini/interp/mintops.h @@ -60,7 +60,7 @@ typedef enum { #define MINT_SWITCH_LEN(n) (4 + (n) * 2) #define MINT_IS_NOP(op) ((op) == MINT_NOP || (op) == MINT_DEF || (op) == MINT_DUMMY_USE || (op) == MINT_IL_SEQ_POINT) -#define MINT_IS_MOV(op) ((op) >= MINT_MOV_I1 && (op) <= MINT_MOV_VT) +#define MINT_IS_MOV(op) ((op) >= MINT_MOV_I4_I1 && (op) <= MINT_MOV_VT) #define MINT_IS_UNCONDITIONAL_BRANCH(op) ((op) >= MINT_BR && (op) <= MINT_CALL_HANDLER_S) #define MINT_IS_CONDITIONAL_BRANCH(op) ((op) >= MINT_BRFALSE_I4 && (op) <= MINT_BLT_UN_R8_S) #define MINT_IS_UNOP_CONDITIONAL_BRANCH(op) ((op) >= MINT_BRFALSE_I4 && (op) <= MINT_BRTRUE_R8_S) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 2516f009b9b9b..725e993d392c9 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -723,7 +723,7 @@ get_mov_for_type (int mt, gboolean needs_sext) case MINT_TYPE_I2: case MINT_TYPE_U2: if (needs_sext) - return MINT_MOV_I1 + mt; + return MINT_MOV_I4_I1 + mt; else return MINT_MOV_4; case MINT_TYPE_I4: @@ -7982,10 +7982,25 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in src_off += foff; else dest_off += foff; - if (mt == MINT_TYPE_VT || fsize) + if (mt == MINT_TYPE_VT || fsize) { + // For valuetypes or unaligned access we just use memcpy opcode = MINT_MOV_VT; - else - opcode = GINT_TO_OPCODE (get_mov_for_type (mt, TRUE)); + } else { + if (opcode == MINT_MOV_SRC_OFF) { + // Loading from field, always load full i4 + opcode = GINT_TO_OPCODE (get_mov_for_type (mt, TRUE)); + } else { + // Storing into field, copy exact size + fsize = get_mint_type_size (mt); + switch (fsize) { + case 1: opcode = MINT_MOV_1; break; + case 2: opcode = MINT_MOV_2; break; + case 4: opcode = MINT_MOV_4; break; + case 8: opcode = MINT_MOV_8; break; + default: g_assert_not_reached (); + } + } + } // Replace MINT_MOV_OFF with the real instruction ip [-1] = opcode; *ip++ = GINT_TO_UINT16 (dest_off); @@ -8666,7 +8681,7 @@ interp_cprop (TransformData *td) // We always store to the full i4, except as part of STIND opcodes. These opcodes can be // applied to a local var only if that var has LDLOCA applied to it - if ((opcode >= MINT_MOV_I1 && opcode <= MINT_MOV_U2) && !td->locals [sregs [0]].indirects) { + if ((opcode >= MINT_MOV_I4_I1 && opcode <= MINT_MOV_I4_U2) && !td->locals [sregs [0]].indirects) { ins->opcode = MINT_MOV_4; opcode = MINT_MOV_4; } @@ -8954,18 +8969,23 @@ interp_cprop (TransformData *td) // Add mov.dst.off to store directly int the local var space without use of ldloca. int foffset = ins->data [0]; guint16 vtsize = 0; - if (mt == MINT_TYPE_VT) + if (mt == MINT_TYPE_VT) { vtsize = ins->data [1]; - else - vtsize = get_mint_type_size (mt); + } +#ifdef NO_UNALIGNED_ACCESS + else { + // As with normal loads/stores we use memcpy for unaligned 8 byte accesses + if ((mt == MINT_TYPE_I8 || mt == MINT_TYPE_R8) && foffset % SIZEOF_VOID_P != 0) + vtsize = 8; + } +#endif // This stores just to part of the dest valuetype ins = interp_insert_ins (td, ins, MINT_MOV_DST_OFF); interp_ins_set_dreg (ins, local); interp_ins_set_sreg (ins, sregs [1]); ins->data [0] = foffset; - // Always use MINT_TYPE_VT so we end up doing a memmove with the type size - ins->data [1] = MINT_TYPE_VT; + ins->data [1] = mt; ins->data [2] = vtsize; interp_clear_ins (ins->prev);