Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[mono][interp] Add vectorization for Vector4 #87822

Merged
merged 8 commits into from
Jul 5, 2023
2 changes: 1 addition & 1 deletion src/mono/mono/mini/interp/mintops.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ typedef enum {
#define MINT_IS_SIMD_CREATE(op) ((op) >= MINT_SIMD_V128_I1_CREATE && (op) <= MINT_SIMD_V128_I8_CREATE)

// TODO Add more
#define MINT_NO_SIDE_EFFECTS(op) (MINT_IS_MOV (op) || MINT_IS_LDC_I4 (op) || MINT_IS_LDC_I8 (op) || op == MINT_LDPTR || op == MINT_BOX)
#define MINT_NO_SIDE_EFFECTS(op) (MINT_IS_MOV (op) || MINT_IS_LDC_I4 (op) || MINT_IS_LDC_I8 (op) || op == MINT_LDC_R4 || op == MINT_LDC_R8 || op == MINT_LDPTR || op == MINT_BOX)

#define MINT_CALL_ARGS 2
#define MINT_CALL_ARGS_SREG -2
Expand Down
1 change: 1 addition & 0 deletions src/mono/mono/mini/interp/simd-methods.def
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
SIMD_METHOD2(".ctor", ctor)
SIMD_METHOD(get_Count)
SIMD_METHOD(get_AllBitsSet)
SIMD_METHOD(get_IsHardwareAccelerated)
Expand Down
128 changes: 104 additions & 24 deletions src/mono/mono/mini/interp/transform-simd.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,21 @@
#define MSGSTRFIELD1(line) str##line
static const struct msgstr_t {
#define SIMD_METHOD(name) char MSGSTRFIELD(__LINE__) [sizeof (#name)];
#define SIMD_METHOD2(str,name) char MSGSTRFIELD(__LINE__) [sizeof (str)];
#include "simd-methods.def"
#undef SIMD_METHOD
#undef SIMD_METHOD2
} method_names = {
#define SIMD_METHOD(name) #name,
#define SIMD_METHOD2(str,name) str,
#include "simd-methods.def"
#undef SIMD_METHOD
#undef SIMD_METHOD2
};

enum {
#define SIMD_METHOD(name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
#define SIMD_METHOD2(str,name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
#include "simd-methods.def"
};

Expand Down Expand Up @@ -91,6 +96,7 @@ static guint16 sri_vector128_t_methods [] = {
};

static guint16 sn_vector_t_methods [] = {
SN_ctor,
SN_get_AllBitsSet,
SN_get_Count,
SN_get_One,
Expand Down Expand Up @@ -157,6 +163,12 @@ emit_common_simd_operations (TransformData *td, int id, int atype, int vector_si
for (int i = 0; i < vector_size / arg_size; i++)
data [i] = 1;
return TRUE;
} else if (atype == MONO_TYPE_R4) {
interp_add_ins (td, MINT_SIMD_V128_LDC);
float *data = (float*)&td->last_ins->data [0];
for (int i = 0; i < vector_size / arg_size; i++)
data [i] = 1.0f;
return TRUE;
}
break;
case SN_get_Zero:
Expand Down Expand Up @@ -310,6 +322,31 @@ emit_common_simd_epilogue (TransformData *td, MonoClass *vector_klass, MonoMetho
td->ip += 5;
}

static void
emit_vector_create (TransformData *td, MonoMethodSignature *csignature, MonoClass *vector_klass, int vector_size)
{
int num_args = csignature->param_count;
if (num_args == 16) interp_add_ins (td, MINT_SIMD_V128_I1_CREATE);
else if (num_args == 8) interp_add_ins (td, MINT_SIMD_V128_I2_CREATE);
else if (num_args == 4) interp_add_ins (td, MINT_SIMD_V128_I4_CREATE);
else if (num_args == 2) interp_add_ins (td, MINT_SIMD_V128_I8_CREATE);
else g_assert_not_reached ();

// We use call args machinery since we have too many args
interp_ins_set_sreg (td->last_ins, MINT_CALL_ARGS_SREG);
int *call_args = (int*)mono_mempool_alloc (td->mempool, (num_args + 1) * sizeof (int));
td->sp -= csignature->param_count;
for (int i = 0; i < num_args; i++)
call_args [i] = td->sp [i].local;
call_args [num_args] = -1;
init_last_ins_call (td);
td->last_ins->info.call_info->call_args = call_args;
if (!td->optimized)
td->last_ins->info.call_info->call_offset = get_tos_offset (td);
push_type_vt (td, vector_klass, vector_size);
interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
}

static gboolean
emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature)
{
Expand Down Expand Up @@ -352,26 +389,7 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature
else if (arg_size == 4) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I4_CREATE;
else if (arg_size == 8) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I8_CREATE;
} else if (csignature->param_count == vector_size / arg_size && atype == csignature->params [0]->type) {
int num_args = csignature->param_count;
if (num_args == 16) interp_add_ins (td, MINT_SIMD_V128_I1_CREATE);
else if (num_args == 8) interp_add_ins (td, MINT_SIMD_V128_I2_CREATE);
else if (num_args == 4) interp_add_ins (td, MINT_SIMD_V128_I4_CREATE);
else if (num_args == 2) interp_add_ins (td, MINT_SIMD_V128_I8_CREATE);
else g_assert_not_reached ();

// We use call args machinery since we have too many args
interp_ins_set_sreg (td->last_ins, MINT_CALL_ARGS_SREG);
int *call_args = (int*)mono_mempool_alloc (td->mempool, (num_args + 1) * sizeof (int));
td->sp -= csignature->param_count;
for (int i = 0; i < num_args; i++)
call_args [i] = td->sp [i].local;
call_args [num_args] = -1;
init_last_ins_call (td);
td->last_ins->info.call_info->call_args = call_args;
if (!td->optimized)
td->last_ins->info.call_info->call_offset = get_tos_offset (td);
push_type_vt (td, vector_klass, vector_size);
interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
emit_vector_create (td, csignature, vector_klass, vector_size);
td->ip += 5;
return TRUE;
}
Expand Down Expand Up @@ -507,7 +525,7 @@ emit_sri_vector128_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignatur
}

static gboolean
emit_sn_vector_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature)
emit_sn_vector_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj)
{
int id = lookup_intrins (sn_vector_t_methods, sizeof (sn_vector_t_methods), cmethod);
if (id == -1)
Expand All @@ -518,14 +536,74 @@ emit_sn_vector_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *c

// First argument is always vector
MonoClass *vector_klass = cmethod->klass;
if (!m_class_is_simd_type (vector_klass))
return FALSE;

MonoTypeEnum atype;
int vector_size, arg_size, scalar_arg;
if (!get_common_simd_info (vector_klass, csignature, &atype, &vector_size, &arg_size, &scalar_arg))
return FALSE;

if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins))
if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins)) {
goto opcode_added;
} else if (id == SN_ctor) {
if (csignature->param_count == vector_size / arg_size && atype == csignature->params [0]->type) {
emit_vector_create (td, csignature, vector_klass, vector_size);
if (!newobj) {
// If the ctor is called explicitly, then we need to store to the passed `this`
interp_emit_stobj (td, vector_klass, FALSE);
td->ip += 5;
}
return TRUE;
}
}

if (simd_opcode == -1 || simd_intrins == -1)
return FALSE;

interp_add_ins (td, simd_opcode);
td->last_ins->data [0] = simd_intrins;

opcode_added:
emit_common_simd_epilogue (td, vector_klass, csignature, vector_size, FALSE);
return TRUE;
}

static gboolean
emit_sn_vector4 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj)
{
int id = lookup_intrins (sn_vector_t_methods, sizeof (sn_vector_t_methods), cmethod);
if (id == -1)
return FALSE;

gint16 simd_opcode = -1;
gint16 simd_intrins = -1;

// First argument is always vector
MonoClass *vector_klass = cmethod->klass;

MonoTypeEnum atype = MONO_TYPE_R4;
int vector_size = SIZEOF_V128;
int arg_size = sizeof (float);
int scalar_arg = -1;
for (int i = 0; i < csignature->param_count; i++) {
if (csignature->params [i]->type != MONO_TYPE_GENERICINST)
scalar_arg = i;
}

if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins)) {
goto opcode_added;
} else if (id == SN_ctor) {
if (csignature->param_count == vector_size / arg_size && atype == csignature->params [0]->type) {
emit_vector_create (td, csignature, vector_klass, vector_size);
if (!newobj) {
// If the ctor is called explicitly, then we need to store to the passed `this`
interp_emit_stobj (td, vector_klass, FALSE);
td->ip += 5;
}
return TRUE;
}
}

if (simd_opcode == -1 || simd_intrins == -1)
return FALSE;
Expand Down Expand Up @@ -805,7 +883,7 @@ emit_sri_packedsimd (TransformData *td, MonoMethod *cmethod, MonoMethodSignature
}

static gboolean
interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature)
interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj)
{
const char *class_name;
const char *class_ns;
Expand All @@ -824,7 +902,9 @@ interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodS
return emit_sri_vector128_t (td, cmethod, csignature);
} else if (!strcmp (class_ns, "System.Numerics")) {
if (!strcmp (class_name, "Vector`1"))
return emit_sn_vector_t (td, cmethod, csignature);
return emit_sn_vector_t (td, cmethod, csignature, newobj);
else if (!strcmp (class_name, "Vector4"))
return emit_sn_vector4 (td, cmethod, csignature, newobj);
} else if (!strcmp (class_ns, "System.Runtime.Intrinsics.Wasm")) {
if (!strcmp (class_name, "PackedSimd"))
return emit_sri_packedsimd (td, cmethod, csignature);
Expand Down
35 changes: 32 additions & 3 deletions src/mono/mono/mini/interp/transform.c
Original file line number Diff line number Diff line change
Expand Up @@ -1981,7 +1981,7 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas
const char *klass_name = m_class_get_name (target_method->klass);

#ifdef INTERP_ENABLE_SIMD
if ((mono_interp_opt & INTERP_OPT_SIMD) && interp_emit_simd_intrinsics (td, target_method, csignature))
if ((mono_interp_opt & INTERP_OPT_SIMD) && interp_emit_simd_intrinsics (td, target_method, csignature, FALSE))
return TRUE;
#endif

Expand Down Expand Up @@ -6289,6 +6289,10 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
init_last_ins_call (td);
td->last_ins->info.call_info->call_offset = call_offset;
} else {
#ifdef INTERP_ENABLE_SIMD
if ((mono_interp_opt & INTERP_OPT_SIMD) && interp_emit_simd_intrinsics (td, m, csignature, TRUE))
break;
#endif
td->sp -= csignature->param_count;

// Move params types in temporary buffer
Expand Down Expand Up @@ -9362,7 +9366,7 @@ write_v128_element (gpointer v128_addr, LocalValue *val, int index, int el_size)
switch (el_size) {
case 1: *(gint8*)el_addr = (gint8)val->i; break;
case 2: *(gint16*)el_addr = (gint16)val->i; break;
case 4: *(gint32*)el_addr = val->i; break;
case 4: *(gint32*)el_addr = val->i; break; // this also handles r4
case 8: *(gint64*)el_addr = val->l; break;
default:
g_assert_not_reached ();
Expand All @@ -9379,7 +9383,7 @@ interp_fold_simd_create (TransformData *td, InterpBasicBlock *cbb, LocalValue *l
int var = args [index];
while (var != -1) {
LocalValue *val = &local_defs [var];
if (val->type != LOCAL_VALUE_I4 && val->type != LOCAL_VALUE_I8)
if (val->type != LOCAL_VALUE_I4 && val->type != LOCAL_VALUE_I8 && val->type != LOCAL_VALUE_R4)
return ins;
index++;
var = args [index];
Expand Down Expand Up @@ -9654,6 +9658,11 @@ interp_cprop (TransformData *td)
} else if (MINT_IS_LDC_I8 (opcode)) {
local_defs [dreg].type = LOCAL_VALUE_I8;
local_defs [dreg].l = interp_get_const_from_ldc_i8 (ins);
} else if (opcode == MINT_LDC_R4) {
guint32 val_u = READ32 (&ins->data [0]);
float f = *(float*)(&val_u);
local_defs [dreg].type = LOCAL_VALUE_R4;
local_defs [dreg].f = f;
} else if (ins->opcode == MINT_LDPTR) {
#if SIZEOF_VOID_P == 8
local_defs [dreg].type = LOCAL_VALUE_I8;
Expand Down Expand Up @@ -9824,6 +9833,26 @@ interp_cprop (TransformData *td)
dump_interp_inst (ins, td->data_items);
}
}
} else if (opcode == MINT_STOBJ_VT || opcode == MINT_STOBJ_VT_NOREF) {
InterpInst *ldloca = local_defs [sregs [0]].ins;
if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) {
int stsize = ins->data [0];
int local = ldloca->sregs [0];

if (stsize == td->locals [local].size) {
// Replace LDLOCA + STOBJ_VT with MOV_VT
local_ref_count [sregs [0]]--;
ins->opcode = MINT_MOV_VT;
sregs [0] = sregs [1];
ins->dreg = local;
needs_retry = TRUE;

if (td->verbose_level) {
g_print ("Replace ldloca/stobj_vt pair :\n\t");
dump_interp_inst (ins, td->data_items);
}
}
}
} else if (MINT_IS_STIND (opcode)) {
InterpInst *ldloca = local_defs [sregs [0]].ins;
if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) {
Expand Down
6 changes: 4 additions & 2 deletions src/mono/mono/mini/interp/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ typedef struct
#define LOCAL_VALUE_LOCAL 1
#define LOCAL_VALUE_I4 2
#define LOCAL_VALUE_I8 3
#define LOCAL_VALUE_NON_NULL 4
#define LOCAL_VALUE_R4 4
#define LOCAL_VALUE_NON_NULL 5

// LocalValue contains data to construct an InterpInst that is equivalent with the contents
// of the stack slot / local / argument.
Expand All @@ -62,6 +63,7 @@ typedef struct {
int local;
gint32 i;
gint64 l;
float f;
};
// The instruction that writes this local.
InterpInst *ins;
Expand Down Expand Up @@ -381,6 +383,6 @@ mono_interp_print_td_code (TransformData *td);

/* Forward definitions for simd methods */
static gboolean
interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature);
interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj);

#endif /* __MONO_MINI_INTERP_TRANSFORM_H__ */