Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Have mono handle the vector as APIs that grow or shrink the vector type #104445

Closed
wants to merge 12 commits into from
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/mono/mono/mini/interp/interp-internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
#define MINT_STACK_ALIGNMENT (2 * MINT_STACK_SLOT_SIZE)
#define MINT_SIMD_ALIGNMENT (MINT_STACK_ALIGNMENT)
#define SIZEOF_V128 16
#define SIZEOF_V2 8
#define SIZEOF_V3 12

#define INTERP_STACK_SIZE (1024*1024)
#define INTERP_REDZONE_SIZE (8*1024)
Expand Down
6 changes: 6 additions & 0 deletions src/mono/mono/mini/interp/interp-simd-intrins.def
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_MULTIPLY, interp_v128_
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_DIVISION, interp_v128_r4_op_division, 231)

INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_BITCAST, interp_v128_bitcast, -1)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_TO_V2, interp_v128_to_v2, -1)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_TO_V3, interp_v128_to_v3, -1)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V2_TO_V128, interp_v2_to_v128, -1)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V2_TO_V3, interp_v2_to_v3, -1)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V3_TO_V128, interp_v3_to_v128, -1)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V3_TO_V2, interp_v3_to_v2, -1)

INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_NEGATION, interp_v128_i1_op_negation, 97)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_NEGATION, interp_v128_i2_op_negation, 129)
Expand Down
170 changes: 147 additions & 23 deletions src/mono/mono/mini/interp/interp-simd.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include <wasm_simd128.h>
#endif

#include <mono/utils/mono-math.h>

#ifdef INTERP_ENABLE_SIMD

gboolean interp_simd_enabled = TRUE;
Expand Down Expand Up @@ -35,6 +37,78 @@ interp_v128_bitcast (gpointer res, gpointer v1)
*(v128_i1*)res = *(v128_i1*)v1;
}

// Vector2 AsVector2(Vector128<float> v1)
static void
interp_v128_to_v2 (gpointer res, gpointer v1)
{
float *res_typed = (float*)res;
float *v1_typed = (float*)v1;

res_typed [0] = v1_typed [0];
res_typed [1] = v1_typed [1];
}

// Vector3 AsVector3(Vector128<float> v1)
static void
interp_v128_to_v3 (gpointer res, gpointer v1)
{
float *res_typed = (float*)res;
float *v1_typed = (float*)v1;

res_typed [0] = v1_typed [0];
res_typed [1] = v1_typed [1];
res_typed [2] = v1_typed [2];
}

// Vector128<float> AsVector128(Vector2 v1)
static void
interp_v2_to_v128 (gpointer res, gpointer v1)
{
float *res_typed = (float*)res;
float *v1_typed = (float*)v1;

res_typed [0] = v1_typed [0];
res_typed [1] = v1_typed [1];
res_typed [2] = 0;
res_typed [3] = 0;
}

// Vector3 AsVector3(Vector2 v1)
static void
interp_v2_to_v3 (gpointer res, gpointer v1)
{
float *res_typed = (float*)res;
float *v1_typed = (float*)v1;

res_typed [0] = v1_typed [0];
res_typed [1] = v1_typed [1];
res_typed [2] = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There might be a problem here - if the v3 is in a stack local, it's 16 bytes wide and you might need to zero [3]. I'm not sure whether res can be a non-stack address though...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had code earlier that tried to always handle it as 16-bytes, but it didn't help.

Notably, I wouldn't expect the need to explicitly zero that space anyways as it would be considered padding space and should be ignored when loaded. Otherwise it would risk corrupting other operations, like Sum.

}

// Vector128<float> AsVector128(Vector3 v1)
static void
interp_v3_to_v128 (gpointer res, gpointer v1)
{
float *res_typed = (float*)res;
float *v1_typed = (float*)v1;

res_typed [0] = v1_typed [0];
res_typed [1] = v1_typed [1];
res_typed [2] = v1_typed [2];
res_typed [3] = 0;
}

// Vector2 AsVector128(Vector3 v1)
static void
interp_v3_to_v2 (gpointer res, gpointer v1)
{
float *res_typed = (float*)res;
float *v1_typed = (float*)v1;

res_typed [0] = v1_typed [0];
res_typed [1] = v1_typed [1];
}

// op_Addition
static void
interp_v128_i1_op_addition (gpointer res, gpointer v1, gpointer v2)
Expand Down Expand Up @@ -103,13 +177,18 @@ interp_v128_op_bitwise_or (gpointer res, gpointer v1, gpointer v2)
static void
interp_v128_op_bitwise_equality (gpointer res, gpointer v1, gpointer v2)
{
gint64 *v1_cast = (gint64*)v1;
gint64 *v2_cast = (gint64*)v2;
gint64 *v1_typed = (gint64*)v1;
gint64 *v2_typed = (gint64*)v2;

bool succeeded = true;

if (v1_typed [0] != v2_typed [0]) {
succeeded = false;
} else if (v1_typed [1] != v2_typed [1]) {
succeeded = false;
}

if (*v1_cast == *v2_cast && *(v1_cast + 1) == *(v2_cast + 1))
*(gint32*)res = 1;
else
*(gint32*)res = 0;
*(gint32*)res = succeeded ? 1 : 0;
}

// op_ExclusiveOr
Expand All @@ -123,36 +202,81 @@ interp_v128_op_exclusive_or (gpointer res, gpointer v1, gpointer v2)
static void
interp_v128_op_bitwise_inequality (gpointer res, gpointer v1, gpointer v2)
{
gint64 *v1_cast = (gint64*)v1;
gint64 *v2_cast = (gint64*)v2;
gint64 *v1_typed = (gint64*)v1;
gint64 *v2_typed = (gint64*)v2;

bool succeeded = false;

if (v1_typed [0] != v2_typed [0]) {
succeeded = true;
} else if (v1_typed [1] != v2_typed [1]) {
succeeded = true;
}

if (*v1_cast == *v2_cast && *(v1_cast + 1) == *(v2_cast + 1))
*(gint32*)res = 0;
else
*(gint32*)res = 1;
*(gint32*)res = succeeded ? 1 : 0;
}

// Vector128<float>EqualsFloatingPoint
static bool
r4_float_equality(float v1, float v2)
{
if (v1 == v2) {
return true;
} else if (mono_isnan (v1) && mono_isnan (v2)) {
return true;
}

return false;
}

// Vector128<float>.EqualsFloatingPoint
static void
interp_v128_r4_float_equality (gpointer res, gpointer v1, gpointer v2)
{
v128_r4 v1_cast = *(v128_r4*)v1;
v128_r4 v2_cast = *(v128_r4*)v2;
v128_r4 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast));
memset (&v1_cast, 0xff, SIZEOF_V128);
float *v1_typed = (float*)v1;
float *v2_typed = (float*)v2;

*(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V128) == 0;
bool succeeded = true;

if (!r4_float_equality(v1_typed [0], v2_typed [0])) {
succeeded = false;
} else if (!r4_float_equality(v1_typed [1], v2_typed [1])) {
succeeded = false;
} else if (!r4_float_equality(v1_typed [2], v2_typed [2])) {
succeeded = false;
} else if (!r4_float_equality(v1_typed [3], v2_typed [3])) {
succeeded = false;
}

*(gint32*)res = succeeded ? 1 : 0;
}

static bool
r8_float_equality(double v1, double v2)
{
if (v1 == v2) {
return true;
} else if (mono_isnan (v1) && mono_isnan (v2)) {
return true;
}

return false;
}

static void
interp_v128_r8_float_equality (gpointer res, gpointer v1, gpointer v2)
{
v128_r8 v1_cast = *(v128_r8*)v1;
v128_r8 v2_cast = *(v128_r8*)v2;
v128_r8 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast));
memset (&v1_cast, 0xff, SIZEOF_V128);
double *v1_typed = (double*)v1;
double *v2_typed = (double*)v2;

bool succeeded = true;

if (!r8_float_equality(v1_typed [0], v2_typed [0])) {
succeeded = false;
} else if (!r8_float_equality(v1_typed [1], v2_typed [1])) {
succeeded = false;
}

*(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V128) == 0;
*(gint32*)res = succeeded ? 1 : 0;
}

// op_Multiply
Expand Down
5 changes: 4 additions & 1 deletion src/mono/mono/mini/interp/simd-methods.def
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,11 @@ SIMD_METHOD(AsUInt16)
SIMD_METHOD(AsUInt32)
SIMD_METHOD(AsUInt64)
SIMD_METHOD(AsVector)
SIMD_METHOD(AsVector4)
SIMD_METHOD(AsVector128)
SIMD_METHOD(AsVector128Unsafe)
SIMD_METHOD(AsVector2)
SIMD_METHOD(AsVector3)
SIMD_METHOD(AsVector4)
SIMD_METHOD(ConditionalSelect)
SIMD_METHOD(Create)
SIMD_METHOD(CreateScalar)
Expand Down
79 changes: 76 additions & 3 deletions src/mono/mono/mini/interp/transform-simd.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,11 @@ static guint16 sri_vector128_methods [] = {
SN_AsUInt32,
SN_AsUInt64,
SN_AsVector,
SN_AsVector4,
SN_AsVector128,
SN_AsVector128Unsafe,
SN_AsVector2,
SN_AsVector3,
SN_AsVector4,
SN_ConditionalSelect,
SN_Create,
SN_CreateScalar,
Expand Down Expand Up @@ -376,7 +379,6 @@ emit_common_simd_epilogue (TransformData *td, MonoClass *vector_klass, MonoMetho
g_assert (allow_void);
interp_ins_set_dummy_dreg (td->last_ins, td);
} else if (ret_mt == MINT_TYPE_VT) {
// For these intrinsics, if we return a VT then it is a V128
push_type_vt (td, vector_klass, vector_size);
interp_ins_set_dreg (td->last_ins, td->sp [-1].var);
} else {
Expand Down Expand Up @@ -470,22 +472,93 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature
}
case SN_AsVector:
case SN_AsVector128:
case SN_AsVector128Unsafe:
case SN_AsVector2:
case SN_AsVector3:
case SN_AsVector4: {
if (!is_element_type_primitive (csignature->ret) || !is_element_type_primitive (csignature->params [0]))
return FALSE;

MonoClass *ret_class = mono_class_from_mono_type_internal (csignature->ret);
int ret_size = mono_class_value_size (ret_class, NULL);

if (!strcmp (m_class_get_name (ret_class), "Vector2")) {
g_assert (ret_size == 8);
} else if (!strcmp (m_class_get_name (ret_class), "Vector3")) {
g_assert (ret_size == 12);
} else {
g_assert (ret_size == 16);
}

MonoClass *arg_class = mono_class_from_mono_type_internal (csignature->params [0]);
int arg_size = mono_class_value_size (arg_class, NULL);

if (!strcmp (m_class_get_name (arg_class), "Vector2")) {
g_assert (arg_size == 8);
} else if (!strcmp (m_class_get_name (arg_class), "Vector3")) {
g_assert (arg_size == 12);
} else {
g_assert (arg_size == 16);
}

vector_klass = ret_class;
vector_size = ret_size;

if (id == SN_AsVector2) {
g_assert (ret_size == 8);
g_assert ((arg_size == 12) || (arg_size == 16));
} else if (id == SN_AsVector3) {
g_assert (ret_size == 12);
g_assert ((arg_size == 8) || (arg_size == 16));
}

if (arg_size == ret_size) {
simd_opcode = MINT_SIMD_INTRINS_P_P;
simd_intrins = INTERP_SIMD_INTRINSIC_V128_BITCAST;
break;
}
return FALSE;

if ((ret_size != 8) && (ret_size != 12) && (ret_size != 16)) {
return FALSE;
}

if ((arg_size != 8) && (arg_size != 12) && (arg_size != 16)) {
return FALSE;
}

if (arg_size > ret_size) {
simd_opcode = MINT_SIMD_INTRINS_P_P;

if (ret_size == 8) {
if (arg_size == 16) {
simd_intrins = INTERP_SIMD_INTRINSIC_V128_TO_V2;
} else {
g_assert (arg_size == 12);
simd_intrins = INTERP_SIMD_INTRINSIC_V3_TO_V2;
}
} else {
g_assert (arg_size == 16);
g_assert (ret_size == 12);
simd_intrins = INTERP_SIMD_INTRINSIC_V128_TO_V3;
}
break;
} else {
simd_opcode = MINT_SIMD_INTRINS_P_P;

if (arg_size == 8) {
if (ret_size == 12) {
simd_intrins = INTERP_SIMD_INTRINSIC_V2_TO_V3;
} else {
g_assert (ret_size == 16);
simd_intrins = INTERP_SIMD_INTRINSIC_V2_TO_V128;
}
} else {
g_assert (arg_size == 12);
g_assert (ret_size == 16);
simd_intrins = INTERP_SIMD_INTRINSIC_V3_TO_V128;
}
break;
}
}
case SN_ConditionalSelect:
simd_opcode = MINT_SIMD_INTRINS_P_PPP;
Expand Down
9 changes: 9 additions & 0 deletions src/mono/mono/mini/mini-llvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,10 @@ simd_class_to_llvm_type (EmitContext *ctx, MonoClass *klass)
} else {
guint32 nelems;
MonoTypeEnum type = mini_get_simd_type_info (klass, &nelems);
if (nelems == 3) {
// Override to 3 elements + zero
nelems == 4;
}
return LLVMVectorType (primitive_type_to_llvm_type (type), nelems);
}
g_assert_not_reached ();
Expand Down Expand Up @@ -8276,6 +8280,11 @@ MONO_RESTORE_WARNING
case OP_XCONST: {
int ecount;
MonoTypeEnum etype = mini_get_simd_type_info (ins->klass, (guint32*)&ecount);

if (ecount == 3) {
// Override to 3 elements + zero
ecount == 4;
}

LLVMTypeRef llvm_type = primitive_type_to_llvm_type (etype);
LLVMValueRef vals [64];
Expand Down
3 changes: 1 addition & 2 deletions src/mono/mono/mini/mini.c
Original file line number Diff line number Diff line change
Expand Up @@ -4599,8 +4599,7 @@ mini_get_simd_type_info (MonoClass *klass, guint32 *nelems)
*nelems = 2;
return MONO_TYPE_R4;
} else if (!strcmp (klass_name, "Vector3")) {
// For LLVM SIMD support, Vector3 is treated as a 4-element vector (three elements + zero).
*nelems = 4;
*nelems = 3;
return MONO_TYPE_R4;
} else if (!strcmp (klass_name, "Vector`1") || !strcmp (klass_name, "Vector64`1") || !strcmp (klass_name, "Vector128`1") || !strcmp (klass_name, "Vector256`1") || !strcmp (klass_name, "Vector512`1")) {
MonoType *etype = mono_class_get_generic_class (klass)->context.class_inst->type_argv [0];
Expand Down
Loading
Loading