From 335ffb05fbe97187452046bc6791405ea33f210c Mon Sep 17 00:00:00 2001 From: imhameed Date: Mon, 30 Mar 2020 14:01:25 -0700 Subject: [PATCH] [mono][llvm] Improve LLVM 9 compatibility. (#34182) Improve LLVM 9 compatibility. Several intrinsics were removed in LLVM 7 and above; replace them with IR sequences that generate the desired instructions. Fix some type mismatch problems with the generated IR (in Sse2.StoreAlignedNonTemporal, Vector128.CreateScalarUnsafe, and Sse2.MaskMove); these were caught by running the coreclr hardware intrinsic JIT tests. Support the two-argument forms of Sse.SqrtScalar, Sse.ReciprocalScalar, and Sse.ReciprocalSqrtScalar. llvm::linkCoreCLRGC(), in llvm/CodeGen/GCs.h, was removed and replaced with llvm::linkAllBuiltinGCs(), in llvm/CodeGen/BuiltinGCs.h. In mono_llvm_dump_value and mono_llvm_dump_module, flush the c library's stdout stream before using (and flushing) LLVM's own buffered output functions. --- src/mono/mono/mini/llvm-intrinsics.h | 39 +-- src/mono/mono/mini/llvm-jit.cpp | 19 +- src/mono/mono/mini/mini-llvm-cpp.cpp | 6 +- src/mono/mono/mini/mini-llvm.c | 277 +++++++++++++------ src/mono/mono/mini/mini-ops.h | 18 +- src/mono/mono/mini/mini.h | 9 - src/mono/mono/mini/simd-intrinsics-netcore.c | 101 ++++--- 7 files changed, 308 insertions(+), 161 deletions(-) diff --git a/src/mono/mono/mini/llvm-intrinsics.h b/src/mono/mono/mini/llvm-intrinsics.h index 1eccab2a0b60c..a0cc84e2cec10 100644 --- a/src/mono/mono/mini/llvm-intrinsics.h +++ b/src/mono/mono/mini/llvm-intrinsics.h @@ -88,17 +88,25 @@ INTRINS(SSE_PSRL_Q, x86_sse2_psrl_q) INTRINS(SSE_PSLL_W, x86_sse2_psll_w) INTRINS(SSE_PSLL_D, x86_sse2_psll_d) INTRINS(SSE_PSLL_Q, x86_sse2_psll_q) +#if LLVM_API_VERSION < 700 +// These intrinsics were removed in LLVM 7 (bcaab53d479e7005ee69e06321bbb493f9b7f5e6). INTRINS(SSE_SQRT_PS, x86_sse_sqrt_ps) +INTRINS(SSE_SQRT_SS, x86_sse_sqrt_ss) +INTRINS(SSE_SQRT_PD, x86_sse2_sqrt_pd) +INTRINS(SSE_SQRT_SD, x86_sse2_sqrt_sd) +INTRINS(SSE_PMULUDQ, x86_sse2_pmulu_dq) +#else +INTRINS_OVR(SSE_SQRT_PD, sqrt) +INTRINS_OVR(SSE_SQRT_PS, sqrt) +INTRINS_OVR(SSE_SQRT_SD, sqrt) +INTRINS_OVR(SSE_SQRT_SS, sqrt) +#endif INTRINS(SSE_RCP_PS, x86_sse_rcp_ps) INTRINS(SSE_RSQRT_PS, x86_sse_rsqrt_ps) -INTRINS(SSE_SQRT_SS, x86_sse_sqrt_ss) INTRINS(SSE_RCP_SS, x86_sse_rcp_ss) INTRINS(SSE_RSQRT_SS, x86_sse_rsqrt_ss) -INTRINS(SSE_SQRT_PD, x86_sse2_sqrt_pd) -INTRINS(SSE_SQRT_SD, x86_sse2_sqrt_sd) INTRINS(SSE_CVTTPD2DQ, x86_sse2_cvttpd2dq) INTRINS(SSE_CVTTPS2DQ, x86_sse2_cvttps2dq) -INTRINS(SSE_CVTDQ2PS, x86_sse2_cvtdq2ps) INTRINS(SSE_CVTPD2DQ, x86_sse2_cvtpd2dq) INTRINS(SSE_CVTPS2DQ, x86_sse2_cvtps2dq) INTRINS(SSE_CVTPD2PS, x86_sse2_cvtpd2ps) @@ -110,10 +118,6 @@ INTRINS(SSE_CVTSD2SI, x86_sse2_cvtsd2si) INTRINS(SSE_CVTTSD2SI, x86_sse2_cvttsd2si) INTRINS(SSE_CVTSD2SI64, x86_sse2_cvtsd2si64) INTRINS(SSE_CVTTSD2SI64, x86_sse2_cvttsd2si64) -INTRINS(SSE_CVTSI2SS, x86_sse_cvtsi2ss) -INTRINS(SSE_CVTSI2SS64, x86_sse_cvtsi642ss) -INTRINS(SSE_CVTSI2SD, x86_sse2_cvtsi2sd) -INTRINS(SSE_CVTSI2SD64, x86_sse2_cvtsi642sd) INTRINS(SSE_CVTSD2SS, x86_sse2_cvtsd2ss) INTRINS(SSE_CMPPD, x86_sse2_cmp_pd) INTRINS(SSE_CMPPS, x86_sse_cmp_ps) @@ -161,19 +165,10 @@ INTRINS(SSE_MINSD, x86_sse2_min_sd) INTRINS(SSE_HADDPD, x86_sse3_hadd_pd) INTRINS(SSE_HSUBPD, x86_sse3_hsub_pd) INTRINS(SSE_ADDSUBPD, x86_sse3_addsub_pd) -INTRINS(SSE_PADDSW, x86_sse2_padds_w) -INTRINS(SSE_PSUBSW, x86_sse2_psubs_w) -INTRINS(SSE_PADDUSW, x86_sse2_paddus_w) -INTRINS(SSE_PSUBUSW, x86_sse2_psubus_w) INTRINS(SSE_PMULHW, x86_sse2_pmulh_w) INTRINS(SSE_PMULHU, x86_sse2_pmulhu_w) -INTRINS(SSE_PMULUDQ, x86_sse2_pmulu_dq) INTRINS(SSE_PMULHUW, x86_sse2_pmulhu_w) INTRINS(SSE_PMADDWD, x86_sse2_pmadd_wd) -INTRINS(SSE_PADDSB, x86_sse2_padds_b) -INTRINS(SSE_PSUBSB, x86_sse2_psubs_b) -INTRINS(SSE_PADDUSB, x86_sse2_paddus_b) -INTRINS(SSE_PSUBUSB, x86_sse2_psubus_b) INTRINS(SSE_PSADBW, x86_sse2_psad_bw) INTRINS(SSE_PAUSE, x86_sse2_pause) INTRINS(SSE_MASKMOVDQU, x86_sse2_maskmov_dqu) @@ -208,11 +203,21 @@ INTRINS_OVR(SSE_SADD_SATI8, sadd_sat) INTRINS_OVR(SSE_UADD_SATI8, uadd_sat) INTRINS_OVR(SSE_SADD_SATI16, sadd_sat) INTRINS_OVR(SSE_UADD_SATI16, uadd_sat) + +INTRINS_OVR(SSE_SSUB_SATI8, ssub_sat) +INTRINS_OVR(SSE_USUB_SATI8, usub_sat) +INTRINS_OVR(SSE_SSUB_SATI16, ssub_sat) +INTRINS_OVR(SSE_USUB_SATI16, usub_sat) #else INTRINS(SSE_SADD_SATI8, x86_sse2_padds_b) INTRINS(SSE_UADD_SATI8, x86_sse2_paddus_b) INTRINS(SSE_SADD_SATI16, x86_sse2_padds_w) INTRINS(SSE_UADD_SATI16, x86_sse2_paddus_w) + +INTRINS(SSE_SSUB_SATI8, x86_sse2_psubs_b) +INTRINS(SSE_USUB_SATI8, x86_sse2_psubus_b) +INTRINS(SSE_SSUB_SATI16, x86_sse2_psubs_w) +INTRINS(SSE_USUB_SATI16, x86_sse2_psubus_w) #endif #endif #if defined(TARGET_WASM) && LLVM_API_VERSION >= 800 diff --git a/src/mono/mono/mini/llvm-jit.cpp b/src/mono/mono/mini/llvm-jit.cpp index 3821447a0681a..61f907e963781 100644 --- a/src/mono/mono/mini/llvm-jit.cpp +++ b/src/mono/mono/mini/llvm-jit.cpp @@ -33,7 +33,12 @@ #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/Transforms/Scalar.h" + +#if LLVM_API_VERSION >= 800 +#include "llvm/CodeGen/BuiltinGCs.h" +#else #include "llvm/CodeGen/GCs.h" +#endif #include @@ -50,6 +55,17 @@ mono_llvm_set_unhandled_exception_handler (void) { } +// noop function that merely ensures that certain symbols are not eliminated +// from the resulting binary. +static void +link_gc () { +#if LLVM_API_VERSION >= 800 + llvm::linkAllBuiltinGCs(); +#else + llvm::linkCoreCLRGC(); // Mono uses built-in "coreclr" GCStrategy +#endif +} + template static std::vector singletonSet(T t) { std::vector Vec; @@ -304,7 +320,6 @@ class MonoLLVMJIT { initializeInstCombine(registry); initializeTarget(registry); initializeLoopIdiomRecognizeLegacyPassPass(registry); - linkCoreCLRGC(); // Mono uses built-in "coreclr" GCStrategy // FIXME: find optimal mono specific order of passes // see https://llvm.org/docs/Frontend/PerformanceTips.html#pass-ordering @@ -445,6 +460,8 @@ mono_llvm_jit_init () { if (jit != nullptr) return; + link_gc (); + mono_native_tls_alloc (¤t_cfg_tls_id, NULL); InitializeNativeTarget (); diff --git a/src/mono/mono/mini/mini-llvm-cpp.cpp b/src/mono/mono/mini/mini-llvm-cpp.cpp index e3f8dc0b52275..cbe40546561f7 100644 --- a/src/mono/mono/mini/mini-llvm-cpp.cpp +++ b/src/mono/mono/mini/mini-llvm-cpp.cpp @@ -51,16 +51,18 @@ void mono_llvm_dump_value (LLVMValueRef value) { /* Same as LLVMDumpValue (), but print to stdout */ - outs () << (*unwrap (value)) << "\n"; fflush (stdout); + outs () << (*unwrap (value)) << "\n"; + outs ().flush (); } void mono_llvm_dump_module (LLVMModuleRef module) { /* Same as LLVMDumpModule (), but print to stdout */ - outs () << (*unwrap (module)); fflush (stdout); + outs () << (*unwrap (module)); + outs ().flush (); } /* Missing overload for building an alloca with an alignment */ diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index 2ac794d0f5819..c3cde5fcf2964 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -601,18 +601,10 @@ type_to_llvm_type (EmitContext *ctx, MonoType *t) } } -/* - * type_is_unsigned: - * - * Return whenever T is an unsigned int type. - */ static gboolean -type_is_unsigned (EmitContext *ctx, MonoType *t) +primitive_type_is_unsigned (MonoTypeEnum t) { - t = mini_get_underlying_type (t); - if (t->byref) - return FALSE; - switch (t->type) { + switch (t) { case MONO_TYPE_U1: case MONO_TYPE_U2: case MONO_TYPE_CHAR: @@ -624,6 +616,20 @@ type_is_unsigned (EmitContext *ctx, MonoType *t) } } +/* + * type_is_unsigned: + * + * Return whenever T is an unsigned int type. + */ +static gboolean +type_is_unsigned (EmitContext *ctx, MonoType *t) +{ + t = mini_get_underlying_type (t); + if (t->byref) + return FALSE; + return primitive_type_is_unsigned (t->type); +} + /* * type_to_llvm_arg_type: * @@ -957,32 +963,10 @@ simd_ins_to_intrins (int opcode) case OP_PSARD: case OP_PSARD_REG: return INTRINS_SSE_PSRAI_D; - case OP_PADDB_SAT: - return INTRINS_SSE_PADDSB; - case OP_PADDW_SAT: - return INTRINS_SSE_PADDSW; - case OP_PSUBB_SAT: - return INTRINS_SSE_PSUBSB; - case OP_PSUBW_SAT: - return INTRINS_SSE_PSUBSW; - case OP_PADDB_SAT_UN: - return INTRINS_SSE_PADDUSB; - case OP_PADDW_SAT_UN: - return INTRINS_SSE_PADDUSW; - case OP_PSUBB_SAT_UN: - return INTRINS_SSE_PSUBUSB; - case OP_PSUBW_SAT_UN: - return INTRINS_SSE_PSUBUSW; - case OP_SQRTPS: - return INTRINS_SSE_SQRT_PS; - case OP_SQRTPD: - return INTRINS_SSE_SQRT_PD; case OP_RSQRTPS: return INTRINS_SSE_RSQRT_PS; case OP_RCPPS: return INTRINS_SSE_RCP_PS; - case OP_CVTDQ2PS: - return INTRINS_SSE_CVTDQ2PS; case OP_CVTPD2DQ: return INTRINS_SSE_CVTPD2DQ; case OP_CVTPS2DQ: @@ -1007,6 +991,14 @@ simd_ins_to_intrins (int opcode) return INTRINS_SSE_PMULHU; case OP_DPPS: return INTRINS_SSE_DPPS; + case OP_SSE_SQRTSS: + return INTRINS_SSE_SQRT_SS; + case OP_SSE2_SQRTSD: + return INTRINS_SSE_SQRT_SD; + case OP_SQRTPS: + return INTRINS_SSE_SQRT_PS; + case OP_SQRTPD: + return INTRINS_SSE_SQRT_PD; #endif default: g_assert_not_reached (); @@ -1040,8 +1032,6 @@ simd_op_to_llvm_type (int opcode) case OP_EXTRACT_R4: case OP_EXPAND_R4: return sse_r4_t; - case OP_CVTDQ2PS: - return sse_i4_t; case OP_CVTPD2DQ: case OP_CVTPD2PS: case OP_CVTTPD2DQ: @@ -6893,14 +6883,6 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) case OP_HADDPS: case OP_HSUBPD: case OP_HSUBPS: - case OP_PADDB_SAT: - case OP_PADDW_SAT: - case OP_PSUBB_SAT: - case OP_PSUBW_SAT: - case OP_PADDB_SAT_UN: - case OP_PADDW_SAT_UN: - case OP_PSUBB_SAT_UN: - case OP_PSUBW_SAT_UN: case OP_PACKW: case OP_PACKD: case OP_PACKW_UN: @@ -7088,6 +7070,11 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) ctx->bblocks [bb->block_num].end_bblock = cbb; break; } + case OP_CVTDQ2PS: { + LLVMValueRef i4 = LLVMBuildBitCast (builder, lhs, sse_i4_t, ""); + values [ins->dreg] = LLVMBuildSIToFP (builder, i4, sse_r4_t, dname); + break; + } case OP_CVTDQ2PD: { LLVMValueRef indexes [16]; @@ -7098,10 +7085,10 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) values [ins->dreg] = LLVMBuildSIToFP (builder, shuffle, LLVMVectorType (LLVMDoubleType (), 2), dname); break; } - case OP_CVTSD2SD: { - LLVMValueRef rhs_elem = LLVMBuildExtractElement (builder, rhs, LLVMConstInt (LLVMInt32Type (), 0, FALSE), ""); + case OP_SSE2_CVTSS2SD: { + LLVMValueRef rhs_elem = LLVMBuildExtractElement (builder, rhs, const_int32 (0), ""); LLVMValueRef fpext = LLVMBuildFPExt (builder, rhs_elem, LLVMDoubleType (), dname); - values [ins->dreg] = LLVMBuildInsertElement (builder, lhs, fpext, LLVMConstInt (LLVMInt32Type (), 0, FALSE), ""); + values [ins->dreg] = LLVMBuildInsertElement (builder, lhs, fpext, const_int32 (0), ""); break; } case OP_CVTPS2PD: { @@ -7118,7 +7105,6 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) values [ins->dreg] = LLVMBuildFPToSI (builder, lhs, LLVMVectorType (LLVMInt32Type (), 4), dname); break; - case OP_CVTDQ2PS: case OP_CVTPD2DQ: case OP_CVTPS2DQ: case OP_CVTPD2PS: @@ -7536,10 +7522,20 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) mono_llvm_build_store (builder, val, addr, FALSE, LLVM_BARRIER_NONE); break; } - case OP_SSE_MOVSD: { - LLVMValueRef addr = convert (ctx, lhs, LLVMPointerType (LLVMDoubleType (), 0)); - LLVMValueRef val = mono_llvm_build_load (builder, addr, "", FALSE); - values [ins->dreg] = LLVMBuildInsertElement (builder, LLVMConstNull (type_to_sse_type (ins->inst_c1)), val, LLVMConstInt (LLVMInt32Type (), 0, FALSE), ""); + case OP_SSE2_MOVD: + case OP_SSE2_MOVQ: + case OP_SSE2_MOVUPD: { + LLVMTypeRef rty = NULL; + switch (ins->opcode) { + case OP_SSE2_MOVD: rty = sse_i4_t; break; + case OP_SSE2_MOVQ: rty = sse_i8_t; break; + case OP_SSE2_MOVUPD: rty = sse_r8_t; break; + } + LLVMTypeRef srcty = LLVMGetElementType (rty); + LLVMValueRef zero = LLVMConstNull (rty); + LLVMValueRef addr = convert (ctx, lhs, LLVMPointerType (srcty, 0)); + LLVMValueRef val = mono_llvm_build_aligned_load (builder, addr, "", FALSE, 1); + values [ins->dreg] = LLVMBuildInsertElement (builder, zero, val, const_int32 (0), dname); break; } @@ -7610,7 +7606,8 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) break; } case OP_SSE_MOVNTPS: { - LLVMValueRef store = mono_llvm_build_aligned_store (builder, rhs, lhs, FALSE, ins->inst_c0); + LLVMValueRef addr = convert (ctx, lhs, LLVMPointerType (LLVMTypeOf (rhs), 0)); + LLVMValueRef store = mono_llvm_build_aligned_store (builder, rhs, addr, FALSE, ins->inst_c0); set_nontemporal_flag (store); break; } @@ -7830,6 +7827,73 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) values [ins->dreg] = call_intrins (ctx, id, args, ""); break; } + case OP_SSE_CVTSI2SS: + case OP_SSE_CVTSI2SS64: + case OP_SSE2_CVTSI2SD: + case OP_SSE2_CVTSI2SD64: { + LLVMTypeRef ty = LLVMFloatType (); + switch (ins->opcode) { + case OP_SSE2_CVTSI2SD: + case OP_SSE2_CVTSI2SD64: + ty = LLVMDoubleType (); + break; + } + LLVMValueRef fp = LLVMBuildSIToFP (builder, rhs, ty, ""); + values [ins->dreg] = LLVMBuildInsertElement (builder, lhs, fp, const_int32 (0), dname); + break; + } + case OP_SSE2_PMULUDQ: { +#if LLVM_API_VERSION < 700 + LLVMValueRef args [] = { lhs, rhs }; + values [ins->dreg] = call_intrins (ctx, INTRINS_SSE_PMULUDQ, args, dname); +#else + LLVMValueRef i32_max = LLVMConstInt (LLVMInt64Type (), UINT32_MAX, FALSE); + LLVMValueRef maskvals [] = { i32_max, i32_max }; + LLVMValueRef mask = LLVMConstVector (maskvals, 2); + LLVMValueRef l = LLVMBuildAnd (builder, convert (ctx, lhs, sse_i8_t), mask, ""); + LLVMValueRef r = LLVMBuildAnd (builder, convert (ctx, rhs, sse_i8_t), mask, ""); + values [ins->dreg] = LLVMBuildNUWMul (builder, l, r, dname); +#endif + break; + } + case OP_SSE_SQRTSS: + case OP_SSE2_SQRTSD: { +#if LLVM_API_VERSION < 700 + LLVMValueRef result = call_intrins (ctx, simd_ins_to_intrins (ins->opcode), &rhs, dname); + const int maskf32[] = { 0, 5, 6, 7 }; + const int maskf64[] = { 0, 1 }; + const int *mask = NULL; + int mask_len = 0; + switch (ins->opcode) { + case OP_SSE_SQRTSS: mask = maskf32; mask_len = 4; break; + case OP_SSE2_SQRTSD: mask = maskf64; mask_len = 2; break; + default: g_assert_not_reached (); break; + } + LLVMValueRef shufmask = create_const_vector_i32 (mask, mask_len); + values [ins->dreg] = LLVMBuildShuffleVector (builder, result, lhs, shufmask, ""); +#else + LLVMValueRef upper = values [ins->sreg1]; + LLVMValueRef lower = values [ins->sreg2]; + LLVMValueRef scalar = LLVMBuildExtractElement (builder, lower, const_int32 (0), ""); + LLVMValueRef result = call_intrins (ctx, simd_ins_to_intrins (ins->opcode), &scalar, dname); + values [ins->dreg] = LLVMBuildInsertElement (builder, upper, result, const_int32 (0), ""); +#endif + break; + } + case OP_SSE_RCPSS: + case OP_SSE_RSQRTSS: { + IntrinsicId id = (IntrinsicId)0; + switch (ins->opcode) { + case OP_SSE_RCPSS: id = INTRINS_SSE_RCP_SS; break; + case OP_SSE_RSQRTSS: id = INTRINS_SSE_RSQRT_SS; break; + default: g_assert_not_reached (); break; + }; + LLVMValueRef result = call_intrins (ctx, id, &rhs, dname); + const int mask[] = { 0, 5, 6, 7 }; + LLVMValueRef shufmask = create_const_vector_i32 (mask, 4); + values [ins->dreg] = LLVMBuildShuffleVector (builder, result, lhs, shufmask, ""); + break; + } case OP_XOP: { IntrinsicId id = (IntrinsicId)0; switch (ins->inst_c0) { @@ -7848,11 +7912,7 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) case SIMD_OP_SSE_SQRTPS: id = INTRINS_SSE_SQRT_PS; break; case SIMD_OP_SSE_RCPPS: id = INTRINS_SSE_RCP_PS; break; case SIMD_OP_SSE_RSQRTPS: id = INTRINS_SSE_RSQRT_PS; break; - case SIMD_OP_SSE_SQRTSS: id = INTRINS_SSE_SQRT_SS; break; - case SIMD_OP_SSE_RCPSS: id = INTRINS_SSE_RCP_SS; break; - case SIMD_OP_SSE_RSQRTSS: id = INTRINS_SSE_RSQRT_SS; break; case SIMD_OP_SSE_SQRTPD: id = INTRINS_SSE_SQRT_PD; break; - case SIMD_OP_SSE_SQRTSD: id = INTRINS_SSE_SQRT_SD; break; case SIMD_OP_SSE_LDDQU: id = INTRINS_SSE_LDU_DQ; break; default: g_assert_not_reached (); break; } @@ -7882,10 +7942,6 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) LLVMValueRef args [] = { lhs, rhs }; IntrinsicId id = (IntrinsicId)0; switch (ins->inst_c0) { - case SIMD_OP_SSE_CVTSI2SS: id = INTRINS_SSE_CVTSI2SS; break; - case SIMD_OP_SSE_CVTSI2SS64: id = INTRINS_SSE_CVTSI2SS64; break; - case SIMD_OP_SSE_CVTSI2SD: id = INTRINS_SSE_CVTSI2SD; break; - case SIMD_OP_SSE_CVTSI2SD64: id = INTRINS_SSE_CVTSI2SD64; break; case SIMD_OP_SSE_CVTSD2SS: id = INTRINS_SSE_CVTSD2SS; break; case SIMD_OP_SSE_MAXPS: id = INTRINS_SSE_MAXPS; break; case SIMD_OP_SSE_MAXSS: id = INTRINS_SSE_MAXSS; break; @@ -7898,7 +7954,6 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) case SIMD_OP_SSE_PMADDWD: id = INTRINS_SSE_PMADDWD; break; case SIMD_OP_SSE_PMULHW: id = INTRINS_SSE_PMULHW; break; case SIMD_OP_SSE_PMULHUW: id = INTRINS_SSE_PMULHUW; break; - case SIMD_OP_SSE_PMULUDQ: id = INTRINS_SSE_PMULUDQ; break; case SIMD_OP_SSE_PACKSSWB: id = INTRINS_SSE_PACKSSWB; break; case SIMD_OP_SSE_PACKSSDW: id = INTRINS_SSE_PACKSSDW; break; case SIMD_OP_SSE_PSRLW_IMM: id = INTRINS_SSE_PSRLI_W; break; @@ -7917,10 +7972,6 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) case SIMD_OP_SSE_PSRAD_IMM: id = INTRINS_SSE_PSRAI_D; break; case SIMD_OP_SSE_PSRAW: id = INTRINS_SSE_PSRA_W; break; case SIMD_OP_SSE_PSRAD: id = INTRINS_SSE_PSRA_D; break; - case SIMD_OP_SSE_PSUBSB: id = INTRINS_SSE_PSUBSB; break; - case SIMD_OP_SSE_PSUBSW: id = INTRINS_SSE_PSUBSW; break; - case SIMD_OP_SSE_PSUBUSB: id = INTRINS_SSE_PSUBUSB; break; - case SIMD_OP_SSE_PSUBUSW: id = INTRINS_SSE_PSUBUSW; break; case SIMD_OP_SSE_PSADBW: id = INTRINS_SSE_PSADBW; break; case SIMD_OP_SSE_ADDSUBPS: id = INTRINS_SSE_ADDSUBPS; break; case SIMD_OP_SSE_ADDSUBPD: id = INTRINS_SSE_ADDSUBPD; break; @@ -7945,33 +7996,63 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) break; } - case OP_XOP_X_X_X_I: { - LLVMValueRef args[] = { lhs, rhs, values [ins->sreg3] }; - IntrinsicId id = (IntrinsicId)0; - switch (ins->inst_c0) { - case SIMD_OP_SSE_MASKMOVDQU: id = INTRINS_SSE_MASKMOVDQU; break; - default: g_assert_not_reached (); break; - } - values [ins->dreg] = call_intrins (ctx, id, args, ""); + case OP_SSE2_MASKMOVDQU: { + LLVMTypeRef i8ptr = LLVMPointerType (LLVMInt8Type (), 0); + LLVMValueRef dstaddr = convert (ctx, values [ins->sreg3], i8ptr); + LLVMValueRef src = convert (ctx, lhs, sse_i1_t); + LLVMValueRef mask = convert (ctx, rhs, sse_i1_t); + LLVMValueRef args[] = { src, mask, dstaddr }; + call_intrins (ctx, INTRINS_SSE_MASKMOVDQU, args, ""); break; } - case OP_SSE2_ADDS: { + case OP_PADDB_SAT: + case OP_PADDW_SAT: + case OP_PSUBB_SAT: + case OP_PSUBW_SAT: + case OP_PADDB_SAT_UN: + case OP_PADDW_SAT_UN: + case OP_PSUBB_SAT_UN: + case OP_PSUBW_SAT_UN: + case OP_SSE2_ADDS: + case OP_SSE2_SUBS: { IntrinsicId id = (IntrinsicId)0; - switch (ins->inst_c1) { - case MONO_TYPE_I1: id = INTRINS_SSE_SADD_SATI8; break; - case MONO_TYPE_U1: id = INTRINS_SSE_UADD_SATI8; break; - case MONO_TYPE_I2: id = INTRINS_SSE_SADD_SATI16; break; - case MONO_TYPE_U2: id = INTRINS_SSE_UADD_SATI16; break; - default: g_assert_not_reached (); break; + int type = 0; + gboolean is_add = TRUE; + switch (ins->opcode) { + case OP_PADDB_SAT: type = MONO_TYPE_I1; break; + case OP_PADDW_SAT: type = MONO_TYPE_I2; break; + case OP_PSUBB_SAT: type = MONO_TYPE_I1; is_add = FALSE; break; + case OP_PSUBW_SAT: type = MONO_TYPE_I2; is_add = FALSE; break; + case OP_PADDB_SAT_UN: type = MONO_TYPE_U1; break; + case OP_PADDW_SAT_UN: type = MONO_TYPE_U2; break; + case OP_PSUBB_SAT_UN: type = MONO_TYPE_U1; is_add = FALSE; break; + case OP_PSUBW_SAT_UN: type = MONO_TYPE_U2; is_add = FALSE; break; + case OP_SSE2_ADDS: type = ins->inst_c1; break; + case OP_SSE2_SUBS: type = ins->inst_c1; is_add = FALSE; break; + default: g_assert_not_reached (); + } + if (is_add) { + switch (type) { + case MONO_TYPE_I1: id = INTRINS_SSE_SADD_SATI8; break; + case MONO_TYPE_U1: id = INTRINS_SSE_UADD_SATI8; break; + case MONO_TYPE_I2: id = INTRINS_SSE_SADD_SATI16; break; + case MONO_TYPE_U2: id = INTRINS_SSE_UADD_SATI16; break; + default: g_assert_not_reached (); break; + } + } else { + switch (type) { + case MONO_TYPE_I1: id = INTRINS_SSE_SSUB_SATI8; break; + case MONO_TYPE_U1: id = INTRINS_SSE_USUB_SATI8; break; + case MONO_TYPE_I2: id = INTRINS_SSE_SSUB_SATI16; break; + case MONO_TYPE_U2: id = INTRINS_SSE_USUB_SATI16; break; + default: g_assert_not_reached (); break; + } } - - LLVMValueRef args [2]; - args [0] = convert (ctx, lhs, type_to_sse_type (ins->inst_c1)); - args [1] = convert (ctx, rhs, type_to_sse_type (ins->inst_c1)); - values [ins->dreg] = convert (ctx, - call_intrins (ctx, id, args, dname), - type_to_sse_type (ins->inst_c1)); + LLVMTypeRef vecty = type_to_sse_type (type); + LLVMValueRef args [] = { convert (ctx, lhs, vecty), convert (ctx, rhs, vecty) }; + LLVMValueRef result = call_intrins (ctx, id, args, dname); + values [ins->dreg] = convert (ctx, result, vecty); break; } @@ -8228,12 +8309,14 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) case OP_CREATE_SCALAR: case OP_CREATE_SCALAR_UNSAFE: { - LLVMTypeRef type = type_to_sse_type (ins->inst_c1); + MonoTypeEnum primty = inst_c1_type (ins); + LLVMTypeRef type = type_to_sse_type (primty); // use undef vector (most likely empty but may contain garbage values) for OP_CREATE_SCALAR_UNSAFE // and zero one for OP_CREATE_SCALAR LLVMValueRef vector = (ins->opcode == OP_CREATE_SCALAR) ? LLVMConstNull (type) : LLVMGetUndef (type); LLVMValueRef insert_pos = LLVMConstInt (LLVMInt32Type (), 0, FALSE); - values [ins->dreg] = LLVMBuildInsertElement (builder, vector, lhs, insert_pos, ""); + LLVMValueRef val = convert_full (ctx, lhs, primitive_type_to_llvm_type (primty), primitive_type_is_unsigned (primty)); + values [ins->dreg] = LLVMBuildInsertElement (builder, vector, val, insert_pos, ""); break; } @@ -9944,12 +10027,30 @@ add_intrinsic (LLVMModuleRef module, int id) #if defined(TARGET_AMD64) || defined(TARGET_X86) case INTRINS_SSE_SADD_SATI8: case INTRINS_SSE_UADD_SATI8: + case INTRINS_SSE_SSUB_SATI8: + case INTRINS_SSE_USUB_SATI8: intrins = add_intrins1 (module, id, sse_i1_t); break; case INTRINS_SSE_SADD_SATI16: case INTRINS_SSE_UADD_SATI16: + case INTRINS_SSE_SSUB_SATI16: + case INTRINS_SSE_USUB_SATI16: intrins = add_intrins1 (module, id, sse_i2_t); break; +#if LLVM_API_VERSION >= 700 + case INTRINS_SSE_SQRT_PS: + intrins = add_intrins1 (module, id, sse_r4_t); + break; + case INTRINS_SSE_SQRT_PD: + intrins = add_intrins1 (module, id, sse_r8_t); + break; + case INTRINS_SSE_SQRT_SS: + intrins = add_intrins1 (module, id, LLVMFloatType ()); + break; + case INTRINS_SSE_SQRT_SD: + intrins = add_intrins1 (module, id, LLVMDoubleType ()); + break; +#endif /* LLVM_API_VERSION >= 700 */ #endif /* AMD64 || X86 */ #if defined(TARGET_WASM) && LLVM_API_VERSION >= 800 case INTRINS_WASM_ANYTRUE_V16: diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h index 6bd050c1a0e0a..95e50fdad0528 100644 --- a/src/mono/mono/mini/mini-ops.h +++ b/src/mono/mono/mini/mini-ops.h @@ -996,7 +996,6 @@ MINI_OP(OP_CVTPD2DQ, "cvtpd2dq", XREG, XREG, NONE) MINI_OP(OP_CVTPD2PS, "cvtpd2ps", XREG, XREG, NONE) MINI_OP(OP_CVTPS2DQ, "cvtps2dq", XREG, XREG, NONE) MINI_OP(OP_CVTPS2PD, "cvtps2pd", XREG, XREG, NONE) -MINI_OP(OP_CVTSD2SD, "cvtsd2sd", XREG, XREG, XREG) MINI_OP(OP_CVTTPD2DQ, "cvttpd2dq", XREG, XREG, NONE) MINI_OP(OP_CVTTPS2DQ, "cvttps2dq", XREG, XREG, NONE) @@ -1039,12 +1038,18 @@ MINI_OP(OP_SSE_PREFETCHT0, "sse_prefetcht0", NONE, IREG, NONE) MINI_OP(OP_SSE_PREFETCHT1, "sse_prefetcht1", NONE, IREG, NONE) MINI_OP(OP_SSE_PREFETCHT2, "sse_prefetcht2", NONE, IREG, NONE) MINI_OP(OP_SSE_PREFETCHNTA, "sse_prefetchnta", NONE, IREG, NONE) +MINI_OP(OP_SSE_SQRTSS, "sse_sqrtss", XREG, XREG, XREG) +MINI_OP(OP_SSE_RSQRTSS, "sse_rsqrtss", XREG, XREG, XREG) +MINI_OP(OP_SSE_RCPSS, "sse_rcpss", XREG, XREG, XREG) +MINI_OP(OP_SSE_CVTSI2SS, "sse_cvtsi2ss", XREG, XREG, IREG) +MINI_OP(OP_SSE_CVTSI2SS64, "sse_cvtsi2ss64", XREG, XREG, LREG) /* sse 2 */ MINI_OP(OP_SSE2_PACKUS, "sse2_packus", XREG, XREG, XREG) MINI_OP(OP_SSE2_SRLI, "sse2_srli", XREG, XREG, XREG) MINI_OP(OP_SSE2_SHUFFLE, "sse2_shuffle", XREG, XREG, XREG) MINI_OP(OP_SSE2_ADDS, "sse2_adds", XREG, XREG, XREG) +MINI_OP(OP_SSE2_SUBS, "sse2_subs", XREG, XREG, XREG) MINI_OP(OP_SSE2_CMPSD, "sse2_cmpsd", XREG, XREG, XREG) MINI_OP(OP_SSE2_COMIEQ_SD, "sse2_comieq_sd", XREG, XREG, XREG) MINI_OP(OP_SSE2_COMISD, "sse2_comisd", IREG, XREG, XREG) @@ -1053,7 +1058,9 @@ MINI_OP(OP_SSE2_ADDSD, "sse2_addsd", XREG, XREG, XREG) MINI_OP(OP_SSE2_SUBSD, "sse2_subsd", XREG, XREG, XREG) MINI_OP(OP_SSE2_DIVSD, "sse2_divsd", XREG, XREG, XREG) MINI_OP(OP_SSE2_MULSD, "sse2_mulsd", XREG, XREG, XREG) -MINI_OP(OP_SSE_MOVSD, "sse_movsd", XREG, IREG, NONE) +MINI_OP(OP_SSE2_MOVD, "sse2_movd", XREG, IREG, NONE) +MINI_OP(OP_SSE2_MOVQ, "sse2_movq", XREG, IREG, NONE) +MINI_OP(OP_SSE2_MOVUPD, "sse2_movupd", XREG, IREG, NONE) MINI_OP(OP_SSE2_PSLLDQ, "sse2_pslldq", XREG, XREG, IREG) MINI_OP(OP_SSE2_PSRLDQ, "sse2_psrldq", XREG, XREG, IREG) MINI_OP(OP_SSE2_PSRAW_IMM, "sse2_psraw_imm", XREG, XREG, IREG) @@ -1069,6 +1076,12 @@ MINI_OP(OP_SSE2_MOVHPD_LOAD, "sse2_movhpd_load", XREG, XREG, IREG) MINI_OP(OP_SSE2_MOVLPD_LOAD, "sse2_movlpd_load", XREG, XREG, IREG) MINI_OP(OP_SSE2_MOVHPD_STORE, "sse2_movhpd_store", NONE, IREG, XREG) MINI_OP(OP_SSE2_MOVLPD_STORE, "sse2_movlpd_store", NONE, IREG, XREG) +MINI_OP(OP_SSE2_SQRTSD, "sse2_sqrtsd", XREG, XREG, XREG) +MINI_OP(OP_SSE2_CVTSI2SD, "sse2_cvtsi2sd", XREG, XREG, IREG) +MINI_OP(OP_SSE2_CVTSI2SD64, "sse2_cvtsi2sd64", XREG, XREG, LREG) +MINI_OP(OP_SSE2_CVTSS2SD, "sse2_cvtsd2sd", XREG, XREG, XREG) +MINI_OP(OP_SSE2_PMULUDQ, "sse2_pmuludq", XREG, XREG, XREG) +MINI_OP3(OP_SSE2_MASKMOVDQU, "sse2_maskmovdqu", NONE, XREG, XREG, IREG) /* sse 3 */ MINI_OP(OP_SSE3_MOVDDUP, "sse3_movddup", XREG, XREG, NONE) @@ -1509,7 +1522,6 @@ MINI_OP(OP_XOP_I8_X, "xop_i8_x", LREG, XREG, NONE) MINI_OP(OP_XOP_X_X_X, "xop_x_x_x", XREG, XREG, XREG) MINI_OP(OP_XOP_X_X_I4, "xop_x_x_i4", XREG, XREG, IREG) MINI_OP(OP_XOP_X_X_I8, "xop_x_x_i8", XREG, XREG, LREG) -MINI_OP3(OP_XOP_X_X_X_I, "xop_x_x_x_i", XREG, XREG, XREG, IREG) MINI_OP(OP_XCAST, "xcast", XREG, XREG, NONE) /* Extract element of vector */ diff --git a/src/mono/mono/mini/mini.h b/src/mono/mono/mini/mini.h index dccb37d51deed..80ea9cbe49b0c 100644 --- a/src/mono/mono/mini/mini.h +++ b/src/mono/mono/mini/mini.h @@ -2875,10 +2875,6 @@ typedef enum { SIMD_OP_SSE_CVTTSD2SI, SIMD_OP_SSE_CVTSD2SI64, SIMD_OP_SSE_CVTTSD2SI64, - SIMD_OP_SSE_CVTSI2SS, - SIMD_OP_SSE_CVTSI2SS64, - SIMD_OP_SSE_CVTSI2SD, - SIMD_OP_SSE_CVTSI2SD64, SIMD_OP_SSE_CVTSD2SS, SIMD_OP_SSE_MAXPS, SIMD_OP_SSE_MAXSS, @@ -2921,12 +2917,7 @@ typedef enum { SIMD_OP_SSE_PSRAD_IMM, SIMD_OP_SSE_PSRAW, SIMD_OP_SSE_PSRAD, - SIMD_OP_SSE_PSUBSB, - SIMD_OP_SSE_PSUBSW, - SIMD_OP_SSE_PSUBUSB, - SIMD_OP_SSE_PSUBUSW, SIMD_OP_SSE_PSADBW, - SIMD_OP_SSE_MASKMOVDQU, SIMD_OP_SSE_ADDSUBPS, SIMD_OP_SSE_ADDSUBPD, SIMD_OP_SSE_HADDPS, diff --git a/src/mono/mono/mini/simd-intrinsics-netcore.c b/src/mono/mono/mini/simd-intrinsics-netcore.c index a9c872b7e4e63..6bb6eb2fc96fe 100644 --- a/src/mono/mono/mini/simd-intrinsics-netcore.c +++ b/src/mono/mono/mini/simd-intrinsics-netcore.c @@ -741,12 +741,12 @@ static SimdIntrinsic sse_methods [] = { {SN_Prefetch2, OP_SSE_PREFETCHT2}, {SN_PrefetchNonTemporal, OP_SSE_PREFETCHNTA}, {SN_Reciprocal, OP_XOP_X_X, SIMD_OP_SSE_RCPPS}, - {SN_ReciprocalScalar, 0, SIMD_OP_SSE_RCPSS}, + {SN_ReciprocalScalar}, {SN_ReciprocalSqrt, OP_XOP_X_X, SIMD_OP_SSE_RSQRTPS}, - {SN_ReciprocalSqrtScalar, 0, SIMD_OP_SSE_RSQRTSS}, + {SN_ReciprocalSqrtScalar}, {SN_Shuffle}, {SN_Sqrt, OP_XOP_X_X, SIMD_OP_SSE_SQRTPS}, - {SN_SqrtScalar, 0, SIMD_OP_SSE_SQRTSS}, + {SN_SqrtScalar}, {SN_Store, OP_SSE_STORE, 1 /* alignment */}, {SN_StoreAligned, OP_SSE_STORE, 16 /* alignment */}, {SN_StoreAlignedNonTemporal, OP_SSE_MOVNTPS, 16 /* alignment */}, @@ -829,9 +829,9 @@ static SimdIntrinsic sse2_methods [] = { {SN_LoadFence, OP_XOP, SIMD_OP_SSE_LFENCE}, {SN_LoadHigh, OP_SSE2_MOVHPD_LOAD}, {SN_LoadLow, OP_SSE2_MOVLPD_LOAD}, - {SN_LoadScalarVector128, OP_SSE_MOVSD}, + {SN_LoadScalarVector128}, {SN_LoadVector128}, - {SN_MaskMove, OP_XOP_X_X_X_I, SIMD_OP_SSE_MASKMOVDQU}, + {SN_MaskMove, OP_SSE2_MASKMOVDQU}, {SN_Max}, {SN_MaxScalar, OP_XOP_X_X_X, SIMD_OP_SSE_MAXSD}, {SN_MemoryFence, OP_XOP, SIMD_OP_SSE_MFENCE}, @@ -856,7 +856,7 @@ static SimdIntrinsic sse2_methods [] = { {SN_ShuffleHigh}, {SN_ShuffleLow}, {SN_Sqrt, OP_XOP_X_X, SIMD_OP_SSE_SQRTPD}, - {SN_SqrtScalar, 0, SIMD_OP_SSE_SQRTSD}, + {SN_SqrtScalar}, {SN_Store, OP_SSE_STORE, 1 /* alignment */}, {SN_StoreAligned, OP_SSE_STORE, 16 /* alignment */}, {SN_StoreAlignedNonTemporal, OP_SSE_MOVNTPS, 16 /* alignment */}, @@ -865,7 +865,7 @@ static SimdIntrinsic sse2_methods [] = { {SN_StoreNonTemporal, OP_SSE_MOVNTPS, 1 /* alignment */}, {SN_StoreScalar, OP_SSE_STORES}, {SN_Subtract}, - {SN_SubtractSaturate}, + {SN_SubtractSaturate, OP_SSE2_SUBS}, {SN_SubtractScalar, OP_SSE2_SUBSD}, {SN_SumAbsoluteDifferences, OP_XOP_X_X_X, SIMD_OP_SSE_PSADBW}, {SN_UnpackHigh, OP_SSE_UNPACKHI}, @@ -982,21 +982,32 @@ emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature } return emit_simd_ins_for_sig (cfg, klass, OP_SSE_SHUFFLE, args [2]->inst_c0 /*mask*/, arg0_type, fsig, args); } - case SN_ConvertScalarToVector128Single: - if (fsig->params [1]->type == MONO_TYPE_I4) - return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_I4, SIMD_OP_SSE_CVTSI2SS, 0, fsig, args); - else if (fsig->params [1]->type == MONO_TYPE_I8) - return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_I8, SIMD_OP_SSE_CVTSI2SS64, 0, fsig, args); - else - g_assert_not_reached (); - break; + case SN_ConvertScalarToVector128Single: { + int op = 0; + switch (fsig->params [1]->type) { + case MONO_TYPE_I4: op = OP_SSE_CVTSI2SS; break; + case MONO_TYPE_I8: op = OP_SSE_CVTSI2SS64; break; + default: g_assert_not_reached (); break; + } + return emit_simd_ins_for_sig (cfg, klass, op, 0, 0, fsig, args); + } case SN_ReciprocalScalar: case SN_ReciprocalSqrtScalar: - case SN_SqrtScalar: + case SN_SqrtScalar: { + int op = 0; + switch (id) { + case SN_ReciprocalScalar: op = OP_SSE_RCPSS; break; + case SN_ReciprocalSqrtScalar: op = OP_SSE_RSQRTSS; break; + case SN_SqrtScalar: op = OP_SSE_SQRTSS; break; + }; if (fsig->param_count == 1) - return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X, info->instc0, arg0_type, fsig, args); - else - return NULL; + return emit_simd_ins (cfg, klass, op, args [0]->dreg, args[0]->dreg); + else if (fsig->param_count == 2) + return emit_simd_ins (cfg, klass, op, args [0]->dreg, args[1]->dreg); + else + g_assert_not_reached (); + break; + } case SN_LoadScalarVector128: return NULL; default: @@ -1027,17 +1038,6 @@ emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature } case SN_Subtract: return emit_simd_ins_for_sig (cfg, klass, OP_XBINOP, arg0_type == MONO_TYPE_R8 ? OP_FSUB : OP_ISUB, arg0_type, fsig, args); - case SN_SubtractSaturate: { - SimdOp op = (SimdOp)0; - switch (arg0_type) { - case MONO_TYPE_I1: op = SIMD_OP_SSE_PSUBSB; break; - case MONO_TYPE_I2: op = SIMD_OP_SSE_PSUBSW; break; - case MONO_TYPE_U1: op = SIMD_OP_SSE_PSUBUSB; break; - case MONO_TYPE_U2: op = SIMD_OP_SSE_PSUBUSW; break; - default: g_assert_not_reached (); break; - } - return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, op, arg0_type, fsig, args); - } case SN_Add: return emit_simd_ins_for_sig (cfg, klass, OP_XBINOP, arg0_type == MONO_TYPE_R8 ? OP_FADD : OP_IADD, arg0_type, fsig, args); case SN_Average: @@ -1070,12 +1070,14 @@ emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature else g_assert_not_reached (); break; - case SN_ConvertScalarToVector128Double: - if (fsig->params [1]->type == MONO_TYPE_I4) - return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_I4, SIMD_OP_SSE_CVTSI2SD, 0, fsig, args); - else if (fsig->params [1]->type == MONO_TYPE_I8) - return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_I8, SIMD_OP_SSE_CVTSI2SD64, 0, fsig, args); - return emit_simd_ins_for_sig (cfg, klass, OP_CVTSD2SD, 0, arg0_type, fsig, args); + case SN_ConvertScalarToVector128Double: { + int op = OP_SSE2_CVTSS2SD; + switch (fsig->params [1]->type) { + case MONO_TYPE_I4: op = OP_SSE2_CVTSI2SD; break; + case MONO_TYPE_I8: op = OP_SSE2_CVTSI2SD64; break; + } + return emit_simd_ins_for_sig (cfg, klass, op, 0, 0, fsig, args); + } case SN_ConvertScalarToVector128Int32: case SN_ConvertScalarToVector128Int64: case SN_ConvertScalarToVector128UInt32: @@ -1145,7 +1147,7 @@ emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature break; case SN_Multiply: if (arg0_type == MONO_TYPE_U4) - return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PMULUDQ, arg0_type, fsig, args); + return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_PMULUDQ, 0, arg0_type, fsig, args); else if (arg0_type == MONO_TYPE_R8) return emit_simd_ins_for_sig (cfg, klass, OP_MULPD, 0, arg0_type, fsig, args); else @@ -1250,11 +1252,28 @@ emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature case SN_ShuffleLow: g_assert (fsig->param_count == 2); return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_PSHUFLW, 0, arg0_type, fsig, args); - case SN_SqrtScalar: + case SN_SqrtScalar: { if (fsig->param_count == 1) - return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X, info->instc0, arg0_type, fsig, args); - else - return NULL; + return emit_simd_ins (cfg, klass, OP_SSE2_SQRTSD, args [0]->dreg, args[0]->dreg); + else if (fsig->param_count == 2) + return emit_simd_ins (cfg, klass, OP_SSE2_SQRTSD, args [0]->dreg, args[1]->dreg); + else { + g_assert_not_reached (); + break; + } + } + case SN_LoadScalarVector128: { + int op = 0; + switch (arg0_type) { + case MONO_TYPE_I4: + case MONO_TYPE_U4: op = OP_SSE2_MOVD; break; + case MONO_TYPE_I8: + case MONO_TYPE_U8: op = OP_SSE2_MOVQ; break; + case MONO_TYPE_R8: op = OP_SSE2_MOVUPD; break; + default: g_assert_not_reached(); break; + } + return emit_simd_ins_for_sig (cfg, klass, op, 0, 0, fsig, args); + } default: return NULL; }