diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index 46c45a7f7f3f4..01d8e69330ea8 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -6984,7 +6984,6 @@ void CodeGen::genArm64EmitterUnitTests() genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R(INS_br, EA_PTRSIZE, REG_R8); - theEmitter->emitIns_R(INS_blr, EA_PTRSIZE, REG_R9); theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_R8); theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_LR); @@ -7330,6 +7329,10 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V30, 0xFF000000FF000000); theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V31, 0x0, INS_OPTS_2D); + // We were not encoding immediate of movi that was int.MaxValue or int.MaxValue / 2. + theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V16, 0x7fffffff, INS_OPTS_2S); + theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V16, 0x3fffffff, INS_OPTS_2S); + theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H); theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H); diff --git a/src/coreclr/src/jit/emitarm64.cpp b/src/coreclr/src/jit/emitarm64.cpp index 008a493759b6c..0d3eaba955931 100644 --- a/src/coreclr/src/jit/emitarm64.cpp +++ b/src/coreclr/src/jit/emitarm64.cpp @@ -2864,12 +2864,11 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) * 'size' specifies the size of the result (16 or 32 bits) */ -/*static*/ INT32 emitter::emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size) +/*static*/ UINT32 emitter::emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size) { bool onesShift = (bsImm.immOnes == 1); - unsigned bySh = bsImm.immBY; // Num Bytes to shift 0,1,2,3 - INT32 val = (INT32)bsImm.immVal; // 8-bit immediate - INT32 result = val; + unsigned bySh = bsImm.immBY; // Num Bytes to shift 0,1,2,3 + UINT32 result = (UINT32)bsImm.immVal; // 8-bit immediate if (bySh > 0) { diff --git a/src/coreclr/src/jit/emitarm64.h b/src/coreclr/src/jit/emitarm64.h index f2cef877a7b20..96bbb37aec732 100644 --- a/src/coreclr/src/jit/emitarm64.h +++ b/src/coreclr/src/jit/emitarm64.h @@ -187,7 +187,7 @@ union byteShiftedImm { static emitter::byteShiftedImm emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL); -static INT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size); +static UINT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size); /************************************************************************ * diff --git a/src/coreclr/src/jit/hwintrinsic.cpp b/src/coreclr/src/jit/hwintrinsic.cpp index 0e62ffe07fafc..55438a84527e4 100644 --- a/src/coreclr/src/jit/hwintrinsic.cpp +++ b/src/coreclr/src/jit/hwintrinsic.cpp @@ -200,6 +200,8 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va { case TYP_FLOAT: return m_simdHandleCache->Vector64FloatHandle; + case TYP_DOUBLE: + return m_simdHandleCache->Vector64DoubleHandle; case TYP_INT: return m_simdHandleCache->Vector64IntHandle; case TYP_USHORT: @@ -212,6 +214,10 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va return m_simdHandleCache->Vector64ByteHandle; case TYP_UINT: return m_simdHandleCache->Vector64UIntHandle; + case TYP_LONG: + return m_simdHandleCache->Vector64LongHandle; + case TYP_ULONG: + return m_simdHandleCache->Vector64ULongHandle; default: assert(!"Didn't find a class handle for simdType"); } diff --git a/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp index 767d18f1b3af6..56929d653e7ab 100644 --- a/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp @@ -516,11 +516,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else if (varTypeIsFloating(intrin.baseType)) { - if (targetReg != op1Reg) - { - // fmov reg1, reg2 - GetEmitter()->emitIns_R_R(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, INS_OPTS_NONE); - } + // fmov reg1, reg2 + GetEmitter()->emitIns_R_R(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, INS_OPTS_NONE); } else { @@ -557,6 +554,31 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GetEmitter()->emitIns_R_I(ins, emitSize, targetReg, 0, INS_OPTS_4S); break; + case NI_Vector64_Create: + case NI_Vector128_Create: + if (intrin.op1->isContainedFltOrDblImmed()) + { + const double dataValue = intrin.op1->AsDblCon()->gtDconVal; + GetEmitter()->emitIns_R_F(INS_fmov, emitSize, targetReg, dataValue, opt); + } + else if (varTypeIsFloating(intrin.baseType)) + { + GetEmitter()->emitIns_R_R_I(ins, emitSize, targetReg, op1Reg, 0, opt); + } + else + { + if (intrin.op1->isContainedIntOrIImmed()) + { + const ssize_t dataValue = intrin.op1->AsIntCon()->gtIconVal; + GetEmitter()->emitIns_R_I(INS_movi, emitSize, targetReg, dataValue, opt); + } + else + { + GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt); + } + } + break; + default: unreached(); } diff --git a/src/coreclr/src/jit/hwintrinsiclistarm64.h b/src/coreclr/src/jit/hwintrinsiclistarm64.h index 288dcfb4353c8..bd192bd217bfb 100644 --- a/src/coreclr/src/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/hwintrinsiclistarm64.h @@ -23,6 +23,7 @@ HARDWARE_INTRINSIC(Vector64, AsSByte, HARDWARE_INTRINSIC(Vector64, AsSingle, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, AsUInt16, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, AsUInt32, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, Create, 8, 1, {INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_mov, INS_mov, INS_dup, INS_dup}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector64, CreateScalarUnsafe, 8, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_invalid, INS_invalid, INS_fmov, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector64, get_AllBitsSet, 8, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector64, get_Count, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) @@ -44,6 +45,7 @@ HARDWARE_INTRINSIC(Vector128, AsSingle, 1 HARDWARE_INTRINSIC(Vector128, AsUInt16, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, AsUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, AsUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, Create, 16, 1, {INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_fmov, INS_fmov}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, get_Count, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) diff --git a/src/coreclr/src/jit/lowerarmarch.cpp b/src/coreclr/src/jit/lowerarmarch.cpp index 4ae61be664ee3..200446cfbe114 100644 --- a/src/coreclr/src/jit/lowerarmarch.cpp +++ b/src/coreclr/src/jit/lowerarmarch.cpp @@ -914,6 +914,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } } break; + + case NI_Vector64_Create: + case NI_Vector128_Create: case NI_Vector64_CreateScalarUnsafe: case NI_Vector128_CreateScalarUnsafe: if (intrin.op1->IsCnsIntOrI()) diff --git a/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_35821/GitHub_35821.il b/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_35821/GitHub_35821.il new file mode 100644 index 0000000000000..fb248164872af --- /dev/null +++ b/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_35821/GitHub_35821.il @@ -0,0 +1,92 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// Metadata version: v4.0.30319 +.assembly extern System.Runtime +{ + .publickeytoken = (B0 3F 5F 7F 11 D5 0A 3A ) // .?_....: + .ver 5:0:0:0 +} +.assembly extern System.Runtime.Intrinsics +{ + .publickeytoken = (CC 7B 13 FF CD 2D DD 51 ) // .{...-.Q + .ver 5:0:0:0 +} +.assembly projs { } +.module projs.dll +// MVID: {379016DB-73C2-41D4-9E5F-5B727BC70E2C} +.custom instance void [System.Runtime]System.Security.UnverifiableCodeAttribute::.ctor() = ( 01 00 00 00 ) +.imagebase 0x00400000 +.file alignment 0x00000200 +.stackreserve 0x00100000 +.subsystem 0x0003 // WINDOWS_CUI +.corflags 0x00000001 // ILONLY +// Image base: 0x00000293F3DD0000 + + +// =============== CLASS MEMBERS DECLARATION =================== +// This bug was found when passing Vector64 to a method such that +// the vector is on the evaluation stack. C# sometimes assign it the vector64 +// to local variable before passing it to method. In such cases, the bug +// doesn't repro. +.class public auto ansi sealed beforefieldinit projs.GitHub_35821 + extends [System.Runtime]System.Object +{ + .method private hidebysig static int32 + Main(string[] args) cil managed + { + .entrypoint + // Code size 48 (0x30) + .maxstack 8 + IL_0000: ldc.i4.s 23 + IL_0002: conv.i8 + IL_0003: call valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1 [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64::Create(uint64) + IL_0008: call void projs.GitHub_35821::Test1(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1) + IL_000d: ldc.i4.s 23 + IL_000f: conv.i8 + IL_0010: call valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1 [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64::Create(int64) + IL_0015: call void projs.GitHub_35821::Test2(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1) + IL_001a: ldc.r8 23. + IL_0023: call valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1 [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64::Create(float64) + IL_0028: call void projs.GitHub_35821::Test3(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1) + IL_002d: ldc.i4.s 100 + IL_002f: ret + } // end of method GitHub_35821::Main + + .method public hidebysig static void Test1(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1 data) cil managed noinlining + { + // Code size 1 (0x1) + .maxstack 8 + IL_0000: ret + } // end of method GitHub_35821::Test1 + + .method public hidebysig static void Test2(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1 data) cil managed noinlining + { + // Code size 1 (0x1) + .maxstack 8 + IL_0000: ret + } // end of method GitHub_35821::Test2 + + .method public hidebysig static void Test3(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1 data) cil managed noinlining + { + // Code size 1 (0x1) + .maxstack 8 + IL_0000: ret + } // end of method GitHub_35821::Test3 + + .method public hidebysig specialname rtspecialname + instance void .ctor() cil managed + { + // Code size 7 (0x7) + .maxstack 8 + IL_0000: ldarg.0 + IL_0001: call instance void [System.Runtime]System.Object::.ctor() + IL_0006: ret + } // end of method GitHub_35821::.ctor + +} // end of class projs.GitHub_35821 + + +// ============================================================= + +// *********** DISASSEMBLY COMPLETE *********************** diff --git a/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_35821/GitHub_35821.ilproj b/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_35821/GitHub_35821.ilproj new file mode 100644 index 0000000000000..e7c67cc80e853 --- /dev/null +++ b/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_35821/GitHub_35821.ilproj @@ -0,0 +1,12 @@ + + + Exe + + + None + True + + + + + diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index 2938d63460775..5d55d00f11b8a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -260,6 +260,7 @@ public static Vector AsVector(this Vector128 value) /// On x86, this method corresponds to __m128i _mm_set1_epi8 /// A new with all elements initialized to . [MethodImpl(MethodImplOptions.AggressiveInlining)] + [Intrinsic] public static unsafe Vector128 Create(byte value) { if (Avx2.IsSupported) @@ -318,6 +319,7 @@ static Vector128 SoftwareFallback(byte value) /// On x86, this method corresponds to __m128d _mm_set1_pd /// A new with all elements initialized to . [MethodImpl(MethodImplOptions.AggressiveInlining)] + [Intrinsic] public static unsafe Vector128 Create(double value) { if (Sse3.IsSupported) @@ -354,6 +356,7 @@ static Vector128 SoftwareFallback(double value) /// On x86, this method corresponds to __m128i _mm_set1_epi16 /// A new with all elements initialized to . [MethodImpl(MethodImplOptions.AggressiveInlining)] + [Intrinsic] public static unsafe Vector128 Create(short value) { if (Avx2.IsSupported) @@ -397,6 +400,7 @@ static Vector128 SoftwareFallback(short value) /// On x86, this method corresponds to __m128i _mm_set1_epi32 /// A new with all elements initialized to . [MethodImpl(MethodImplOptions.AggressiveInlining)] + [Intrinsic] public static unsafe Vector128 Create(int value) { if (Avx2.IsSupported) @@ -432,6 +436,7 @@ static Vector128 SoftwareFallback(int value) /// On x86, this method corresponds to __m128i _mm_set1_epi64x /// A new with all elements initialized to . [MethodImpl(MethodImplOptions.AggressiveInlining)] + [Intrinsic] public static unsafe Vector128 Create(long value) { if (Sse2.X64.IsSupported) @@ -468,6 +473,7 @@ static Vector128 SoftwareFallback(long value) /// A new with all elements initialized to . [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] + [Intrinsic] public static unsafe Vector128 Create(sbyte value) { if (Avx2.IsSupported) @@ -526,6 +532,7 @@ static Vector128 SoftwareFallback(sbyte value) /// On x86, this method corresponds to __m128 _mm_set1_ps /// A new with all elements initialized to . [MethodImpl(MethodImplOptions.AggressiveInlining)] + [Intrinsic] public static unsafe Vector128 Create(float value) { if (Avx2.IsSupported) @@ -568,6 +575,7 @@ static Vector128 SoftwareFallback(float value) /// A new with all elements initialized to . [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] + [Intrinsic] public static unsafe Vector128 Create(ushort value) { if (Avx2.IsSupported) @@ -612,6 +620,7 @@ static Vector128 SoftwareFallback(ushort value) /// A new with all elements initialized to . [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] + [Intrinsic] public static unsafe Vector128 Create(uint value) { if (Avx2.IsSupported) @@ -648,6 +657,7 @@ static Vector128 SoftwareFallback(uint value) /// A new with all elements initialized to . [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] + [Intrinsic] public static unsafe Vector128 Create(ulong value) { if (Sse2.X64.IsSupported) @@ -753,6 +763,26 @@ public static unsafe Vector128 Create(byte e0, byte e1, byte e2, byte e3, return Sse2.UnpackLow(lo64, hi64).AsByte(); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 > } + if (AdvSimd.IsSupported) + { + Vector128 result = CreateScalarUnsafe(e0); + result = AdvSimd.Insert(result, 1, e1); + result = AdvSimd.Insert(result, 2, e2); + result = AdvSimd.Insert(result, 3, e3); + result = AdvSimd.Insert(result, 4, e4); + result = AdvSimd.Insert(result, 5, e5); + result = AdvSimd.Insert(result, 6, e6); + result = AdvSimd.Insert(result, 7, e7); + result = AdvSimd.Insert(result, 8, e8); + result = AdvSimd.Insert(result, 9, e9); + result = AdvSimd.Insert(result, 10, e10); + result = AdvSimd.Insert(result, 11, e11); + result = AdvSimd.Insert(result, 12, e12); + result = AdvSimd.Insert(result, 13, e13); + result = AdvSimd.Insert(result, 14, e14); + return AdvSimd.Insert(result, 15, e15); + } + return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); static Vector128 SoftwareFallback(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7, byte e8, byte e9, byte e10, byte e11, byte e12, byte e13, byte e14, byte e15) @@ -797,6 +827,12 @@ public static unsafe Vector128 Create(double e0, double e1) return Sse.MoveLowToHigh(CreateScalarUnsafe(e0).AsSingle(), CreateScalarUnsafe(e1).AsSingle()).AsDouble(); } + if (AdvSimd.IsSupported) + { + Vector128 result = CreateScalarUnsafe(e0); + return AdvSimd.Insert(result, 1, e1); + } + return SoftwareFallback(e0, e1); static Vector128 SoftwareFallback(double e0, double e1) @@ -837,6 +873,18 @@ public static unsafe Vector128 Create(short e0, short e1, short e2, short return Sse2.Insert(result, e7, 7); // < 0, 1, 2, 3, 4, 5, 6, 7 > } + if (AdvSimd.IsSupported) + { + Vector128 result = CreateScalarUnsafe(e0); + result = AdvSimd.Insert(result, 1, e1); + result = AdvSimd.Insert(result, 2, e2); + result = AdvSimd.Insert(result, 3, e3); + result = AdvSimd.Insert(result, 4, e4); + result = AdvSimd.Insert(result, 5, e5); + result = AdvSimd.Insert(result, 6, e6); + return AdvSimd.Insert(result, 7, e7); + } + return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7); static Vector128 SoftwareFallback(short e0, short e1, short e2, short e3, short e4, short e5, short e6, short e7) @@ -886,6 +934,14 @@ public static unsafe Vector128 Create(int e0, int e1, int e2, int e3) return Sse2.UnpackLow(lo64, hi64).AsInt32(); // < 0, 1, 2, 3 > } + if (AdvSimd.IsSupported) + { + Vector128 result = CreateScalarUnsafe(e0); + result = AdvSimd.Insert(result, 1, e1); + result = AdvSimd.Insert(result, 2, e2); + return AdvSimd.Insert(result, 3, e3); + } + return SoftwareFallback(e0, e1, e2, e3); static Vector128 SoftwareFallback(int e0, int e1, int e2, int e3) @@ -921,6 +977,12 @@ public static unsafe Vector128 Create(long e0, long e1) return Sse2.UnpackLow(CreateScalarUnsafe(e0), CreateScalarUnsafe(e1)); // < 0, 1 > } + if (AdvSimd.IsSupported) + { + Vector128 result = CreateScalarUnsafe(e0); + return AdvSimd.Insert(result, 1, e1); + } + return SoftwareFallback(e0, e1); static Vector128 SoftwareFallback(long e0, long e1) @@ -1011,6 +1073,26 @@ public static unsafe Vector128 Create(sbyte e0, sbyte e1, sbyte e2, sbyte return Sse2.UnpackLow(lo64, hi64).AsSByte(); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 > } + if (AdvSimd.IsSupported) + { + Vector128 result = CreateScalarUnsafe(e0); + result = AdvSimd.Insert(result, 1, e1); + result = AdvSimd.Insert(result, 2, e2); + result = AdvSimd.Insert(result, 3, e3); + result = AdvSimd.Insert(result, 4, e4); + result = AdvSimd.Insert(result, 5, e5); + result = AdvSimd.Insert(result, 6, e6); + result = AdvSimd.Insert(result, 7, e7); + result = AdvSimd.Insert(result, 8, e8); + result = AdvSimd.Insert(result, 9, e9); + result = AdvSimd.Insert(result, 10, e10); + result = AdvSimd.Insert(result, 11, e11); + result = AdvSimd.Insert(result, 12, e12); + result = AdvSimd.Insert(result, 13, e13); + result = AdvSimd.Insert(result, 14, e14); + return AdvSimd.Insert(result, 15, e15); + } + return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); static Vector128 SoftwareFallback(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7, sbyte e8, sbyte e9, sbyte e10, sbyte e11, sbyte e12, sbyte e13, sbyte e14, sbyte e15) @@ -1065,6 +1147,14 @@ public static unsafe Vector128 Create(float e0, float e1, float e2, float return Sse.MoveLowToHigh(lo64, hi64); // < 0, 1, 2, 3 > } + if (AdvSimd.IsSupported) + { + Vector128 result = CreateScalarUnsafe(e0); + result = AdvSimd.Insert(result, 1, e1); + result = AdvSimd.Insert(result, 2, e2); + return AdvSimd.Insert(result, 3, e3); + } + return SoftwareFallback(e0, e1, e2, e3); static Vector128 SoftwareFallback(float e0, float e1, float e2, float e3) @@ -1108,6 +1198,18 @@ public static unsafe Vector128 Create(ushort e0, ushort e1, ushort e2, u return Sse2.Insert(result, e7, 7); // < 0, 1, 2, 3, 4, 5, 6, 7 > } + if (AdvSimd.IsSupported) + { + Vector128 result = CreateScalarUnsafe(e0); + result = AdvSimd.Insert(result, 1, e1); + result = AdvSimd.Insert(result, 2, e2); + result = AdvSimd.Insert(result, 3, e3); + result = AdvSimd.Insert(result, 4, e4); + result = AdvSimd.Insert(result, 5, e5); + result = AdvSimd.Insert(result, 6, e6); + return AdvSimd.Insert(result, 7, e7); + } + return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7); static Vector128 SoftwareFallback(ushort e0, ushort e1, ushort e2, ushort e3, ushort e4, ushort e5, ushort e6, ushort e7) @@ -1158,6 +1260,14 @@ public static unsafe Vector128 Create(uint e0, uint e1, uint e2, uint e3) return Sse2.UnpackLow(lo64, hi64).AsUInt32(); // < 0, 1, 2, 3 > } + if (AdvSimd.IsSupported) + { + Vector128 result = CreateScalarUnsafe(e0); + result = AdvSimd.Insert(result, 1, e1); + result = AdvSimd.Insert(result, 2, e2); + return AdvSimd.Insert(result, 3, e3); + } + return SoftwareFallback(e0, e1, e2, e3); static Vector128 SoftwareFallback(uint e0, uint e1, uint e2, uint e3) @@ -1194,6 +1304,12 @@ public static unsafe Vector128 Create(ulong e0, ulong e1) return Sse2.UnpackLow(CreateScalarUnsafe(e0), CreateScalarUnsafe(e1)); // < 0, 1 > } + if (AdvSimd.IsSupported) + { + Vector128 result = CreateScalarUnsafe(e0); + return AdvSimd.Insert(result, 1, e1); + } + return SoftwareFallback(e0, e1); static Vector128 SoftwareFallback(ulong e0, ulong e1) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index 089a0b5a4ed48..56cf62d2f8805 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -2,8 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System.Runtime.CompilerServices; using Internal.Runtime.CompilerServices; +using System.Runtime.CompilerServices; using System.Runtime.Intrinsics.Arm; namespace System.Runtime.Intrinsics @@ -157,6 +157,7 @@ public static Vector64 AsUInt64(this Vector64 vector) /// The value that all elements will be initialized to. /// On x86, this method corresponds to __m64 _mm_set1_pi8 /// A new with all elements initialized to . + [Intrinsic] public static unsafe Vector64 Create(byte value) { byte* pResult = stackalloc byte[8] @@ -177,6 +178,7 @@ public static unsafe Vector64 Create(byte value) /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [Intrinsic] public static unsafe Vector64 Create(double value) { return Unsafe.As>(ref value); @@ -186,6 +188,7 @@ public static unsafe Vector64 Create(double value) /// The value that all elements will be initialized to. /// On x86, this method corresponds to __m64 _mm_set1_pi16 /// A new with all elements initialized to . + [Intrinsic] public static unsafe Vector64 Create(short value) { short* pResult = stackalloc short[4] @@ -203,6 +206,7 @@ public static unsafe Vector64 Create(short value) /// The value that all elements will be initialized to. /// On x86, this method corresponds to __m64 _mm_set1_pi32 /// A new with all elements initialized to . + [Intrinsic] public static unsafe Vector64 Create(int value) { int* pResult = stackalloc int[2] @@ -217,6 +221,7 @@ public static unsafe Vector64 Create(int value) /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [Intrinsic] public static unsafe Vector64 Create(long value) { return Unsafe.As>(ref value); @@ -227,6 +232,7 @@ public static unsafe Vector64 Create(long value) /// On x86, this method corresponds to __m64 _mm_set1_pi8 /// A new with all elements initialized to . [CLSCompliant(false)] + [Intrinsic] public static unsafe Vector64 Create(sbyte value) { sbyte* pResult = stackalloc sbyte[8] @@ -247,6 +253,7 @@ public static unsafe Vector64 Create(sbyte value) /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [Intrinsic] public static unsafe Vector64 Create(float value) { float* pResult = stackalloc float[2] @@ -263,6 +270,7 @@ public static unsafe Vector64 Create(float value) /// On x86, this method corresponds to __m64 _mm_set1_pi16 /// A new with all elements initialized to . [CLSCompliant(false)] + [Intrinsic] public static unsafe Vector64 Create(ushort value) { ushort* pResult = stackalloc ushort[4] @@ -281,6 +289,7 @@ public static unsafe Vector64 Create(ushort value) /// On x86, this method corresponds to __m64 _mm_set1_pi32 /// A new with all elements initialized to . [CLSCompliant(false)] + [Intrinsic] public static unsafe Vector64 Create(uint value) { uint* pResult = stackalloc uint[2] @@ -296,6 +305,7 @@ public static unsafe Vector64 Create(uint value) /// The value that all elements will be initialized to. /// A new with all elements initialized to . [CLSCompliant(false)] + [Intrinsic] public static unsafe Vector64 Create(ulong value) { return Unsafe.As>(ref value); @@ -314,6 +324,18 @@ public static unsafe Vector64 Create(ulong value) /// A new with each element initialized to corresponding specified value. public static unsafe Vector64 Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7) { + if (AdvSimd.IsSupported) + { + Vector64 result = Vector64.CreateScalarUnsafe(e0); + result = AdvSimd.Insert(result, 1, e1); + result = AdvSimd.Insert(result, 2, e2); + result = AdvSimd.Insert(result, 3, e3); + result = AdvSimd.Insert(result, 4, e4); + result = AdvSimd.Insert(result, 5, e5); + result = AdvSimd.Insert(result, 6, e6); + return AdvSimd.Insert(result, 7, e7); + } + byte* pResult = stackalloc byte[8] { e0, @@ -338,6 +360,14 @@ public static unsafe Vector64 Create(byte e0, byte e1, byte e2, byte e3, b /// A new with each element initialized to corresponding specified value. public static unsafe Vector64 Create(short e0, short e1, short e2, short e3) { + if (AdvSimd.IsSupported) + { + Vector64 result = Vector64.CreateScalarUnsafe(e0); + result = AdvSimd.Insert(result, 1, e1); + result = AdvSimd.Insert(result, 2, e2); + return AdvSimd.Insert(result, 3, e3); + } + short* pResult = stackalloc short[4] { e0, @@ -356,6 +386,12 @@ public static unsafe Vector64 Create(short e0, short e1, short e2, short /// A new with each element initialized to corresponding specified value. public static unsafe Vector64 Create(int e0, int e1) { + if (AdvSimd.IsSupported) + { + Vector64 result = Vector64.CreateScalarUnsafe(e0); + return AdvSimd.Insert(result, 1, e1); + } + int* pResult = stackalloc int[2] { e0, @@ -379,6 +415,18 @@ public static unsafe Vector64 Create(int e0, int e1) [CLSCompliant(false)] public static unsafe Vector64 Create(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7) { + if (AdvSimd.IsSupported) + { + Vector64 result = Vector64.CreateScalarUnsafe(e0); + result = AdvSimd.Insert(result, 1, e1); + result = AdvSimd.Insert(result, 2, e2); + result = AdvSimd.Insert(result, 3, e3); + result = AdvSimd.Insert(result, 4, e4); + result = AdvSimd.Insert(result, 5, e5); + result = AdvSimd.Insert(result, 6, e6); + return AdvSimd.Insert(result, 7, e7); + } + sbyte* pResult = stackalloc sbyte[8] { e0, @@ -400,6 +448,12 @@ public static unsafe Vector64 Create(sbyte e0, sbyte e1, sbyte e2, sbyte /// A new with each element initialized to corresponding specified value. public static unsafe Vector64 Create(float e0, float e1) { + if (AdvSimd.IsSupported) + { + Vector64 result = Vector64.CreateScalarUnsafe(e0); + return AdvSimd.Insert(result, 1, e1); + } + float* pResult = stackalloc float[2] { e0, @@ -419,6 +473,14 @@ public static unsafe Vector64 Create(float e0, float e1) [CLSCompliant(false)] public static unsafe Vector64 Create(ushort e0, ushort e1, ushort e2, ushort e3) { + if (AdvSimd.IsSupported) + { + Vector64 result = Vector64.CreateScalarUnsafe(e0); + result = AdvSimd.Insert(result, 1, e1); + result = AdvSimd.Insert(result, 2, e2); + return AdvSimd.Insert(result, 3, e3); + } + ushort* pResult = stackalloc ushort[4] { e0, @@ -438,6 +500,12 @@ public static unsafe Vector64 Create(ushort e0, ushort e1, ushort e2, us [CLSCompliant(false)] public static unsafe Vector64 Create(uint e0, uint e1) { + if (AdvSimd.IsSupported) + { + Vector64 result = Vector64.CreateScalarUnsafe(e0); + return AdvSimd.Insert(result, 1, e1); + } + uint* pResult = stackalloc uint[2] { e0,