Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARM64 intrinsic support for Vector64.Create() and Vector128.Create() #35590

Merged
merged 15 commits into from
May 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/coreclr/src/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6984,7 +6984,6 @@ void CodeGen::genArm64EmitterUnitTests()
genDefineTempLabel(genCreateTempLabel());

theEmitter->emitIns_R(INS_br, EA_PTRSIZE, REG_R8);
theEmitter->emitIns_R(INS_blr, EA_PTRSIZE, REG_R9);
theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_R8);
theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_LR);

Expand Down Expand Up @@ -7330,6 +7329,10 @@ void CodeGen::genArm64EmitterUnitTests()
theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V30, 0xFF000000FF000000);
theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V31, 0x0, INS_OPTS_2D);

// We were not encoding immediate of movi that was int.MaxValue or int.MaxValue / 2.
theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V16, 0x7fffffff, INS_OPTS_2S);
theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V16, 0x3fffffff, INS_OPTS_2S);

theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
Expand Down
7 changes: 3 additions & 4 deletions src/coreclr/src/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2864,12 +2864,11 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt)
* 'size' specifies the size of the result (16 or 32 bits)
*/

/*static*/ INT32 emitter::emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size)
/*static*/ UINT32 emitter::emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size)
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
{
bool onesShift = (bsImm.immOnes == 1);
unsigned bySh = bsImm.immBY; // Num Bytes to shift 0,1,2,3
INT32 val = (INT32)bsImm.immVal; // 8-bit immediate
INT32 result = val;
unsigned bySh = bsImm.immBY; // Num Bytes to shift 0,1,2,3
UINT32 result = (UINT32)bsImm.immVal; // 8-bit immediate

if (bySh > 0)
{
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/src/jit/emitarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ union byteShiftedImm {

static emitter::byteShiftedImm emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL);

static INT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size);
static UINT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size);

/************************************************************************
*
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/src/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,8 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va
{
case TYP_FLOAT:
return m_simdHandleCache->Vector64FloatHandle;
case TYP_DOUBLE:
return m_simdHandleCache->Vector64DoubleHandle;
case TYP_INT:
return m_simdHandleCache->Vector64IntHandle;
case TYP_USHORT:
Expand All @@ -212,6 +214,10 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va
return m_simdHandleCache->Vector64ByteHandle;
case TYP_UINT:
return m_simdHandleCache->Vector64UIntHandle;
case TYP_LONG:
return m_simdHandleCache->Vector64LongHandle;
case TYP_ULONG:
return m_simdHandleCache->Vector64ULongHandle;
default:
assert(!"Didn't find a class handle for simdType");
}
Expand Down
32 changes: 27 additions & 5 deletions src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -516,11 +516,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}
else if (varTypeIsFloating(intrin.baseType))
{
if (targetReg != op1Reg)
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
{
// fmov reg1, reg2
GetEmitter()->emitIns_R_R(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, INS_OPTS_NONE);
}
// fmov reg1, reg2
GetEmitter()->emitIns_R_R(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, INS_OPTS_NONE);
}
else
{
Expand Down Expand Up @@ -557,6 +554,31 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
GetEmitter()->emitIns_R_I(ins, emitSize, targetReg, 0, INS_OPTS_4S);
break;

case NI_Vector64_Create:
case NI_Vector128_Create:
if (intrin.op1->isContainedFltOrDblImmed())
{
const double dataValue = intrin.op1->AsDblCon()->gtDconVal;
GetEmitter()->emitIns_R_F(INS_fmov, emitSize, targetReg, dataValue, opt);
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
}
else if (varTypeIsFloating(intrin.baseType))
{
GetEmitter()->emitIns_R_R_I(ins, emitSize, targetReg, op1Reg, 0, opt);
}
else
{
if (intrin.op1->isContainedIntOrIImmed())
{
const ssize_t dataValue = intrin.op1->AsIntCon()->gtIconVal;
GetEmitter()->emitIns_R_I(INS_movi, emitSize, targetReg, dataValue, opt);
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
}
else
{
GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt);
}
}
break;

default:
unreached();
}
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/src/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ HARDWARE_INTRINSIC(Vector64, AsSByte,
HARDWARE_INTRINSIC(Vector64, AsSingle, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, AsUInt16, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, AsUInt32, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, Create, 8, 1, {INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_mov, INS_mov, INS_dup, INS_dup}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector64, CreateScalarUnsafe, 8, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_invalid, INS_invalid, INS_fmov, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector64, get_AllBitsSet, 8, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector64, get_Count, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
Expand All @@ -44,6 +45,7 @@ HARDWARE_INTRINSIC(Vector128, AsSingle, 1
HARDWARE_INTRINSIC(Vector128, AsUInt16, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, AsUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, AsUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, Create, 16, 1, {INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup, INS_dup}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_fmov, INS_fmov}, HW_Category_SimpleSIMD, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector128, get_Count, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/src/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -914,6 +914,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
}
}
break;

case NI_Vector64_Create:
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
case NI_Vector128_Create:
case NI_Vector64_CreateScalarUnsafe:
case NI_Vector128_CreateScalarUnsafe:
if (intrin.op1->IsCnsIntOrI())
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
// Metadata version: v4.0.30319
.assembly extern System.Runtime
{
.publickeytoken = (B0 3F 5F 7F 11 D5 0A 3A ) // .?_....:
.ver 5:0:0:0
}
.assembly extern System.Runtime.Intrinsics
{
.publickeytoken = (CC 7B 13 FF CD 2D DD 51 ) // .{...-.Q
.ver 5:0:0:0
}
.assembly projs { }
.module projs.dll
// MVID: {379016DB-73C2-41D4-9E5F-5B727BC70E2C}
.custom instance void [System.Runtime]System.Security.UnverifiableCodeAttribute::.ctor() = ( 01 00 00 00 )
.imagebase 0x00400000
.file alignment 0x00000200
.stackreserve 0x00100000
.subsystem 0x0003 // WINDOWS_CUI
.corflags 0x00000001 // ILONLY
// Image base: 0x00000293F3DD0000


// =============== CLASS MEMBERS DECLARATION ===================
// This bug was found when passing Vector64<long> to a method such that
// the vector is on the evaluation stack. C# sometimes assign it the vector64
// to local variable before passing it to method. In such cases, the bug
// doesn't repro.
.class public auto ansi sealed beforefieldinit projs.GitHub_35821
extends [System.Runtime]System.Object
{
.method private hidebysig static int32
Main(string[] args) cil managed
{
.entrypoint
// Code size 48 (0x30)
.maxstack 8
IL_0000: ldc.i4.s 23
IL_0002: conv.i8
IL_0003: call valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<uint64> [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64::Create(uint64)
IL_0008: call void projs.GitHub_35821::Test1(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<uint64>)
IL_000d: ldc.i4.s 23
IL_000f: conv.i8
IL_0010: call valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<int64> [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64::Create(int64)
IL_0015: call void projs.GitHub_35821::Test2(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<int64>)
IL_001a: ldc.r8 23.
IL_0023: call valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<float64> [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64::Create(float64)
IL_0028: call void projs.GitHub_35821::Test3(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<float64>)
IL_002d: ldc.i4.s 100
IL_002f: ret
} // end of method GitHub_35821::Main

.method public hidebysig static void Test1(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<uint64> data) cil managed noinlining
{
// Code size 1 (0x1)
.maxstack 8
IL_0000: ret
} // end of method GitHub_35821::Test1

.method public hidebysig static void Test2(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<int64> data) cil managed noinlining
{
// Code size 1 (0x1)
.maxstack 8
IL_0000: ret
} // end of method GitHub_35821::Test2

.method public hidebysig static void Test3(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<float64> data) cil managed noinlining
{
// Code size 1 (0x1)
.maxstack 8
IL_0000: ret
} // end of method GitHub_35821::Test3

.method public hidebysig specialname rtspecialname
instance void .ctor() cil managed
{
// Code size 7 (0x7)
.maxstack 8
IL_0000: ldarg.0
IL_0001: call instance void [System.Runtime]System.Object::.ctor()
IL_0006: ret
} // end of method GitHub_35821::.ctor

} // end of class projs.GitHub_35821


// =============================================================

// *********** DISASSEMBLY COMPLETE ***********************
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<Project Sdk="Microsoft.NET.Sdk.IL">
<PropertyGroup>
<OutputType>Exe</OutputType>
</PropertyGroup>
<PropertyGroup>
<DebugType>None</DebugType>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).il" />
</ItemGroup>
</Project>
Loading