From 97373cadbd21841f5b2cd256d4f19059e880c01c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 6 May 2024 16:39:45 -0700 Subject: [PATCH 01/29] Add *Fused* APIs --- .../Arm/Sve.PlatformNotSupported.cs | 105 ++++++++++++++++++ .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 105 ++++++++++++++++++ .../ref/System.Runtime.Intrinsics.cs | 16 ++- 3 files changed, 225 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index 43eec348f76fc..0054cf789a33b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -782,6 +782,111 @@ internal Arm64() { } /// public static unsafe Vector Divide(Vector left, Vector right) { throw new PlatformNotSupportedException(); } + /// FusedMultiplyAdd : Multiply-add, addend first + + /// + /// svfloat64_t svmla[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// svfloat64_t svmla[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// svfloat64_t svmla[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// FMLA Ztied1.D, Pg/M, Zop2.D, Zop3.D + /// + public static unsafe Vector FusedMultiplyAdd(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svfloat32_t svmla[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// svfloat32_t svmla[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// svfloat32_t svmla[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// FMLA Ztied1.S, Pg/M, Zop2.S, Zop3.S + /// + public static unsafe Vector FusedMultiplyAdd(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + + /// FusedMultiplyAddBySelectedScalar : Multiply-add, addend first + + /// + /// svfloat64_t svmla_lane[_f64](svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_index) + /// FMLA Ztied1.D, Zop2.D, Zop3.D[imm_index] + /// + public static unsafe Vector FusedMultiplyAddBySelectedScalar(Vector addend, Vector left, Vector right, [ConstantExpected] byte rightIndex) { throw new PlatformNotSupportedException(); } + + /// + /// svfloat32_t svmla_lane[_f32](svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_index) + /// FMLA Ztied1.S, Zop2.S, Zop3.S[imm_index] + /// + public static unsafe Vector FusedMultiplyAddBySelectedScalar(Vector addend, Vector left, Vector right, [ConstantExpected] byte rightIndex) { throw new PlatformNotSupportedException(); } + + + /// FusedMultiplyAddNegated : Negated multiply-add, addend first + + /// + /// svfloat64_t svnmla[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// svfloat64_t svnmla[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// svfloat64_t svnmla[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// FNMLA Ztied1.D, Pg/M, Zop2.D, Zop3.D + /// + public static unsafe Vector FusedMultiplyAddNegated(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svfloat32_t svnmla[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// svfloat32_t svnmla[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// svfloat32_t svnmla[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// FNMLA Ztied1.S, Pg/M, Zop2.S, Zop3.S + /// + public static unsafe Vector FusedMultiplyAddNegated(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + + /// FusedMultiplySubtract : Multiply-subtract, minuend first + + /// + /// svfloat64_t svmls[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// svfloat64_t svmls[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// svfloat64_t svmls[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// FMLS Ztied1.D, Pg/M, Zop2.D, Zop3.D + /// + public static unsafe Vector FusedMultiplySubtract(Vector minuend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svfloat32_t svmls[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// svfloat32_t svmls[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// svfloat32_t svmls[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// FMLS Ztied1.S, Pg/M, Zop2.S, Zop3.S + /// + public static unsafe Vector FusedMultiplySubtract(Vector minuend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + + /// FusedMultiplySubtractBySelectedScalar : Multiply-subtract, minuend first + + /// + /// svfloat64_t svmls_lane[_f64](svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_index) + /// FMLS Ztied1.D, Zop2.D, Zop3.D[imm_index] + /// + public static unsafe Vector FusedMultiplySubtractBySelectedScalar(Vector minuend, Vector left, Vector right, [ConstantExpected] byte rightIndex) { throw new PlatformNotSupportedException(); } + + /// + /// svfloat32_t svmls_lane[_f32](svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_index) + /// FMLS Ztied1.S, Zop2.S, Zop3.S[imm_index] + /// + public static unsafe Vector FusedMultiplySubtractBySelectedScalar(Vector minuend, Vector left, Vector right, [ConstantExpected] byte rightIndex) { throw new PlatformNotSupportedException(); } + + + /// FusedMultiplySubtractNegated : Negated multiply-subtract, minuend first + + /// + /// svfloat64_t svnmls[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// svfloat64_t svnmls[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// svfloat64_t svnmls[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// FNMLS Ztied1.D, Pg/M, Zop2.D, Zop3.D + /// + public static unsafe Vector FusedMultiplySubtractNegated(Vector minuend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svfloat32_t svnmls[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// svfloat32_t svnmls[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// svfloat32_t svnmls[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// FNMLS Ztied1.S, Pg/M, Zop2.S, Zop3.S + /// + public static unsafe Vector FusedMultiplySubtractNegated(Vector minuend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + /// LoadVector : Unextended load /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index 6814d2f8317aa..9a30c700113bf 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -839,6 +839,111 @@ internal Arm64() { } /// public static unsafe Vector Divide(Vector left, Vector right) => Divide(left, right); + /// FusedMultiplyAdd : Multiply-add, addend first + + /// + /// svfloat64_t svmla[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// svfloat64_t svmla[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// svfloat64_t svmla[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// FMLA Ztied1.D, Pg/M, Zop2.D, Zop3.D + /// + public static unsafe Vector FusedMultiplyAdd(Vector addend, Vector left, Vector right) => FusedMultiplyAdd(addend, left, right); + + /// + /// svfloat32_t svmla[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// svfloat32_t svmla[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// svfloat32_t svmla[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// FMLA Ztied1.S, Pg/M, Zop2.S, Zop3.S + /// + public static unsafe Vector FusedMultiplyAdd(Vector addend, Vector left, Vector right) => FusedMultiplyAdd(addend, left, right); + + + /// FusedMultiplyAddBySelectedScalar : Multiply-add, addend first + + /// + /// svfloat64_t svmla_lane[_f64](svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_index) + /// FMLA Ztied1.D, Zop2.D, Zop3.D[imm_index] + /// + public static unsafe Vector FusedMultiplyAddBySelectedScalar(Vector addend, Vector left, Vector right, [ConstantExpected] byte rightIndex) => FusedMultiplyAddBySelectedScalar(addend, left, right, rightIndex); + + /// + /// svfloat32_t svmla_lane[_f32](svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_index) + /// FMLA Ztied1.S, Zop2.S, Zop3.S[imm_index] + /// + public static unsafe Vector FusedMultiplyAddBySelectedScalar(Vector addend, Vector left, Vector right, [ConstantExpected] byte rightIndex) => FusedMultiplyAddBySelectedScalar(addend, left, right, rightIndex); + + + /// FusedMultiplyAddNegated : Negated multiply-add, addend first + + /// + /// svfloat64_t svnmla[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// svfloat64_t svnmla[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// svfloat64_t svnmla[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// FNMLA Ztied1.D, Pg/M, Zop2.D, Zop3.D + /// + public static unsafe Vector FusedMultiplyAddNegated(Vector addend, Vector left, Vector right) => FusedMultiplyAddNegated(addend, left, right); + + /// + /// svfloat32_t svnmla[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// svfloat32_t svnmla[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// svfloat32_t svnmla[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// FNMLA Ztied1.S, Pg/M, Zop2.S, Zop3.S + /// + public static unsafe Vector FusedMultiplyAddNegated(Vector addend, Vector left, Vector right) => FusedMultiplyAddNegated(addend, left, right); + + + /// FusedMultiplySubtract : Multiply-subtract, minuend first + + /// + /// svfloat64_t svmls[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// svfloat64_t svmls[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// svfloat64_t svmls[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// FMLS Ztied1.D, Pg/M, Zop2.D, Zop3.D + /// + public static unsafe Vector FusedMultiplySubtract(Vector minuend, Vector left, Vector right) => FusedMultiplySubtract(minuend, left, right); + + /// + /// svfloat32_t svmls[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// svfloat32_t svmls[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// svfloat32_t svmls[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// FMLS Ztied1.S, Pg/M, Zop2.S, Zop3.S + /// + public static unsafe Vector FusedMultiplySubtract(Vector minuend, Vector left, Vector right) => FusedMultiplySubtract(minuend, left, right); + + + /// FusedMultiplySubtractBySelectedScalar : Multiply-subtract, minuend first + + /// + /// svfloat64_t svmls_lane[_f64](svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_index) + /// FMLS Ztied1.D, Zop2.D, Zop3.D[imm_index] + /// + public static unsafe Vector FusedMultiplySubtractBySelectedScalar(Vector minuend, Vector left, Vector right, [ConstantExpected] byte rightIndex) => FusedMultiplySubtractBySelectedScalar(minuend, left, right, rightIndex); + + /// + /// svfloat32_t svmls_lane[_f32](svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_index) + /// FMLS Ztied1.S, Zop2.S, Zop3.S[imm_index] + /// + public static unsafe Vector FusedMultiplySubtractBySelectedScalar(Vector minuend, Vector left, Vector right, [ConstantExpected] byte rightIndex) => FusedMultiplySubtractBySelectedScalar(minuend, left, right, rightIndex); + + + /// FusedMultiplySubtractNegated : Negated multiply-subtract, minuend first + + /// + /// svfloat64_t svnmls[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// svfloat64_t svnmls[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// svfloat64_t svnmls[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) + /// FNMLS Ztied1.D, Pg/M, Zop2.D, Zop3.D + /// + public static unsafe Vector FusedMultiplySubtractNegated(Vector minuend, Vector left, Vector right) => FusedMultiplySubtractNegated(minuend, left, right); + + /// + /// svfloat32_t svnmls[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// svfloat32_t svnmls[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// svfloat32_t svnmls[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) + /// FNMLS Ztied1.S, Pg/M, Zop2.S, Zop3.S + /// + public static unsafe Vector FusedMultiplySubtractNegated(Vector minuend, Vector left, Vector right) => FusedMultiplySubtractNegated(minuend, left, right); + /// LoadVector : Unextended load /// diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index e4f5e1d215802..bc1f89c484aae 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4272,7 +4272,21 @@ internal Arm64() { } public static System.Numerics.Vector CreateWhileLessThanOrEqualMask8Bit(uint left, uint right) { throw null; } public static System.Numerics.Vector CreateWhileLessThanOrEqualMask8Bit(ulong left, ulong right) { throw null; } public static System.Numerics.Vector Divide(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } - public static System.Numerics.Vector Divide(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector Divide(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + + public static System.Numerics.Vector FusedMultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector FusedMultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector FusedMultiplyAddBySelectedScalar(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } + public static System.Numerics.Vector FusedMultiplyAddBySelectedScalar(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } + public static System.Numerics.Vector FusedMultiplyAddNegated(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector FusedMultiplyAddNegated(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector FusedMultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector FusedMultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector FusedMultiplySubtractBySelectedScalar(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } + public static System.Numerics.Vector FusedMultiplySubtractBySelectedScalar(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } + public static System.Numerics.Vector FusedMultiplySubtractNegated(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector FusedMultiplySubtractNegated(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, sbyte* address) { throw null; } public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, short* address) { throw null; } public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, int* address) { throw null; } From 4e140981e50a6dae68fdb5c2c40afff07461bf6a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 7 May 2024 06:59:49 -0700 Subject: [PATCH 02/29] fix an assert in morph --- src/coreclr/jit/morph.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index a43882bd9867d..bf0ffcce2af69 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -10718,9 +10718,8 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) } unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); - GenTreeHWIntrinsic* cvtOp2 = op2->AsHWIntrinsic(); - if ((genTypeSize(cvtOp2->GetSimdBaseType()) != simdBaseTypeSize)) + if (!op2->OperIsHWIntrinsic() || (genTypeSize(op2->AsHWIntrinsic()->GetSimdBaseType()) != simdBaseTypeSize)) { // We need the operand to be the same kind of mask; otherwise // the bitwise operation can differ in how it performs From 3fb9dea7c006d60fc7ed978081f5869047d96bfe Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 7 May 2024 07:00:02 -0700 Subject: [PATCH 03/29] Map APIs to instructions --- src/coreclr/jit/hwintrinsiclistarm64sve.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index a2af0bec71139..9f224a58b3f48 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -46,6 +46,12 @@ HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask32Bit, HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, false, {INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, Divide, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdiv, INS_sve_udiv, INS_sve_sdiv, INS_sve_udiv, INS_sve_fdiv, INS_sve_fdiv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, FusedMultiplyAdd, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, FusedMultiplyAddBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve, FusedMultiplyAddNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmla, INS_sve_fnmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, FusedMultiplySubtract, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmls, INS_sve_fnmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) From 600391a0072b10a74e21e655258e04280c425db8 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 7 May 2024 23:00:00 -0700 Subject: [PATCH 04/29] Add test cases --- .../GenerateHWIntrinsicTests_Arm.cs | 44 +- .../Shared/_SveImmTernOpTestTemplate.template | 358 +++++++++++++ .../Shared/_SveTernOpTestTemplate.template | 478 ++++++++++++++++++ 3 files changed, 878 insertions(+), 2 deletions(-) create mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template create mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveTernOpTestTemplate.template diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index 5701becf7c486..6e3b88e83f486 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -47,6 +47,16 @@ } }"; +const string SimpleTernVecOpTest_ValidationLogic = @"for (var i = 0; i < RetElementCount; i++) + { + if ({ValidateIterResult}) + { + succeeded = false; + break; + } + }"; + + const string SimpleVecOpTest_ValidationLogicForCndSel = @"for (var i = 0; i < RetElementCount; i++) { {Op1BaseType} iterResult = (mask[i] != 0) ? {GetIterResult} : falseVal[i]; @@ -57,6 +67,16 @@ } }"; +const string SimpleTernVecOpTest_ValidationLogicForCndSel = @"for (var i = 0; i < RetElementCount; i++) + { + {Op1BaseType} iterResult = (mask[i] != 0) ? {GetIterResult} : falseVal[i]; + if ({ConvertFunc}(iterResult) != {ConvertFunc}(result[i])) + { + succeeded = false; + break; + } + }"; + const string VecPairBinOpTest_ValidationLogic = @" int index = 0; int half = RetElementCount / 2; @@ -121,6 +141,8 @@ ("_TernaryOpTestTemplate.template", "SecureHashTernOpTest.template", new Dictionary { ["TemplateName"] = "SecureHash", ["TemplateValidationLogic"] = SecureHashOpTest_ValidationLogic }), ("_SveUnaryOpTestTemplate.template", "SveSimpleVecOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_ValidationLogicForCndSel }), ("_SveBinaryOpTestTemplate.template", "SveVecBinOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_ValidationLogicForCndSel }), + ("_SveTernOpTestTemplate.template", "SveVecTernOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel }), + ("_SveImmTernOpTestTemplate.template", "SveVecImmTernOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, }), ("_SveMinimalUnaryOpTestTemplate.template", "SveVecReduceUnOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = VecReduceOpTest_ValidationLogic }), }; @@ -2988,8 +3010,26 @@ ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask8Bit_UInt32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask8Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1BaseType"] = "UInt32", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (UInt32)i, right) != (UInt32)result[i]",}), ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask8Bit_UInt64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask8Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1BaseType"] = "UInt64", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (UInt64)i, right) != (UInt64)result[i]",}), - ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_Divide_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Divide", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "Helpers.Divide(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.Divide(left[i], right[i])"}), - ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_Divide_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Divide", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "Helpers.Divide(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.Divide(left[i], right[i])"}), + ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_Divide_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Divide", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "Helpers.Divide(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.Divide(left[i], right[i])"}), + ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_Divide_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Divide", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "Helpers.Divide(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.Divide(left[i], right[i])"}), + + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplyAdd_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(first[i], second[i], third[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplyAdd_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(first[i], second[i], third[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])",}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])",}), + + ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddNegated_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAddNegated(first[i], second[i], third[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddNegated_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAddNegated(first[i], second[i], third[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplySubtract_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(first[i], second[i], third[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplySubtract_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(first[i], second[i], third[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])"}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])"}), + + ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractNegated_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtractNegated(first[i], second[i], third[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractNegated_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtractNegated(first[i], second[i], third[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_float", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_double", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template new file mode 100644 index 0000000000000..abe0377c07e4b --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template @@ -0,0 +1,358 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics.Arm\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [Fact] + public static void {TestName}() + { + var test = new {TemplateName}TernaryOpTest__{TestName}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if ({LoadIsa}.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class {TemplateName}TernaryOpTest__{TestName} + { + private struct DataTable + { + private byte[] inArray1; + private byte[] inArray2; + private byte[] inArray3; + private byte[] outArray; + + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle inHandle3; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op1BaseType}[] inArray1, {Op2BaseType}[] inArray2, {Op3BaseType}[] inArray3, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op3BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 64 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.inArray3 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.inHandle3 = GCHandle.Alloc(this.inArray3, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + } + + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray3Ptr => Align((byte*)(inHandle3.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle1.Free(); + inHandle2.Free(); + inHandle3.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op2VectorType}<{Op2BaseType}> _fld2; + public {Op3VectorType}<{Op3BaseType}> _fld3; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = {NextValueOp3}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op3VectorType}<{Op3BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op3BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario({TemplateName}TernaryOpTest__{TestName} testClass) + { + var result = {Isa}.{Method}(_fld1, _fld2, _fld3, {Imm}); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, _fld2, _fld3, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int Op3ElementCount = Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>() / sizeof({Op3BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + private static readonly byte Imm = {Imm}; + + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data2 = new {Op2BaseType}[Op2ElementCount]; + private static {Op3BaseType}[] _data3 = new {Op3BaseType}[Op3ElementCount]; + + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op2VectorType}<{Op2BaseType}> _fld2; + private {Op3VectorType}<{Op3BaseType}> _fld3; + + private DataTable _dataTable; + + public {TemplateName}TernaryOpTest__{TestName}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = {NextValueOp3}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op3VectorType}<{Op3BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op3BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = {NextValueOp3}; } + _dataTable = new DataTable(_data1, _data2, _data3, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op3VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr), + {Imm} + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + {Op1VectorType}<{Op1BaseType}> loadMask = Sve.CreateTrueMask{RetBaseType}(SveMaskPattern.All); + + var result = {Isa}.{Method}( + {LoadIsa}.Load{Op1VectorType}(loadMask, ({Op1BaseType}*)(_dataTable.inArray1Ptr)), + {LoadIsa}.Load{Op2VectorType}(loadMask, ({Op2BaseType}*)(_dataTable.inArray2Ptr)), + {LoadIsa}.Load{Op3VectorType}(loadMask, ({Op3BaseType}*)(_dataTable.inArray3Ptr)), + {Imm} + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op2VectorType}<{Op2BaseType}>), typeof({Op3VectorType}<{Op3BaseType}>), typeof(byte) }) + .Invoke(null, new object[] { + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op3VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr), + (byte){Imm} + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op3VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr); + var result = {Isa}.{Method}(op1, op2, op3, {Imm}); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op1, op2, op3, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld1, _fld2, _fld3, {Imm}); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld1, test._fld2, test._fld3, {Imm}); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op2VectorType}<{Op2BaseType}> op2, {Op3VectorType}<{Op3BaseType}> op3, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; + {Op3BaseType}[] inArray3 = new {Op3BaseType}[Op3ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult(void* op1, void* op2, void* op3, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; + {Op3BaseType}[] inArray3 = new {Op3BaseType}[Op3ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] firstOp, {Op2BaseType}[] secondOp, {Op3BaseType}[] thirdOp, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + {TemplateValidationLogic} + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op2VectorType}<{Op2BaseType}>, {Op3VectorType}<{Op3BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($"secondOp: ({string.Join(", ", secondOp)})"); + TestLibrary.TestFramework.LogInformation($" thirdOp: ({string.Join(", ", thirdOp)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveTernOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveTernOpTestTemplate.template new file mode 100644 index 0000000000000..b8ce72108d760 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveTernOpTestTemplate.template @@ -0,0 +1,478 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics.Arm\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [Fact] + public static void {TestName}() + { + var test = new {TemplateName}BinaryOpTest__{TestName}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if ({LoadIsa}.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + + // Validates executing the test inside conditional, with op1 as falseValue + test.ConditionalSelect_Op1(); + + // Validates executing the test inside conditional, with op2 as falseValue + test.ConditionalSelect_Op2(); + + // Validates executing the test inside conditional, with op3 as falseValue + test.ConditionalSelect_Op3(); + + // Validates executing the test inside conditional, with op3 as falseValue + test.ConditionalSelect_FalseOp(); + + // Validates executing the test inside conditional, with op3 as zero + test.ConditionalSelect_ZeroOp(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class {TemplateName}BinaryOpTest__{TestName} + { + private struct DataTable + { + private byte[] inArray1; + private byte[] inArray2; + private byte[] inArray3; + private byte[] outArray; + + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle inHandle3; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op1BaseType}[] inArray1, {Op1BaseType}[] inArray2, {Op1BaseType}[] inArray3, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 64 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException($"Invalid value of alignment: {alignment}, sizeOfinArray1: {sizeOfinArray1}, sizeOfinArray2: {sizeOfinArray2}, sizeOfinArray3: {sizeOfinArray3}, sizeOfoutArray: {sizeOfoutArray}"); + } + + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.inArray3 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.inHandle3 = GCHandle.Alloc(this.inArray3, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + } + + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray3Ptr => Align((byte*)(inHandle3.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle1.Free(); + inHandle2.Free(); + inHandle3.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op1VectorType}<{Op1BaseType}> _fld2; + public {Op1VectorType}<{Op1BaseType}> _fld3; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario({TemplateName}BinaryOpTest__{TestName} testClass) + { + var result = {Isa}.{Method}(_fld1, _fld2, _fld3); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, _fld2, _fld3, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op1BaseType}[] _maskData = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data2 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data3 = new {Op1BaseType}[Op1ElementCount]; + + private {Op1VectorType}<{Op1BaseType}> _mask; + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op1VectorType}<{Op1BaseType}> _fld2; + private {Op1VectorType}<{Op1BaseType}> _fld3; + private {Op1VectorType}<{Op1BaseType}> _falseFld; + + private DataTable _dataTable; + + public {TemplateName}BinaryOpTest__{TestName}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _maskData[i] = ({Op1BaseType})({NextValueOp1} % 2); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _mask), ref Unsafe.As<{Op1BaseType}, byte>(ref _maskData[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _falseFld), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + _dataTable = new DataTable(_data1, _data2, _data3, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + {Op1VectorType}<{Op1BaseType}> loadMask = Sve.CreateTrueMask{RetBaseType}(SveMaskPattern.All); + + var result = {Isa}.{Method}( + {LoadIsa}.Load{Op1VectorType}(loadMask, ({Op1BaseType}*)(_dataTable.inArray1Ptr)), + {LoadIsa}.Load{Op1VectorType}(loadMask, ({Op1BaseType}*)(_dataTable.inArray2Ptr)), + {LoadIsa}.Load{Op1VectorType}(loadMask, ({Op1BaseType}*)(_dataTable.inArray3Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op1VectorType}<{Op1BaseType}>) }) + .Invoke(null, new object[] { + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr); + var result = {Isa}.{Method}(op1, op2, op3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op1, op2, op3, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld1, _fld2, _fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld1, test._fld2, test._fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void ConditionalSelect_Op1() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op1_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld3, _fld1); + + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op1_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld3, _fld1); + + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op1_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld3, _fld1); + } + + public void ConditionalSelect_Op2() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op2_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld3, _fld2); + + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op2_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld3, _fld2); + + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op2_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld3, _fld2); + } + + public void ConditionalSelect_Op3() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op3_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld3, _fld3); + + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op3_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld3, _fld3); + + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op3_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld3, _fld3); + } + + public void ConditionalSelect_FalseOp() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_FalseOp_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld3, _falseFld); + + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_FalseOp_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld3, _falseFld); + + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_FalseOp_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld3, _falseFld); + } + + public void ConditionalSelect_ZeroOp() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_ZeroOp_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld3, {Op1VectorType}<{Op1BaseType}>.Zero); + + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_ZeroOp_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld3, {Op1VectorType}<{Op1BaseType}>.Zero); + + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_ZeroOp_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld3, {Op1VectorType}<{Op1BaseType}>.Zero); + } + + [method: MethodImpl(MethodImplOptions.AggressiveInlining)] + private void ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}> mask, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, {Op1VectorType}<{Op1BaseType}> falseOp) + { + var result = Sve.ConditionalSelect(mask, {Isa}.{Method}(op1, op2, op3), falseOp); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateConditionalSelectResult(mask, op1, op2, op3, falseOp, _dataTable.outArrayPtr); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateConditionalSelectResult({Op1VectorType}<{Op1BaseType}> maskOp, {Op1VectorType}<{Op1BaseType}> firstOp, {Op1VectorType}<{Op1BaseType}> secondOp, {Op1VectorType}<{Op1BaseType}> thirdOp, {Op1VectorType}<{Op1BaseType}> falseOp, void* output, [CallerMemberName] string method = "") + { + {Op1BaseType}[] mask = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] first = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] second = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] third = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] falseVal = new {Op1BaseType}[Op1ElementCount]; + {RetBaseType}[] result = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref mask[0]), maskOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref first[0]), firstOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref second[0]), secondOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref third[0]), thirdOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref falseVal[0]), falseOp); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref result[0]), ref Unsafe.AsRef(output), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + bool succeeded = true; + + {TemplateValidationLogicForCndSel} + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" mask: ({string.Join(", ", mask)})"); + TestLibrary.TestFramework.LogInformation($" first: ({string.Join(", ", first)})"); + TestLibrary.TestFramework.LogInformation($" second: ({string.Join(", ", second)})"); + TestLibrary.TestFramework.LogInformation($" third: ({string.Join(", ", third)})"); + TestLibrary.TestFramework.LogInformation($" falseOp: ({string.Join(", ", falseVal)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult(void* op1, void* op2, void* op3, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] first, {Op1BaseType}[] second, {Op1BaseType}[] third, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + {TemplateValidationLogic} + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" first: ({string.Join(", ", first)})"); + TestLibrary.TestFramework.LogInformation($" second: ({string.Join(", ", second)})"); + TestLibrary.TestFramework.LogInformation($" third: ({string.Join(", ", third)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} From 67e4d4dd19449ba3b5bb302374df9f70e836a88f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 7 May 2024 23:02:31 -0700 Subject: [PATCH 05/29] handle fused* instructions --- src/coreclr/jit/hwintrinsicarm64.cpp | 2 + src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 57 ++++++++++++++++++++- src/coreclr/jit/hwintrinsiclistarm64sve.h | 4 +- src/coreclr/jit/lowerarmarch.cpp | 10 ++++ src/coreclr/jit/lsraarm64.cpp | 23 +++++++-- src/coreclr/jit/targetarm64.h | 4 +- 6 files changed, 93 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 90dde12bde822..84b7e6b31387d 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -277,6 +277,8 @@ void HWIntrinsicInfo::lookupImmBounds( case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4: case NI_AdvSimd_Arm64_DuplicateSelectedScalarToVector128: case NI_AdvSimd_Arm64_InsertSelectedScalar: + case NI_Sve_FusedMultiplyAddBySelectedScalar: + case NI_Sve_FusedMultiplySubtractBySelectedScalar: immUpperBound = Compiler::getSIMDVectorLength(simdSize, baseType) - 1; break; diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 7223d7b2203e8..41c19984cfd0c 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -417,10 +417,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) regNumber maskReg = op1Reg; regNumber embMaskOp1Reg = REG_NA; regNumber embMaskOp2Reg = REG_NA; + regNumber embMaskOp3Reg = REG_NA; regNumber falseReg = op3Reg; switch (intrinEmbMask.numOperands) { + case 3: + assert(intrinEmbMask.op3 != nullptr); + embMaskOp3Reg = intrinEmbMask.op3->GetRegNum(); + FALLTHROUGH; + case 2: assert(intrinEmbMask.op2 != nullptr); embMaskOp2Reg = intrinEmbMask.op2->GetRegNum(); @@ -438,6 +444,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinEmbMask.numOperands) { case 1: + { assert(!instrIsRMW); if (targetReg != falseReg) @@ -488,9 +495,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg, opt); break; + } case 2: - + { assert(instrIsRMW); if (intrin.op3->IsVectorZero()) @@ -560,7 +568,48 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } break; + } + case 3: + { + assert(instrIsRMW); + assert(targetReg != falseReg); + assert(targetReg != embMaskOp2Reg); + assert(targetReg != embMaskOp3Reg); + assert(!HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(intrinEmbMask.id)); + + if (intrin.op3->IsVectorZero()) + { + // If `falseReg` is zero, then move the first operand of `intrinEmbMask` in the + // destination using /Z. + + assert(targetReg != embMaskOp2Reg); + GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg, opt); + // Finally, perform the actual "predicated" operation so that `targetReg` is the first operand + // `embMaskOp2Reg` is the second operand and `embMaskOp3Reg` is the third operand. + GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, embMaskOp3Reg, opt); + } + else + { + // If the instruction just has "predicated" version, then move the "embMaskOp1Reg" + // into targetReg. Next, do the predicated operation on the targetReg and last, + // use "sel" to select the active lanes based on mask, and set inactive lanes + // to falseReg. + + assert(HWIntrinsicInfo::IsEmbeddedMaskedOperation(intrinEmbMask.id)); + + if (targetReg != embMaskOp1Reg) + { + GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, embMaskOp1Reg); + } + + GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,embMaskOp3Reg, opt); + + GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg, falseReg, + opt, INS_SCALABLE_OPTS_UNPREDICATED); + } + break; + } default: unreached(); } @@ -627,6 +676,12 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) INS_SCALABLE_OPTS_UNPREDICATED); } break; + case 4: + assert(!isRMW); + GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, opt, + INS_SCALABLE_OPTS_UNPREDICATED); + break; + default: unreached(); } diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 9f224a58b3f48..7d9d46d5171c1 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -47,10 +47,10 @@ HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, false, {INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, Divide, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdiv, INS_sve_udiv, INS_sve_sdiv, INS_sve_udiv, INS_sve_fdiv, INS_sve_fdiv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, FusedMultiplyAdd, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, FusedMultiplyAddBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve, FusedMultiplyAddBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, FusedMultiplyAddNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmla, INS_sve_fnmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, FusedMultiplySubtract, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMD, HW_Flag_Scalable) +HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmls, INS_sve_fnmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 36b5e08f0afd6..ee54776825f19 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -3352,6 +3352,16 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_Sve_FusedMultiplyAddBySelectedScalar: + case NI_Sve_FusedMultiplySubtractBySelectedScalar: + assert(hasImmediateOperand); + assert(varTypeIsIntegral(intrin.op4)); + if (intrin.op4->IsCnsIntOrI()) + { + MakeSrcContained(node, intrin.op4); + } + break; + default: unreached(); } diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 454cc43c8fcfa..ce6f2273db53b 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1772,7 +1772,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // then record delay-free for operands as well as the "merge" value GenTreeHWIntrinsic* intrinEmbOp2 = intrin.op2->AsHWIntrinsic(); size_t numArgs = intrinEmbOp2->GetOperandCount(); - assert((numArgs == 1) || (numArgs == 2)); + assert((numArgs == 1) || (numArgs == 2) || (numArgs == 3)); tgtPrefUse = BuildUse(intrinEmbOp2->Op(1)); srcCount += 1; @@ -1792,7 +1792,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou assert(intrin.op1 != nullptr); - bool forceOp2DelayFree = false; + bool forceOp2DelayFree = false; + regMaskTP candidates = RBM_NONE; if ((intrin.id == NI_Vector64_GetElement) || (intrin.id == NI_Vector128_GetElement)) { if (!intrin.op2->IsCnsIntOrI() && (!intrin.op1->isContained() || intrin.op1->OperIsLocal())) @@ -1815,6 +1816,21 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } } + if ((intrin.id == NI_Sve_FusedMultiplyAddBySelectedScalar) || (intrin.id == NI_Sve_FusedMultiplySubtractBySelectedScalar)) + { + // If this is common pattern, then we will add a flag in the table, but for now, just check for specific + // intrinsics + if (intrin.baseType == TYP_DOUBLE) + { + candidates = RBM_SVE_INDEXED_D_ELEMENT_ALLOWED_REGS; + } + else + { + assert(intrin.baseType == TYP_FLOAT); + candidates = RBM_SVE_INDEXED_S_ELEMENT_ALLOWED_REGS; + } + } + if ((intrin.id == NI_Sve_ConditionalSelect) && (intrin.op2->IsEmbMaskOp()) && (intrin.op2->isRMWHWIntrinsic(compiler))) { @@ -1845,7 +1861,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (intrin.op3 != nullptr) { - srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1) : BuildOperandUses(intrin.op3); + srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1, candidates) + : BuildOperandUses(intrin.op3, candidates); if (intrin.op4 != nullptr) { diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index f52ec2cc9976b..8238980fa22d5 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -376,9 +376,11 @@ // For arm64, this is the maximum prolog establishment pre-indexed (that is SP pre-decrement) offset. #define STACK_PROBE_BOUNDARY_THRESHOLD_BYTES 512 - // Some "Advanced SIMD scalar x indexed element" and "Advanced SIMD vector x indexed element" instructions (e.g. "MLA (by element)") + // Some "Advanced SIMD / SVE scalar x indexed element" and "Advanced SIMD / SVE vector x indexed element" instructions (e.g. "MLA (by element)") // have encoding that restricts what registers that can be used for the indexed element when the element size is H (i.e. 2 bytes). #define RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS (RBM_V0|RBM_V1|RBM_V2|RBM_V3|RBM_V4|RBM_V5|RBM_V6|RBM_V7|RBM_V8|RBM_V9|RBM_V10|RBM_V11|RBM_V12|RBM_V13|RBM_V14|RBM_V15) + #define RBM_SVE_INDEXED_S_ELEMENT_ALLOWED_REGS (RBM_V0|RBM_V1|RBM_V2|RBM_V3|RBM_V4|RBM_V5|RBM_V6|RBM_V7) + #define RBM_SVE_INDEXED_D_ELEMENT_ALLOWED_REGS RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS #define REG_ZERO_INIT_FRAME_REG1 REG_R9 #define REG_ZERO_INIT_FRAME_REG2 REG_R10 From 54899b253c214b234b63bd16534b44d836d7647d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 7 May 2024 23:06:00 -0700 Subject: [PATCH 06/29] jit format --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 6 ++++-- src/coreclr/jit/lsraarm64.cpp | 3 ++- src/coreclr/jit/morph.cpp | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 41c19984cfd0c..dd88aa41e884b 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -587,7 +587,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // Finally, perform the actual "predicated" operation so that `targetReg` is the first operand // `embMaskOp2Reg` is the second operand and `embMaskOp3Reg` is the third operand. - GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, embMaskOp3Reg, opt); + GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, + embMaskOp3Reg, opt); } else { @@ -603,7 +604,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, embMaskOp1Reg); } - GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,embMaskOp3Reg, opt); + GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, + embMaskOp3Reg, opt); GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg, falseReg, opt, INS_SCALABLE_OPTS_UNPREDICATED); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index ce6f2273db53b..e6039783bf977 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1816,7 +1816,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } } - if ((intrin.id == NI_Sve_FusedMultiplyAddBySelectedScalar) || (intrin.id == NI_Sve_FusedMultiplySubtractBySelectedScalar)) + if ((intrin.id == NI_Sve_FusedMultiplyAddBySelectedScalar) || + (intrin.id == NI_Sve_FusedMultiplySubtractBySelectedScalar)) { // If this is common pattern, then we will add a flag in the table, but for now, just check for specific // intrinsics diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index bf0ffcce2af69..7b76a74b5f4c8 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -10717,7 +10717,7 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) break; } - unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); + unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); if (!op2->OperIsHWIntrinsic() || (genTypeSize(op2->AsHWIntrinsic()->GetSimdBaseType()) != simdBaseTypeSize)) { From e4a53aef9e6ac09044ae9b5ee69baba72596d6e3 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 7 May 2024 23:19:26 -0700 Subject: [PATCH 07/29] Added MultiplyAdd/MultiplySubtract --- .../Arm/Sve.PlatformNotSupported.cs | 131 +++++++++++++++++ .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 133 ++++++++++++++++++ .../ref/System.Runtime.Intrinsics.cs | 18 +++ 3 files changed, 282 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index 0054cf789a33b..528f928c5ae08 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -1702,7 +1702,138 @@ internal Arm64() { } /// public static unsafe Vector Multiply(Vector left, Vector right) { throw new PlatformNotSupportedException(); } + /// MultiplyAdd : Multiply-add, addend first + /// + /// svuint8_t svmla[_u8]_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) + /// svuint8_t svmla[_u8]_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) + /// svuint8_t svmla[_u8]_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) + /// MLA Ztied1.B, Pg/M, Zop2.B, Zop3.B + /// + public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svint16_t svmla[_s16]_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) + /// svint16_t svmla[_s16]_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) + /// svint16_t svmla[_s16]_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) + /// MLA Ztied1.H, Pg/M, Zop2.H, Zop3.H + /// + public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svint32_t svmla[_s32]_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) + /// svint32_t svmla[_s32]_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) + /// svint32_t svmla[_s32]_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) + /// MLA Ztied1.S, Pg/M, Zop2.S, Zop3.S + /// + public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svint64_t svmla[_s64]_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) + /// svint64_t svmla[_s64]_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) + /// svint64_t svmla[_s64]_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) + /// MLA Ztied1.D, Pg/M, Zop2.D, Zop3.D + /// + public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svint8_t svmla[_s8]_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) + /// svint8_t svmla[_s8]_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) + /// svint8_t svmla[_s8]_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) + /// MLA Ztied1.B, Pg/M, Zop2.B, Zop3.B + /// + public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svuint16_t svmla[_u16]_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) + /// svuint16_t svmla[_u16]_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) + /// svuint16_t svmla[_u16]_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) + /// MLA Ztied1.H, Pg/M, Zop2.H, Zop3.H + /// + public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svuint32_t svmla[_u32]_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) + /// svuint32_t svmla[_u32]_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) + /// svuint32_t svmla[_u32]_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) + /// MLA Ztied1.S, Pg/M, Zop2.S, Zop3.S + /// + public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svuint64_t svmla[_u64]_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) + /// svuint64_t svmla[_u64]_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) + /// svuint64_t svmla[_u64]_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) + /// MLA Ztied1.D, Pg/M, Zop2.D, Zop3.D + /// + public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + + /// MultiplySubtract : Multiply-subtract, minuend first + + /// + /// svuint8_t svmls[_u8]_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) + /// svuint8_t svmls[_u8]_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) + /// svuint8_t svmls[_u8]_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) + /// MLS Ztied1.B, Pg/M, Zop2.B, Zop3.B + /// + public static unsafe Vector MultiplySubtract(Vector minuend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svint16_t svmls[_s16]_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) + /// svint16_t svmls[_s16]_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) + /// svint16_t svmls[_s16]_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) + /// MLS Ztied1.H, Pg/M, Zop2.H, Zop3.H + /// + public static unsafe Vector MultiplySubtract(Vector minuend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svint32_t svmls[_s32]_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) + /// svint32_t svmls[_s32]_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) + /// svint32_t svmls[_s32]_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) + /// MLS Ztied1.S, Pg/M, Zop2.S, Zop3.S + /// + public static unsafe Vector MultiplySubtract(Vector minuend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svint64_t svmls[_s64]_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) + /// svint64_t svmls[_s64]_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) + /// svint64_t svmls[_s64]_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) + /// MLS Ztied1.D, Pg/M, Zop2.D, Zop3.D + /// + public static unsafe Vector MultiplySubtract(Vector minuend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svint8_t svmls[_s8]_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) + /// svint8_t svmls[_s8]_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) + /// svint8_t svmls[_s8]_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) + /// MLS Ztied1.B, Pg/M, Zop2.B, Zop3.B + /// + public static unsafe Vector MultiplySubtract(Vector minuend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svuint16_t svmls[_u16]_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) + /// svuint16_t svmls[_u16]_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) + /// svuint16_t svmls[_u16]_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) + /// MLS Ztied1.H, Pg/M, Zop2.H, Zop3.H + /// + public static unsafe Vector MultiplySubtract(Vector minuend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svuint32_t svmls[_u32]_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) + /// svuint32_t svmls[_u32]_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) + /// svuint32_t svmls[_u32]_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) + /// MLS Ztied1.S, Pg/M, Zop2.S, Zop3.S + /// + public static unsafe Vector MultiplySubtract(Vector minuend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svuint64_t svmls[_u64]_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) + /// svuint64_t svmls[_u64]_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) + /// svuint64_t svmls[_u64]_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) + /// MLS Ztied1.D, Pg/M, Zop2.D, Zop3.D + /// + public static unsafe Vector MultiplySubtract(Vector minuend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } /// Or : Bitwise inclusive OR /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index 9a30c700113bf..dc19387281775 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -1758,6 +1758,139 @@ internal Arm64() { } /// public static unsafe Vector Multiply(Vector left, Vector right) => Multiply(left, right); + /// MultiplyAdd : Multiply-add, addend first + + /// + /// svuint8_t svmla[_u8]_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) + /// svuint8_t svmla[_u8]_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) + /// svuint8_t svmla[_u8]_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) + /// MLA Ztied1.B, Pg/M, Zop2.B, Zop3.B + /// + public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) => MultiplyAdd(addend, left, right); + + /// + /// svint16_t svmla[_s16]_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) + /// svint16_t svmla[_s16]_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) + /// svint16_t svmla[_s16]_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) + /// MLA Ztied1.H, Pg/M, Zop2.H, Zop3.H + /// + public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) => MultiplyAdd(addend, left, right); + + /// + /// svint32_t svmla[_s32]_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) + /// svint32_t svmla[_s32]_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) + /// svint32_t svmla[_s32]_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) + /// MLA Ztied1.S, Pg/M, Zop2.S, Zop3.S + /// + public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) => MultiplyAdd(addend, left, right); + + /// + /// svint64_t svmla[_s64]_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) + /// svint64_t svmla[_s64]_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) + /// svint64_t svmla[_s64]_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) + /// MLA Ztied1.D, Pg/M, Zop2.D, Zop3.D + /// + public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) => MultiplyAdd(addend, left, right); + + /// + /// svint8_t svmla[_s8]_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) + /// svint8_t svmla[_s8]_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) + /// svint8_t svmla[_s8]_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) + /// MLA Ztied1.B, Pg/M, Zop2.B, Zop3.B + /// + public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) => MultiplyAdd(addend, left, right); + + /// + /// svuint16_t svmla[_u16]_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) + /// svuint16_t svmla[_u16]_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) + /// svuint16_t svmla[_u16]_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) + /// MLA Ztied1.H, Pg/M, Zop2.H, Zop3.H + /// + public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) => MultiplyAdd(addend, left, right); + + /// + /// svuint32_t svmla[_u32]_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) + /// svuint32_t svmla[_u32]_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) + /// svuint32_t svmla[_u32]_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) + /// MLA Ztied1.S, Pg/M, Zop2.S, Zop3.S + /// + public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) => MultiplyAdd(addend, left, right); + + /// + /// svuint64_t svmla[_u64]_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) + /// svuint64_t svmla[_u64]_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) + /// svuint64_t svmla[_u64]_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) + /// MLA Ztied1.D, Pg/M, Zop2.D, Zop3.D + /// + public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) => MultiplyAdd(addend, left, right); + + + /// MultiplySubtract : Multiply-subtract, minuend first + + /// + /// svuint8_t svmls[_u8]_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) + /// svuint8_t svmls[_u8]_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) + /// svuint8_t svmls[_u8]_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) + /// MLS Ztied1.B, Pg/M, Zop2.B, Zop3.B + /// + public static unsafe Vector MultiplySubtract(Vector minuend, Vector left, Vector right) => MultiplySubtract(minuend, left, right); + + /// + /// svint16_t svmls[_s16]_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) + /// svint16_t svmls[_s16]_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) + /// svint16_t svmls[_s16]_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) + /// MLS Ztied1.H, Pg/M, Zop2.H, Zop3.H + /// + public static unsafe Vector MultiplySubtract(Vector minuend, Vector left, Vector right) => MultiplySubtract(minuend, left, right); + + /// + /// svint32_t svmls[_s32]_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) + /// svint32_t svmls[_s32]_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) + /// svint32_t svmls[_s32]_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) + /// MLS Ztied1.S, Pg/M, Zop2.S, Zop3.S + /// + public static unsafe Vector MultiplySubtract(Vector minuend, Vector left, Vector right) => MultiplySubtract(minuend, left, right); + + /// + /// svint64_t svmls[_s64]_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) + /// svint64_t svmls[_s64]_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) + /// svint64_t svmls[_s64]_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) + /// MLS Ztied1.D, Pg/M, Zop2.D, Zop3.D + /// + public static unsafe Vector MultiplySubtract(Vector minuend, Vector left, Vector right) => MultiplySubtract(minuend, left, right); + + /// + /// svint8_t svmls[_s8]_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) + /// svint8_t svmls[_s8]_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) + /// svint8_t svmls[_s8]_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) + /// MLS Ztied1.B, Pg/M, Zop2.B, Zop3.B + /// + public static unsafe Vector MultiplySubtract(Vector minuend, Vector left, Vector right) => MultiplySubtract(minuend, left, right); + + /// + /// svuint16_t svmls[_u16]_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) + /// svuint16_t svmls[_u16]_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) + /// svuint16_t svmls[_u16]_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) + /// MLS Ztied1.H, Pg/M, Zop2.H, Zop3.H + /// + public static unsafe Vector MultiplySubtract(Vector minuend, Vector left, Vector right) => MultiplySubtract(minuend, left, right); + + /// + /// svuint32_t svmls[_u32]_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) + /// svuint32_t svmls[_u32]_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) + /// svuint32_t svmls[_u32]_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) + /// MLS Ztied1.S, Pg/M, Zop2.S, Zop3.S + /// + public static unsafe Vector MultiplySubtract(Vector minuend, Vector left, Vector right) => MultiplySubtract(minuend, left, right); + + /// + /// svuint64_t svmls[_u64]_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) + /// svuint64_t svmls[_u64]_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) + /// svuint64_t svmls[_u64]_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) + /// MLS Ztied1.D, Pg/M, Zop2.D, Zop3.D + /// + public static unsafe Vector MultiplySubtract(Vector minuend, Vector left, Vector right) => MultiplySubtract(minuend, left, right); + /// Or : Bitwise inclusive OR diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index bc1f89c484aae..6d29e78fcb9cd 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4382,6 +4382,24 @@ internal Arm64() { } public static System.Numerics.Vector Multiply(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector Multiply(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + + public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector Or(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector Or(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector Or(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } From bfad7b7b6c6481f871dc7226ff84ecc56ef98eee Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 7 May 2024 23:22:13 -0700 Subject: [PATCH 08/29] Add mapping of API to instruction --- src/coreclr/jit/hwintrinsiclistarm64sve.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 7d9d46d5171c1..fb95384ab285d 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -86,6 +86,8 @@ HARDWARE_INTRINSIC(Sve, MinAcross, HARDWARE_INTRINSIC(Sve, MinNumber, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnm, INS_sve_fminnm}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, MinNumberAcross, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnmv, INS_sve_fminnmv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, true, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, MultiplyAdd, -1, -1, false, {INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, false, {INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, Or, -1, -1, false, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, OrAcross, -1, -1, false, {INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, SignExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxth, INS_invalid, INS_sve_sxth, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) From 100f2895ae23c085628fe82a3e6083911b8b4a31 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 7 May 2024 23:28:48 -0700 Subject: [PATCH 09/29] Add test cases --- .../GenerateHWIntrinsicTests_Arm.cs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index 6e3b88e83f486..7fa47f6f67832 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -3134,6 +3134,24 @@ ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_Multiply_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Multiply", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.Multiply(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.Multiply(left[i], right[i])"}), ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_Multiply_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Multiply", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.Multiply(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.Multiply(left[i], right[i])"}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_sbyte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_short", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_byte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_ushort", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_sbyte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_short", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_byte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_ushort", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_Or_sbyte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Or", ["RetVectorType"] = "Vector", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "(sbyte)TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "Helpers.Or(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.Or(left[i], right[i])"}), ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_Or_short", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Or", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "(short)TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "Helpers.Or(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.Or(left[i], right[i])"}), ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_Or_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Or", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.Or(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.Or(left[i], right[i])"}), From 8ac18409fd29dd3a74f74988a6416ec0e65152a7 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 May 2024 14:53:21 -0700 Subject: [PATCH 10/29] Handle mov Z, Z instruction --- src/coreclr/jit/emitarm64.cpp | 16 +++++++++++++-- src/coreclr/jit/emitarm64sve.cpp | 34 +++++++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 04db539d1c82d..3daf63e0b08e4 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -4250,9 +4250,11 @@ void emitter::emitIns_Mov( case INS_sve_mov: { - if (isPredicateRegister(dstReg) && isPredicateRegister(srcReg)) + //TODO-SVE: Remove check for insOptsNone() when predicate registers + // are present. + if (insOptsNone(opt) && isPredicateRegister(dstReg) && isPredicateRegister(srcReg)) { - assert(insOptsNone(opt)); + //assert(insOptsNone(opt)); opt = INS_OPTS_SCALABLE_B; attr = EA_SCALABLE; @@ -4263,6 +4265,16 @@ void emitter::emitIns_Mov( } fmt = IF_SVE_CZ_4A_L; } + else if (isVectorRegister(dstReg) && isVectorRegister(srcReg)) + { + assert(insOptsScalable(opt)); + + if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip)) + { + return; + } + fmt = IF_SVE_AU_3A; + } else { unreached(); diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index 0c8485a459b5a..2c6b8e30abcb8 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -10384,7 +10384,6 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) case IF_SVE_FN_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply long case IF_SVE_FO_3A: // ...........mmmmm ......nnnnnddddd -- SVE integer matrix multiply accumulate case IF_SVE_AT_3B: // ...........mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) - case IF_SVE_AU_3A: // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated) case IF_SVE_BD_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) case IF_SVE_EF_3A: // ...........mmmmm ......nnnnnddddd -- SVE two-way dot product case IF_SVE_EI_3A: // ...........mmmmm ......nnnnnddddd -- SVE mixed sign dot product @@ -10406,6 +10405,17 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_AU_3A: // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd + code |= insEncodeReg_V<9, 5>(id->idReg2()); // nnnnn + if (id->idIns() != INS_sve_mov) + { + code |= insEncodeReg_V<20, 16>(id->idReg3()); // mmmmm + } + dst += emitOutput_Instr(dst, code); + break; + case IF_SVE_AV_3A: // ...........mmmmm ......kkkkkddddd -- SVE2 bitwise ternary operations code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd @@ -12911,7 +12921,6 @@ void emitter::emitInsSveSanityCheck(instrDesc* id) case IF_SVE_FN_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply long case IF_SVE_FO_3A: // ...........mmmmm ......nnnnnddddd -- SVE integer matrix multiply accumulate case IF_SVE_AT_3B: // ...........mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) - case IF_SVE_AU_3A: // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated) case IF_SVE_BD_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) case IF_SVE_EF_3A: // ...........mmmmm ......nnnnnddddd -- SVE two-way dot product case IF_SVE_EI_3A: // ...........mmmmm ......nnnnnddddd -- SVE mixed sign dot product @@ -12931,6 +12940,12 @@ void emitter::emitInsSveSanityCheck(instrDesc* id) assert(isVectorRegister(id->idReg2())); // nnnnn/mmmmm assert(isVectorRegister(id->idReg3())); // mmmmm/aaaaa break; + case IF_SVE_AU_3A: // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated) + assert(insOptsScalable(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn/mmmmm + assert((id->idIns() == INS_sve_mov) || isVectorRegister(id->idReg3())); // mmmmm/aaaaa + break; case IF_SVE_HA_3A_F: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product case IF_SVE_EW_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer) @@ -14574,7 +14589,6 @@ void emitter::emitDispInsSveHelp(instrDesc* id) case IF_SVE_HD_3A_A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate // .D, .D, .D case IF_SVE_AT_3B: // ...........mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) - case IF_SVE_AU_3A: // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated) // .B, .B, .B case IF_SVE_GF_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 histogram generation (segment) case IF_SVE_BD_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) @@ -14589,6 +14603,20 @@ void emitter::emitDispInsSveHelp(instrDesc* id) emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm/aaaaa break; + // .D, .D, .D + case IF_SVE_AU_3A: // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + if (id->idIns() == INS_sve_mov) + { + emitDispSveReg(id->idReg2(), id->idInsOpt(), false); // nnnnn/mmmmm + } + else + { + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn/mmmmm + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm/aaaaa + } + break; + // .D, .D, .D case IF_SVE_EW_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer) // .D, .D, .D From 9eb195e2cbc778eaac58654d3ca544c5d987c1cf Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 May 2024 14:54:49 -0700 Subject: [PATCH 11/29] Reuse GetResultOpNumForRmwIntrinsic() for arm64 --- src/coreclr/jit/gentree.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index d48fc45868ba2..0f9c42c9d6341 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -27931,7 +27931,7 @@ bool GenTreeLclVar::IsNeverNegative(Compiler* comp) const return comp->lvaGetDesc(GetLclNum())->IsNeverNegative(); } -#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) +#if (defined(TARGET_XARCH) || defined(TARGET_ARM64)) && defined(FEATURE_HW_INTRINSICS) //------------------------------------------------------------------------ // GetResultOpNumForRmwIntrinsic: check if the result is written into one of the operands. // In the case that none of the operand is overwritten, check if any of them is lastUse. @@ -27942,7 +27942,11 @@ bool GenTreeLclVar::IsNeverNegative(Compiler* comp) const // unsigned GenTreeHWIntrinsic::GetResultOpNumForRmwIntrinsic(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3) { +#if defined(TARGET_XARCH) assert(HWIntrinsicInfo::IsFmaIntrinsic(gtHWIntrinsicId) || HWIntrinsicInfo::IsPermuteVar2x(gtHWIntrinsicId)); +#elif defined(TARGET_ARM64) + assert(HWIntrinsicInfo::IsFmaIntrinsic(gtHWIntrinsicId)); +#endif if (use != nullptr && use->OperIs(GT_STORE_LCL_VAR)) { From c182d0dd32469c884bf89628dc8fcbaa0126ef80 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 May 2024 14:55:29 -0700 Subject: [PATCH 12/29] Reuse HW_Flag_FmaIntrinsic for arm64 --- src/coreclr/jit/hwintrinsic.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 82cf179c742f4..55e30ead065ef 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -217,20 +217,17 @@ enum HWIntrinsicFlag : unsigned int // The intrinsic is an RMW intrinsic HW_Flag_RmwIntrinsic = 0x1000000, - // The intrinsic is a FusedMultiplyAdd intrinsic - HW_Flag_FmaIntrinsic = 0x2000000, - // The intrinsic is a PermuteVar2x intrinsic - HW_Flag_PermuteVar2x = 0x4000000, + HW_Flag_PermuteVar2x = 0x2000000, // The intrinsic is an embedded broadcast compatible intrinsic - HW_Flag_EmbBroadcastCompatible = 0x8000000, + HW_Flag_EmbBroadcastCompatible = 0x4000000, // The intrinsic is an embedded rounding compatible intrinsic - HW_Flag_EmbRoundingCompatible = 0x10000000, + HW_Flag_EmbRoundingCompatible = 0x8000000, // The intrinsic is an embedded masking incompatible intrinsic - HW_Flag_EmbMaskingIncompatible = 0x20000000, + HW_Flag_EmbMaskingIncompatible = 0x10000000, #elif defined(TARGET_ARM64) // The intrinsic has an enum operand. Using this implies HW_Flag_HasImmediateOperand. @@ -238,6 +235,9 @@ enum HWIntrinsicFlag : unsigned int #endif // TARGET_XARCH + // The intrinsic is a FusedMultiplyAdd intrinsic + HW_Flag_FmaIntrinsic = 0x20000000, + HW_Flag_CanBenefitFromConstantProp = 0x80000000, }; @@ -936,17 +936,17 @@ struct HWIntrinsicInfo return (flags & HW_Flag_MaybeNoJmpTableIMM) != 0; } -#if defined(TARGET_XARCH) - static bool IsRmwIntrinsic(NamedIntrinsic id) + static bool IsFmaIntrinsic(NamedIntrinsic id) { HWIntrinsicFlag flags = lookupFlags(id); - return (flags & HW_Flag_RmwIntrinsic) != 0; + return (flags & HW_Flag_FmaIntrinsic) != 0; } - static bool IsFmaIntrinsic(NamedIntrinsic id) +#if defined(TARGET_XARCH) + static bool IsRmwIntrinsic(NamedIntrinsic id) { HWIntrinsicFlag flags = lookupFlags(id); - return (flags & HW_Flag_FmaIntrinsic) != 0; + return (flags & HW_Flag_RmwIntrinsic) != 0; } static bool IsPermuteVar2x(NamedIntrinsic id) From 62ea159eec7407a461cbbed604a73793e33ffe17 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 May 2024 14:56:08 -0700 Subject: [PATCH 13/29] Mark FMA APIs as HW_Flag_FmaIntrinsic --- src/coreclr/jit/hwintrinsiclistarm64sve.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index fb95384ab285d..6f18b48813643 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -46,12 +46,12 @@ HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask32Bit, HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, false, {INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, Divide, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdiv, INS_sve_udiv, INS_sve_sdiv, INS_sve_udiv, INS_sve_fdiv, INS_sve_fdiv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, FusedMultiplyAdd, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, FusedMultiplyAddBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, FusedMultiplyAddNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmla, INS_sve_fnmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, FusedMultiplySubtract, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmls, INS_sve_fnmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, FusedMultiplyAdd, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, FusedMultiplyAddBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, FusedMultiplyAddNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmla, INS_sve_fnmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, FusedMultiplySubtract, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmls, INS_sve_fnmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) @@ -86,8 +86,8 @@ HARDWARE_INTRINSIC(Sve, MinAcross, HARDWARE_INTRINSIC(Sve, MinNumber, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnm, INS_sve_fminnm}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, MinNumberAcross, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnmv, INS_sve_fminnmv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, true, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, MultiplyAdd, -1, -1, false, {INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, false, {INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, MultiplyAdd, -1, -1, false, {INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, false, {INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, Or, -1, -1, false, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, OrAcross, -1, -1, false, {INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, SignExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxth, INS_invalid, INS_sve_sxth, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) From 28a49cb4a34235d7eb98e61fcc5d5e85f33d2620 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 May 2024 14:58:06 -0700 Subject: [PATCH 14/29] Handle FMA in LSRA and codegen --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 89 ++++++++++++++++----- src/coreclr/jit/lsraarm64.cpp | 67 +++++++++++++--- 2 files changed, 125 insertions(+), 31 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index dd88aa41e884b..9bc986af4000d 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -572,11 +572,63 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case 3: { assert(instrIsRMW); - assert(targetReg != falseReg); - assert(targetReg != embMaskOp2Reg); - assert(targetReg != embMaskOp3Reg); assert(!HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(intrinEmbMask.id)); + if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id)) + { + bool useAddend = true; + if (targetReg == embMaskOp2Reg) + { + useAddend = false; + std::swap(embMaskOp1Reg, embMaskOp2Reg); + } + else if (targetReg == embMaskOp3Reg) + { + useAddend = false; + std::swap(embMaskOp1Reg, embMaskOp3Reg); + } + else + { + // Nothing to do here + } + + switch (intrinEmbMask.id) + { + case NI_Sve_FusedMultiplyAdd: + insEmbMask = useAddend ? INS_sve_fmla : INS_sve_fmad; + break; + + case NI_Sve_FusedMultiplyAddNegated: + insEmbMask = useAddend ? INS_sve_fnmla : INS_sve_fnmad; + break; + + case NI_Sve_FusedMultiplySubtract: + insEmbMask = useAddend ? INS_sve_fmls : INS_sve_fmsb; + break; + + case NI_Sve_FusedMultiplySubtractNegated: + insEmbMask = useAddend ? INS_sve_fnmls : INS_sve_fnmsb; + break; + + case NI_Sve_MultiplyAdd: + insEmbMask = useAddend ? INS_sve_mla : INS_sve_mad; + break; + + case NI_Sve_MultiplySubtract: + insEmbMask = useAddend ? INS_sve_mls : INS_sve_msb; + break; + + default: + unreached(); + } + } + else + { + assert(targetReg != falseReg); + assert(targetReg != embMaskOp2Reg); + assert(targetReg != embMaskOp3Reg); + } + if (intrin.op3->IsVectorZero()) { // If `falseReg` is zero, then move the first operand of `intrinEmbMask` in the @@ -584,32 +636,27 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(targetReg != embMaskOp2Reg); GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg, opt); - - // Finally, perform the actual "predicated" operation so that `targetReg` is the first operand - // `embMaskOp2Reg` is the second operand and `embMaskOp3Reg` is the third operand. - GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, - embMaskOp3Reg, opt); } else { - // If the instruction just has "predicated" version, then move the "embMaskOp1Reg" - // into targetReg. Next, do the predicated operation on the targetReg and last, - // use "sel" to select the active lanes based on mask, and set inactive lanes - // to falseReg. - - assert(HWIntrinsicInfo::IsEmbeddedMaskedOperation(intrinEmbMask.id)); - if (targetReg != embMaskOp1Reg) { - GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, embMaskOp1Reg); + // Since this is RMW node, we need to move `embMaskOp1Reg -> targetReg` first. + GetEmitter()->emitIns_Mov(INS_sve_mov, emitSize, targetReg, embMaskOp1Reg, + true /* canSkip */, opt); } - GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, - embMaskOp3Reg, opt); - - GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg, falseReg, - opt, INS_SCALABLE_OPTS_UNPREDICATED); + if (targetReg != falseReg) + { + // Next, if `falseReg` is not same as `targetReg`, merge it with `target` + GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, falseReg, + opt, INS_SCALABLE_OPTS_PREDICATE_MERGE); + } } + + // `targetReg` is same as `falseReg`. Just generate the predicated version of instruction + GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, + embMaskOp3Reg, opt); break; } default: diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index e6039783bf977..4d47cf21fef80 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1626,7 +1626,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } } } - else if (HWIntrinsicInfo::NeedsConsecutiveRegisters(intrin.id)) { switch (intrin.id) @@ -1768,21 +1767,69 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou else if ((intrin.id == NI_Sve_ConditionalSelect) && (intrin.op2->IsEmbMaskOp()) && (intrin.op2->isRMWHWIntrinsic(compiler))) { + assert(intrin.op3 != nullptr); + // For ConditionalSelect, if there is an embedded operation, and the operation has RMW semantics // then record delay-free for operands as well as the "merge" value - GenTreeHWIntrinsic* intrinEmbOp2 = intrin.op2->AsHWIntrinsic(); - size_t numArgs = intrinEmbOp2->GetOperandCount(); - assert((numArgs == 1) || (numArgs == 2) || (numArgs == 3)); - tgtPrefUse = BuildUse(intrinEmbOp2->Op(1)); - srcCount += 1; + GenTreeHWIntrinsic* embOp2Node = intrin.op2->AsHWIntrinsic(); + size_t numArgs = embOp2Node->GetOperandCount(); + const HWIntrinsic intrinEmb(embOp2Node); + numArgs = embOp2Node->GetOperandCount(); - for (size_t argNum = 2; argNum <= numArgs; argNum++) + if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmb.id)) { - srcCount += BuildDelayFreeUses(intrinEmbOp2->Op(argNum), intrinEmbOp2->Op(1)); + assert(embOp2Node->isRMWHWIntrinsic(compiler)); + assert(numArgs == 3); + + LIR::Use use; + GenTree* user = nullptr; + + if (LIR::AsRange(blockSequence[curBBSeqNum]).TryGetUse(embOp2Node, &use)) + { + user = use.User(); + } + unsigned resultOpNum = + embOp2Node->GetResultOpNumForRmwIntrinsic(user, intrinEmb.op1, intrinEmb.op2, intrinEmb.op3); + + GenTree* emitOp1 = intrinEmb.op1; + GenTree* emitOp2 = intrinEmb.op2; + GenTree* emitOp3 = intrinEmb.op3; + + if (resultOpNum == 2) + { + // op2 = op1 + (op2 * op3) + std::swap(emitOp1, emitOp2); + } + else if (resultOpNum == 3) + { + // op3 = op1 + (op2 * op3) + std::swap(emitOp1, emitOp3); + } + else + { + // op1 = op1 + (op2 * op3) + // Nothing needs to be done + } + + tgtPrefUse = BuildUse(emitOp1); + srcCount += 1; + srcCount += BuildDelayFreeUses(emitOp2, emitOp1); + srcCount += BuildDelayFreeUses(emitOp3, emitOp1); + srcCount += BuildDelayFreeUses(intrin.op3, emitOp1); } + else + { + assert((numArgs == 1) || (numArgs == 2) || (numArgs == 3)); + tgtPrefUse = BuildUse(embOp2Node->Op(1)); + srcCount += 1; - assert(intrin.op3 != nullptr); - srcCount += BuildDelayFreeUses(intrin.op3, intrinEmbOp2->Op(1)); + for (size_t argNum = 2; argNum <= numArgs; argNum++) + { + srcCount += BuildDelayFreeUses(embOp2Node->Op(argNum), embOp2Node->Op(1)); + } + + srcCount += BuildDelayFreeUses(intrin.op3, embOp2Node->Op(1)); + } } else if (intrin.op2 != nullptr) From 722dd55f65df1983982009c177485647127bca17 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 May 2024 15:07:03 -0700 Subject: [PATCH 15/29] Remove the SpecialCodeGen flag from selectedScalar --- src/coreclr/jit/hwintrinsiclistarm64sve.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 6f18b48813643..305bb5c6859ab 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -47,10 +47,10 @@ HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, false, {INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, Divide, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdiv, INS_sve_udiv, INS_sve_sdiv, INS_sve_udiv, INS_sve_fdiv, INS_sve_fdiv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, FusedMultiplyAdd, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, FusedMultiplyAddBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, FusedMultiplyAddBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic) HARDWARE_INTRINSIC(Sve, FusedMultiplyAddNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmla, INS_sve_fnmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, FusedMultiplySubtract, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic) HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmls, INS_sve_fnmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) From 229f78f0ec68e60a6ff6244da93ef6c99db60414 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 May 2024 17:40:55 -0700 Subject: [PATCH 16/29] address some more scenarios --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 130 +++++++++++--------- 1 file changed, 75 insertions(+), 55 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 9bc986af4000d..c58cc2746abd7 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -572,61 +572,55 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case 3: { assert(instrIsRMW); - assert(!HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(intrinEmbMask.id)); + assert(HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id)); + assert(falseReg != embMaskOp1Reg); + assert(falseReg != embMaskOp2Reg); + assert(falseReg != embMaskOp3Reg); - if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id)) + bool useAddend = true; + if (targetReg == embMaskOp2Reg) { - bool useAddend = true; - if (targetReg == embMaskOp2Reg) - { - useAddend = false; - std::swap(embMaskOp1Reg, embMaskOp2Reg); - } - else if (targetReg == embMaskOp3Reg) - { - useAddend = false; - std::swap(embMaskOp1Reg, embMaskOp3Reg); - } - else - { - // Nothing to do here - } + useAddend = false; + std::swap(embMaskOp1Reg, embMaskOp2Reg); + } + else if (targetReg == embMaskOp3Reg) + { + useAddend = false; + std::swap(embMaskOp1Reg, embMaskOp3Reg); + } + else + { + // Nothing to do here + } - switch (intrinEmbMask.id) - { - case NI_Sve_FusedMultiplyAdd: - insEmbMask = useAddend ? INS_sve_fmla : INS_sve_fmad; - break; + switch (intrinEmbMask.id) + { + case NI_Sve_FusedMultiplyAdd: + insEmbMask = useAddend ? INS_sve_fmla : INS_sve_fmad; + break; - case NI_Sve_FusedMultiplyAddNegated: - insEmbMask = useAddend ? INS_sve_fnmla : INS_sve_fnmad; - break; + case NI_Sve_FusedMultiplyAddNegated: + insEmbMask = useAddend ? INS_sve_fnmla : INS_sve_fnmad; + break; - case NI_Sve_FusedMultiplySubtract: - insEmbMask = useAddend ? INS_sve_fmls : INS_sve_fmsb; - break; + case NI_Sve_FusedMultiplySubtract: + insEmbMask = useAddend ? INS_sve_fmls : INS_sve_fmsb; + break; - case NI_Sve_FusedMultiplySubtractNegated: - insEmbMask = useAddend ? INS_sve_fnmls : INS_sve_fnmsb; - break; + case NI_Sve_FusedMultiplySubtractNegated: + insEmbMask = useAddend ? INS_sve_fnmls : INS_sve_fnmsb; + break; - case NI_Sve_MultiplyAdd: - insEmbMask = useAddend ? INS_sve_mla : INS_sve_mad; - break; + case NI_Sve_MultiplyAdd: + insEmbMask = useAddend ? INS_sve_mla : INS_sve_mad; + break; - case NI_Sve_MultiplySubtract: - insEmbMask = useAddend ? INS_sve_mls : INS_sve_msb; - break; + case NI_Sve_MultiplySubtract: + insEmbMask = useAddend ? INS_sve_mls : INS_sve_msb; + break; - default: - unreached(); - } - } - else - { - assert(targetReg != falseReg); - assert(targetReg != embMaskOp2Reg); - assert(targetReg != embMaskOp3Reg); + default: + unreached(); } if (intrin.op3->IsVectorZero()) @@ -639,24 +633,50 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else { - if (targetReg != embMaskOp1Reg) - { - // Since this is RMW node, we need to move `embMaskOp1Reg -> targetReg` first. - GetEmitter()->emitIns_Mov(INS_sve_mov, emitSize, targetReg, embMaskOp1Reg, - true /* canSkip */, opt); - } + // Below are the considerations we need to handle: + // + // targetReg == falseReg && targetReg == embMaskOp1Reg + // fmla Zd, P/m, Zn, Zm + // + // targetReg == falseReg && targetReg != embMaskOp1Reg + // movprfx target, P/m, embMaskOp1Reg + // fmla target, P/m, embMaskOp2Reg, embMaskOp3Reg + // + // targetReg != falseReg && targetReg == embMaskOp1Reg + // sel target, P/m, embMaskOp1Reg, falseReg + // fmla target, P/m, embMaskOp2Reg, embMaskOp3Reg + // + // targetReg != falseReg && targetReg != embMaskOp1Reg + // sel target, P/m, embMaskOp1Reg, falseReg + // fmla target, P/m, embMaskOp2Reg, embMaskOp3Reg + // + // Note that, we just check if the targetReg/falseReg or targetReg/embMaskOp1Reg + // coincides or not. Other combination like falseReg/embMaskOp*Reg cannot happen + // because we marked embMaskOp*Reg as delayFree. if (targetReg != falseReg) { - // Next, if `falseReg` is not same as `targetReg`, merge it with `target` - GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, falseReg, + // If falseReg value is not present in targetReg yet, move the inactive lanes + // into the targetReg using `sel`. Since this is RMW, the active lanes should + // have the value from embMaskOp1Reg + + GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, embMaskOp1Reg, + falseReg, opt, INS_SCALABLE_OPTS_UNPREDICATED); + } + else if (targetReg != embMaskOp1Reg) + { + // If target already contains the values of `falseReg`, just merge the lanes from + // `embMaskOp1Reg`, again because this is RMW semantics. + + GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg, opt, INS_SCALABLE_OPTS_PREDICATE_MERGE); } } - // `targetReg` is same as `falseReg`. Just generate the predicated version of instruction + // Finally, perform the desired operation. GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, embMaskOp3Reg, opt); + break; } default: From a21439f6e9075ec22733cf9b9d93692930fc79d1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 May 2024 17:42:56 -0700 Subject: [PATCH 17/29] jit format --- src/coreclr/jit/emitarm64.cpp | 8 ++++---- src/coreclr/jit/emitarm64sve.cpp | 12 ++++++------ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 6 +++--- src/coreclr/jit/lsraarm64.cpp | 4 ++-- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 3daf63e0b08e4..091de211cb013 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -4250,11 +4250,11 @@ void emitter::emitIns_Mov( case INS_sve_mov: { - //TODO-SVE: Remove check for insOptsNone() when predicate registers - // are present. + // TODO-SVE: Remove check for insOptsNone() when predicate registers + // are present. if (insOptsNone(opt) && isPredicateRegister(dstReg) && isPredicateRegister(srcReg)) { - //assert(insOptsNone(opt)); + // assert(insOptsNone(opt)); opt = INS_OPTS_SCALABLE_B; attr = EA_SCALABLE; @@ -4273,7 +4273,7 @@ void emitter::emitIns_Mov( { return; } - fmt = IF_SVE_AU_3A; + fmt = IF_SVE_AU_3A; } else { diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index 2c6b8e30abcb8..52d7b37e6ce48 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -10407,8 +10407,8 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) case IF_SVE_AU_3A: // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated) code = emitInsCodeSve(ins, fmt); - code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd - code |= insEncodeReg_V<9, 5>(id->idReg2()); // nnnnn + code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd + code |= insEncodeReg_V<9, 5>(id->idReg2()); // nnnnn if (id->idIns() != INS_sve_mov) { code |= insEncodeReg_V<20, 16>(id->idReg3()); // mmmmm @@ -12942,8 +12942,8 @@ void emitter::emitInsSveSanityCheck(instrDesc* id) break; case IF_SVE_AU_3A: // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated) assert(insOptsScalable(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn/mmmmm + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn/mmmmm assert((id->idIns() == INS_sve_mov) || isVectorRegister(id->idReg3())); // mmmmm/aaaaa break; @@ -14605,10 +14605,10 @@ void emitter::emitDispInsSveHelp(instrDesc* id) // .D, .D, .D case IF_SVE_AU_3A: // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd if (id->idIns() == INS_sve_mov) { - emitDispSveReg(id->idReg2(), id->idInsOpt(), false); // nnnnn/mmmmm + emitDispSveReg(id->idReg2(), id->idInsOpt(), false); // nnnnn/mmmmm } else { diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index c58cc2746abd7..0dfb606940a03 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -634,18 +634,18 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) else { // Below are the considerations we need to handle: - // + // // targetReg == falseReg && targetReg == embMaskOp1Reg // fmla Zd, P/m, Zn, Zm // // targetReg == falseReg && targetReg != embMaskOp1Reg // movprfx target, P/m, embMaskOp1Reg // fmla target, P/m, embMaskOp2Reg, embMaskOp3Reg - // + // // targetReg != falseReg && targetReg == embMaskOp1Reg // sel target, P/m, embMaskOp1Reg, falseReg // fmla target, P/m, embMaskOp2Reg, embMaskOp3Reg - // + // // targetReg != falseReg && targetReg != embMaskOp1Reg // sel target, P/m, embMaskOp1Reg, falseReg // fmla target, P/m, embMaskOp2Reg, embMaskOp3Reg diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 4d47cf21fef80..5c35771eefaee 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1772,8 +1772,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // For ConditionalSelect, if there is an embedded operation, and the operation has RMW semantics // then record delay-free for operands as well as the "merge" value GenTreeHWIntrinsic* embOp2Node = intrin.op2->AsHWIntrinsic(); - size_t numArgs = embOp2Node->GetOperandCount(); - const HWIntrinsic intrinEmb(embOp2Node); + size_t numArgs = embOp2Node->GetOperandCount(); + const HWIntrinsic intrinEmb(embOp2Node); numArgs = embOp2Node->GetOperandCount(); if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmb.id)) From 6a01ca4d523b31998ce8bafd180ac88710954200 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 May 2024 17:49:09 -0700 Subject: [PATCH 18/29] Add MultiplyBySelectedScalar --- .../Intrinsics/Arm/Sve.PlatformNotSupported.cs | 13 +++++++++++++ .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 14 ++++++++++++++ .../ref/System.Runtime.Intrinsics.cs | 3 +++ 3 files changed, 30 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index 528f928c5ae08..1712471657ef2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -1768,6 +1768,19 @@ internal Arm64() { } /// public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + /// MultiplyBySelectedScalar : Multiply + + /// + /// svfloat64_t svmul_lane[_f64](svfloat64_t op1, svfloat64_t op2, uint64_t imm_index) + /// FMUL Zresult.D, Zop1.D, Zop2.D[imm_index] + /// + public static unsafe Vector MultiplyBySelectedScalar(Vector left, Vector right, [ConstantExpected] byte rightIndex) { throw new PlatformNotSupportedException(); } + + /// + /// svfloat32_t svmul_lane[_f32](svfloat32_t op1, svfloat32_t op2, uint64_t imm_index) + /// FMUL Zresult.S, Zop1.S, Zop2.S[imm_index] + /// + public static unsafe Vector MultiplyBySelectedScalar(Vector left, Vector right, [ConstantExpected] byte rightIndex) { throw new PlatformNotSupportedException(); } /// MultiplySubtract : Multiply-subtract, minuend first diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index dc19387281775..8913112cc36cd 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -1824,6 +1824,20 @@ internal Arm64() { } /// public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) => MultiplyAdd(addend, left, right); + /// MultiplyBySelectedScalar : Multiply + + /// + /// svfloat64_t svmul_lane[_f64](svfloat64_t op1, svfloat64_t op2, uint64_t imm_index) + /// FMUL Zresult.D, Zop1.D, Zop2.D[imm_index] + /// + public static unsafe Vector MultiplyBySelectedScalar(Vector left, Vector right, [ConstantExpected] byte rightIndex) => MultiplyBySelectedScalar(left, right, rightIndex); + + /// + /// svfloat32_t svmul_lane[_f32](svfloat32_t op1, svfloat32_t op2, uint64_t imm_index) + /// FMUL Zresult.S, Zop1.S, Zop2.S[imm_index] + /// + public static unsafe Vector MultiplyBySelectedScalar(Vector left, Vector right, [ConstantExpected] byte rightIndex) => MultiplyBySelectedScalar(left, right, rightIndex); + /// MultiplySubtract : Multiply-subtract, minuend first diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 6d29e78fcb9cd..4d8eb56a0a2b8 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4391,6 +4391,9 @@ internal Arm64() { } public static System.Numerics.Vector MultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector MultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplyBySelectedScalar(System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } + public static System.Numerics.Vector MultiplyBySelectedScalar(System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } + public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } From 318cbf39ec3068f63aafb78f8bc7333caad091a5 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 May 2024 17:53:30 -0700 Subject: [PATCH 19/29] Map the API to the instruction --- src/coreclr/jit/hwintrinsiclistarm64sve.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 305bb5c6859ab..0e5cd70744220 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -87,6 +87,7 @@ HARDWARE_INTRINSIC(Sve, MinNumber, HARDWARE_INTRINSIC(Sve, MinNumberAcross, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnmv, INS_sve_fminnmv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, true, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, MultiplyAdd, -1, -1, false, {INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, MultiplyBySelectedScalar, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, false, {INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, Or, -1, -1, false, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, OrAcross, -1, -1, false, {INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) From e3fc8307c3b8c9dfb31276bc359624189e355614 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 May 2024 18:43:38 -0700 Subject: [PATCH 20/29] fix a bug where *Indexed API used with ConditionalSelect were failing ` Sve.ConditionalSelect(op1, Sve.MultiplyBySelectedScalar(op1, op2, 0), op3);` was failing because we were trying to check if `MultiplyBySelectedScalar` is contained and we hit the assert because it is not containable. Added the check. --- src/coreclr/jit/lowerarmarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index ee54776825f19..83c27e69dab08 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -3334,7 +3334,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) uint32_t maskSize = genTypeSize(node->GetSimdBaseType()); uint32_t operSize = genTypeSize(op2->AsHWIntrinsic()->GetSimdBaseType()); - if ((maskSize == operSize) && IsInvariantInRange(op2, node)) + if ((maskSize == operSize) && IsInvariantInRange(op2, node) && op2->isEmbeddedMaskingCompatibleHWIntrinsic()) { MakeSrcContained(node, op2); op2->MakeEmbMaskOp(); From 1ca5539f0cea0ced0ad7fff72930ac511055939e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 May 2024 18:50:59 -0700 Subject: [PATCH 21/29] unpredicated movprfx should not send opt --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 0dfb606940a03..e748b1d29b6e2 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -553,7 +553,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // embMaskOp1Reg is same as `falseReg`, but not same as `targetReg`. Move the // `embMaskOp1Reg` i.e. `falseReg` in `targetReg`, using "unpredicated movprfx", so the // subsequent `insEmbMask` operation can be merged on top of it. - GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, falseReg, opt); + GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, falseReg); } // Finally, perform the actual "predicated" operation so that `targetReg` is the first operand From eb41e1d35a3a28c8ca46050e46f2ba012f2a2001 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 May 2024 19:07:25 -0700 Subject: [PATCH 22/29] Add the missing flags for Subtract/Multiply --- src/coreclr/jit/hwintrinsiclistarm64sve.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 0e5cd70744220..e88b6f2f93d8b 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -85,7 +85,7 @@ HARDWARE_INTRINSIC(Sve, Min, HARDWARE_INTRINSIC(Sve, MinAcross, -1, -1, false, {INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_sminv, INS_sve_uminv, INS_sve_fminv, INS_sve_fminv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) HARDWARE_INTRINSIC(Sve, MinNumber, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnm, INS_sve_fminnm}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, MinNumberAcross, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnmv, INS_sve_fminnmv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) -HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, true, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, true, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, MultiplyAdd, -1, -1, false, {INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, MultiplyBySelectedScalar, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, false, {INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) @@ -96,7 +96,7 @@ HARDWARE_INTRINSIC(Sve, SignExtend32, HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, SignExtendWideningLower, -1, 1, true, {INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Sve, SignExtendWideningUpper, -1, 1, true, {INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, true, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, true, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, true, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, UnzipOdd, -1, 2, true, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, Xor, -1, -1, false, {INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) From 7874f2597559ffb8be0e465eaab53b73a88187ff Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 May 2024 19:13:28 -0700 Subject: [PATCH 23/29] Added tests for MultiplyBySelectedScalar Also updated *SelectedScalar* tests for ConditionalSelect --- .../GenerateHWIntrinsicTests_Arm.cs | 18 +- .../_SveImmBinaryOpTestTemplate.template | 412 ++++++++++++++++++ .../Shared/_SveImmTernOpTestTemplate.template | 181 ++++++-- .../Shared/_SveTernOpTestTemplate.template | 22 +- 4 files changed, 572 insertions(+), 61 deletions(-) create mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmBinaryOpTestTemplate.template diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index 7fa47f6f67832..93332e4dd14d8 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -141,8 +141,9 @@ ("_TernaryOpTestTemplate.template", "SecureHashTernOpTest.template", new Dictionary { ["TemplateName"] = "SecureHash", ["TemplateValidationLogic"] = SecureHashOpTest_ValidationLogic }), ("_SveUnaryOpTestTemplate.template", "SveSimpleVecOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_ValidationLogicForCndSel }), ("_SveBinaryOpTestTemplate.template", "SveVecBinOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_ValidationLogicForCndSel }), + ("_SveImmBinaryOpTestTemplate.template", "SveVecImmBinOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_ValidationLogicForCndSel }), ("_SveTernOpTestTemplate.template", "SveVecTernOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel }), - ("_SveImmTernOpTestTemplate.template", "SveVecImmTernOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, }), + ("_SveImmTernOpTestTemplate.template", "SveVecImmTernOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel }), ("_SveMinimalUnaryOpTestTemplate.template", "SveVecReduceUnOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = VecReduceOpTest_ValidationLogic }), }; @@ -3016,8 +3017,8 @@ ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplyAdd_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(first[i], second[i], third[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplyAdd_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(first[i], second[i], third[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), - ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])",}), - ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])",}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddNegated_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAddNegated(first[i], second[i], third[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddNegated_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAddNegated(first[i], second[i], third[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), @@ -3025,8 +3026,8 @@ ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplySubtract_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(first[i], second[i], third[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplySubtract_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(first[i], second[i], third[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), - ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])"}), - ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])"}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractNegated_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtractNegated(first[i], second[i], third[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractNegated_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtractNegated(first[i], second[i], third[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), @@ -3143,8 +3144,11 @@ ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_sbyte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_short", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_MultiplyBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.Multiply(firstOp[i], secondOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])",["GetIterResult"] = "Helpers.Multiply(firstOp[i], secondOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_MultiplyBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.Multiply(firstOp[i], secondOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])",["GetIterResult"] = "Helpers.Multiply(firstOp[i], secondOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_sbyte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_short", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_byte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmBinaryOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmBinaryOpTestTemplate.template new file mode 100644 index 0000000000000..0585fe27a8be0 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmBinaryOpTestTemplate.template @@ -0,0 +1,412 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics.Arm\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [Fact] + public static void {TestName}() + { + var test = new {TemplateName}BinaryOpTest__{TestName}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if ({LoadIsa}.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + // Validates executing the test inside conditional, with op1 as falseValue + test.ConditionalSelect_Op1(); + // Validates executing the test inside conditional, with op2 as falseValue + test.ConditionalSelect_Op2(); + // Validates executing the test inside conditional, with op3 as falseValue + test.ConditionalSelect_FalseOp(); + // Validates executing the test inside conditional, with op3 as zero + test.ConditionalSelect_ZeroOp(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class {TemplateName}BinaryOpTest__{TestName} + { + private struct DataTable + { + private byte[] inArray1; + private byte[] inArray2; + private byte[] outArray; + + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op1BaseType}[] inArray1, {Op1BaseType}[] inArray2, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 64 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + } + + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op1VectorType}<{Op1BaseType}> _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario({TemplateName}BinaryOpTest__{TestName} testClass) + { + var result = {Isa}.{Method}(_fld1, _fld2, {Imm}); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + private static readonly byte Imm = {Imm}; + + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data2 = new {Op1BaseType}[Op1ElementCount]; + + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op1VectorType}<{Op1BaseType}> _fld2; + + private DataTable _dataTable; + + public {TemplateName}BinaryOpTest__{TestName}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _maskData[i] = ({Op1BaseType})({NextValueOp1} % 2); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _mask), ref Unsafe.As<{Op1BaseType}, byte>(ref _maskData[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _falseFld), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + _dataTable = new DataTable(_data1, _data2, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + {Imm} + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + {Op1VectorType}<{Op1BaseType}> loadMask = Sve.CreateTrueMask{RetBaseType}(SveMaskPattern.All); + + var result = {Isa}.{Method}( + {LoadIsa}.Load{Op1VectorType}(loadMask, ({Op1BaseType}*)(_dataTable.inArray1Ptr)), + {LoadIsa}.Load{Op1VectorType}(loadMask, ({Op1BaseType}*)(_dataTable.inArray2Ptr)), + {Imm} + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op1VectorType}<{Op1BaseType}>), typeof(byte) }) + .Invoke(null, new object[] { + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + (byte){Imm} + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr); + var result = {Isa}.{Method}(op1, op2, {Imm}); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op1, op2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld1, _fld2, {Imm}); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld1, test._fld2, {Imm}); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + public void ConditionalSelect_Op1() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op1_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld1); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op1_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld1); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op1_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld1); + } + public void ConditionalSelect_Op2() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op2_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld2); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op2_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld2); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op2_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld2); + } + + public void ConditionalSelect_FalseOp() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_FalseOp_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _falseFld); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_FalseOp_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _falseFld); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_FalseOp_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _falseFld); + } + public void ConditionalSelect_ZeroOp() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_ZeroOp_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, {Op1VectorType}<{Op1BaseType}>.Zero); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_ZeroOp_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, {Op1VectorType}<{Op1BaseType}>.Zero); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_ZeroOp_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, {Op1VectorType}<{Op1BaseType}>.Zero); + } + [method: MethodImpl(MethodImplOptions.AggressiveInlining)] + private void ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}> mask, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, {Op1VectorType}<{Op1BaseType}> falseOp) + { + var result = Sve.ConditionalSelect(mask, {Isa}.{Method}(op1, op2, op3, Imm), falseOp); + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateConditionalSelectResult(mask, op1, op2, op3, falseOp, _dataTable.outArrayPtr); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + private void ValidateConditionalSelectResult({Op1VectorType}<{Op1BaseType}> maskOp, {Op1VectorType}<{Op1BaseType}> firstOp, {Op1VectorType}<{Op1BaseType}> secondOp, {Op1VectorType}<{Op1BaseType}> falseOp, void* output, [CallerMemberName] string method = "") + { + {Op1BaseType}[] mask = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] first = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] second = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] falseVal = new {Op1BaseType}[Op1ElementCount]; + {RetBaseType}[] result = new {RetBaseType}[RetElementCount]; + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref mask[0]), maskOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref first[0]), firstOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref second[0]), secondOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref falseVal[0]), falseOp); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref result[0]), ref Unsafe.AsRef(output), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + bool succeeded = true; + {TemplateValidationLogicForCndSel} + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" mask: ({string.Join(", ", mask)})"); + TestLibrary.TestFramework.LogInformation($" first: ({string.Join(", ", first)})"); + TestLibrary.TestFramework.LogInformation($" second: ({string.Join(", ", second)})"); + TestLibrary.TestFramework.LogInformation($" falseOp: ({string.Join(", ", falseVal)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + Succeeded = false; + } + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* op1, void* op2, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] firstOp, {Op1BaseType}[] secondOp, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + {TemplateValidationLogic} + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($"secondOp: ({string.Join(", ", secondOp)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template index abe0377c07e4b..bc7fafb5b24e9 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template @@ -50,6 +50,16 @@ namespace JIT.HardwareIntrinsics.Arm // Validates passing an instance member of a struct works test.RunStructFldScenario(); + // Validates executing the test inside conditional, with op1 as falseValue + test.ConditionalSelect_Op1(); + // Validates executing the test inside conditional, with op2 as falseValue + test.ConditionalSelect_Op2(); + // Validates executing the test inside conditional, with op3 as falseValue + test.ConditionalSelect_Op3(); + // Validates executing the test inside conditional, with op3 as falseValue + test.ConditionalSelect_FalseOp(); + // Validates executing the test inside conditional, with op3 as zero + test.ConditionalSelect_ZeroOp(); } else { @@ -80,11 +90,11 @@ namespace JIT.HardwareIntrinsics.Arm private ulong alignment; - public DataTable({Op1BaseType}[] inArray1, {Op2BaseType}[] inArray2, {Op3BaseType}[] inArray3, {RetBaseType}[] outArray, int alignment) + public DataTable({Op1BaseType}[] inArray1, {Op1BaseType}[] inArray2, {Op1BaseType}[] inArray3, {RetBaseType}[] outArray, int alignment) { int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); - int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op2BaseType}>(); - int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op3BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op1BaseType}>(); int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); if ((alignment != 64 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfoutArray) { @@ -104,8 +114,8 @@ namespace JIT.HardwareIntrinsics.Arm this.alignment = (ulong)alignment; Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); - Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); - Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); } public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); @@ -130,8 +140,8 @@ namespace JIT.HardwareIntrinsics.Arm private struct TestStruct { public {Op1VectorType}<{Op1BaseType}> _fld1; - public {Op2VectorType}<{Op2BaseType}> _fld2; - public {Op3VectorType}<{Op3BaseType}> _fld3; + public {Op1VectorType}<{Op1BaseType}> _fld2; + public {Op1VectorType}<{Op1BaseType}> _fld3; public static TestStruct Create() { @@ -139,10 +149,10 @@ namespace JIT.HardwareIntrinsics.Arm for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); - for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } - Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); - for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = {NextValueOp3}; } - Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op3VectorType}<{Op3BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op3BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); return testStruct; } @@ -159,18 +169,19 @@ namespace JIT.HardwareIntrinsics.Arm private static readonly int LargestVectorSize = {LargestVectorSize}; private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); - private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); - private static readonly int Op3ElementCount = Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>() / sizeof({Op3BaseType}); private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); private static readonly byte Imm = {Imm}; + private static {Op1BaseType}[] _maskData = new {Op1BaseType}[Op1ElementCount]; private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; - private static {Op2BaseType}[] _data2 = new {Op2BaseType}[Op2ElementCount]; - private static {Op3BaseType}[] _data3 = new {Op3BaseType}[Op3ElementCount]; + private static {Op1BaseType}[] _data2 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data3 = new {Op1BaseType}[Op1ElementCount]; + private {Op1VectorType}<{Op1BaseType}> _mask; private {Op1VectorType}<{Op1BaseType}> _fld1; - private {Op2VectorType}<{Op2BaseType}> _fld2; - private {Op3VectorType}<{Op3BaseType}> _fld3; + private {Op1VectorType}<{Op1BaseType}> _fld2; + private {Op1VectorType}<{Op1BaseType}> _fld3; + private {Op1VectorType}<{Op1BaseType}> _falseFld; private DataTable _dataTable; @@ -178,16 +189,19 @@ namespace JIT.HardwareIntrinsics.Arm { Succeeded = true; + for (var i = 0; i < Op1ElementCount; i++) { _maskData[i] = ({Op1BaseType})({NextValueOp1} % 2); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _mask), ref Unsafe.As<{Op1BaseType}, byte>(ref _maskData[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); - for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } - Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); - for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = {NextValueOp3}; } - Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op3VectorType}<{Op3BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op3BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _falseFld), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } - for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } - for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = {NextValueOp3}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } _dataTable = new DataTable(_data1, _data2, _data3, new {RetBaseType}[RetElementCount], LargestVectorSize); } @@ -201,8 +215,8 @@ namespace JIT.HardwareIntrinsics.Arm var result = {Isa}.{Method}( Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), - Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), - Unsafe.Read<{Op3VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), {Imm} ); @@ -218,8 +232,8 @@ namespace JIT.HardwareIntrinsics.Arm var result = {Isa}.{Method}( {LoadIsa}.Load{Op1VectorType}(loadMask, ({Op1BaseType}*)(_dataTable.inArray1Ptr)), - {LoadIsa}.Load{Op2VectorType}(loadMask, ({Op2BaseType}*)(_dataTable.inArray2Ptr)), - {LoadIsa}.Load{Op3VectorType}(loadMask, ({Op3BaseType}*)(_dataTable.inArray3Ptr)), + {LoadIsa}.Load{Op1VectorType}(loadMask, ({Op1BaseType}*)(_dataTable.inArray2Ptr)), + {LoadIsa}.Load{Op1VectorType}(loadMask, ({Op1BaseType}*)(_dataTable.inArray3Ptr)), {Imm} ); @@ -231,11 +245,11 @@ namespace JIT.HardwareIntrinsics.Arm { TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); - var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op2VectorType}<{Op2BaseType}>), typeof({Op3VectorType}<{Op3BaseType}>), typeof(byte) }) + var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op1VectorType}<{Op1BaseType}>), typeof(byte) }) .Invoke(null, new object[] { Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), - Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), - Unsafe.Read<{Op3VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), (byte){Imm} }); @@ -248,8 +262,8 @@ namespace JIT.HardwareIntrinsics.Arm TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); - var op2 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr); - var op3 = Unsafe.Read<{Op3VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr); var result = {Isa}.{Method}(op1, op2, op3, {Imm}); Unsafe.Write(_dataTable.outArrayPtr, result); @@ -284,6 +298,58 @@ namespace JIT.HardwareIntrinsics.Arm var test = TestStruct.Create(); test.RunStructFldScenario(this); } + public void ConditionalSelect_Op1() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op1_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld3, _fld1); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op1_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld3, _fld1); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op1_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld3, _fld1); + } + public void ConditionalSelect_Op2() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op2_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld3, _fld2); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op2_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld3, _fld2); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op2_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld3, _fld2); + } + public void ConditionalSelect_Op3() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op3_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld3, _fld3); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op3_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld3, _fld3); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op3_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld3, _fld3); + } + public void ConditionalSelect_FalseOp() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_FalseOp_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld3, _falseFld); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_FalseOp_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld3, _falseFld); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_FalseOp_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld3, _falseFld); + } + public void ConditionalSelect_ZeroOp() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_ZeroOp_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld3, {Op1VectorType}<{Op1BaseType}>.Zero); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_ZeroOp_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld3, {Op1VectorType}<{Op1BaseType}>.Zero); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_ZeroOp_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld3, {Op1VectorType}<{Op1BaseType}>.Zero); + } + [method: MethodImpl(MethodImplOptions.AggressiveInlining)] + private void ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}> mask, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, {Op1VectorType}<{Op1BaseType}> falseOp) + { + var result = Sve.ConditionalSelect(mask, {Isa}.{Method}(op1, op2, op3, Imm), falseOp); + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateConditionalSelectResult(mask, op1, op2, op3, falseOp, _dataTable.outArrayPtr); + } public void RunUnsupportedScenario() { @@ -305,17 +371,46 @@ namespace JIT.HardwareIntrinsics.Arm Succeeded = false; } } + private void ValidateConditionalSelectResult({Op1VectorType}<{Op1BaseType}> maskOp, {Op1VectorType}<{Op1BaseType}> firstOp, {Op1VectorType}<{Op1BaseType}> secondOp, {Op1VectorType}<{Op1BaseType}> thirdOp, {Op1VectorType}<{Op1BaseType}> falseOp, void* output, [CallerMemberName] string method = "") + { + {Op1BaseType}[] mask = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] first = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] second = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] third = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] falseVal = new {Op1BaseType}[Op1ElementCount]; + {RetBaseType}[] result = new {RetBaseType}[RetElementCount]; + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref mask[0]), maskOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref first[0]), firstOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref second[0]), secondOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref third[0]), thirdOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref falseVal[0]), falseOp); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref result[0]), ref Unsafe.AsRef(output), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + bool succeeded = true; + {TemplateValidationLogicForCndSel} + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" mask: ({string.Join(", ", mask)})"); + TestLibrary.TestFramework.LogInformation($" first: ({string.Join(", ", first)})"); + TestLibrary.TestFramework.LogInformation($" second: ({string.Join(", ", second)})"); + TestLibrary.TestFramework.LogInformation($" third: ({string.Join(", ", third)})"); + TestLibrary.TestFramework.LogInformation($" falseOp: ({string.Join(", ", falseVal)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + Succeeded = false; + } + } - private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op2VectorType}<{Op2BaseType}> op2, {Op3VectorType}<{Op3BaseType}> op3, void* result, [CallerMemberName] string method = "") + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, void* result, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; - {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; - {Op3BaseType}[] inArray3 = new {Op3BaseType}[Op3ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); - Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), op2); - Unsafe.WriteUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), op3); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); ValidateResult(inArray1, inArray2, inArray3, outArray, method); @@ -324,19 +419,19 @@ namespace JIT.HardwareIntrinsics.Arm private void ValidateResult(void* op1, void* op2, void* op3, void* result, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; - {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; - {Op3BaseType}[] inArray3 = new {Op3BaseType}[Op3ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); - Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); - Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); ValidateResult(inArray1, inArray2, inArray3, outArray, method); } - private void ValidateResult({Op1BaseType}[] firstOp, {Op2BaseType}[] secondOp, {Op3BaseType}[] thirdOp, {RetBaseType}[] result, [CallerMemberName] string method = "") + private void ValidateResult({Op1BaseType}[] firstOp, {Op1BaseType}[] secondOp, {Op1BaseType}[] thirdOp, {RetBaseType}[] result, [CallerMemberName] string method = "") { bool succeeded = true; @@ -344,7 +439,7 @@ namespace JIT.HardwareIntrinsics.Arm if (!succeeded) { - TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op2VectorType}<{Op2BaseType}>, {Op3VectorType}<{Op3BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>): {method} failed:"); TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); TestLibrary.TestFramework.LogInformation($"secondOp: ({string.Join(", ", secondOp)})"); TestLibrary.TestFramework.LogInformation($" thirdOp: ({string.Join(", ", thirdOp)})"); diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveTernOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveTernOpTestTemplate.template index b8ce72108d760..86eaa7a453da0 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveTernOpTestTemplate.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveTernOpTestTemplate.template @@ -23,7 +23,7 @@ namespace JIT.HardwareIntrinsics.Arm [Fact] public static void {TestName}() { - var test = new {TemplateName}BinaryOpTest__{TestName}(); + var test = new {TemplateName}TernaryOpTest__{TestName}(); if (test.IsSupported) { @@ -79,7 +79,7 @@ namespace JIT.HardwareIntrinsics.Arm } } - public sealed unsafe class {TemplateName}BinaryOpTest__{TestName} + public sealed unsafe class {TemplateName}TernaryOpTest__{TestName} { private struct DataTable { @@ -103,7 +103,7 @@ namespace JIT.HardwareIntrinsics.Arm int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); if ((alignment != 64 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfoutArray) { - throw new ArgumentException($"Invalid value of alignment: {alignment}, sizeOfinArray1: {sizeOfinArray1}, sizeOfinArray2: {sizeOfinArray2}, sizeOfinArray3: {sizeOfinArray3}, sizeOfoutArray: {sizeOfoutArray}"); + throw new ArgumentException("Invalid value of alignment"); } this.inArray1 = new byte[alignment * 2]; @@ -157,12 +157,12 @@ namespace JIT.HardwareIntrinsics.Arm for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } - Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); return testStruct; } - public void RunStructFldScenario({TemplateName}BinaryOpTest__{TestName} testClass) + public void RunStructFldScenario({TemplateName}TernaryOpTest__{TestName} testClass) { var result = {Isa}.{Method}(_fld1, _fld2, _fld3); @@ -189,7 +189,7 @@ namespace JIT.HardwareIntrinsics.Arm private DataTable _dataTable; - public {TemplateName}BinaryOpTest__{TestName}() + public {TemplateName}TernaryOpTest__{TestName}() { Succeeded = true; @@ -456,7 +456,7 @@ namespace JIT.HardwareIntrinsics.Arm ValidateResult(inArray1, inArray2, inArray3, outArray, method); } - private void ValidateResult({Op1BaseType}[] first, {Op1BaseType}[] second, {Op1BaseType}[] third, {RetBaseType}[] result, [CallerMemberName] string method = "") + private void ValidateResult({Op1BaseType}[] firstOp, {Op1BaseType}[] secondOp, {Op1BaseType}[] thirdOp, {RetBaseType}[] result, [CallerMemberName] string method = "") { bool succeeded = true; @@ -464,10 +464,10 @@ namespace JIT.HardwareIntrinsics.Arm if (!succeeded) { - TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>): {method} failed:"); - TestLibrary.TestFramework.LogInformation($" first: ({string.Join(", ", first)})"); - TestLibrary.TestFramework.LogInformation($" second: ({string.Join(", ", second)})"); - TestLibrary.TestFramework.LogInformation($" third: ({string.Join(", ", third)})"); + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($"secondOp: ({string.Join(", ", secondOp)})"); + TestLibrary.TestFramework.LogInformation($" thirdOp: ({string.Join(", ", thirdOp)})"); TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); TestLibrary.TestFramework.LogInformation(string.Empty); From f756afb3838d540ca2b9a1eb40d11d5319fd3f46 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 May 2024 19:57:18 -0700 Subject: [PATCH 24/29] fixes to test cases --- .../GenerateHWIntrinsicTests_Arm.cs | 56 +++++++++---------- .../_SveImmBinaryOpTestTemplate.template | 11 ++-- 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index 93332e4dd14d8..fc58ae8716a23 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -3014,23 +3014,23 @@ ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_Divide_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Divide", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "Helpers.Divide(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.Divide(left[i], right[i])"}), ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_Divide_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Divide", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "Helpers.Divide(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.Divide(left[i], right[i])"}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplyAdd_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(first[i], second[i], third[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplyAdd_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(first[i], second[i], third[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplyAdd_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplyAdd_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), - ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), - ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), - ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddNegated_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAddNegated(first[i], second[i], third[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), - ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddNegated_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAddNegated(first[i], second[i], third[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddNegated_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddNegated_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplySubtract_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(first[i], second[i], third[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplySubtract_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(first[i], second[i], third[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplySubtract_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplySubtract_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), - ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), - ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), - ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractNegated_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtractNegated(first[i], second[i], third[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), - ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractNegated_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtractNegated(first[i], second[i], third[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(first[i], second[i], third[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractNegated_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractNegated_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_float", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_double", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), @@ -3135,26 +3135,26 @@ ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_Multiply_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Multiply", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.Multiply(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.Multiply(left[i], right[i])"}), ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_Multiply_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Multiply", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.Multiply(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.Multiply(left[i], right[i])"}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_sbyte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_short", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_byte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_ushort", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_sbyte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_short", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_byte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_ushort", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_MultiplyBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.Multiply(firstOp[i], secondOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])",["GetIterResult"] = "Helpers.Multiply(firstOp[i], secondOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_MultiplyBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.Multiply(firstOp[i], secondOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])",["GetIterResult"] = "Helpers.Multiply(firstOp[i], secondOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_sbyte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_short", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_byte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_ushort", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(first[i], second[i], third[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_sbyte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_short", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_byte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_ushort", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplySubtract_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.MultiplySubtract(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_Or_sbyte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Or", ["RetVectorType"] = "Vector", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "(sbyte)TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "Helpers.Or(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.Or(left[i], right[i])"}), ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_Or_short", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Or", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "(short)TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "Helpers.Or(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.Or(left[i], right[i])"}), diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmBinaryOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmBinaryOpTestTemplate.template index 0585fe27a8be0..e891aa8a1810e 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmBinaryOpTestTemplate.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmBinaryOpTestTemplate.template @@ -159,11 +159,14 @@ namespace JIT.HardwareIntrinsics.Arm private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); private static readonly byte Imm = {Imm}; + private static {Op1BaseType}[] _maskData = new {Op1BaseType}[Op1ElementCount]; private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; private static {Op1BaseType}[] _data2 = new {Op1BaseType}[Op1ElementCount]; + private {Op1VectorType}<{Op1BaseType}> _mask; private {Op1VectorType}<{Op1BaseType}> _fld1; private {Op1VectorType}<{Op1BaseType}> _fld2; + private {Op1VectorType}<{Op1BaseType}> _falseFld; private DataTable _dataTable; @@ -177,7 +180,7 @@ namespace JIT.HardwareIntrinsics.Arm Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); - Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _falseFld), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _falseFld), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } @@ -311,11 +314,11 @@ namespace JIT.HardwareIntrinsics.Arm ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, {Op1VectorType}<{Op1BaseType}>.Zero); } [method: MethodImpl(MethodImplOptions.AggressiveInlining)] - private void ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}> mask, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, {Op1VectorType}<{Op1BaseType}> falseOp) + private void ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}> mask, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> falseOp) { - var result = Sve.ConditionalSelect(mask, {Isa}.{Method}(op1, op2, op3, Imm), falseOp); + var result = Sve.ConditionalSelect(mask, {Isa}.{Method}(op1, op2, Imm), falseOp); Unsafe.Write(_dataTable.outArrayPtr, result); - ValidateConditionalSelectResult(mask, op1, op2, op3, falseOp, _dataTable.outArrayPtr); + ValidateConditionalSelectResult(mask, op1, op2, falseOp, _dataTable.outArrayPtr); } public void RunUnsupportedScenario() From 290493442f3ac209dfbe05cdbbfad987eae48f57 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 May 2024 22:24:00 -0700 Subject: [PATCH 25/29] fix the parameter for selectedScalar test --- .../GenerateHWIntrinsicTests_Arm.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index fc58ae8716a23..e44c25f90b294 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -3017,8 +3017,8 @@ ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplyAdd_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplyAdd_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), - ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), - ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddNegated_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddNegated_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), @@ -3026,8 +3026,8 @@ ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplySubtract_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplySubtract_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), - ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), - ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractNegated_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractNegated_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), From 98ac0ce6d961f382b23f8552e4216c4cdcd9eca0 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 May 2024 22:26:14 -0700 Subject: [PATCH 26/29] jit format --- src/coreclr/jit/lowerarmarch.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 83c27e69dab08..aa34a3d2f311a 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -3334,7 +3334,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) uint32_t maskSize = genTypeSize(node->GetSimdBaseType()); uint32_t operSize = genTypeSize(op2->AsHWIntrinsic()->GetSimdBaseType()); - if ((maskSize == operSize) && IsInvariantInRange(op2, node) && op2->isEmbeddedMaskingCompatibleHWIntrinsic()) + if ((maskSize == operSize) && IsInvariantInRange(op2, node) && + op2->isEmbeddedMaskingCompatibleHWIntrinsic()) { MakeSrcContained(node, op2); op2->MakeEmbMaskOp(); From 0f89e104248932f7f334093e67eda8dbc3d75d78 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 10 May 2024 12:22:48 -0700 Subject: [PATCH 27/29] Contain(op3) of CndSel if op1 is AllTrueMask --- src/coreclr/jit/lowerarmarch.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index aa34a3d2f311a..6e3b8e3202991 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -3343,10 +3343,11 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } // Handle op3 - if (op3->IsVectorZero()) + if (op3->IsVectorZero() && op1->IsMaskAllBitsSet()) { // When we are merging with zero, we can specialize // and avoid instantiating the vector constant. + // Do this only if op1 was AllTrueMask MakeSrcContained(node, op3); } From 8e928ec0b2423787f74c8f4bc9c705f627acb470 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 10 May 2024 13:12:14 -0700 Subject: [PATCH 28/29] Handle FMA properly --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 56 ++++++++++++++++++--- src/coreclr/jit/lsraarm64.cpp | 3 ++ 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index bad2d6220f98f..99e5424dcf698 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -576,20 +576,69 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(falseReg != embMaskOp2Reg); assert(falseReg != embMaskOp3Reg); + // For FMA, the operation we are trying to perform is: + // result = op1 + (op2 * op3) + // + // There are two instructions that can be used depending on which operand's register, + // optionally, will store the final result. + // + // 1. If the result is stored in the operand that was used as an "addend" in the operation, + // then we use `FMLA` format: + // reg1 = reg1 + (reg2 * reg3) + // + // 2. If the result is stored in the operand that was used as a "multiplicand" in the operation, + // then we use `FMAD` format: + // reg1 = (reg1 * reg2) + reg3 + // + // Check if the result's register is same as that of one of the operand's register and accordingly + // pick the appropriate format. Suppose `targetReg` holds the result, then we have following cases: + // + // Case# 1: Result is stored in the operand that held the "addend" + // targetReg == reg1 + // + // We generate the FMLA instruction format and no further changes are needed. + // + // Case# 2: Result is stored in the operand `op2` that held the "multiplicand" + // targetReg == reg2 + // + // So we basically have an operation: + // reg2 = reg1 + (reg2 * reg3) + // + // Since, the result will be stored in the "multiplicand", we pick format `FMAD`. + // Then, we rearrange the operands to ensure that the operation is done correctly. + // reg2 = reg1 + (reg2 * reg3) // to start with + // reg2 = reg3 + (reg2 * reg1) // swap reg1 <--> reg3 + // reg1 = reg3 + (reg1 * reg2) // swap reg1 <--> reg2 + // reg1 = (reg1 * reg2) + reg3 // rearrange to get FMAD format + // + // Case# 3: Result is stored in the operand `op3` that held the "multiplier" + // targetReg == reg3 + // + // So we basically have an operation: + // reg3 = reg1 + (reg2 * reg3) + // Since, the result will be stored in the "multiplier", we again pick format `FMAD`. + // Then, we rearrange the operands to ensure that the operation is done correctly. + // reg3 = reg1 + (reg2 * reg3) // to start with + // reg1 = reg3 + (reg2 * reg1) // swap reg1 <--> reg3 + // reg1 = (reg1 * reg2) + reg3 // rearrange to get FMAD format + bool useAddend = true; if (targetReg == embMaskOp2Reg) { + // Case# 2 useAddend = false; + std::swap(embMaskOp1Reg, embMaskOp3Reg); std::swap(embMaskOp1Reg, embMaskOp2Reg); } else if (targetReg == embMaskOp3Reg) { + // Case# 3 useAddend = false; std::swap(embMaskOp1Reg, embMaskOp3Reg); } else { - // Nothing to do here + // Case# 1 } switch (intrinEmbMask.id) @@ -744,11 +793,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) INS_SCALABLE_OPTS_UNPREDICATED); } break; - case 4: - assert(!isRMW); - GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, opt, - INS_SCALABLE_OPTS_UNPREDICATED); - break; default: unreached(); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 5c35771eefaee..f6f2cb2a2021c 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1798,12 +1798,15 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (resultOpNum == 2) { // op2 = op1 + (op2 * op3) + std::swap(emitOp1, emitOp3); std::swap(emitOp1, emitOp2); + // op1 = (op1 * op2) + op3 } else if (resultOpNum == 3) { // op3 = op1 + (op2 * op3) std::swap(emitOp1, emitOp3); + // op1 = (op1 * op2) + op3 } else { From c713d3155495ca390098e6d4853dbb74ab1f999c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 10 May 2024 15:31:39 -0700 Subject: [PATCH 29/29] added assert --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 99e5424dcf698..e8774f0363d56 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -677,6 +677,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // destination using /Z. assert(targetReg != embMaskOp2Reg); + assert(intrin.op3->isContained() || !intrin.op1->IsMaskAllBitsSet()); GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg, opt); } else