Skip to content

Commit

Permalink
Arm64/SVE: Implemented Scale and Sqrt (#103663)
Browse files Browse the repository at this point in the history
* Added ConverToInt32 and ConvertToUInt32 for float inputs.

* Added flags to handle only low predicate registers.

* Fix whitespace

* Remove special codegen flag

* Added new test template for operations with different return types.

* Add new test template.

* Added api for ConvertToInt32 and ConvertToUInt 32 for double.

* Round SVE intrinsics for floats.

* Completed Round SVE fp apis.

* Completed sve apis for scale and sqrt, added a new test template for when Op1 and Op2 have different types.

* Fixed test templates.
  • Loading branch information
ebepho committed Jun 24, 2024
1 parent f0e70b2 commit ddcbc8b
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 5 deletions.
2 changes: 2 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,11 +192,13 @@ HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy32BitElementCount,
HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy64BitElementCount, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqincd, INS_sve_uqincd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialCodeGen|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy8BitElementCount, 0, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqincb, INS_sve_uqincb, INS_sve_sqincb, INS_sve_uqincb, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, SaturatingIncrementByActiveElementCount, -1, 2, true, {INS_invalid, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, Scale, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fscale, INS_sve_fscale}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, SignExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxth, INS_invalid, INS_sve_sxth, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtend32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxtw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtendWideningLower, -1, 1, true, {INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, SignExtendWideningUpper, -1, 1, true, {INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, Sqrt, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fsqrt, INS_sve_fsqrt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, StoreAndZip, -1, 3, true, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_sve_st1w, INS_sve_st1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, StoreNarrowing, -1, 3, true, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, StoreNonTemporal, -1, 3, true, {INS_sve_stnt1b, INS_sve_stnt1b, INS_sve_stnt1h, INS_sve_stnt1h, INS_sve_stnt1w, INS_sve_stnt1w, INS_sve_stnt1d, INS_sve_stnt1d, INS_sve_stnt1w, INS_sve_stnt1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5394,6 +5394,27 @@ internal Arm64() { }
public static unsafe Vector<ulong> SaturatingIncrementByActiveElementCount(Vector<ulong> value, Vector<ulong> from) { throw new PlatformNotSupportedException(); }


/// Scale : Adjust exponent

/// <summary>
/// svfloat64_t svscale[_f64]_m(svbool_t pg, svfloat64_t op1, svint64_t op2)
/// FSCALE Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// svfloat64_t svscale[_f64]_x(svbool_t pg, svfloat64_t op1, svint64_t op2)
/// FSCALE Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// svfloat64_t svscale[_f64]_z(svbool_t pg, svfloat64_t op1, svint64_t op2)
/// </summary>
public static unsafe Vector<double> Scale(Vector<double> left, Vector<long> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat32_t svscale[_f32]_m(svbool_t pg, svfloat32_t op1, svint32_t op2)
/// FSCALE Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// svfloat32_t svscale[_f32]_x(svbool_t pg, svfloat32_t op1, svint32_t op2)
/// FSCALE Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// svfloat32_t svscale[_f32]_z(svbool_t pg, svfloat32_t op1, svint32_t op2)
/// </summary>
public static unsafe Vector<float> Scale(Vector<float> left, Vector<int> right) { throw new PlatformNotSupportedException(); }


/// SignExtend16 : Sign-extend the low 16 bits

/// <summary>
Expand Down Expand Up @@ -5475,6 +5496,27 @@ internal Arm64() { }
public static unsafe Vector<long> SignExtend8(Vector<long> value) { throw new PlatformNotSupportedException(); }


/// Sqrt : Square root

/// <summary>
/// svfloat64_t svsqrt[_f64]_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op)
/// FSQRT Ztied.D, Pg/M, Zop.D
/// svfloat64_t svsqrt[_f64]_x(svbool_t pg, svfloat64_t op)
/// FSQRT Ztied.D, Pg/M, Ztied.D
/// svfloat64_t svsqrt[_f64]_z(svbool_t pg, svfloat64_t op)
/// </summary>
public static unsafe Vector<double> Sqrt(Vector<double> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat32_t svsqrt[_f32]_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op)
/// FSQRT Ztied.S, Pg/M, Zop.S
/// svfloat32_t svsqrt[_f32]_x(svbool_t pg, svfloat32_t op)
/// FSQRT Ztied.S, Pg/M, Ztied.S
/// svfloat32_t svsqrt[_f32]_z(svbool_t pg, svfloat32_t op)
/// </summary>
public static unsafe Vector<float> Sqrt(Vector<float> value) { throw new PlatformNotSupportedException(); }


/// Non-truncating store

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5437,6 +5437,27 @@ internal Arm64() { }
public static unsafe Vector<ulong> SaturatingIncrementByActiveElementCount(Vector<ulong> value, Vector<ulong> from) => SaturatingIncrementByActiveElementCount(value, from);


/// Scale : Adjust exponent

/// <summary>
/// svfloat64_t svscale[_f64]_m(svbool_t pg, svfloat64_t op1, svint64_t op2)
/// FSCALE Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// svfloat64_t svscale[_f64]_x(svbool_t pg, svfloat64_t op1, svint64_t op2)
/// FSCALE Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// svfloat64_t svscale[_f64]_z(svbool_t pg, svfloat64_t op1, svint64_t op2)
/// </summary>
public static unsafe Vector<double> Scale(Vector<double> left, Vector<long> right) => Scale(left, right);

/// <summary>
/// svfloat32_t svscale[_f32]_m(svbool_t pg, svfloat32_t op1, svint32_t op2)
/// FSCALE Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// svfloat32_t svscale[_f32]_x(svbool_t pg, svfloat32_t op1, svint32_t op2)
/// FSCALE Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// svfloat32_t svscale[_f32]_z(svbool_t pg, svfloat32_t op1, svint32_t op2)
/// </summary>
public static unsafe Vector<float> Scale(Vector<float> left, Vector<int> right) => Scale(left, right);


/// SignExtend16 : Sign-extend the low 16 bits

/// <summary>
Expand Down Expand Up @@ -5559,6 +5580,27 @@ internal Arm64() { }
public static unsafe Vector<long> SignExtendWideningUpper(Vector<int> value) => SignExtendWideningUpper(value);


/// Sqrt : Square root

/// <summary>
/// svfloat64_t svsqrt[_f64]_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op)
/// FSQRT Ztied.D, Pg/M, Zop.D
/// svfloat64_t svsqrt[_f64]_x(svbool_t pg, svfloat64_t op)
/// FSQRT Ztied.D, Pg/M, Ztied.D
/// svfloat64_t svsqrt[_f64]_z(svbool_t pg, svfloat64_t op)
/// </summary>
public static unsafe Vector<double> Sqrt(Vector<double> value) => Sqrt(value);

/// <summary>
/// svfloat32_t svsqrt[_f32]_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op)
/// FSQRT Ztied.S, Pg/M, Zop.S
/// svfloat32_t svsqrt[_f32]_x(svbool_t pg, svfloat32_t op)
/// FSQRT Ztied.S, Pg/M, Ztied.S
/// svfloat32_t svsqrt[_f32]_z(svbool_t pg, svfloat32_t op)
/// </summary>
public static unsafe Vector<float> Sqrt(Vector<float> value) => Sqrt(value);


/// Non-truncating store

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4972,6 +4972,9 @@ internal Arm64() { }
public static ulong SaturatingIncrementByActiveElementCount(ulong value, System.Numerics.Vector<ulong> from) { throw null; }
public static System.Numerics.Vector<ulong> SaturatingIncrementByActiveElementCount(System.Numerics.Vector<ulong> value, System.Numerics.Vector<ulong> from) { throw null; }

public static System.Numerics.Vector<double> Scale(System.Numerics.Vector<double> left, System.Numerics.Vector<long> right) { throw null; }
public static System.Numerics.Vector<float> Scale(System.Numerics.Vector<float> left, System.Numerics.Vector<int> right) { throw null; }

public static System.Numerics.Vector<int> SignExtend16(System.Numerics.Vector<int> value) { throw null; }
public static System.Numerics.Vector<long> SignExtend16(System.Numerics.Vector<long> value) { throw null; }
public static System.Numerics.Vector<long> SignExtend32(System.Numerics.Vector<long> value) { throw null; }
Expand All @@ -4986,6 +4989,9 @@ internal Arm64() { }
public static System.Numerics.Vector<int> SignExtendWideningUpper(System.Numerics.Vector<short> value) { throw null; }
public static System.Numerics.Vector<long> SignExtendWideningUpper(System.Numerics.Vector<int> value) { throw null; }

public static System.Numerics.Vector<double> Sqrt(System.Numerics.Vector<double> value) { throw null; }
public static System.Numerics.Vector<float> Sqrt(System.Numerics.Vector<float> value) { throw null; }

public static unsafe void StoreAndZip(System.Numerics.Vector<byte> mask, byte* address, System.Numerics.Vector<byte> data) { throw null; }
public static unsafe void StoreAndZip(System.Numerics.Vector<byte> mask, byte* address, (System.Numerics.Vector<byte> Value1, System.Numerics.Vector<byte> Value2) data) { throw null; }
public static unsafe void StoreAndZip(System.Numerics.Vector<byte> mask, byte* address, (System.Numerics.Vector<byte> Value1, System.Numerics.Vector<byte> Value2, System.Numerics.Vector<byte> Value3) data) { throw null; }
Expand Down
Loading

0 comments on commit ddcbc8b

Please sign in to comment.