[API Proposal]: Arm64: FEAT_F64MM #94025

a74nh · 2023-10-26T11:17:04Z

namespace System.Runtime.Intrinsics.Arm

/// VectorT Summary
public abstract class SveF64mm : AdvSimd /// Feature: FEAT_F64MM
{

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> ConcatenateEvenInt128FromTwoInputs(Vector<T> left, Vector<T> right); // UZP1

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> ConcatenateOddInt128FromTwoInputs(Vector<T> left, Vector<T> right); // UZP2

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> InterleaveEvenInt128FromTwoInputs(Vector<T> left, Vector<T> right); // TRN1

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> InterleaveInt128FromHighHalvesOfTwoInputs(Vector<T> left, Vector<T> right); // ZIP2

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> InterleaveInt128FromLowHalvesOfTwoInputs(Vector<T> left, Vector<T> right); // ZIP1

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> InterleaveOddInt128FromTwoInputs(Vector<T> left, Vector<T> right); // TRN2

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> LoadVector256AndReplicateToVector(Vector<T> mask, const T *base); // LD1ROH or LD1ROW or LD1ROD or LD1ROB

  public static unsafe Vector<double> MatrixMultiplyAccumulate(Vector<double> op1, Vector<double> op2, Vector<double> op3); // FMMLA // MOVPRFX

  /// total method signatures: 8

}

ghost · 2023-10-26T11:17:12Z

Tagging subscribers to this area: @dotnet/area-system-numerics
See info in area-owners.md if you want to be subscribed.

Issue Details

namespace System.Runtime.Intrinsics.Arm

/// VectorT Summary
public abstract class Sve : AdvSimd /// Feature: FEAT_F64MM
{
  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> ConcatenateEvenQuadwordsFromTwoInputs(Vector<T> left, Vector<T> right); // UZP1

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> ConcatenateOddQuadwordsFromTwoInputs(Vector<T> left, Vector<T> right); // UZP2

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> InterleaveEvenQuadwordsFromTwoInputs(Vector<T> left, Vector<T> right); // TRN1

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> InterleaveOddQuadwordsFromTwoInputs(Vector<T> left, Vector<T> right); // TRN2

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> InterleaveQuadwordsFromHighHalvesOfTwoInputs(Vector<T> left, Vector<T> right); // ZIP2

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> InterleaveQuadwordsFromLowHalvesOfTwoInputs(Vector<T> left, Vector<T> right); // ZIP1

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> Load256ReplicateToVector(const T *base); // LD1ROH or LD1ROW or LD1ROD or LD1ROB

  public static unsafe Vector<double> MatrixMultiplyAccumulate(Vector<double> op1, Vector<double> op2, Vector<double> op3);

  /// total method signatures: 8
}

Author:	a74nh
Assignees:	-
Labels:	`area-System.Numerics`
Milestone:	-

a74nh · 2023-10-26T11:17:15Z

/// Full API
public abstract class SveF64mm : AdvSimd /// Feature: FEAT_F64MM
{
    /// ConcatenateEvenInt128FromTwoInputs : Concatenate even quadwords from two inputs

    /// svbfloat16_t svuzp1q[_bf16](svbfloat16_t op1, svbfloat16_t op2) : "UZP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<bfloat16> ConcatenateEvenInt128FromTwoInputs(Vector<bfloat16> left, Vector<bfloat16> right);

    /// svfloat16_t svuzp1q[_f16](svfloat16_t op1, svfloat16_t op2) : "UZP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<half> ConcatenateEvenInt128FromTwoInputs(Vector<half> left, Vector<half> right);

    /// svfloat32_t svuzp1q[_f32](svfloat32_t op1, svfloat32_t op2) : "UZP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<float> ConcatenateEvenInt128FromTwoInputs(Vector<float> left, Vector<float> right);

    /// svfloat64_t svuzp1q[_f64](svfloat64_t op1, svfloat64_t op2) : "UZP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<double> ConcatenateEvenInt128FromTwoInputs(Vector<double> left, Vector<double> right);

    /// svint8_t svuzp1q[_s8](svint8_t op1, svint8_t op2) : "UZP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<sbyte> ConcatenateEvenInt128FromTwoInputs(Vector<sbyte> left, Vector<sbyte> right);

    /// svint16_t svuzp1q[_s16](svint16_t op1, svint16_t op2) : "UZP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<short> ConcatenateEvenInt128FromTwoInputs(Vector<short> left, Vector<short> right);

    /// svint32_t svuzp1q[_s32](svint32_t op1, svint32_t op2) : "UZP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<int> ConcatenateEvenInt128FromTwoInputs(Vector<int> left, Vector<int> right);

    /// svint64_t svuzp1q[_s64](svint64_t op1, svint64_t op2) : "UZP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<long> ConcatenateEvenInt128FromTwoInputs(Vector<long> left, Vector<long> right);

    /// svuint8_t svuzp1q[_u8](svuint8_t op1, svuint8_t op2) : "UZP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<byte> ConcatenateEvenInt128FromTwoInputs(Vector<byte> left, Vector<byte> right);

    /// svuint16_t svuzp1q[_u16](svuint16_t op1, svuint16_t op2) : "UZP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<ushort> ConcatenateEvenInt128FromTwoInputs(Vector<ushort> left, Vector<ushort> right);

    /// svuint32_t svuzp1q[_u32](svuint32_t op1, svuint32_t op2) : "UZP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<uint> ConcatenateEvenInt128FromTwoInputs(Vector<uint> left, Vector<uint> right);

    /// svuint64_t svuzp1q[_u64](svuint64_t op1, svuint64_t op2) : "UZP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<ulong> ConcatenateEvenInt128FromTwoInputs(Vector<ulong> left, Vector<ulong> right);


    /// ConcatenateOddInt128FromTwoInputs : Concatenate odd quadwords from two inputs

    /// svbfloat16_t svuzp2q[_bf16](svbfloat16_t op1, svbfloat16_t op2) : "UZP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<bfloat16> ConcatenateOddInt128FromTwoInputs(Vector<bfloat16> left, Vector<bfloat16> right);

    /// svfloat16_t svuzp2q[_f16](svfloat16_t op1, svfloat16_t op2) : "UZP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<half> ConcatenateOddInt128FromTwoInputs(Vector<half> left, Vector<half> right);

    /// svfloat32_t svuzp2q[_f32](svfloat32_t op1, svfloat32_t op2) : "UZP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<float> ConcatenateOddInt128FromTwoInputs(Vector<float> left, Vector<float> right);

    /// svfloat64_t svuzp2q[_f64](svfloat64_t op1, svfloat64_t op2) : "UZP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<double> ConcatenateOddInt128FromTwoInputs(Vector<double> left, Vector<double> right);

    /// svint8_t svuzp2q[_s8](svint8_t op1, svint8_t op2) : "UZP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<sbyte> ConcatenateOddInt128FromTwoInputs(Vector<sbyte> left, Vector<sbyte> right);

    /// svint16_t svuzp2q[_s16](svint16_t op1, svint16_t op2) : "UZP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<short> ConcatenateOddInt128FromTwoInputs(Vector<short> left, Vector<short> right);

    /// svint32_t svuzp2q[_s32](svint32_t op1, svint32_t op2) : "UZP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<int> ConcatenateOddInt128FromTwoInputs(Vector<int> left, Vector<int> right);

    /// svint64_t svuzp2q[_s64](svint64_t op1, svint64_t op2) : "UZP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<long> ConcatenateOddInt128FromTwoInputs(Vector<long> left, Vector<long> right);

    /// svuint8_t svuzp2q[_u8](svuint8_t op1, svuint8_t op2) : "UZP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<byte> ConcatenateOddInt128FromTwoInputs(Vector<byte> left, Vector<byte> right);

    /// svuint16_t svuzp2q[_u16](svuint16_t op1, svuint16_t op2) : "UZP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<ushort> ConcatenateOddInt128FromTwoInputs(Vector<ushort> left, Vector<ushort> right);

    /// svuint32_t svuzp2q[_u32](svuint32_t op1, svuint32_t op2) : "UZP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<uint> ConcatenateOddInt128FromTwoInputs(Vector<uint> left, Vector<uint> right);

    /// svuint64_t svuzp2q[_u64](svuint64_t op1, svuint64_t op2) : "UZP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<ulong> ConcatenateOddInt128FromTwoInputs(Vector<ulong> left, Vector<ulong> right);


    /// InterleaveEvenInt128FromTwoInputs : Interleave even quadwords from two inputs

    /// svbfloat16_t svtrn1q[_bf16](svbfloat16_t op1, svbfloat16_t op2) : "TRN1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<bfloat16> InterleaveEvenInt128FromTwoInputs(Vector<bfloat16> left, Vector<bfloat16> right);

    /// svfloat16_t svtrn1q[_f16](svfloat16_t op1, svfloat16_t op2) : "TRN1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<half> InterleaveEvenInt128FromTwoInputs(Vector<half> left, Vector<half> right);

    /// svfloat32_t svtrn1q[_f32](svfloat32_t op1, svfloat32_t op2) : "TRN1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<float> InterleaveEvenInt128FromTwoInputs(Vector<float> left, Vector<float> right);

    /// svfloat64_t svtrn1q[_f64](svfloat64_t op1, svfloat64_t op2) : "TRN1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<double> InterleaveEvenInt128FromTwoInputs(Vector<double> left, Vector<double> right);

    /// svint8_t svtrn1q[_s8](svint8_t op1, svint8_t op2) : "TRN1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<sbyte> InterleaveEvenInt128FromTwoInputs(Vector<sbyte> left, Vector<sbyte> right);

    /// svint16_t svtrn1q[_s16](svint16_t op1, svint16_t op2) : "TRN1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<short> InterleaveEvenInt128FromTwoInputs(Vector<short> left, Vector<short> right);

    /// svint32_t svtrn1q[_s32](svint32_t op1, svint32_t op2) : "TRN1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<int> InterleaveEvenInt128FromTwoInputs(Vector<int> left, Vector<int> right);

    /// svint64_t svtrn1q[_s64](svint64_t op1, svint64_t op2) : "TRN1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<long> InterleaveEvenInt128FromTwoInputs(Vector<long> left, Vector<long> right);

    /// svuint8_t svtrn1q[_u8](svuint8_t op1, svuint8_t op2) : "TRN1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<byte> InterleaveEvenInt128FromTwoInputs(Vector<byte> left, Vector<byte> right);

    /// svuint16_t svtrn1q[_u16](svuint16_t op1, svuint16_t op2) : "TRN1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<ushort> InterleaveEvenInt128FromTwoInputs(Vector<ushort> left, Vector<ushort> right);

    /// svuint32_t svtrn1q[_u32](svuint32_t op1, svuint32_t op2) : "TRN1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<uint> InterleaveEvenInt128FromTwoInputs(Vector<uint> left, Vector<uint> right);

    /// svuint64_t svtrn1q[_u64](svuint64_t op1, svuint64_t op2) : "TRN1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<ulong> InterleaveEvenInt128FromTwoInputs(Vector<ulong> left, Vector<ulong> right);


    /// InterleaveInt128FromHighHalvesOfTwoInputs : Interleave quadwords from high halves of two inputs

    /// svbfloat16_t svzip2q[_bf16](svbfloat16_t op1, svbfloat16_t op2) : "ZIP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<bfloat16> InterleaveInt128FromHighHalvesOfTwoInputs(Vector<bfloat16> left, Vector<bfloat16> right);

    /// svfloat16_t svzip2q[_f16](svfloat16_t op1, svfloat16_t op2) : "ZIP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<half> InterleaveInt128FromHighHalvesOfTwoInputs(Vector<half> left, Vector<half> right);

    /// svfloat32_t svzip2q[_f32](svfloat32_t op1, svfloat32_t op2) : "ZIP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<float> InterleaveInt128FromHighHalvesOfTwoInputs(Vector<float> left, Vector<float> right);

    /// svfloat64_t svzip2q[_f64](svfloat64_t op1, svfloat64_t op2) : "ZIP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<double> InterleaveInt128FromHighHalvesOfTwoInputs(Vector<double> left, Vector<double> right);

    /// svint8_t svzip2q[_s8](svint8_t op1, svint8_t op2) : "ZIP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<sbyte> InterleaveInt128FromHighHalvesOfTwoInputs(Vector<sbyte> left, Vector<sbyte> right);

    /// svint16_t svzip2q[_s16](svint16_t op1, svint16_t op2) : "ZIP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<short> InterleaveInt128FromHighHalvesOfTwoInputs(Vector<short> left, Vector<short> right);

    /// svint32_t svzip2q[_s32](svint32_t op1, svint32_t op2) : "ZIP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<int> InterleaveInt128FromHighHalvesOfTwoInputs(Vector<int> left, Vector<int> right);

    /// svint64_t svzip2q[_s64](svint64_t op1, svint64_t op2) : "ZIP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<long> InterleaveInt128FromHighHalvesOfTwoInputs(Vector<long> left, Vector<long> right);

    /// svuint8_t svzip2q[_u8](svuint8_t op1, svuint8_t op2) : "ZIP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<byte> InterleaveInt128FromHighHalvesOfTwoInputs(Vector<byte> left, Vector<byte> right);

    /// svuint16_t svzip2q[_u16](svuint16_t op1, svuint16_t op2) : "ZIP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<ushort> InterleaveInt128FromHighHalvesOfTwoInputs(Vector<ushort> left, Vector<ushort> right);

    /// svuint32_t svzip2q[_u32](svuint32_t op1, svuint32_t op2) : "ZIP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<uint> InterleaveInt128FromHighHalvesOfTwoInputs(Vector<uint> left, Vector<uint> right);

    /// svuint64_t svzip2q[_u64](svuint64_t op1, svuint64_t op2) : "ZIP2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<ulong> InterleaveInt128FromHighHalvesOfTwoInputs(Vector<ulong> left, Vector<ulong> right);


    /// InterleaveInt128FromLowHalvesOfTwoInputs : Interleave quadwords from low halves of two inputs

    /// svbfloat16_t svzip1q[_bf16](svbfloat16_t op1, svbfloat16_t op2) : "ZIP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<bfloat16> InterleaveInt128FromLowHalvesOfTwoInputs(Vector<bfloat16> left, Vector<bfloat16> right);

    /// svfloat16_t svzip1q[_f16](svfloat16_t op1, svfloat16_t op2) : "ZIP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<half> InterleaveInt128FromLowHalvesOfTwoInputs(Vector<half> left, Vector<half> right);

    /// svfloat32_t svzip1q[_f32](svfloat32_t op1, svfloat32_t op2) : "ZIP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<float> InterleaveInt128FromLowHalvesOfTwoInputs(Vector<float> left, Vector<float> right);

    /// svfloat64_t svzip1q[_f64](svfloat64_t op1, svfloat64_t op2) : "ZIP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<double> InterleaveInt128FromLowHalvesOfTwoInputs(Vector<double> left, Vector<double> right);

    /// svint8_t svzip1q[_s8](svint8_t op1, svint8_t op2) : "ZIP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<sbyte> InterleaveInt128FromLowHalvesOfTwoInputs(Vector<sbyte> left, Vector<sbyte> right);

    /// svint16_t svzip1q[_s16](svint16_t op1, svint16_t op2) : "ZIP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<short> InterleaveInt128FromLowHalvesOfTwoInputs(Vector<short> left, Vector<short> right);

    /// svint32_t svzip1q[_s32](svint32_t op1, svint32_t op2) : "ZIP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<int> InterleaveInt128FromLowHalvesOfTwoInputs(Vector<int> left, Vector<int> right);

    /// svint64_t svzip1q[_s64](svint64_t op1, svint64_t op2) : "ZIP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<long> InterleaveInt128FromLowHalvesOfTwoInputs(Vector<long> left, Vector<long> right);

    /// svuint8_t svzip1q[_u8](svuint8_t op1, svuint8_t op2) : "ZIP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<byte> InterleaveInt128FromLowHalvesOfTwoInputs(Vector<byte> left, Vector<byte> right);

    /// svuint16_t svzip1q[_u16](svuint16_t op1, svuint16_t op2) : "ZIP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<ushort> InterleaveInt128FromLowHalvesOfTwoInputs(Vector<ushort> left, Vector<ushort> right);

    /// svuint32_t svzip1q[_u32](svuint32_t op1, svuint32_t op2) : "ZIP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<uint> InterleaveInt128FromLowHalvesOfTwoInputs(Vector<uint> left, Vector<uint> right);

    /// svuint64_t svzip1q[_u64](svuint64_t op1, svuint64_t op2) : "ZIP1 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<ulong> InterleaveInt128FromLowHalvesOfTwoInputs(Vector<ulong> left, Vector<ulong> right);


    /// InterleaveOddInt128FromTwoInputs : Interleave odd quadwords from two inputs

    /// svbfloat16_t svtrn2q[_bf16](svbfloat16_t op1, svbfloat16_t op2) : "TRN2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<bfloat16> InterleaveOddInt128FromTwoInputs(Vector<bfloat16> left, Vector<bfloat16> right);

    /// svfloat16_t svtrn2q[_f16](svfloat16_t op1, svfloat16_t op2) : "TRN2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<half> InterleaveOddInt128FromTwoInputs(Vector<half> left, Vector<half> right);

    /// svfloat32_t svtrn2q[_f32](svfloat32_t op1, svfloat32_t op2) : "TRN2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<float> InterleaveOddInt128FromTwoInputs(Vector<float> left, Vector<float> right);

    /// svfloat64_t svtrn2q[_f64](svfloat64_t op1, svfloat64_t op2) : "TRN2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<double> InterleaveOddInt128FromTwoInputs(Vector<double> left, Vector<double> right);

    /// svint8_t svtrn2q[_s8](svint8_t op1, svint8_t op2) : "TRN2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<sbyte> InterleaveOddInt128FromTwoInputs(Vector<sbyte> left, Vector<sbyte> right);

    /// svint16_t svtrn2q[_s16](svint16_t op1, svint16_t op2) : "TRN2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<short> InterleaveOddInt128FromTwoInputs(Vector<short> left, Vector<short> right);

    /// svint32_t svtrn2q[_s32](svint32_t op1, svint32_t op2) : "TRN2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<int> InterleaveOddInt128FromTwoInputs(Vector<int> left, Vector<int> right);

    /// svint64_t svtrn2q[_s64](svint64_t op1, svint64_t op2) : "TRN2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<long> InterleaveOddInt128FromTwoInputs(Vector<long> left, Vector<long> right);

    /// svuint8_t svtrn2q[_u8](svuint8_t op1, svuint8_t op2) : "TRN2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<byte> InterleaveOddInt128FromTwoInputs(Vector<byte> left, Vector<byte> right);

    /// svuint16_t svtrn2q[_u16](svuint16_t op1, svuint16_t op2) : "TRN2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<ushort> InterleaveOddInt128FromTwoInputs(Vector<ushort> left, Vector<ushort> right);

    /// svuint32_t svtrn2q[_u32](svuint32_t op1, svuint32_t op2) : "TRN2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<uint> InterleaveOddInt128FromTwoInputs(Vector<uint> left, Vector<uint> right);

    /// svuint64_t svtrn2q[_u64](svuint64_t op1, svuint64_t op2) : "TRN2 Zresult.Q, Zop1.Q, Zop2.Q"
  public static unsafe Vector<ulong> InterleaveOddInt128FromTwoInputs(Vector<ulong> left, Vector<ulong> right);


    /// LoadVector256AndReplicateToVector : Load and replicate 256 bits of data

    /// svbfloat16_t svld1ro[_bf16](svbool_t pg, const bfloat16_t *base) : "LD1ROH Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1ROH Zresult.H, Pg/Z, [Xarray, #index * 2]" or "LD1ROH Zresult.H, Pg/Z, [Xbase, #0]"
  public static unsafe Vector<bfloat16> LoadVector256AndReplicateToVector(Vector<bfloat16> mask, const bfloat16 *base);

    /// svfloat16_t svld1ro[_f16](svbool_t pg, const float16_t *base) : "LD1ROH Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1ROH Zresult.H, Pg/Z, [Xarray, #index * 2]" or "LD1ROH Zresult.H, Pg/Z, [Xbase, #0]"
  public static unsafe Vector<half> LoadVector256AndReplicateToVector(Vector<half> mask, const half *base);

    /// svfloat32_t svld1ro[_f32](svbool_t pg, const float32_t *base) : "LD1ROW Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1ROW Zresult.S, Pg/Z, [Xarray, #index * 4]" or "LD1ROW Zresult.S, Pg/Z, [Xbase, #0]"
  public static unsafe Vector<float> LoadVector256AndReplicateToVector(Vector<float> mask, const float *base);

    /// svfloat64_t svld1ro[_f64](svbool_t pg, const float64_t *base) : "LD1ROD Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD1ROD Zresult.D, Pg/Z, [Xarray, #index * 8]" or "LD1ROD Zresult.D, Pg/Z, [Xbase, #0]"
  public static unsafe Vector<double> LoadVector256AndReplicateToVector(Vector<double> mask, const double *base);

    /// svint8_t svld1ro[_s8](svbool_t pg, const int8_t *base) : "LD1ROB Zresult.B, Pg/Z, [Xarray, Xindex]" or "LD1ROB Zresult.B, Pg/Z, [Xarray, #index]" or "LD1ROB Zresult.B, Pg/Z, [Xbase, #0]"
  public static unsafe Vector<sbyte> LoadVector256AndReplicateToVector(Vector<sbyte> mask, const sbyte *base);

    /// svint16_t svld1ro[_s16](svbool_t pg, const int16_t *base) : "LD1ROH Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1ROH Zresult.H, Pg/Z, [Xarray, #index * 2]" or "LD1ROH Zresult.H, Pg/Z, [Xbase, #0]"
  public static unsafe Vector<short> LoadVector256AndReplicateToVector(Vector<short> mask, const short *base);

    /// svint32_t svld1ro[_s32](svbool_t pg, const int32_t *base) : "LD1ROW Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1ROW Zresult.S, Pg/Z, [Xarray, #index * 4]" or "LD1ROW Zresult.S, Pg/Z, [Xbase, #0]"
  public static unsafe Vector<int> LoadVector256AndReplicateToVector(Vector<int> mask, const int *base);

    /// svint64_t svld1ro[_s64](svbool_t pg, const int64_t *base) : "LD1ROD Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD1ROD Zresult.D, Pg/Z, [Xarray, #index * 8]" or "LD1ROD Zresult.D, Pg/Z, [Xbase, #0]"
  public static unsafe Vector<long> LoadVector256AndReplicateToVector(Vector<long> mask, const long *base);

    /// svuint8_t svld1ro[_u8](svbool_t pg, const uint8_t *base) : "LD1ROB Zresult.B, Pg/Z, [Xarray, Xindex]" or "LD1ROB Zresult.B, Pg/Z, [Xarray, #index]" or "LD1ROB Zresult.B, Pg/Z, [Xbase, #0]"
  public static unsafe Vector<byte> LoadVector256AndReplicateToVector(Vector<byte> mask, const byte *base);

    /// svuint16_t svld1ro[_u16](svbool_t pg, const uint16_t *base) : "LD1ROH Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1ROH Zresult.H, Pg/Z, [Xarray, #index * 2]" or "LD1ROH Zresult.H, Pg/Z, [Xbase, #0]"
  public static unsafe Vector<ushort> LoadVector256AndReplicateToVector(Vector<ushort> mask, const ushort *base);

    /// svuint32_t svld1ro[_u32](svbool_t pg, const uint32_t *base) : "LD1ROW Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1ROW Zresult.S, Pg/Z, [Xarray, #index * 4]" or "LD1ROW Zresult.S, Pg/Z, [Xbase, #0]"
  public static unsafe Vector<uint> LoadVector256AndReplicateToVector(Vector<uint> mask, const uint *base);

    /// svuint64_t svld1ro[_u64](svbool_t pg, const uint64_t *base) : "LD1ROD Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD1ROD Zresult.D, Pg/Z, [Xarray, #index * 8]" or "LD1ROD Zresult.D, Pg/Z, [Xbase, #0]"
  public static unsafe Vector<ulong> LoadVector256AndReplicateToVector(Vector<ulong> mask, const ulong *base);


    /// MatrixMultiplyAccumulate : Matrix multiply-accumulate

    /// svfloat64_t svmmla[_f64](svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) : "FMMLA Ztied1.D, Zop2.D, Zop3.D" or "MOVPRFX Zresult, Zop1; FMMLA Zresult.D, Zop2.D, Zop3.D"
  public static unsafe Vector<double> MatrixMultiplyAccumulate(Vector<double> op1, Vector<double> op2, Vector<double> op3);


  /// total method signatures: 85
  /// total method names:      8
}


  /// Total ACLE covered across API:      85

a74nh · 2023-10-26T11:17:22Z

/// Rejected:
/// None yet

a74nh · 2023-10-26T11:17:55Z

This contributes to #93095

It covers all of the instructions in FEAT_F64MM. This an optional 8.2 feature but is not yet available in any hardware.

This list was auto generated from the C ACLE for SVE, and is in three parts:

The methods list reduced down to Vector versions. All possible varaints of T are given above the method.
The complete list of all methods. The corresponding ACLE methods and SVE instructions are given above the method.
All rejected ACLE methods. These are methods we have agreed that do not need including in C#.
Where possible, existing C# naming conventions have been matched.

Many of the C functions include predicate argument(s), of type svbool_t as the first argument. These are missing from the C# method. It is expected that the Jit will create predicates where required, or combine with uses of conditionalSelect(). For more discussion see #88140 comment.

a74nh · 2023-11-06T17:19:05Z

Updated to reflect review comments from other API proposals.

ghost · 2024-02-08T05:23:06Z

Tagging subscribers to this area: @dotnet/area-system-runtime-intrinsics
See info in area-owners.md if you want to be subscribed.

Issue Details

namespace System.Runtime.Intrinsics.Arm

/// VectorT Summary
public abstract class SveF64mm : AdvSimd /// Feature: FEAT_F64MM
{

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> ConcatenateEvenInt128FromTwoInputs(Vector<T> left, Vector<T> right); // UZP1

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> ConcatenateOddInt128FromTwoInputs(Vector<T> left, Vector<T> right); // UZP2

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> InterleaveEvenInt128FromTwoInputs(Vector<T> left, Vector<T> right); // TRN1

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> InterleaveInt128FromHighHalvesOfTwoInputs(Vector<T> left, Vector<T> right); // ZIP2

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> InterleaveInt128FromLowHalvesOfTwoInputs(Vector<T> left, Vector<T> right); // ZIP1

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> InterleaveOddInt128FromTwoInputs(Vector<T> left, Vector<T> right); // TRN2

  /// T: bfloat16, half, float, double, sbyte, short, int, long, byte, ushort, uint, ulong
  public static unsafe Vector<T> LoadVector256AndReplicateToVector(Vector<T> mask, const T *base); // LD1ROH or LD1ROW or LD1ROD or LD1ROB

  public static unsafe Vector<double> MatrixMultiplyAccumulate(Vector<double> op1, Vector<double> op2, Vector<double> op3); // FMMLA // MOVPRFX

  /// total method signatures: 8

}

Author:	a74nh
Assignees:	-
Labels:	`area-System.Runtime.Intrinsics`, `api-ready-for-review`
Milestone:	-

a74nh · 2024-08-21T10:27:20Z

This feature is not yet available on any existing Arm hardware. I don't recommend implementing this for .NET10

dotnet-issue-labeler bot added the area-System.Numerics label Oct 26, 2023

a74nh mentioned this issue Oct 26, 2023

Arm64: Add SVE/SVE2 support in .NET 9 #93095

Closed

31 tasks

a74nh mentioned this issue Nov 13, 2023

Sve: Early versions of Intrinsics/Arm/Sve and hwintrinsiclistarm64sve #94606

Closed

kunalspathak added the api-ready-for-review API is ready for review, it is NOT ready for implementation label Feb 7, 2024

tannergooding added area-System.Runtime.Intrinsics and removed area-System.Numerics labels Feb 8, 2024

kunalspathak mentioned this issue Mar 19, 2024

Arm64: Implement SVE APIs #99957

Closed

kunalspathak added this to the Future milestone May 1, 2024

tannergooding added api-suggestion Early API idea and discussion, it is NOT ready for implementation arch-arm64 and removed api-ready-for-review API is ready for review, it is NOT ready for implementation labels Aug 20, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[API Proposal]: Arm64: FEAT_F64MM #94025

[API Proposal]: Arm64: FEAT_F64MM #94025

a74nh commented Oct 26, 2023 •

edited

Loading

ghost commented Oct 26, 2023

a74nh commented Oct 26, 2023 •

edited

Loading

a74nh commented Oct 26, 2023

a74nh commented Oct 26, 2023

a74nh commented Nov 6, 2023

ghost commented Feb 8, 2024

a74nh commented Aug 21, 2024

[API Proposal]: Arm64: FEAT_F64MM #94025

[API Proposal]: Arm64: FEAT_F64MM #94025

Comments

a74nh commented Oct 26, 2023 • edited Loading

ghost commented Oct 26, 2023

a74nh commented Oct 26, 2023 • edited Loading

a74nh commented Oct 26, 2023

a74nh commented Oct 26, 2023

a74nh commented Nov 6, 2023

ghost commented Feb 8, 2024

a74nh commented Aug 21, 2024

a74nh commented Oct 26, 2023 •

edited

Loading

a74nh commented Oct 26, 2023 •

edited

Loading