-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ARM additional arithmetic intrinsics #32512
Comments
CC. @CarolEidt, @echesakovMSFT, @TamarChristinaArm This is a proposal for some of the remaining intrinsics that aren't yet in an existing proposal. I expect 2-3 more issues in the coming days to get put up before all have been covered. |
Added in some more arithmetic intrinsics. |
namespace System.Runtime.Intrinsics.Arm
{
public partial class AdvSimd
{
public Vector64<byte> AddLowerReturningUpper(Vector64<ushort> left, Vector64<ushort> right);
public Vector64<sbyte> AddLowerReturningUpper(Vector64<short> left, Vector64<short> right);
public Vector64<short> AddLowerReturningUpper(Vector64<int> left, Vector64<int> right);
public Vector64<ushort> AddLowerReturningUpper(Vector64<uint> left, Vector64<uint> right);
public Vector64<int> AddLowerReturningUpper(Vector64<long> left, Vector64<long> right);
public Vector64<uint> AddLowerReturningUpper(Vector64<ulong> left, Vector64<ulong> right);
public Vector64<byte> AddLowerRoundedReturningUpper(Vector64<ushort> left, Vector64<ushort> right);
public Vector64<sbyte> AddLowerRoundedReturningUpper(Vector64<short> left, Vector64<short> right);
public Vector64<short> AddLowerRoundedReturningUpper(Vector64<int> left, Vector64<int> right);
public Vector64<ushort> AddLowerRoundedReturningUpper(Vector64<uint> left, Vector64<uint> right);
public Vector64<int> AddLowerRoundedReturningUpper(Vector64<long> left, Vector64<long> right);
public Vector64<uint> AddLowerRoundedReturningUpper(Vector64<ulong> left, Vector64<ulong> right);
public Vector64<T> FusedAddHalving(Vector64<T> left, Vector64<T> right);
public Vector128<T> FusedAddHalving(Vector128<T> left, Vector128<T> right);
public Vector64<T> FusedAddRoundedHalving(Vector64<T> left, Vector64<T> right);
public Vector128<T> FusedAddRoundedHalving(Vector128<T> left, Vector128<T> right);
public Vector64<T> AddSaturate(Vector64<T> left, Vector64<T> right);
public Vector128<T> AddSaturate(Vector128<T> left, Vector128<T> right);
public Vector64<byte> SubtractLowerReturningUpper(Vector64<ushort> left, Vector64<ushort> right);
public Vector64<sbyte> SubtractLowerReturningUpper(Vector64<short> left, Vector64<short> right);
public Vector64<short> SubtractLowerReturningUpper(Vector64<int> left, Vector64<int> right);
public Vector64<ushort> SubtractLowerReturningUpper(Vector64<uint> left, Vector64<uint> right);
public Vector64<int> SubtractLowerReturningUpper(Vector64<long> left, Vector64<long> right);
public Vector64<uint> SubtractLowerReturningUpper(Vector64<ulong> left, Vector64<ulong> right);
public Vector64<byte> SubtractLowerRoundedReturningUpper(Vector64<ushort> left, Vector64<ushort> right);
public Vector64<sbyte> SubtractLowerRoundedReturningUpper(Vector64<short> left, Vector64<short> right);
public Vector64<short> SubtractLowerRoundedReturningUpper(Vector64<int> left, Vector64<int> right);
public Vector64<ushort> SubtractLowerRoundedReturningUpper(Vector64<uint> left, Vector64<uint> right);
public Vector64<int> SubtractLowerRoundedReturningUpper(Vector64<long> left, Vector64<long> right);
public Vector64<uint> SubtractLowerRoundedReturningUpper(Vector64<ulong> left, Vector64<ulong> right);
public Vector64<T> FusedSubtractHalving(Vector64<T> left, Vector64<T> right);
public Vector128<T> FusedSubtractHalving(Vector128<T> left, Vector128<T> right);
public Vector64<T> SubtractSaturate(Vector64<T> left, Vector64<T> right);
public Vector128<T> SubtractSaturate(Vector128<T> left, Vector128<T> right);
public partial class Arm64
{
public Vector128<byte> AddUpperReturningUpper(Vector64<byte> lower, Vector128<ushort> left, Vector128<ushort> right);
public Vector128<sbyte> AddUpperReturningUpper(Vector64<sbyte> lower, Vector128<short> left, Vector128<short> right);
public Vector128<short> AddUpperReturningUpper(Vector64<short> lower, Vector128<int> left, Vector128<int> right);
public Vector128<ushort> AddUpperReturningUpper(Vector64<ushort> lower, Vector128<uint> left, Vector128<uint> right);
public Vector128<int> AddUpperReturningUpper(Vector64<int> lower, Vector128<long> left, Vector128<long> right);
public Vector128<uint> AddUpperReturningUpper(Vector64<uint> lower, Vector128<ulong> left, Vector128<ulong> right);
public Vector128<byte> AddUpperRoundedReturningUpper(Vector64<byte> lower, Vector128<ushort> left, Vector128<ushort> right);
public Vector128<sbyte> AddUpperRoundedReturningUpper(Vector64<sbyte> lower, Vector128<short> left, Vector128<short> right);
public Vector128<short> AddUpperRoundedReturningUpper(Vector64<short> lower, Vector128<int> left, Vector128<int> right);
public Vector128<ushort> AddUpperRoundedReturningUpper(Vector64<ushort> lower, Vector128<uint> left, Vector128<uint> right);
public Vector128<int> AddUpperRoundedReturningUpper(Vector64<int> lower, Vector128<long> left, Vector128<long> right);
public Vector128<uint> AddUpperRoundedReturningUpper(Vector64<uint> lower, Vector128<ulong> left, Vector128<ulong> right);
public Vector64<T> AddSaturateScalar(Vector64<T> left, Vector64<T> right);
public Vector128<byte> SubtractUpperReturningUpper(Vector64<byte> lower, Vector128<ushort> left, Vector128<ushort> right);
public Vector128<sbyte> SubtractUpperReturningUpper(Vector64<sbyte> lower, Vector128<short> left, Vector128<short> right);
public Vector128<short> SubtractUpperReturningUpper(Vector64<short> lower, Vector128<int> left, Vector128<int> right);
public Vector128<ushort> SubtractUpperReturningUpper(Vector64<ushort> lower, Vector128<uint> left, Vector128<uint> right);
public Vector128<int> SubtractUpperReturningUpper(Vector64<int> lower, Vector128<long> left, Vector128<long> right);
public Vector128<uint> SubtractUpperReturningUpper(Vector64<uint> lower, Vector128<ulong> left, Vector128<ulong> right);
public Vector128<byte> SubtractUpperRoundedReturningUpper(Vector64<byte> lower, Vector128<ushort> left, Vector128<ushort> right);
public Vector128<sbyte> SubtractUpperRoundedReturningUpper(Vector64<sbyte> lower, Vector128<short> left, Vector128<short> right);
public Vector128<short> SubtractUpperRoundedReturningUpper(Vector64<short> lower, Vector128<int> left, Vector128<int> right);
public Vector128<ushort> SubtractUpperRoundedReturningUpper(Vector64<ushort> lower, Vector128<uint> left, Vector128<uint> right);
public Vector128<int> SubtractUpperRoundedReturningUpper(Vector64<int> lower, Vector128<long> left, Vector128<long> right);
public Vector128<uint> SubtractUpperRoundedReturningUpper(Vector64<uint> lower, Vector128<ulong> left, Vector128<ulong> right);
public Vector64<T> SubtractSaturateScalar(Vector64<T> left, Vector64<T> right);
public static Vector128<short> AddPairwiseWidening(Vector64<sbyte> value);
public static Vector128<ushort> AddPairwiseWidening(Vector64<byte> value);
public static Vector128<int> AddPairwiseWidening(Vector64<short> value);
public static Vector128<uint> AddPairwiseWidening(Vector64<ushort> value);
public static Vector128<long> AddPairwiseWidening(Vector64<int> value);
public static Vector128<ulong> AddPairwiseWidening(Vector64<uint> value);
public static Vector128<short> AddPairwiseWideningAndAdd(Vector128<short> addend, Vector64<sbyte> value);
public static Vector128<ushort> AddPairwiseWideningAndAdd(Vector128<ushort> addend, Vector64<byte> value);
public static Vector128<int> AddPairwiseWideningAndAdd(Vector128<int> addend, Vector64<short> value);
public static Vector128<uint> AddPairwiseWideningAndAdd(Vector128<uint> addend, Vector64<ushort> value);
public static Vector128<long> AddPairwiseWideningAndAdd(Vector128<long> addend, Vector64<int> value);
public static Vector128<ulong> AddPairwiseWideningAndAdd(Vector128<ulong> addend, Vector64<uint> value);
public static Vector128<ushort> AbsoluteDifferenceWideningLower(Vector128<byte> left, Vector64<byte> right);
public static Vector128<ushort> AbsoluteDifferenceWideningLower(Vector128<sbyte> left, Vector64<sbyte> right);
public static Vector128<uint> AbsoluteDifferenceWideningLower(Vector128<ushort> left, Vector64<ushort> right);
public static Vector128<uint> AbsoluteDifferenceWideningLower(Vector128<short> left, Vector64<short> right);
public static Vector128<ulong> AbsoluteDifferenceWideningLower(Vector128<uint> left, Vector64<uint> right);
public static Vector128<ulong> AbsoluteDifferenceWideningLower(Vector128<int> left, Vector64<int> right);
public static Vector128<ushort> AbsoluteDifferenceWideningUpper(Vector128<byte> left, Vector128<byte> right);
public static Vector128<ushort> AbsoluteDifferenceWideningUpper(Vector128<sbyte> left, Vector128<sbyte> right);
public static Vector128<uint> AbsoluteDifferenceWideningUpper(Vector128<ushort> left, Vector128<ushort> right);
public static Vector128<uint> AbsoluteDifferenceWideningUpper(Vector128<short> left, Vector128<short> right);
public static Vector128<ulong> AbsoluteDifferenceWideningUpper(Vector128<uint> left, Vector128<uint> right);
public static Vector128<ulong> AbsoluteDifferenceWideningUpper(Vector128<int> left, Vector128<int> right);
public static Vector128<ushort> AbsoluteDifferenceWideningLowerAndAdd(Vector128<ushort> addend, Vector64<byte> left, Vector64<byte> right);
public static Vector128<ushort> AbsoluteDifferenceWideningLowerAndAdd(Vector128<ushort> addend, Vector64<sbyte> left, Vector64<sbyte> right);
public static Vector128<uint> AbsoluteDifferenceWideningLowerAndAdd(Vector128<uint> addend, Vector64<ushort> left, Vector64<ushort> right);
public static Vector128<uint> AbsoluteDifferenceWideningLowerAndAdd(Vector128<uint> addend, Vector64<short> left, Vector64<short> right);
public static Vector128<ulong> AbsoluteDifferenceWideningLowerAndAdd(Vector128<ulong> addend, Vector64<uint> left, Vector64<uint> right);
public static Vector128<ulong> AbsoluteDifferenceWideningLowerAndAdd(Vector128<ulong> addend, Vector64<int> left, Vector64<int> right);
public static Vector128<ushort> AbsoluteDifferenceWideningUpperAndAdd(Vector128<ushort> addend, Vector128<byte> left, Vector128<byte> right);
public static Vector128<ushort> AbsoluteDifferenceWideningUpperAndAdd(Vector128<ushort> addend, Vector128<sbyte> left, Vector128<sbyte> right);
public static Vector128<uint> AbsoluteDifferenceWideningUpperAndAdd(Vector128<uint> addend, Vector128<ushort> left, Vector128<ushort> right);
public static Vector128<uint> AbsoluteDifferenceWideningUpperAndAdd(Vector128<uint> addend, Vector128<short> left, Vector128<short> right);
public static Vector128<ulong> AbsoluteDifferenceWideningUpperAndAdd(Vector128<ulong> addend, Vector128<uint> left, Vector128<uint> right);
public static Vector128<ulong> AbsoluteDifferenceWideningUpperAndAdd(Vector128<ulong> addend, Vector128<int> left, Vector128<int> right);
public static Vector128<short> AddWideningLower(Vector64<sbyte> left, Vector64<sbyte> right);
public static Vector128<ushort> AddWideningLower(Vector64<byte> left, Vector64<byte> right);
public static Vector128<int> AddWideningLower(Vector64<short> left, Vector64<short> right);
public static Vector128<uint> AddWideningLower(Vector64<ushort> left, Vector64<ushort> right);
public static Vector128<long> AddWideningLower(Vector64<int> left, Vector64<int> right);
public static Vector128<ulong> AddWideningLower(Vector64<uint> left, Vector64<uint> right);
public static Vector128<short> AddWideningUpper(Vector128<sbyte> left, Vector128<sbyte> right);
public static Vector128<ushort> AddWideningUpper(Vector128<byte> left, Vector128<byte> right);
public static Vector128<int> AddWideningUpper(Vector128<short> left, Vector128<short> right);
public static Vector128<uint> AddWideningUpper(Vector128<ushort> left, Vector128<ushort> right);
public static Vector128<long> AddWideningUpper(Vector128<int> left, Vector128<int> right);
public static Vector128<ulong> AddWideningUpper(Vector128<uint> left, Vector128<uint> right);
public static Vector128<short> AddWideningLower(Vector128<short> left, Vector64<sbyte> right);
public static Vector128<ushort> AddWideningLower(Vector128<ushort> left, Vector64<byte> right);
public static Vector128<int> AddWideningLower(Vector128<int> left, Vector64<short> right);
public static Vector128<uint> AddWideningLower(Vector128<uint> left, Vector64<ushort> right);
public static Vector128<long> AddWideningLower(Vector128<long> left, Vector64<int> right);
public static Vector128<ulong> AddWideningLower(Vector128<ulong> left, Vector64<uint> right);
public static Vector128<short> AddWideningUpper(Vector128<short> left, Vector128<sbyte> right);
public static Vector128<ushort> AddWideningUpper(Vector128<ushort> left, Vector128<byte> right);
public static Vector128<int> AddWideningUpper(Vector128<int> left, Vector128<short> right);
public static Vector128<uint> AddWideningUpper(Vector128<uint> left, Vector128<ushort> right);
public static Vector128<long> AddWideningUpper(Vector128<long> left, Vector128<int> right);
public static Vector128<ulong> AddWideningUpper(Vector128<ulong> left, Vector128<uint> right);
public static Vector128<short> SubtractWideningLower(Vector64<sbyte> left, Vector64<sbyte> right);
public static Vector128<ushort> SubtractWideningLower(Vector64<byte> left, Vector64<byte> right);
public static Vector128<int> SubtractWideningLower(Vector64<short> left, Vector64<short> right);
public static Vector128<uint> SubtractWideningLower(Vector64<ushort> left, Vector64<ushort> right);
public static Vector128<long> SubtractWideningLower(Vector64<int> left, Vector64<int> right);
public static Vector128<ulong> SubtractWideningLower(Vector64<uint> left, Vector64<uint> right);
public static Vector128<short> SubtractWideningUpper(Vector128<sbyte> left, Vector128<sbyte> right);
public static Vector128<ushort> SubtractWideningUpper(Vector128<byte> left, Vector128<byte> right);
public static Vector128<int> SubtractWideningUpper(Vector128<short> left, Vector128<short> right);
public static Vector128<uint> SubtractWideningUpper(Vector128<ushort> left, Vector128<ushort> right);
public static Vector128<long> SubtractWideningUpper(Vector128<int> left, Vector128<int> right);
public static Vector128<ulong> SubtractWideningUpper(Vector128<uint> left, Vector128<uint> right);
public static Vector128<short> SubtractWideningLower(Vector128<short> left, Vector64<sbyte> right);
public static Vector128<ushort> SubtractWideningLower(Vector128<ushort> left, Vector64<byte> right);
public static Vector128<int> SubtractWideningLower(Vector128<int> left, Vector64<short> right);
public static Vector128<uint> SubtractWideningLower(Vector128<uint> left, Vector64<ushort> right);
public static Vector128<long> SubtractWideningLower(Vector128<long> left, Vector64<int> right);
public static Vector128<ulong> SubtractWideningLower(Vector128<ulong> left, Vector64<uint> right);
public static Vector128<short> SubtractWideningUpper(Vector128<short> left, Vector128<sbyte> right);
public static Vector128<ushort> SubtractWideningUpper(Vector128<ushort> left, Vector128<byte> right);
public static Vector128<int> SubtractWideningUpper(Vector128<int> left, Vector128<short> right);
public static Vector128<uint> SubtractWideningUpper(Vector128<uint> left, Vector128<ushort> right);
public static Vector128<long> SubtractWideningUpper(Vector128<long> left, Vector128<int> right);
public static Vector128<ulong> SubtractWideningUpper(Vector128<ulong> left, Vector128<uint> right);
public static Vector128<short> MultiplyWideningLower(Vector64<sbyte> left, Vector64<sbyte> right);
public static Vector128<ushort> MultiplyWideningLower(Vector64<byte> left, Vector64<byte> right);
public static Vector128<int> MultiplyWideningLower(Vector64<short> left, Vector64<short> right);
public static Vector128<uint> MultiplyWideningLower(Vector64<ushort> left, Vector64<ushort> right);
public static Vector128<long> MultiplyWideningLower(Vector64<int> left, Vector64<int> right);
public static Vector128<ulong> MultiplyWideningLower(Vector64<uint> left, Vector64<uint> right);
public static Vector128<short> MultiplyWideningUpper(Vector128<sbyte> left, Vector128<sbyte> right);
public static Vector128<ushort> MultiplyWideningUpper(Vector128<byte> left, Vector128<byte> right);
public static Vector128<int> MultiplyWideningUpper(Vector128<short> left, Vector128<short> right);
public static Vector128<uint> MultiplyWideningUpper(Vector128<ushort> left, Vector128<ushort> right);
public static Vector128<long> MultiplyWideningUpper(Vector128<int> left, Vector128<int> right);
public static Vector128<ulong> MultiplyWideningUpper(Vector128<uint> left, Vector128<uint> right);
public static Vector128<short> MultiplyWideningLowerAndAdd(Vector128<short> addend, Vector64<sbyte> left, Vector64<sbyte> right);
public static Vector128<ushort> MultiplyWideningLowerAndAdd(Vector128<ushort> addend, Vector64<byte> left, Vector64<byte> right);
public static Vector128<int> MultiplyWideningLowerAndAdd(Vector128<int> addend, Vector64<short> left, Vector64<short> right);
public static Vector128<uint> MultiplyWideningLowerAndAdd(Vector128<uint> addend, Vector64<ushort> left, Vector64<ushort> right);
public static Vector128<long> MultiplyWideningLowerAndAdd(Vector128<long> addend, Vector64<int> left, Vector64<int> right);
public static Vector128<ulong> MultiplyWideningLowerAndAdd(Vector128<ulong> addend, Vector64<uint> left, Vector64<uint> right);
public static Vector128<short> MultiplyWideningUpperAndAdd(Vector128<short> addend, Vector128<sbyte> left, Vector128<sbyte> right);
public static Vector128<ushort> MultiplyWideningUpperAndAdd(Vector128<ushort> addend, Vector128<byte> left, Vector128<byte> right);
public static Vector128<int> MultiplyWideningUpperAndAdd(Vector128<int> addend, Vector128<short> left, Vector128<short> right);
public static Vector128<uint> MultiplyWideningUpperAndAdd(Vector128<uint> addend, Vector128<ushort> left, Vector128<ushort> right);
public static Vector128<long> MultiplyWideningUpperAndAdd(Vector128<long> addend, Vector128<int> left, Vector128<int> right);
public static Vector128<ulong> MultiplyWideningUpperAndAdd(Vector128<ulong> addend, Vector128<uint> left, Vector128<uint> right);
public static Vector128<short> MultiplyWideningLowerAndSubtract(Vector128<short> minuend, Vector64<sbyte> left, Vector64<sbyte> right);
public static Vector128<ushort> MultiplyWideningLowerAndSubtract(Vector128<ushort> minuend, Vector64<byte> left, Vector64<byte> right);
public static Vector128<int> MultiplyWideningLowerAndSubtract(Vector128<int> minuend, Vector64<short> left, Vector64<short> right);
public static Vector128<uint> MultiplyWideningLowerAndSubtract(Vector128<uint> minuend, Vector64<ushort> left, Vector64<ushort> right);
public static Vector128<long> MultiplyWideningLowerAndSubtract(Vector128<long> minuend, Vector64<int> left, Vector64<int> right);
public static Vector128<ulong> MultiplyWideningLowerAndSubtract(Vector128<ulong> minuend, Vector64<uint> left, Vector64<uint> right);
public static Vector128<short> MultiplyWideningUpperAndSubtract(Vector128<short> minuend, Vector128<sbyte> left, Vector128<sbyte> right);
public static Vector128<ushort> MultiplyWideningUpperAndSubtract(Vector128<ushort> minuend, Vector128<byte> left, Vector128<byte> right);
public static Vector128<int> MultiplyWideningUpperAndSubtract(Vector128<int> minuend, Vector128<short> left, Vector128<short> right);
public static Vector128<uint> MultiplyWideningUpperAndSubtract(Vector128<uint> minuend, Vector128<ushort> left, Vector128<ushort> right);
public static Vector128<long> MultiplyWideningUpperAndSubtract(Vector128<long> minuend, Vector128<int> left, Vector128<int> right);
public static Vector128<ulong> MultiplyWideningUpperAndSubtract(Vector128<ulong> minuend, Vector128<uint> left, Vector128<uint> right);
}
}
} |
The following intrinsics should be under
These are all "lower" variants. The "upper" variants are Arm64 only. CC. @echesakovMSFT, @TamarChristinaArm |
@tannergooding same here. I believe some, if not all of these are implementable on AArch32 as well. I will go through them. |
I believe VABAL Qd, Dn, Dm where Dn, Dm corresponds to upper parts of I believe the reason why Arm64 introduces SABAL2, UABAL2 is that you can't access upper 64-bit parts of 128-bit vector registers using 64-bit register names - while on Arm32 D_{2n}, D_{2n+1} corresponds to lower and upper parts of Q_{n}. |
@TamarChristinaArm Are you working on these or have started implementing them? If not, I can take these as my next workitem since I am little bit reluctant to start working on the remaining intrinsics in #24794 (*BySelectedScalar) before we agreed on their names |
It looks that public Vector64<byte> AddLowerReturningUpper(Vector128<ushort> left, Vector128<ushort> right);
public Vector64<sbyte> AddLowerReturningUpper(Vector128<short> left, Vector128<short> right);
public Vector64<short> AddLowerReturningUpper(Vector128<int> left, Vector128<int> right);
public Vector64<ushort> AddLowerReturningUpper(Vector128<uint> left, Vector128<uint> right);
public Vector64<int> AddLowerReturningUpper(Vector128<long> left, Vector128<long> right);
public Vector64<uint> AddLowerReturningUpper(Vector128<ulong> left, Vector128<ulong> right); and public Vector64<byte> AddLowerRoundedReturningUpper(Vector128<ushort> left, Vector128<ushort> right);
public Vector64<sbyte> AddLowerRoundedReturningUpper(Vector128<short> left, Vector128<short> right);
public Vector64<short> AddLowerRoundedReturningUpper(Vector128<int> left, Vector128<int> right);
public Vector64<ushort> AddLowerRoundedReturningUpper(Vector128<uint> left, Vector128<uint> right);
public Vector64<int> AddLowerRoundedReturningUpper(Vector128<long> left, Vector128<long> right);
public Vector64<uint> AddLowerRoundedReturningUpper(Vector128<ulong> left, Vector128<ulong> right); |
Based on the description of
shouldn't these be |
Yeah, I think those names were likely messed up during the API review as the general terms are confusing. They |
AddHighNarrowingToUpper AddHighNarrowingToLower? |
That doesn't fit with the general naming convention we've used elsewhere |
No I'm not, I'm currently doing the single register TBL/TBX and the SLI/SRI ones. |
@tannergooding Can you please add these to the next API meeting agenda? These should be clarification what names to use here. I don't want to block on this so I am going to start implementing the intrinsics and as the temporary names use AddReturningHighNarrowUpper and AddReturningHighNarrowLower. When the names are finalized - these can be updated with simple search and replace. |
I went through all of the instrinsic - it looks that only Below I attached the corrected API and added the corresponding С++ intrinsic name and the instructions with operands on A32 and A64. class AdvSimd.Arm64
{
// uint8_t vqaddb_u8 (uint8_t a, uint8_t b)
// A64: UQADD Bd, Bn, Bm
public static Vector64<byte> AddSaturateScalar(Vector64<byte> left, Vector64<byte> right);
// int16_t vqaddh_s16 (int16_t a, int16_t b)
// A64: SQADD Hd, Hn, Hm
public static Vector64<short> AddSaturateScalar(Vector64<short> left, Vector64<short> right);
// int32_t vqadds_s32 (int32_t a, int32_t b)
// A64: SQADD Sd, Sn, Sm
public static Vector64<int> AddSaturateScalar(Vector64<int> left, Vector64<int> right);
// int8_t vqaddb_s8 (int8_t a, int8_t b)
// A64: SQADD Bd, Bn, Bm
public static Vector64<sbyte> AddSaturateScalar(Vector64<sbyte> left, Vector64<sbyte> right);
// uint16_t vqaddh_u16 (uint16_t a, uint16_t b)
// A64: UQADD Hd, Hn, Hm
public static Vector64<ushort> AddSaturateScalar(Vector64<ushort> left, Vector64<ushort> right);
// uint32_t vqadds_u32 (uint32_t a, uint32_t b)
// A64: UQADD Sd, Sn, Sm
public static Vector64<uint> AddSaturateScalar(Vector64<uint> left, Vector64<uint> right);
// uint8_t vqsubb_u8 (uint8_t a, uint8_t b)
// A64: UQSUB Bd, Bn, Bm
public static Vector64<byte> SubtractSaturateScalar(Vector64<byte> left, Vector64<byte> right);
// int16_t vqsubh_s16 (int16_t a, int16_t b)
// A64: SQSUB Hd, Hn, Hm
public static Vector64<short> SubtractSaturateScalar(Vector64<short> left, Vector64<short> right);
// int32_t vqsubs_s32 (int32_t a, int32_t b)
// A64: SQSUB Sd, Sn, Sm
public static Vector64<int> SubtractSaturateScalar(Vector64<int> left, Vector64<int> right);
// int8_t vqsubb_s8 (int8_t a, int8_t b)
// A64: SQSUB Bd, Bn, Bm
public static Vector64<sbyte> SubtractSaturateScalar(Vector64<sbyte> left, Vector64<sbyte> right);
// uint16_t vqsubh_u16 (uint16_t a, uint16_t b)
// A64: UQSUB Hd, Hn, Hm
public static Vector64<ushort> SubtractSaturateScalar(Vector64<ushort> left, Vector64<ushort> right);
// uint32_t vqsubs_u32 (uint32_t a, uint32_t b)
// A64: UQSUB Sd, Sn, Sm
public static Vector64<uint> SubtractSaturateScalar(Vector64<uint> left, Vector64<uint> right);
}
class AdvSimd
{
// uint16x8_t vabdl_u8 (uint8x8_t a, uint8x8_t b)
// A32: VABDL.U8 Qd, Dn, Dm
// A64: UABDL Vd.8H, Vn.8B, Vm.8B
public static Vector128<ushort> AbsoluteDifferenceWideningLower(Vector64<byte> left, Vector64<byte> right);
// int32x4_t vabdl_s16 (int16x4_t a, int16x4_t b)
// A32: VABDL.S16 Qd, Dn, Dm
// A64: SABDL Vd.4S, Vn.4H, Vm.4H
public static Vector128<int> AbsoluteDifferenceWideningLower(Vector64<short> left, Vector64<short> right);
// int64x2_t vabdl_s32 (int32x2_t a, int32x2_t b)
// A32: VABDL.S32 Qd, Dn, Dm
// A64: SABDL Vd.2D, Vn.2S, Vm.2S
public static Vector128<long> AbsoluteDifferenceWideningLower(Vector64<int> left, Vector64<int> right);
// int16x8_t vabdl_s8 (int8x8_t a, int8x8_t b)
// A32: VABDL.S8 Qd, Dn, Dm
// A64: SABDL Vd.8H, Vn.8B, Vm.8B
public static Vector128<short> AbsoluteDifferenceWideningLower(Vector64<sbyte> left, Vector64<sbyte> right);
// uint32x4_t vabdl_u16 (uint16x4_t a, uint16x4_t b)
// A32: VABDL.U16 Qd, Dn, Dm
// A64: UABDL Vd.4S, Vn.4H, Vm.4H
public static Vector128<uint> AbsoluteDifferenceWideningLower(Vector64<ushort> left, Vector64<ushort> right);
// uint64x2_t vabdl_u32 (uint32x2_t a, uint32x2_t b)
// A32: VABDL.U32 Qd, Dn, Dm
// A64: UABDL Vd.2D, Vn.2S, Vm.2S
public static Vector128<ulong> AbsoluteDifferenceWideningLower(Vector64<uint> left, Vector64<uint> right);
// int16x8_t vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
// A32: VABAL.S8 Qd, Dn, Dm
// A64: SABAL Vd.8H, Vn.8B, Vm.8B
public static Vector128<short> AbsoluteDifferenceWideningLowerAndAdd(Vector128<short> addend, Vector64<sbyte> left, Vector64<sbyte> right);
// int32x4_t vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
// A32: VABAL.S16 Qd, Dn, Dm
// A64: SABAL Vd.4S, Vn.4H, Vm.4H
public static Vector128<int> AbsoluteDifferenceWideningLowerAndAdd(Vector128<int> addend, Vector64<short> left, Vector64<short> right);
// int64x2_t vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
// A32: VABAL.S32 Qd, Dn, Dm
// A64: SABAL Vd.2D, Vn.2S, Vm.2S
public static Vector128<long> AbsoluteDifferenceWideningLowerAndAdd(Vector128<long> addend, Vector64<int> left, Vector64<int> right);
// uint16x8_t vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
// A32: VABAL.U8 Qd, Dn, Dm
// A64: UABAL Vd.8H, Vn.8B, Vm.8B
public static Vector128<ushort> AbsoluteDifferenceWideningLowerAndAdd(Vector128<ushort> addend, Vector64<byte> left, Vector64<byte> right);
// uint32x4_t vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
// A32: VABAL.U16 Qd, Dn, Dm
// A64: UABAL Vd.4S, Vn.4H, Vm.4H
public static Vector128<uint> AbsoluteDifferenceWideningLowerAndAdd(Vector128<uint> addend, Vector64<ushort> left, Vector64<ushort> right);
// uint64x2_t vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
// A32: VABAL.U32 Qd, Dn, Dm
// A64: UABAL Vd.2D, Vn.2S, Vm.2S
public static Vector128<ulong> AbsoluteDifferenceWideningLowerAndAdd(Vector128<ulong> addend, Vector64<uint> left, Vector64<uint> right);
// uint16x8_t vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
// A32: VABDL.U8 Qd, Dn+1, Dm+1
// A64: UABDL2 Vd.8H, Vn.16B, Vm.16B
public static Vector128<ushort> AbsoluteDifferenceWideningUpper(Vector128<byte> left, Vector128<byte> right);
// int32x4_t vabdl_high_s16 (int16x8_t a, int16x8_t b)
// A32: VABDL.S16 Qd, Dn+1, Dm+1
// A64: SABDL2 Vd.4S, Vn.8H, Vm.8H
public static Vector128<int> AbsoluteDifferenceWideningUpper(Vector128<short> left, Vector128<short> right);
// int64x2_t vabdl_high_s32 (int32x4_t a, int32x4_t b)
// A32: VABDL.S32 Qd, Dn+1, Dm+1
// A64: SABDL2 Vd.2D, Vn.4S, Vm.4S
public static Vector128<long> AbsoluteDifferenceWideningUpper(Vector128<int> left, Vector128<int> right);
// int16x8_t vabdl_high_s8 (int8x16_t a, int8x16_t b)
// A32: VABDL.S8 Qd, Dn+1, Dm+1
// A64: SABDL2 Vd.8H, Vn.16B, Vm.16B
public static Vector128<short> AbsoluteDifferenceWideningUpper(Vector128<sbyte> left, Vector128<sbyte> right);
// uint32x4_t vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
// A32: VABDL.U16 Qd, Dn+1, Dm+1
// A64: UABDL2 Vd.4S, Vn.8H, Vm.8H
public static Vector128<uint> AbsoluteDifferenceWideningUpper(Vector128<ushort> left, Vector128<ushort> right);
// uint64x2_t vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
// A32: VABDL.U32 Qd, Dn+1, Dm+1
// A64: UABDL2 Vd.2D, Vn.4S, Vm.4S
public static Vector128<ulong> AbsoluteDifferenceWideningUpper(Vector128<uint> left, Vector128<uint> right);
// int16x8_t vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
// A32: VABAL.S8 Qd, Dn+1, Dm+1
// A64: SABAL2 Vd.8H, Vn.16B, Vm.16B
public static Vector128<short> AbsoluteDifferenceWideningUpperAndAdd(Vector128<short> addend, Vector128<sbyte> left, Vector128<sbyte> right);
// int32x4_t vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
// A32: VABAL.S16 Qd, Dn+1, Dm+1
// A64: SABAL2 Vd.4S, Vn.8H, Vm.8H
public static Vector128<int> AbsoluteDifferenceWideningUpperAndAdd(Vector128<int> addend, Vector128<short> left, Vector128<short> right);
// int64x2_t vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
// A32: VABAL.S32 Qd, Dn+1, Dm+1
// A64: SABAL2 Vd.2D, Vn.4S, Vm.4S
public static Vector128<long> AbsoluteDifferenceWideningUpperAndAdd(Vector128<long> addend, Vector128<int> left, Vector128<int> right);
// uint16x8_t vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
// A32: VABAL.U8 Qd, Dn+1, Dm+1
// A64: UABAL2 Vd.8H, Vn.16B, Vm.16B
public static Vector128<ushort> AbsoluteDifferenceWideningUpperAndAdd(Vector128<ushort> addend, Vector128<byte> left, Vector128<byte> right);
// uint32x4_t vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
// A32: VABAL.U16 Qd, Dn+1, Dm+1
// A64: UABAL2 Vd.4S, Vn.8H, Vm.8H
public static Vector128<uint> AbsoluteDifferenceWideningUpperAndAdd(Vector128<uint> addend, Vector128<ushort> left, Vector128<ushort> right);
// uint64x2_t vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
// A32: VABAL.U32 Qd, Dn+1, Dm+1
// A64: UABAL2 Vd.2D, Vn.4S, Vm.4S
public static Vector128<ulong> AbsoluteDifferenceWideningUpperAndAdd(Vector128<ulong> addend, Vector128<uint> left, Vector128<uint> right);
// int16x4_t vpaddl_s8 (int8x8_t a)
// A32: VPADDL.S8 Dd, Dm
// A64: SADDLP Vd.4H, Vn.8B
public static Vector64<short> AddPairwiseWidening(Vector64<sbyte> value);
// int32x2_t vpaddl_s16 (int16x4_t a)
// A32: VPADDL.S16 Dd, Dm
// A64: SADDLP Vd.2S, Vn.4H
public static Vector64<int> AddPairwiseWidening(Vector64<short> value);
// uint16x4_t vpaddl_u8 (uint8x8_t a)
// A32: VPADDL.U8 Dd, Dm
// A64: UADDLP Vd.4H, Vn.8B
public static Vector64<ushort> AddPairwiseWidening(Vector64<byte> value);
// uint32x2_t vpaddl_u16 (uint16x4_t a)
// A32: VPADDL.U16 Dd, Dm
// A64: UADDLP Vd.2S, Vn.4H
public static Vector64<uint> AddPairwiseWidening(Vector64<ushort> value);
// int16x8_t vpaddlq_s8 (int8x16_t a)
// A32: VPADDL.S8 Qd, Qm
// A64: SADDLP Vd.8H, Vn.16B
public static Vector128<short> AddPairwiseWidening(Vector128<sbyte> value);
// int32x4_t vpaddlq_s16 (int16x8_t a)
// A32: VPADDL.S16 Qd, Qm
// A64: SADDLP Vd.4S, Vn.8H
public static Vector128<int> AddPairwiseWidening(Vector128<short> value);
// int64x2_t vpaddlq_s32 (int32x4_t a)
// A32: VPADDL.S32 Qd, Qm
// A64: SADDLP Vd.2D, Vn.4S
public static Vector128<long> AddPairwiseWidening(Vector128<int> value);
// uint16x8_t vpaddlq_u8 (uint8x16_t a)
// A32: VPADDL.U8 Qd, Qm
// A64: UADDLP Vd.8H, Vn.16B
public static Vector128<ushort> AddPairwiseWidening(Vector128<byte> value);
// uint32x4_t vpaddlq_u16 (uint16x8_t a)
// A32: VPADDL.U16 Qd, Qm
// A64: UADDLP Vd.4S, Vn.8H
public static Vector128<uint> AddPairwiseWidening(Vector128<ushort> value);
// uint64x2_t vpaddlq_u32 (uint32x4_t a)
// A32: VPADDL.U32 Qd, Qm
// A64: UADDLP Vd.2D, Vn.4S
public static Vector128<ulong> AddPairwiseWidening(Vector128<uint> value);
// int16x4_t vpadal_s8 (int16x4_t a, int8x8_t b)
// A32: VPADAL.S8 Dd, Dm
// A64: SADALP Vd.4H, Vn.8B
public static Vector64<short> AddPairwiseWideningAndAdd(Vector64<short> addend, Vector64<sbyte> value);
// int32x2_t vpadal_s16 (int32x2_t a, int16x4_t b)
// A32: VPADAL.S16 Dd, Dm
// A64: SADALP Vd.2S, Vn.4H
public static Vector64<int> AddPairwiseWideningAndAdd(Vector64<int> addend, Vector64<short> value);
// uint16x4_t vpadal_u8 (uint16x4_t a, uint8x8_t b)
// A32: VPADAL.U8 Dd, Dm
// A64: UADALP Vd.4H, Vn.8B
public static Vector64<ushort> AddPairwiseWideningAndAdd(Vector64<ushort> addend, Vector64<byte> value);
// uint32x2_t vpadal_u16 (uint32x2_t a, uint16x4_t b)
// A32: VPADAL.U16 Dd, Dm
// A64: UADALP Vd.2S, Vn.4H
public static Vector64<uint> AddPairwiseWideningAndAdd(Vector64<uint> addend, Vector64<ushort> value);
// int16x8_t vpadalq_s8 (int16x8_t a, int8x16_t b)
// A32: VPADAL.S8 Qd, Qm
// A64: SADALP Vd.8H, Vn.16B
public static Vector128<short> AddPairwiseWideningAndAdd(Vector128<short> addend, Vector128<sbyte> value);
// int32x4_t vpadalq_s16 (int32x4_t a, int16x8_t b)
// A32: VPADAL.S16 Qd, Qm
// A64: SADALP Vd.4S, Vn.8H
public static Vector128<int> AddPairwiseWideningAndAdd(Vector128<int> addend, Vector128<short> value);
// int64x2_t vpadalq_s32 (int64x2_t a, int32x4_t b)
// A32: VPADAL.S32 Qd, Qm
// A64: SADALP Vd.2D, Vn.4S
public static Vector128<long> AddPairwiseWideningAndAdd(Vector128<long> addend, Vector128<int> value);
// uint16x8_t vpadalq_u8 (uint16x8_t a, uint8x16_t b)
// A32: VPADAL.U8 Qd, Qm
// A64: UADALP Vd.8H, Vn.16B
public static Vector128<ushort> AddPairwiseWideningAndAdd(Vector128<ushort> addend, Vector128<byte> value);
// uint32x4_t vpadalq_u16 (uint32x4_t a, uint16x8_t b)
// A32: VPADAL.U16 Qd, Qm
// A64: UADALP Vd.4S, Vn.8H
public static Vector128<uint> AddPairwiseWideningAndAdd(Vector128<uint> addend, Vector128<ushort> value);
// uint64x2_t vpadalq_u32 (uint64x2_t a, uint32x4_t b)
// A32: VPADAL.U32 Qd, Qm
// A64: UADALP Vd.2D, Vn.4S
public static Vector128<ulong> AddPairwiseWideningAndAdd(Vector128<ulong> addend, Vector128<uint> value);
// int64x1_t vpadal_s32 (int64x1_t a, int32x2_t b)
// A32: VPADAL.S32 Dd, Dm
// A64: SADALP Vd.1D, Vn.2S
public static Vector64<long> AddPairwiseWideningAndAddScalar(Vector64<long> addend, Vector64<int> value);
// uint64x1_t vpadal_u32 (uint64x1_t a, uint32x2_t b)
// A32: VPADAL.U32 Dd, Dm
// A64: UADALP Vd.1D, Vn.2S
public static Vector64<ulong> AddPairwiseWideningAndAddScalar(Vector64<ulong> addend, Vector64<uint> value);
// int64x1_t vpaddl_s32 (int32x2_t a)
// A32: VPADDL.S32 Dd, Dm
// A64: SADDLP Dd, Vn.2S
public static Vector64<long> AddPairwiseWideningScalar(Vector64<int> value);
// uint64x1_t vpaddl_u32 (uint32x2_t a)
// A32: VPADDL.U32 Dd, Dm
// A64: UADDLP Dd, Vn.2S
public static Vector64<ulong> AddPairwiseWideningScalar(Vector64<uint> value);
// int8x8_t vaddhn_s16 (int16x8_t a, int16x8_t b)
// A32: VADDHN.I16 Dd, Qn, Qm
// A64: ADDHN Vd.8B, Vn.8H, Vm.8H
public static Vector64<sbyte> AddReturningHighNarrowLower(Vector128<short> left, Vector128<short> right);
// int16x4_t vaddhn_s32 (int32x4_t a, int32x4_t b)
// A32: VADDHN.I32 Dd, Qn, Qm
// A64: ADDHN Vd.4H, Vn.4S, Vm.4S
public static Vector64<short> AddReturningHighNarrowLower(Vector128<int> left, Vector128<int> right);
// int32x2_t vaddhn_s64 (int64x2_t a, int64x2_t b)
// A32: VADDHN.I64 Dd, Qn, Qm
// A64: ADDHN Vd.2S, Vn.2D, Vm.2D
public static Vector64<int> AddReturningHighNarrowLower(Vector128<long> left, Vector128<long> right);
// uint8x8_t vaddhn_u16 (uint16x8_t a, uint16x8_t b)
// A32: VADDHN.I16 Dd, Qn, Qm
// A64: ADDHN Vd.8B, Vn.8H, Vm.8H
public static Vector64<byte> AddReturningHighNarrowLower(Vector128<ushort> left, Vector128<ushort> right);
// uint16x4_t vaddhn_u32 (uint32x4_t a, uint32x4_t b)
// A32: VADDHN.I32 Dd, Qn, Qm
// A64: ADDHN Vd.4H, Vn.4S, Vm.4S
public static Vector64<ushort> AddReturningHighNarrowLower(Vector128<uint> left, Vector128<uint> right);
// uint32x2_t vaddhn_u64 (uint64x2_t a, uint64x2_t b)
// A32: VADDHN.I64 Dd, Qn, Qm
// A64: ADDHN Vd.2S, Vn.2D, Vm.2D
public static Vector64<uint> AddReturningHighNarrowLower(Vector128<ulong> left, Vector128<ulong> right);
// int8x16_t vaddhn_high_s16 (int8x8_t r, int16x8_t a, int16x8_t b)
// A32: VADDHN.I16 Dd+1, Qn, Qm
// A64: ADDHN2 Vd.16B, Vn.8B, Vm.8H
public static Vector128<sbyte> AddReturningHighNarrowUpper(Vector64<sbyte> lower, Vector128<short> left, Vector128<short> right);
// int16x8_t vaddhn_high_s32 (int16x4_t r, int32x4_t a, int32x4_t b)
// A32: VADDHN.I32 Dd+1, Qn, Qm
// A64: ADDHN2 Vd.8H, Vn.4H, Vm.4S
public static Vector128<short> AddReturningHighNarrowUpper(Vector64<short> lower, Vector128<int> left, Vector128<int> right);
// int32x4_t vaddhn_high_s64 (int32x2_t r, int64x2_t a, int64x2_t b)
// A32: VADDHN.I64 Dd+1, Qn, Qm
// A64: ADDHN2 Vd.4S, Vn.2S, Vm.2D
public static Vector128<int> AddReturningHighNarrowUpper(Vector64<int> lower, Vector128<long> left, Vector128<long> right);
// uint8x16_t vaddhn_high_u16 (uint8x8_t r, uint16x8_t a, uint16x8_t b)
// A32: VADDHN.I16 Dd+1, Qn, Qm
// A64: ADDHN2 Vd.16B, Vn.8B, Vm.8H
public static Vector128<byte> AddReturningHighNarrowUpper(Vector64<byte> lower, Vector128<ushort> left, Vector128<ushort> right);
// uint16x8_t vaddhn_high_u32 (uint16x4_t r, uint32x4_t a, uint32x4_t b)
// A32: VADDHN.I32 Dd+1, Qn, Qm
// A64: ADDHN2 Vd.8H, Vn.4H, Vm.4S
public static Vector128<ushort> AddReturningHighNarrowUpper(Vector64<ushort> lower, Vector128<uint> left, Vector128<uint> right);
// uint32x4_t vaddhn_high_u64 (uint32x2_t r, uint64x2_t a, uint64x2_t b)
// A32: VADDHN.I64 Dd+1, Qn, Qm
// A64: ADDHN2 Vd.4S, Vn.2S, Vm.2D
public static Vector128<uint> AddReturningHighNarrowUpper(Vector64<uint> lower, Vector128<ulong> left, Vector128<ulong> right);
// int8x8_t vraddhn_s16 (int16x8_t a, int16x8_t b)
// A32: VRADDHN.I16 Dd, Qn, Qm
// A64: RADDHN Vd.8B, Vn.8H, Vm.8H
public static Vector64<sbyte> AddReturningRoundedHighNarrowLower(Vector128<short> left, Vector128<short> right);
// int16x4_t vraddhn_s32 (int32x4_t a, int32x4_t b)
// A32: VRADDHN.I32 Dd, Qn, Qm
// A64: RADDHN Vd.4H, Vn.4S, Vm.4S
public static Vector64<short> AddReturningRoundedHighNarrowLower(Vector128<int> left, Vector128<int> right);
// int32x2_t vraddhn_s64 (int64x2_t a, int64x2_t b)
// A32: VRADDHN.I64 Dd, Qn, Qm
// A64: RADDHN Vd.2S, Vn.2D, Vm.2D
public static Vector64<int> AddReturningRoundedHighNarrowLower(Vector128<long> left, Vector128<long> right);
// uint8x8_t vraddhn_u16 (uint16x8_t a, uint16x8_t b)
// A32: VRADDHN.I16 Dd, Qn, Qm
// A64: RADDHN Vd.8B, Vn.8H, Vm.8H
public static Vector64<byte> AddReturningRoundedHighNarrowLower(Vector128<ushort> left, Vector128<ushort> right);
// uint16x4_t vraddhn_u32 (uint32x4_t a, uint32x4_t b)
// A32: VRADDHN.I32 Dd, Qn, Qm
// A64: RADDHN Vd.4H, Vn.4S, Vm.4S
public static Vector64<ushort> AddReturningRoundedHighNarrowLower(Vector128<uint> left, Vector128<uint> right);
// uint32x2_t vraddhn_u64 (uint64x2_t a, uint64x2_t b)
// A32: VRADDHN.I64 Dd, Qn, Qm
// A64: RADDHN Vd.2S, Vn.2D, Vm.2D
public static Vector64<uint> AddReturningRoundedHighNarrowLower(Vector128<ulong> left, Vector128<ulong> right);
// int8x16_t vraddhn_high_s16 (int8x8_t r, int16x8_t a, int16x8_t b)
// A32: VRADDHN.I16 Dd+1, Qn, Qm
// A64: RADDHN2 Vd.16B, Vn.8B, Vm.8H
public static Vector128<sbyte> AddReturningRoundedHighNarrowUpper(Vector64<sbyte> lower, Vector128<short> left, Vector128<short> right);
// int16x8_t vraddhn_high_s32 (int16x4_t r, int32x4_t a, int32x4_t b)
// A32: VRADDHN.I32 Dd+1, Qn, Qm
// A64: RADDHN2 Vd.8H, Vn.4H, Vm.4S
public static Vector128<short> AddReturningRoundedHighNarrowUpper(Vector64<short> lower, Vector128<int> left, Vector128<int> right);
// int32x4_t vraddhn_high_s64 (int32x2_t r, int64x2_t a, int64x2_t b)
// A32: VRADDHN.I64 Dd+1, Qn, Qm
// A64: RADDHN2 Vd.4S, Vn.2S, Vm.2D
public static Vector128<int> AddReturningRoundedHighNarrowUpper(Vector64<int> lower, Vector128<long> left, Vector128<long> right);
// uint8x16_t vraddhn_high_u16 (uint8x8_t r, uint16x8_t a, uint16x8_t b)
// A32: VRADDHN.I16 Dd+1, Qn, Qm
// A64: RADDHN2 Vd.16B, Vn.8B, Vm.8H
public static Vector128<byte> AddReturningRoundedHighNarrowUpper(Vector64<byte> lower, Vector128<ushort> left, Vector128<ushort> right);
// uint16x8_t vraddhn_high_u32 (uint16x4_t r, uint32x4_t a, uint32x4_t b)
// A32: VRADDHN.I32 Dd+1, Qn, Qm
// A64: RADDHN2 Vd.8H, Vn.4H, Vm.4S
public static Vector128<ushort> AddReturningRoundedHighNarrowUpper(Vector64<ushort> lower, Vector128<uint> left, Vector128<uint> right);
// uint32x4_t vraddhn_high_u64 (uint32x2_t r, uint64x2_t a, uint64x2_t b)
// A32: VRADDHN.I64 Dd+1, Qn, Qm
// A64: RADDHN2 Vd.4S, Vn.2S, Vm.2D
public static Vector128<uint> AddReturningRoundedHighNarrowUpper(Vector64<uint> lower, Vector128<ulong> left, Vector128<ulong> right);
// uint8x8_t vqadd_u8 (uint8x8_t a, uint8x8_t b)
// A32: VQADD.U8 Dd, Dn, Dm
// A64: UQADD Vd.8B, Vn.8B, Vm.8B
public static Vector64<byte> AddSaturate(Vector64<byte> left, Vector64<byte> right);
// int16x4_t vqadd_s16 (int16x4_t a, int16x4_t b)
// A32: VQADD.S16 Dd, Dn, Dm
// A64: SQADD Vd.4H, Vn.4H, Vm.4H
public static Vector64<short> AddSaturate(Vector64<short> left, Vector64<short> right);
// int32x2_t vqadd_s32 (int32x2_t a, int32x2_t b)
// A32: VQADD.S32 Dd, Dn, Dm
// A64: SQADD Vd.2S, Vn.2S, Vm.2S
public static Vector64<int> AddSaturate(Vector64<int> left, Vector64<int> right);
// int8x8_t vqadd_s8 (int8x8_t a, int8x8_t b)
// A32: VQADD.S8 Dd, Dn, Dm
// A64: SQADD Vd.8B, Vn.8B, Vm.8B
public static Vector64<sbyte> AddSaturate(Vector64<sbyte> left, Vector64<sbyte> right);
// uint16x4_t vqadd_u16 (uint16x4_t a, uint16x4_t b)
// A32: VQADD.U16 Dd, Dn, Dm
// A64: UQADD Vd.4H, Vn.4H, Vm.4H
public static Vector64<ushort> AddSaturate(Vector64<ushort> left, Vector64<ushort> right);
// uint32x2_t vqadd_u32 (uint32x2_t a, uint32x2_t b)
// A32: VQADD.U32 Dd, Dn, Dm
// A64: UQADD Vd.2S, Vn.2S, Vm.2S
public static Vector64<uint> AddSaturate(Vector64<uint> left, Vector64<uint> right);
// uint8x16_t vqaddq_u8 (uint8x16_t a, uint8x16_t b)
// A32: VQADD.U8 Qd, Qn, Qm
// A64: UQADD Vd.16B, Vn.16B, Vm.16B
public static Vector128<byte> AddSaturate(Vector128<byte> left, Vector128<byte> right);
// int16x8_t vqaddq_s16 (int16x8_t a, int16x8_t b)
// A32: VQADD.S16 Qd, Qn, Qm
// A64: SQADD Vd.8H, Vn.8H, Vm.8H
public static Vector128<short> AddSaturate(Vector128<short> left, Vector128<short> right);
// int32x4_t vqaddq_s32 (int32x4_t a, int32x4_t b)
// A32: VQADD.S32 Qd, Qn, Qm
// A64: SQADD Vd.4S, Vn.4S, Vm.4S
public static Vector128<int> AddSaturate(Vector128<int> left, Vector128<int> right);
// int64x2_t vqaddq_s64 (int64x2_t a, int64x2_t b)
// A32: VQADD.S64 Qd, Qn, Qm
// A64: SQADD Vd.2D, Vn.2D, Vm.2D
public static Vector128<long> AddSaturate(Vector128<long> left, Vector128<long> right);
// int8x16_t vqaddq_s8 (int8x16_t a, int8x16_t b)
// A32: VQADD.S8 Qd, Qn, Qm
// A64: SQADD Vd.16B, Vn.16B, Vm.16B
public static Vector128<sbyte> AddSaturate(Vector128<sbyte> left, Vector128<sbyte> right);
// uint16x8_t vqaddq_u16 (uint16x8_t a, uint16x8_t b)
// A32: VQADD.U16 Qd, Qn, Qm
// A64: UQADD Vd.8H, Vn.8H, Vm.8H
public static Vector128<ushort> AddSaturate(Vector128<ushort> left, Vector128<ushort> right);
// uint32x4_t vqaddq_u32 (uint32x4_t a, uint32x4_t b)
// A32: VQADD.U32 Qd, Qn, Qm
// A64: UQADD Vd.4S, Vn.4S, Vm.4S
public static Vector128<uint> AddSaturate(Vector128<uint> left, Vector128<uint> right);
// uint64x2_t vqaddq_u64 (uint64x2_t a, uint64x2_t b)
// A32: VQADD.U64 Qd, Qn, Qm
// A64: UQADD Vd.2D, Vn.2D, Vm.2D
public static Vector128<ulong> AddSaturate(Vector128<ulong> left, Vector128<ulong> right);
// int64x1_t vqadd_s64 (int64x1_t a, int64x1_t b)
// A32: VQADD.S64 Dd, Dn, Dm
// A64: SQADD Dd, Dn, Dm
public static Vector64<long> AddSaturateScalar(Vector64<long> left, Vector64<long> right);
// uint64x1_t vqadd_u64 (uint64x1_t a, uint64x1_t b)
// A32: VQADD.U64 Dd, Dn, Dm
// A64: UQADD Dd, Dn, Dm
public static Vector64<ulong> AddSaturateScalar(Vector64<ulong> left, Vector64<ulong> right);
// uint16x8_t vaddl_u8 (uint8x8_t a, uint8x8_t b)
// A32: VADDL.U8 Qd, Dn, Dm
// A64: UADDL Vd.8H, Vn.8B, Vm.8B
public static Vector128<ushort> AddWideningLower(Vector64<byte> left, Vector64<byte> right);
// int32x4_t vaddl_s16 (int16x4_t a, int16x4_t b)
// A32: VADDL.S16 Qd, Dn, Dm
// A64: SADDL Vd.4S, Vn.4H, Vm.4H
public static Vector128<int> AddWideningLower(Vector64<short> left, Vector64<short> right);
// int64x2_t vaddl_s32 (int32x2_t a, int32x2_t b)
// A32: VADDL.S32 Qd, Dn, Dm
// A64: SADDL Vd.2D, Vn.2S, Vm.2S
public static Vector128<long> AddWideningLower(Vector64<int> left, Vector64<int> right);
// int16x8_t vaddl_s8 (int8x8_t a, int8x8_t b)
// A32: VADDL.S8 Qd, Dn, Dm
// A64: SADDL Vd.8H, Vn.8B, Vm.8B
public static Vector128<short> AddWideningLower(Vector64<sbyte> left, Vector64<sbyte> right);
// uint32x4_t vaddl_u16 (uint16x4_t a, uint16x4_t b)
// A32: VADDL.U16 Qd, Dn, Dm
// A64: UADDL Vd.4S, Vn.4H, Vm.4H
public static Vector128<uint> AddWideningLower(Vector64<ushort> left, Vector64<ushort> right);
// uint64x2_t vaddl_u32 (uint32x2_t a, uint32x2_t b)
// A32: VADDL.U32 Qd, Dn, Dm
// A64: UADDL Vd.2D, Vn.2S, Vm.2S
public static Vector128<ulong> AddWideningLower(Vector64<uint> left, Vector64<uint> right);
// int16x8_t vaddw_s8 (int16x8_t a, int8x8_t b)
// A32: VADDW.S8 Qd, Qn, Dm
// A64: SADDW Vd.8H, Vn.8H, Vm.8B
public static Vector128<short> AddWideningLower(Vector128<short> left, Vector64<sbyte> right);
// int32x4_t vaddw_s16 (int32x4_t a, int16x4_t b)
// A32: VADDW.S16 Qd, Qn, Dm
// A64: SADDW Vd.4S, Vn.4S, Vm.4H
public static Vector128<int> AddWideningLower(Vector128<int> left, Vector64<short> right);
// int64x2_t vaddw_s32 (int64x2_t a, int32x2_t b)
// A32: VADDW.S32 Qd, Qn, Dm
// A64: SADDW Vd.2D, Vn.2D, Vm.2S
public static Vector128<long> AddWideningLower(Vector128<long> left, Vector64<int> right);
// uint16x8_t vaddw_u8 (uint16x8_t a, uint8x8_t b)
// A32: VADDW.U8 Qd, Qn, Dm
// A64: UADDW Vd.8H, Vn.8H, Vm.8B
public static Vector128<ushort> AddWideningLower(Vector128<ushort> left, Vector64<byte> right);
// uint32x4_t vaddw_u16 (uint32x4_t a, uint16x4_t b)
// A32: VADDW.U16 Qd, Qn, Dm
// A64: UADDW Vd.4S, Vn.4S, Vm.4H
public static Vector128<uint> AddWideningLower(Vector128<uint> left, Vector64<ushort> right);
// uint64x2_t vaddw_u32 (uint64x2_t a, uint32x2_t b)
// A32: VADDW.U32 Qd, Qn, Dm
// A64: UADDW Vd.2D, Vn.2D, Vm.2S
public static Vector128<ulong> AddWideningLower(Vector128<ulong> left, Vector64<uint> right);
// uint16x8_t vaddl_high_u8 (uint8x16_t a, uint8x16_t b)
// A32: VADDL.U8 Qd, Dn+1, Dm+1
// A64: UADDL2 Vd.8H, Vn.16B, Vm.16B
public static Vector128<ushort> AddWideningUpper(Vector128<byte> left, Vector128<byte> right);
// int32x4_t vaddl_high_s16 (int16x8_t a, int16x8_t b)
// A32: VADDL.S16 Qd, Dn+1, Dm+1
// A64: SADDL2 Vd.4S, Vn.8H, Vm.8H
public static Vector128<int> AddWideningUpper(Vector128<short> left, Vector128<short> right);
// int16x8_t vaddw_high_s8 (int16x8_t a, int8x16_t b)
// A32: VADDW.S8 Qd, Qn, Dm+1
// A64: SADDW2 Vd.8H, Vn.8H, Vm.16B
public static Vector128<short> AddWideningUpper(Vector128<short> left, Vector128<sbyte> right);
// int32x4_t vaddw_high_s16 (int32x4_t a, int16x8_t b)
// A32: VADDW.S16 Qd, Qn, Dm+1
// A64: SADDW2 Vd.4S, Vn.4S, Vm.8H
public static Vector128<int> AddWideningUpper(Vector128<int> left, Vector128<short> right);
// int64x2_t vaddl_high_s32 (int32x4_t a, int32x4_t b)
// A32: VADDL.S32 Qd, Dn+1, Dm+1
// A64: SADDL2 Vd.2D, Vn.4S, Vm.4S
public static Vector128<long> AddWideningUpper(Vector128<int> left, Vector128<int> right);
// int64x2_t vaddw_s32 (int64x2_t a, int32x2_t b)
// A32: VADDW.S32 Qd, Qn, Dm+1
// A64: SADDW2 Vd.2D, Vn.2D, Vm.2S
public static Vector128<long> AddWideningUpper(Vector128<long> left, Vector64<int> right);
// int16x8_t vaddl_high_s8 (int8x16_t a, int8x16_t b)
// A32: VADDL.S8 Qd, Dn+1, Dm+1
// A64: SADDL2 Vd.8H, Vn.16B, Vm.16B
public static Vector128<short> AddWideningUpper(Vector128<sbyte> left, Vector128<sbyte> right);
// uint16x8_t vaddw_high_u8 (uint16x8_t a, uint8x16_t b)
// A32: VADDW.U8 Qd, Qn, Dm+1
// A64: UADDW2 Vd.8H, Vn.8H, Vm.16B
public static Vector128<ushort> AddWideningUpper(Vector128<ushort> left, Vector128<byte> right);
// uint32x4_t vaddl_high_u16 (uint16x8_t a, uint16x8_t b)
// A32: VADDL.U16 Qd, Dn+1, Dm+1
// A64: UADDL2 Vd.4S, Vn.8H, Vm.8H
public static Vector128<uint> AddWideningUpper(Vector128<ushort> left, Vector128<ushort> right);
// uint32x4_t vaddw_high_u16 (uint32x4_t a, uint16x8_t b)
// A32: VADDW.U16 Qd, Qn, Dm+1
// A64: UADDW2 Vd.4S, Vn.4S, Vm.8H
public static Vector128<uint> AddWideningUpper(Vector128<uint> left, Vector128<ushort> right);
// uint64x2_t vaddl_high_u32 (uint32x4_t a, uint32x4_t b)
// A32: VADDL.U32 Qd, Dn+1, Dm+1
// A64: UADDL2 Vd.2D, Vn.4S, Vm.4S
public static Vector128<ulong> AddWideningUpper(Vector128<uint> left, Vector128<uint> right);
// uint64x2_t vaddw_high_u32 (uint64x2_t a, uint32x4_t b)
// A32: VADDW.U32 Qd, Qn, Dm+1
// A64: UADDW2 Vd.2D, Vn.2D, Vm.4S
public static Vector128<ulong> AddWideningUpper(Vector128<ulong> left, Vector128<uint> right);
// uint8x8_t vhadd_u8 (uint8x8_t a, uint8x8_t b)
// A32: VHADD.U8 Dd, Dn, Dm
// A64: UHADD Vd.8B, Vn.8B, Vm.8B
public static Vector64<byte> FusedAddHalving(Vector64<byte> left, Vector64<byte> right);
// int16x4_t vhadd_s16 (int16x4_t a, int16x4_t b)
// A32: VHADD.S16 Dd, Dn, Dm
// A64: SHADD Vd.4H, Vn.4H, Vm.4H
public static Vector64<short> FusedAddHalving(Vector64<short> left, Vector64<short> right);
// int32x2_t vhadd_s32 (int32x2_t a, int32x2_t b)
// A32: VHADD.S32 Dd, Dn, Dm
// A64: SHADD Vd.2S, Vn.2S, Vm.2S
public static Vector64<int> FusedAddHalving(Vector64<int> left, Vector64<int> right);
// int8x8_t vhadd_s8 (int8x8_t a, int8x8_t b)
// A32: VHADD.S8 Dd, Dn, Dm
// A64: SHADD Vd.8B, Vn.8B, Vm.8B
public static Vector64<sbyte> FusedAddHalving(Vector64<sbyte> left, Vector64<sbyte> right);
// uint16x4_t vhadd_u16 (uint16x4_t a, uint16x4_t b)
// A32: VHADD.U16 Dd, Dn, Dm
// A64: UHADD Vd.4H, Vn.4H, Vm.4H
public static Vector64<ushort> FusedAddHalving(Vector64<ushort> left, Vector64<ushort> right);
// uint32x2_t vhadd_u32 (uint32x2_t a, uint32x2_t b)
// A32: VHADD.U32 Dd, Dn, Dm
// A64: UHADD Vd.2S, Vn.2S, Vm.2S
public static Vector64<uint> FusedAddHalving(Vector64<uint> left, Vector64<uint> right);
// uint8x16_t vhaddq_u8 (uint8x16_t a, uint8x16_t b)
// A32: VHADD.U8 Qd, Qn, Qm
// A64: UHADD Vd.16B, Vn.16B, Vm.16B
public static Vector128<byte> FusedAddHalving(Vector128<byte> left, Vector128<byte> right);
// int16x8_t vhaddq_s16 (int16x8_t a, int16x8_t b)
// A32: VHADD.S16 Qd, Qn, Qm
// A64: SHADD Vd.8H, Vn.8H, Vm.8H
public static Vector128<short> FusedAddHalving(Vector128<short> left, Vector128<short> right);
// int32x4_t vhaddq_s32 (int32x4_t a, int32x4_t b)
// A32: VHADD.S32 Qd, Qn, Qm
// A64: SHADD Vd.4S, Vn.4S, Vm.4S
public static Vector128<int> FusedAddHalving(Vector128<int> left, Vector128<int> right);
// int8x16_t vhaddq_s8 (int8x16_t a, int8x16_t b)
// A32: VHADD.S8 Qd, Qn, Qm
// A64: SHADD Vd.16B, Vn.16B, Vm.16B
public static Vector128<sbyte> FusedAddHalving(Vector128<sbyte> left, Vector128<sbyte> right);
// uint16x8_t vhaddq_u16 (uint16x8_t a, uint16x8_t b)
// A32: VHADD.U16 Qd, Qn, Qm
// A64: UHADD Vd.8H, Vn.8H, Vm.8H
public static Vector128<ushort> FusedAddHalving(Vector128<ushort> left, Vector128<ushort> right);
// uint32x4_t vhaddq_u32 (uint32x4_t a, uint32x4_t b)
// A32: VHADD.U32 Qd, Qn, Qm
// A64: UHADD Vd.4S, Vn.4S, Vm.4S
public static Vector128<uint> FusedAddHalving(Vector128<uint> left, Vector128<uint> right);
// uint8x8_t vrhadd_u8 (uint8x8_t a, uint8x8_t b)
// A32: VRHADD.U8 Dd, Dn, Dm
// A64: URHADD Vd.8B, Vn.8B, Vm.8B
public static Vector64<byte> FusedAddRoundedHalving(Vector64<byte> left, Vector64<byte> right);
// int16x4_t vrhadd_s16 (int16x4_t a, int16x4_t b)
// A32: VRHADD.S16 Dd, Dn, Dm
// A64: SRHADD Vd.4H, Vn.4H, Vm.4H
public static Vector64<short> FusedAddRoundedHalving(Vector64<short> left, Vector64<short> right);
// int32x2_t vrhadd_s32 (int32x2_t a, int32x2_t b)
// A32: VRHADD.S32 Dd, Dn, Dm
// A64: SRHADD Vd.2S, Vn.2S, Vm.2S
public static Vector64<int> FusedAddRoundedHalving(Vector64<int> left, Vector64<int> right);
// int8x8_t vrhadd_s8 (int8x8_t a, int8x8_t b)
// A32: VRHADD.S8 Dd, Dn, Dm
// A64: SRHADD Vd.8B, Vn.8B, Vm.8B
public static Vector64<sbyte> FusedAddRoundedHalving(Vector64<sbyte> left, Vector64<sbyte> right);
// uint16x4_t vrhadd_u16 (uint16x4_t a, uint16x4_t b)
// A32: VRHADD.U16 Dd, Dn, Dm
// A64: URHADD Vd.4H, Vn.4H, Vm.4H
public static Vector64<ushort> FusedAddRoundedHalving(Vector64<ushort> left, Vector64<ushort> right);
// uint32x2_t vrhadd_u32 (uint32x2_t a, uint32x2_t b)
// A32: VRHADD.U32 Dd, Dn, Dm
// A64: URHADD Vd.2S, Vn.2S, Vm.2S
public static Vector64<uint> FusedAddRoundedHalving(Vector64<uint> left, Vector64<uint> right);
// uint8x16_t vrhaddq_u8 (uint8x16_t a, uint8x16_t b)
// A32: VRHADD.U8 Qd, Qn, Qm
// A64: URHADD Vd.16B, Vn.16B, Vm.16B
public static Vector128<byte> FusedAddRoundedHalving(Vector128<byte> left, Vector128<byte> right);
// int16x8_t vrhaddq_s16 (int16x8_t a, int16x8_t b)
// A32: VRHADD.S16 Qd, Qn, Qm
// A64: SRHADD Vd.8H, Vn.8H, Vm.8H
public static Vector128<short> FusedAddRoundedHalving(Vector128<short> left, Vector128<short> right);
// int32x4_t vrhaddq_s32 (int32x4_t a, int32x4_t b)
// A32: VRHADD.S32 Qd, Qn, Qm
// A64: SRHADD Vd.4S, Vn.4S, Vm.4S
public static Vector128<int> FusedAddRoundedHalving(Vector128<int> left, Vector128<int> right);
// int8x16_t vrhaddq_s8 (int8x16_t a, int8x16_t b)
// A32: VRHADD.S8 Qd, Qn, Qm
// A64: SRHADD Vd.16B, Vn.16B, Vm.16B
public static Vector128<sbyte> FusedAddRoundedHalving(Vector128<sbyte> left, Vector128<sbyte> right);
// uint16x8_t vrhaddq_u16 (uint16x8_t a, uint16x8_t b)
// A32: VRHADD.U16 Qd, Qn, Qm
// A64: URHADD Vd.8H, Vn.8H, Vm.8H
public static Vector128<ushort> FusedAddRoundedHalving(Vector128<ushort> left, Vector128<ushort> right);
// uint32x4_t vrhaddq_u32 (uint32x4_t a, uint32x4_t b)
// A32: VRHADD.U32 Qd, Qn, Qm
// A64: URHADD Vd.4S, Vn.4S, Vm.4S
public static Vector128<uint> FusedAddRoundedHalving(Vector128<uint> left, Vector128<uint> right);
// uint8x8_t vhsub_u8 (uint8x8_t a, uint8x8_t b)
// A32: VHSUB.U8 Dd, Dn, Dm
// A64: UHSUB Vd.8B, Vn.8B, Vm.8B
public static Vector64<byte> FusedSubtractHalving(Vector64<byte> left, Vector64<byte> right);
// int16x4_t vhsub_s16 (int16x4_t a, int16x4_t b)
// A32: VHSUB.S16 Dd, Dn, Dm
// A64: SHSUB Vd.4H, Vn.4H, Vm.4H
public static Vector64<short> FusedSubtractHalving(Vector64<short> left, Vector64<short> right);
// int32x2_t vhsub_s32 (int32x2_t a, int32x2_t b)
// A32: VHSUB.S32 Dd, Dn, Dm
// A64: SHSUB Vd.2S, Vn.2S, Vm.2S
public static Vector64<int> FusedSubtractHalving(Vector64<int> left, Vector64<int> right);
// int8x8_t vhsub_s8 (int8x8_t a, int8x8_t b)
// A32: VHSUB.S8 Dd, Dn, Dm
// A64: SHSUB Vd.8B, Vn.8B, Vm.8B
public static Vector64<sbyte> FusedSubtractHalving(Vector64<sbyte> left, Vector64<sbyte> right);
// uint16x4_t vhsub_u16 (uint16x4_t a, uint16x4_t b)
// A32: VHSUB.U16 Dd, Dn, Dm
// A64: UHSUB Vd.4H, Vn.4H, Vm.4H
public static Vector64<ushort> FusedSubtractHalving(Vector64<ushort> left, Vector64<ushort> right);
// uint32x2_t vhsub_u32 (uint32x2_t a, uint32x2_t b)
// A32: VHSUB.U32 Dd, Dn, Dm
// A64: UHSUB Vd.2S, Vn.2S, Vm.2S
public static Vector64<uint> FusedSubtractHalving(Vector64<uint> left, Vector64<uint> right);
// uint8x16_t vhsubq_u8 (uint8x16_t a, uint8x16_t b)
// A32: VHSUB.U8 Qd, Qn, Qm
// A64: UHSUB Vd.16B, Vn.16B, Vm.16B
public static Vector128<byte> FusedSubtractHalving(Vector128<byte> left, Vector128<byte> right);
// int16x8_t vhsubq_s16 (int16x8_t a, int16x8_t b)
// A32: VHSUB.S16 Qd, Qn, Qm
// A64: SHSUB Vd.8H, Vn.8H, Vm.8H
public static Vector128<short> FusedSubtractHalving(Vector128<short> left, Vector128<short> right);
// int32x4_t vhsubq_s32 (int32x4_t a, int32x4_t b)
// A32: VHSUB.S32 Qd, Qn, Qm
// A64: SHSUB Vd.4S, Vn.4S, Vm.4S
public static Vector128<int> FusedSubtractHalving(Vector128<int> left, Vector128<int> right);
// int8x16_t vhsubq_s8 (int8x16_t a, int8x16_t b)
// A32: VHSUB.S8 Qd, Qn, Qm
// A64: SHSUB Vd.16B, Vn.16B, Vm.16B
public static Vector128<sbyte> FusedSubtractHalving(Vector128<sbyte> left, Vector128<sbyte> right);
// uint16x8_t vhsubq_u16 (uint16x8_t a, uint16x8_t b)
// A32: VHSUB.U16 Qd, Qn, Qm
// A64: UHSUB Vd.8H, Vn.8H, Vm.8H
public static Vector128<ushort> FusedSubtractHalving(Vector128<ushort> left, Vector128<ushort> right);
// uint32x4_t vhsubq_u32 (uint32x4_t a, uint32x4_t b)
// A32: VHSUB.U32 Qd, Qn, Qm
// A64: UHSUB Vd.4S, Vn.4S, Vm.4S
public static Vector128<uint> FusedSubtractHalving(Vector128<uint> left, Vector128<uint> right);
// uint16x8_t vmull_u8 (uint8x8_t a, uint8x8_t b)
// A32: VMULL.U8 Qd, Dn, Dm
// A64: UMULL Vd.8H, Vn.8B, Vm.8B
public static Vector128<ushort> MultiplyWideningLower(Vector64<byte> left, Vector64<byte> right);
// int32x4_t vmull_s16 (int16x4_t a, int16x4_t b)
// A32: VMULL.S16 Qd, Dn, Dm
// A64: SMULL Vd.4S, Vn.4H, Vm.4H
public static Vector128<int> MultiplyWideningLower(Vector64<short> left, Vector64<short> right);
// int64x2_t vmull_s32 (int32x2_t a, int32x2_t b)
// A32: VMULL.S32 Qd, Dn, Dm
// A64: SMULL Vd.2D, Vn.2S, Vm.2S
public static Vector128<long> MultiplyWideningLower(Vector64<int> left, Vector64<int> right);
// int16x8_t vmull_s8 (int8x8_t a, int8x8_t b)
// A32: VMULL.S8 Qd, Dn, Dm
// A64: SMULL Vd.8H, Vn.8B, Vm.8B
public static Vector128<short> MultiplyWideningLower(Vector64<sbyte> left, Vector64<sbyte> right);
// uint32x4_t vmull_u16 (uint16x4_t a, uint16x4_t b)
// A32: VMULL.U16 Qd, Dn, Dm
// A64: UMULL Vd.4S, Vn.4H, Vm.4H
public static Vector128<uint> MultiplyWideningLower(Vector64<ushort> left, Vector64<ushort> right);
// uint64x2_t vmull_u32 (uint32x2_t a, uint32x2_t b)
// A32: VMULL.U32 Qd, Dn, Dm
// A64: UMULL Vd.2D, Vn.2S, Vm.2S
public static Vector128<ulong> MultiplyWideningLower(Vector64<uint> left, Vector64<uint> right);
// int16x8_t vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
// A32: VMLAL.S8 Qd, Dn, Dm
// A64: SMLAL Vd.8H, Vn.8B, Vm.8B
public static Vector128<short> MultiplyWideningLowerAndAdd(Vector128<short> addend, Vector64<sbyte> left, Vector64<sbyte> right);
// int32x4_t vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
// A32: VMLAL.S16 Qd, Dn, Dm
// A64: SMLAL Vd.4S, Vn.4H, Vm.4H
public static Vector128<int> MultiplyWideningLowerAndAdd(Vector128<int> addend, Vector64<short> left, Vector64<short> right);
// int64x2_t vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
// A32: VMLAL.S32 Qd, Dn, Dm
// A64: SMLAL Vd.2D, Vn.2S, Vm.2S
public static Vector128<long> MultiplyWideningLowerAndAdd(Vector128<long> addend, Vector64<int> left, Vector64<int> right);
// uint16x8_t vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
// A32: VMLAL.U8 Qd, Dn, Dm
// A64: UMLAL Vd.8H, Vn.8B, Vm.8B
public static Vector128<ushort> MultiplyWideningLowerAndAdd(Vector128<ushort> addend, Vector64<byte> left, Vector64<byte> right);
// uint32x4_t vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
// A32: VMLAL.U16 Qd, Dn, Dm
// A64: UMLAL Vd.4S, Vn.4H, Vm.4H
public static Vector128<uint> MultiplyWideningLowerAndAdd(Vector128<uint> addend, Vector64<ushort> left, Vector64<ushort> right);
// uint64x2_t vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
// A32: VMLAL.U32 Qd, Dn, Dm
// A64: UMLAL Vd.2D, Vn.2S, Vm.2S
public static Vector128<ulong> MultiplyWideningLowerAndAdd(Vector128<ulong> addend, Vector64<uint> left, Vector64<uint> right);
// int16x8_t vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
// A32: VMLSL.S8 Qd, Dn, Dm
// A64: SMLSL Vd.8H, Vn.8B, Vm.8B
public static Vector128<short> MultiplyWideningLowerAndSubtract(Vector128<short> minuend, Vector64<sbyte> left, Vector64<sbyte> right);
// int32x4_t vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
// A32: VMLSL.S16 Qd, Dn, Dm
// A64: SMLSL Vd.4S, Vn.4H, Vm.4H
public static Vector128<int> MultiplyWideningLowerAndSubtract(Vector128<int> minuend, Vector64<short> left, Vector64<short> right);
// int64x2_t vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
// A32: VMLSL.S32 Qd, Dn, Dm
// A64: SMLSL Vd.2D, Vn.2S, Vm.2S
public static Vector128<long> MultiplyWideningLowerAndSubtract(Vector128<long> minuend, Vector64<int> left, Vector64<int> right);
// uint16x8_t vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
// A32: VMLSL.U8 Qd, Dn, Dm
// A64: UMLSL Vd.8H, Vn.8B, Vm.8B
public static Vector128<ushort> MultiplyWideningLowerAndSubtract(Vector128<ushort> minuend, Vector64<byte> left, Vector64<byte> right);
// uint32x4_t vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
// A32: VMLSL.U16 Qd, Dn, Dm
// A64: UMLSL Vd.4S, Vn.4H, Vm.4H
public static Vector128<uint> MultiplyWideningLowerAndSubtract(Vector128<uint> minuend, Vector64<ushort> left, Vector64<ushort> right);
// uint64x2_t vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
// A32: VMLSL.U32 Qd, Dn, Dm
// A64: UMLSL Vd.2D, Vn.2S, Vm.2S
public static Vector128<ulong> MultiplyWideningLowerAndSubtract(Vector128<ulong> minuend, Vector64<uint> left, Vector64<uint> right);
// uint16x8_t vmull_high_u8 (uint8x16_t a, uint8x16_t b)
// A32: VMULL.U8 Qd, Dn+1, Dm+1
// A64: UMULL2 Vd.8H, Vn.16B, Vm.16B
public static Vector128<ushort> MultiplyWideningUpper(Vector128<byte> left, Vector128<byte> right);
// int32x4_t vmull_high_s16 (int16x8_t a, int16x8_t b)
// A32: VMULL.S16 Qd, Dn+1, Dm+1
// A64: SMULL2 Vd.4S, Vn.8H, Vm.8H
public static Vector128<int> MultiplyWideningUpper(Vector128<short> left, Vector128<short> right);
// int64x2_t vmull_high_s32 (int32x4_t a, int32x4_t b)
// A32: VMULL.S32 Qd, Dn+1, Dm+1
// A64: SMULL2 Vd.2D, Vn.4S, Vm.4S
public static Vector128<long> MultiplyWideningUpper(Vector128<int> left, Vector128<int> right);
// int16x8_t vmull_high_s8 (int8x16_t a, int8x16_t b)
// A32: VMULL.S8 Qd, Dn+1, Dm+1
// A64: SMULL2 Vd.8H, Vn.16B, Vm.16B
public static Vector128<short> MultiplyWideningUpper(Vector128<sbyte> left, Vector128<sbyte> right);
// uint32x4_t vmull_high_u16 (uint16x8_t a, uint16x8_t b)
// A32: VMULL.U16 Qd, Dn+1, Dm+1
// A64: UMULL2 Vd.4S, Vn.8H, Vm.8H
public static Vector128<uint> MultiplyWideningUpper(Vector128<ushort> left, Vector128<ushort> right);
// uint64x2_t vmull_high_u32 (uint32x4_t a, uint32x4_t b)
// A32: VMULL.U32 Qd, Dn+1, Dm+1
// A64: UMULL2 Vd.2D, Vn.4S, Vm.4S
public static Vector128<ulong> MultiplyWideningUpper(Vector128<uint> left, Vector128<uint> right);
// int16x8_t vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
// A32: VMLAL.S8 Qd, Dn+1, Dm+1
// A64: SMLAL2 Vd.8H, Vn.16B, Vm.16B
public static Vector128<short> MultiplyWideningUpperAndAdd(Vector128<short> addend, Vector128<sbyte> left, Vector128<sbyte> right);
// int32x4_t vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
// A32: VMLAL.S16 Qd, Dn+1, Dm+1
// A64: SMLAL2 Vd.4S, Vn.8H, Vm.8H
public static Vector128<int> MultiplyWideningUpperAndAdd(Vector128<int> addend, Vector128<short> left, Vector128<short> right);
// int64x2_t vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
// A32: VMLAL.S32 Qd, Dn+1, Dm+1
// A64: SMLAL2 Vd.2D, Vn.4S, Vm.4S
public static Vector128<long> MultiplyWideningUpperAndAdd(Vector128<long> addend, Vector128<int> left, Vector128<int> right);
// uint16x8_t vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
// A32: VMLAL.U8 Qd, Dn+1, Dm+1
// A64: UMLAL2 Vd.8H, Vn.16B, Vm.16B
public static Vector128<ushort> MultiplyWideningUpperAndAdd(Vector128<ushort> addend, Vector128<byte> left, Vector128<byte> right);
// uint32x4_t vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
// A32: VMLAL.U16 Qd, Dn+1, Dm+1
// A64: UMLAL2 Vd.4S, Vn.8H, Vm.8H
public static Vector128<uint> MultiplyWideningUpperAndAdd(Vector128<uint> addend, Vector128<ushort> left, Vector128<ushort> right);
// uint64x2_t vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
// A32: VMLAL.U32 Qd, Dn+1, Dm+1
// A64: UMLAL2 Vd.2D, Vn.4S, Vm.4S
public static Vector128<ulong> MultiplyWideningUpperAndAdd(Vector128<ulong> addend, Vector128<uint> left, Vector128<uint> right);
// int16x8_t vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
// A32: VMLSL.S8 Qd, Dn+1, Dm+1
// A64: SMLSL2 Vd.8H, Vn.16B, Vm.16B
public static Vector128<short> MultiplyWideningUpperAndSubtract(Vector128<short> minuend, Vector128<sbyte> left, Vector128<sbyte> right);
// int32x4_t vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
// A32: VMLSL.S16 Qd, Dn+1, Dm+1
// A64: SMLSL2 Vd.4S, Vn.8H, Vm.8H
public static Vector128<int> MultiplyWideningUpperAndSubtract(Vector128<int> minuend, Vector128<short> left, Vector128<short> right);
// int64x2_t vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
// A32: VMLSL.S32 Qd, Dn+1, Dm+1
// A64: SMLSL2 Vd.2D, Vn.4S, Vm.4S
public static Vector128<long> MultiplyWideningUpperAndSubtract(Vector128<long> minuend, Vector128<int> left, Vector128<int> right);
// uint16x8_t vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
// A32: VMLSL.U8 Qd, Dn+1, Dm+1
// A64: UMLSL2 Vd.8H, Vn.16B, Vm.16B
public static Vector128<ushort> MultiplyWideningUpperAndSubtract(Vector128<ushort> minuend, Vector128<byte> left, Vector128<byte> right);
// uint32x4_t vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
// A32: VMLSL.U16 Qd, Dn+1, Dm+1
// A64: UMLSL2 Vd.4S, Vn.8H, Vm.8H
public static Vector128<uint> MultiplyWideningUpperAndSubtract(Vector128<uint> minuend, Vector128<ushort> left, Vector128<ushort> right);
// uint64x2_t vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
// A32: VMLSL.U32 Qd, Dn+1, Dm+1
// A64: UMLSL2 Vd.2D, Vn.4S, Vm.4S
public static Vector128<ulong> MultiplyWideningUpperAndSubtract(Vector128<ulong> minuend, Vector128<uint> left, Vector128<uint> right);
// int8x8_t vsubhn_s16 (int16x8_t a, int16x8_t b)
// A32: VSUBHN.I16 Dd, Qn, Qm
// A64: SUBHN Vd.8B, Vn.8H, Vm.8H
public static Vector64<sbyte> SubtractReturningHighNarrowLower(Vector128<short> left, Vector128<short> right);
// int16x4_t vsubhn_s32 (int32x4_t a, int32x4_t b)
// A32: VSUBHN.I32 Dd, Qn, Qm
// A64: SUBHN Vd.4H, Vn.4S, Vm.4S
public static Vector64<short> SubtractReturningHighNarrowLower(Vector128<int> left, Vector128<int> right);
// int32x2_t vsubhn_s64 (int64x2_t a, int64x2_t b)
// A32: VSUBHN.I64 Dd, Qn, Qm
// A64: SUBHN Vd.2S, Vn.2D, Vm.2D
public static Vector64<int> SubtractReturningHighNarrowLower(Vector128<long> left, Vector128<long> right);
// uint8x8_t vsubhn_u16 (uint16x8_t a, uint16x8_t b)
// A32: VSUBHN.I16 Dd, Qn, Qm
// A64: SUBHN Vd.8B, Vn.8H, Vm.8H
public static Vector64<byte> SubtractReturningHighNarrowLower(Vector128<ushort> left, Vector128<ushort> right);
// uint16x4_t vsubhn_u32 (uint32x4_t a, uint32x4_t b)
// A32: VSUBHN.I32 Dd, Qn, Qm
// A64: SUBHN Vd.4H, Vn.4S, Vm.4S
public static Vector64<ushort> SubtractReturningHighNarrowLower(Vector128<uint> left, Vector128<uint> right);
// uint32x2_t vsubhn_u64 (uint64x2_t a, uint64x2_t b)
// A32: VSUBHN.I64 Dd, Qn, Qm
// A64: SUBHN Vd.2S, Vn.2D, Vm.2D
public static Vector64<uint> SubtractReturningHighNarrowLower(Vector128<ulong> left, Vector128<ulong> right);
// int8x16_t vsubhn_high_s16 (int8x8_t r, int16x8_t a, int16x8_t b)
// A32: VSUBHN.I16 Dd+1, Qn, Qm
// A64: SUBHN2 Vd.16B, Vn.8B, Vm.8H
public static Vector128<sbyte> SubtractReturningHighNarrowUpper(Vector64<sbyte> lower, Vector128<short> left, Vector128<short> right);
// int16x8_t vsubhn_high_s32 (int16x4_t r, int32x4_t a, int32x4_t b)
// A32: VSUBHN.I32 Dd+1, Qn, Qm
// A64: SUBHN2 Vd.8H, Vn.4H, Vm.4S
public static Vector128<short> SubtractReturningHighNarrowUpper(Vector64<short> lower, Vector128<int> left, Vector128<int> right);
// int32x4_t vsubhn_high_s64 (int32x2_t r, int64x2_t a, int64x2_t b)
// A32: VSUBHN.I64 Dd+1, Qn, Qm
// A64: SUBHN2 Vd.4S, Vn.2S, Vm.2D
public static Vector128<int> SubtractReturningHighNarrowUpper(Vector64<int> lower, Vector128<long> left, Vector128<long> right);
// uint8x16_t vsubhn_high_u16 (uint8x8_t r, uint16x8_t a, uint16x8_t b)
// A32: VSUBHN.I16 Dd+1, Qn, Qm
// A64: SUBHN2 Vd.16B, Vn.8B, Vm.8H
public static Vector128<byte> SubtractReturningHighNarrowUpper(Vector64<byte> lower, Vector128<ushort> left, Vector128<ushort> right);
// uint16x8_t vsubhn_high_u32 (uint16x4_t r, uint32x4_t a, uint32x4_t b)
// A32: VSUBHN.I32 Dd+1, Qn, Qm
// A64: SUBHN2 Vd.8H, Vn.4H, Vm.4S
public static Vector128<ushort> SubtractReturningHighNarrowUpper(Vector64<ushort> lower, Vector128<uint> left, Vector128<uint> right);
// uint32x4_t vsubhn_high_u64 (uint32x2_t r, uint64x2_t a, uint64x2_t b)
// A32: VSUBHN.I64 Dd+1, Qn, Qm
// A64: SUBHN2 Vd.4S, Vn.2S, Vm.2D
public static Vector128<uint> SubtractReturningHighNarrowUpper(Vector64<uint> lower, Vector128<ulong> left, Vector128<ulong> right);
// int8x8_t vrsubhn_s16 (int16x8_t a, int16x8_t b)
// A32: VRSUBHN.I16 Dd, Qn, Qm
// A64: RSUBHN Vd.8B, Vn.8H, Vm.8H
public static Vector64<sbyte> SubtractReturningRoundedHighNarrowLower(Vector128<short> left, Vector128<short> right);
// int16x4_t vrsubhn_s32 (int32x4_t a, int32x4_t b)
// A32: VRSUBHN.I32 Dd, Qn, Qm
// A64: RSUBHN Vd.4H, Vn.4S, Vm.4S
public static Vector64<short> SubtractReturningRoundedHighNarrowLower(Vector128<int> left, Vector128<int> right);
// int32x2_t vrsubhn_s64 (int64x2_t a, int64x2_t b)
// A32: VRSUBHN.I64 Dd, Qn, Qm
// A64: RSUBHN Vd.2S, Vn.2D, Vm.2D
public static Vector64<int> SubtractReturningRoundedHighNarrowLower(Vector128<long> left, Vector128<long> right);
// uint8x8_t vrsubhn_u16 (uint16x8_t a, uint16x8_t b)
// A32: VRSUBHN.I16 Dd, Qn, Qm
// A64: RSUBHN Vd.8B, Vn.8H, Vm.8H
public static Vector64<byte> SubtractReturningRoundedHighNarrowLower(Vector128<ushort> left, Vector128<ushort> right);
// uint16x4_t vrsubhn_u32 (uint32x4_t a, uint32x4_t b)
// A32: VRSUBHN.I32 Dd, Qn, Qm
// A64: RSUBHN Vd.4H, Vn.4S, Vm.4S
public static Vector64<ushort> SubtractReturningRoundedHighNarrowLower(Vector128<uint> left, Vector128<uint> right);
// uint32x2_t vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
// A32: VRSUBHN.I64 Dd, Qn, Qm
// A64: RSUBHN Vd.2S, Vn.2D, Vm.2D
public static Vector64<uint> SubtractReturningRoundedHighNarrowLower(Vector128<ulong> left, Vector128<ulong> right);
// int8x16_t vrsubhn_high_s16 (int8x8_t r, int16x8_t a, int16x8_t b)
// A32: VRSUBHN.I16 Dd+1, Qn, Qm
// A64: RSUBHN2 Vd.16B, Vn.8B, Vm.8H
public static Vector128<sbyte> SubtractReturningRoundedHighNarrowUpper(Vector64<sbyte> lower, Vector128<short> left, Vector128<short> right);
// int16x8_t vrsubhn_high_s32 (int16x4_t r, int32x4_t a, int32x4_t b)
// A32: VRSUBHN.I32 Dd+1, Qn, Qm
// A64: RSUBHN2 Vd.8H, Vn.4H, Vm.4S
public static Vector128<short> SubtractReturningRoundedHighNarrowUpper(Vector64<short> lower, Vector128<int> left, Vector128<int> right);
// int32x4_t vrsubhn_high_s64 (int32x2_t r, int64x2_t a, int64x2_t b)
// A32: VRSUBHN.I64 Dd+1, Qn, Qm
// A64: RSUBHN2 Vd.4S, Vn.2S, Vm.2D
public static Vector128<int> SubtractReturningRoundedHighNarrowUpper(Vector64<int> lower, Vector128<long> left, Vector128<long> right);
// uint8x16_t vrsubhn_high_u16 (uint8x8_t r, uint16x8_t a, uint16x8_t b)
// A32: VRSUBHN.I16 Dd+1, Qn, Qm
// A64: RSUBHN2 Vd.16B, Vn.8B, Vm.8H
public static Vector128<byte> SubtractReturningRoundedHighNarrowUpper(Vector64<byte> lower, Vector128<ushort> left, Vector128<ushort> right);
// uint16x8_t vrsubhn_high_u32 (uint16x4_t r, uint32x4_t a, uint32x4_t b)
// A32: VRSUBHN.I32 Dd+1, Qn, Qm
// A64: RSUBHN2 Vd.8H, Vn.4H, Vm.4S
public static Vector128<ushort> SubtractReturningRoundedHighNarrowUpper(Vector64<ushort> lower, Vector128<uint> left, Vector128<uint> right);
// uint32x4_t vrsubhn_high_u64 (uint32x2_t r, uint64x2_t a, uint64x2_t b)
// A32: VRSUBHN.I64 Dd+1, Qn, Qm
// A64: RSUBHN2 Vd.4S, Vn.2S, Vm.2D
public static Vector128<uint> SubtractReturningRoundedHighNarrowUpper(Vector64<uint> lower, Vector128<ulong> left, Vector128<ulong> right);
// uint8x8_t vqsub_u8 (uint8x8_t a, uint8x8_t b)
// A32: VQSUB.U8 Dd, Dn, Dm
// A64: UQSUB Vd.8B, Vn.8B, Vm.8B
public static Vector64<byte> SubtractSaturate(Vector64<byte> left, Vector64<byte> right);
// int16x4_t vqsub_s16 (int16x4_t a, int16x4_t b)
// A32: VQSUB.S16 Dd, Dn, Dm
// A64: SQSUB Vd.4H, Vn.4H, Vm.4H
public static Vector64<short> SubtractSaturate(Vector64<short> left, Vector64<short> right);
// int32x2_t vqsub_s32 (int32x2_t a, int32x2_t b)
// A32: VQSUB.S32 Dd, Dn, Dm
// A64: SQSUB Vd.2S, Vn.2S, Vm.2S
public static Vector64<int> SubtractSaturate(Vector64<int> left, Vector64<int> right);
// int8x8_t vqsub_s8 (int8x8_t a, int8x8_t b)
// A32: VQSUB.S8 Dd, Dn, Dm
// A64: SQSUB Vd.8B, Vn.8B, Vm.8B
public static Vector64<sbyte> SubtractSaturate(Vector64<sbyte> left, Vector64<sbyte> right);
// uint16x4_t vqsub_u16 (uint16x4_t a, uint16x4_t b)
// A32: VQSUB.U16 Dd, Dn, Dm
// A64: UQSUB Vd.4H, Vn.4H, Vm.4H
public static Vector64<ushort> SubtractSaturate(Vector64<ushort> left, Vector64<ushort> right);
// uint32x2_t vqsub_u32 (uint32x2_t a, uint32x2_t b)
// A32: VQSUB.U32 Dd, Dn, Dm
// A64: UQSUB Vd.2S, Vn.2S, Vm.2S
public static Vector64<uint> SubtractSaturate(Vector64<uint> left, Vector64<uint> right);
// uint8x16_t vqsubq_u8 (uint8x16_t a, uint8x16_t b)
// A32: VQSUB.U8 Qd, Qn, Qm
// A64: UQSUB Vd.16B, Vn.16B, Vm.16B
public static Vector128<byte> SubtractSaturate(Vector128<byte> left, Vector128<byte> right);
// int16x8_t vqsubq_s16 (int16x8_t a, int16x8_t b)
// A32: VQSUB.S16 Qd, Qn, Qm
// A64: SQSUB Vd.8H, Vn.8H, Vm.8H
public static Vector128<short> SubtractSaturate(Vector128<short> left, Vector128<short> right);
// int32x4_t vqsubq_s32 (int32x4_t a, int32x4_t b)
// A32: VQSUB.S32 Qd, Qn, Qm
// A64: SQSUB Vd.4S, Vn.4S, Vm.4S
public static Vector128<int> SubtractSaturate(Vector128<int> left, Vector128<int> right);
// int64x2_t vqsubq_s64 (int64x2_t a, int64x2_t b)
// A32: VQSUB.S64 Qd, Qn, Qm
// A64: SQSUB Vd.2D, Vn.2D, Vm.2D
public static Vector128<long> SubtractSaturate(Vector128<long> left, Vector128<long> right);
// int8x16_t vqsubq_s8 (int8x16_t a, int8x16_t b)
// A32: VQSUB.S8 Qd, Qn, Qm
// A64: SQSUB Vd.16B, Vn.16B, Vm.16B
public static Vector128<sbyte> SubtractSaturate(Vector128<sbyte> left, Vector128<sbyte> right);
// uint16x8_t vqsubq_u16 (uint16x8_t a, uint16x8_t b)
// A32: VQSUB.U16 Qd, Qn, Qm
// A64: UQSUB Vd.8H, Vn.8H, Vm.8H
public static Vector128<ushort> SubtractSaturate(Vector128<ushort> left, Vector128<ushort> right);
// uint32x4_t vqsubq_u32 (uint32x4_t a, uint32x4_t b)
// A32: VQSUB.U32 Qd, Qn, Qm
// A64: UQSUB Vd.4S, Vn.4S, Vm.4S
public static Vector128<uint> SubtractSaturate(Vector128<uint> left, Vector128<uint> right);
// uint64x2_t vqsubq_u64 (uint64x2_t a, uint64x2_t b)
// A32: VQSUB.U64 Qd, Qn, Qm
// A64: UQSUB Vd.2D, Vn.2D, Vm.2D
public static Vector128<ulong> SubtractSaturate(Vector128<ulong> left, Vector128<ulong> right);
// int64x1_t vqsub_s64 (int64x1_t a, int64x1_t b)
// A32: VQSUB.S64 Dd, Dn, Dm
// A64: SQSUB Dd, Dn, Dm
public static Vector64<long> SubtractSaturateScalar(Vector64<long> left, Vector64<long> right);
// uint64x1_t vqsub_u64 (uint64x1_t a, uint64x1_t b)
// A32: VQSUB.U64 Dd, Dn, Dm
// A64: UQSUB Dd, Dn, Dm
public static Vector64<ulong> SubtractSaturateScalar(Vector64<ulong> left, Vector64<ulong> right);
// uint16x8_t vsubl_u8 (uint8x8_t a, uint8x8_t b)
// A32: VSUBL.U8 Qd, Dn, Dm
// A64: USUBL Vd.8H, Vn.8B, Vm.8B
public static Vector128<ushort> SubtractWideningLower(Vector64<byte> left, Vector64<byte> right);
// int32x4_t vsubl_s16 (int16x4_t a, int16x4_t b)
// A32: VSUBL.S16 Qd, Dn, Dm
// A64: SSUBL Vd.4S, Vn.4H, Vm.4H
public static Vector128<int> SubtractWideningLower(Vector64<short> left, Vector64<short> right);
// int64x2_t vsubl_s32 (int32x2_t a, int32x2_t b)
// A32: VSUBL.S32 Qd, Dn, Dm
// A64: SSUBL Vd.2D, Vn.2S, Vm.2S
public static Vector128<long> SubtractWideningLower(Vector64<int> left, Vector64<int> right);
// int16x8_t vsubl_s8 (int8x8_t a, int8x8_t b)
// A32: VSUBL.S8 Qd, Dn, Dm
// A64: SSUBL Vd.8H, Vn.8B, Vm.8B
public static Vector128<short> SubtractWideningLower(Vector64<sbyte> left, Vector64<sbyte> right);
// uint32x4_t vsubl_u16 (uint16x4_t a, uint16x4_t b)
// A32: VSUBL.U16 Qd, Dn, Dm
// A64: USUBL Vd.4S, Vn.4H, Vm.4H
public static Vector128<uint> SubtractWideningLower(Vector64<ushort> left, Vector64<ushort> right);
// uint64x2_t vsubl_u32 (uint32x2_t a, uint32x2_t b)
// A32: VSUBL.U32 Qd, Dn, Dm
// A64: USUBL Vd.2D, Vn.2S, Vm.2S
public static Vector128<ulong> SubtractWideningLower(Vector64<uint> left, Vector64<uint> right);
// int16x8_t vsubw_s8 (int16x8_t a, int8x8_t b)
// A32: VSUBW.S8 Qd, Qn, Dm
// A64: SSUBW Vd.8H, Vn.8H, Vm.8B
public static Vector128<short> SubtractWideningLower(Vector128<short> left, Vector64<sbyte> right);
// int32x4_t vsubw_s16 (int32x4_t a, int16x4_t b)
// A32: VSUBW.S16 Qd, Qn, Dm
// A64: SSUBW Vd.4S, Vn.4S, Vm.4H
public static Vector128<int> SubtractWideningLower(Vector128<int> left, Vector64<short> right);
// int64x2_t vsubw_s32 (int64x2_t a, int32x2_t b)
// A32: VSUBW.S32 Qd, Qn, Dm
// A64: SSUBW Vd.2D, Vn.2D, Vm.2S
public static Vector128<long> SubtractWideningLower(Vector128<long> left, Vector64<int> right);
// uint16x8_t vsubw_u8 (uint16x8_t a, uint8x8_t b)
// A32: VSUBW.U8 Qd, Qn, Dm
// A64: USUBW Vd.8H, Vn.8H, Vm.8B
public static Vector128<ushort> SubtractWideningLower(Vector128<ushort> left, Vector64<byte> right);
// uint32x4_t vsubw_u16 (uint32x4_t a, uint16x4_t b)
// A32: VSUBW.U16 Qd, Qn, Dm
// A64: USUBW Vd.4S, Vn.4S, Vm.4H
public static Vector128<uint> SubtractWideningLower(Vector128<uint> left, Vector64<ushort> right);
// uint64x2_t vsubw_u32 (uint64x2_t a, uint32x2_t b)
// A32: VSUBW.U32 Qd, Qn, Dm
// A64: USUBW Vd.2D, Vn.2D, Vm.2S
public static Vector128<ulong> SubtractWideningLower(Vector128<ulong> left, Vector64<uint> right);
// uint16x8_t vsubl_high_u8 (uint8x16_t a, uint8x16_t b)
// A32: VSUBL.U8 Qd, Dn+1, Dm+1
// A64: USUBL2 Vd.8H, Vn.16B, Vm.16B
public static Vector128<ushort> SubtractWideningUpper(Vector128<byte> left, Vector128<byte> right);
// int32x4_t vsubl_high_s16 (int16x8_t a, int16x8_t b)
// A32: VSUBL.S16 Qd, Dn+1, Dm+1
// A64: SSUBL2 Vd.4S, Vn.8H, Vm.8H
public static Vector128<int> SubtractWideningUpper(Vector128<short> left, Vector128<short> right);
// int16x8_t vsubw_high_s8 (int16x8_t a, int8x16_t b)
// A32: VSUBW.S8 Qd, Qn, Dm+1
// A64: SSUBW2 Vd.8H, Vn.8H, Vm.16B
public static Vector128<short> SubtractWideningUpper(Vector128<short> left, Vector128<sbyte> right);
// int32x4_t vsubw_high_s16 (int32x4_t a, int16x8_t b)
// A32: VSUBW.S16 Qd, Qn, Dm+1
// A64: SSUBW2 Vd.4S, Vn.4S, Vm.8H
public static Vector128<int> SubtractWideningUpper(Vector128<int> left, Vector128<short> right);
// int64x2_t vsubl_high_s32 (int32x4_t a, int32x4_t b)
// A32: VSUBL.S32 Qd, Dn+1, Dm+1
// A64: SSUBL2 Vd.2D, Vn.4S, Vm.4S
public static Vector128<long> SubtractWideningUpper(Vector128<int> left, Vector128<int> right);
// int64x2_t vsubw_s32 (int64x2_t a, int32x2_t b)
// A32: VSUBW.S32 Qd, Qn, Dm+1
// A64: SSUBW2 Vd.2D, Vn.2D, Vm.2S
public static Vector128<long> SubtractWideningUpper(Vector128<long> left, Vector64<int> right);
// int16x8_t vsubl_high_s8 (int8x16_t a, int8x16_t b)
// A32: VSUBL.S8 Qd, Dn+1, Dm+1
// A64: SSUBL2 Vd.8H, Vn.16B, Vm.16B
public static Vector128<short> SubtractWideningUpper(Vector128<sbyte> left, Vector128<sbyte> right);
// uint16x8_t vsubw_high_u8 (uint16x8_t a, uint8x16_t b)
// A32: VSUBW.U8 Qd, Qn, Dm+1
// A64: USUBW2 Vd.8H, Vn.8H, Vm.16B
public static Vector128<ushort> SubtractWideningUpper(Vector128<ushort> left, Vector128<byte> right);
// uint32x4_t vsubl_high_u16 (uint16x8_t a, uint16x8_t b)
// A32: VSUBL.U16 Qd, Dn+1, Dm+1
// A64: USUBL2 Vd.4S, Vn.8H, Vm.8H
public static Vector128<uint> SubtractWideningUpper(Vector128<ushort> left, Vector128<ushort> right);
// uint32x4_t vsubw_high_u16 (uint32x4_t a, uint16x8_t b)
// A32: VSUBW.U16 Qd, Qn, Dm+1
// A64: USUBW2 Vd.4S, Vn.4S, Vm.8H
public static Vector128<uint> SubtractWideningUpper(Vector128<uint> left, Vector128<ushort> right);
// uint64x2_t vsubl_high_u32 (uint32x4_t a, uint32x4_t b)
// A32: VSUBL.U32 Qd, Dn+1, Dm+1
// A64: USUBL2 Vd.2D, Vn.4S, Vm.4S
public static Vector128<ulong> SubtractWideningUpper(Vector128<uint> left, Vector128<uint> right);
// uint64x2_t vsubw_high_u32 (uint64x2_t a, uint32x4_t b)
// A32: VSUBW.U32 Qd, Qn, Dm+1
// A64: USUBW2 Vd.2D, Vn.2D, Vm.4S
public static Vector128<ulong> SubtractWideningUpper(Vector128<ulong> left, Vector128<uint> right);
} @TamarChristinaArm Can you please take a look and verify my assumptions how *Upper intrinsics should be implemented on A32? |
@echesakovMSFT Yeah those look correct to me. I'll make sure they're also corrected in the next ACLE release. |
The text was updated successfully, but these errors were encountered: